-rwxr-xr-x 5716 libntruprime-20241021/scripts-build/selected raw
#!/usr/bin/env python3 import os import sys o,p,host,trim = sys.argv[1:5] impls = [] for line in sys.stdin: line = line.strip().split('/') if line[0] != o: continue if line[1] != p: continue impls += [line[2:]] print('operation %s' % o) print('primitive %s' % p) icarch = {} iccompiler = {} for i,c in impls: with open('compilerarch/%s' % c) as f: icarch[i,c] = f.read().strip() with open('compilerversion/%s' % c) as f: iccompiler[i,c] = f.read().strip() def archkey(a): if a == 'default': return 1,a # put default last return -a.count('+'),a allimpls = sorted(set(i for i,c in impls)) with open('allarches') as f: allarches = f.read().splitlines() allarches = sorted(set(allarches),key=archkey) prioritydata = [] for i in allimpls: priorityfn = 'priority/%s-%s-%s' % (o,p,i) if not os.path.exists(priorityfn): continue with open(priorityfn) as f: for line in f: line = line.split() if len(line) < 7: continue prio,score,priohost,cpuid,version,machine = line[:6] c = ' '.join(line[6:]) prio = float(prio) prioritydata += [(i,prio,score,priohost,cpuid,machine,c)] def asupportsic(a,i,c): a = a.split('+')[1:] ica = icarch[i,c] ica = ica.split('+')[1:] return all(icapart in a for icapart in ica) def cpuidsupports(cpuid,a): a = a.split('+') cpuid = [int('0x'+cpuid[8*j:8*j+8],16) for j in range(32)] mmx = cpuid[18] & (1<<23) sse = cpuid[18] & (1<<25) sse2 = cpuid[18] & (1<<26) sse3 = cpuid[17] & (1<<0) ssse3 = cpuid[17] & (1<<9) fma = cpuid[17] & (1<<12) sse41 = cpuid[17] & (1<<19) sse42 = cpuid[17] & (1<<20) popcnt = cpuid[17] & (1<<23) osxsave = cpuid[17] & (1<<27) avx = cpuid[17] & (1<<28) bmi1 = cpuid[20] & (1<<3) avx2 = cpuid[20] & (1<<5) bmi2 = cpuid[20] & (1<<8) avx512f = cpuid[20] & (1<<16) adx = cpuid[20] & (1<<19) avx512ifma = cpuid[20] & (1<<21) avx512vl = cpuid[20] & (1<<31) waitpkg = cpuid[21] & (1<<5) vaes = cpuid[21] & (1<<9) sse4a = cpuid[25] & (1<<6) xmmsaved = cpuid[27] & (1<<1) ymmsaved = cpuid[27] & (1<<2) for apart in a[1:]: if apart not in ('sse3','ssse3','fma','sse41','sse42','sse4a','popcnt','adx','avx','bmi1','bmi2','avx2','avx512f','avx512vl','avx512ifma','vaes','waitpkg'): raise ValueError('cpuidsupports does not understand %s' % apart) if not mmx: return False if not sse: return False if not sse2: return False if apart == 'sse3' and not sse3: return False if apart == 'ssse3' and not ssse3: return False if apart == 'fma' and not fma: return False if apart == 'sse41' and not sse41: return False if apart == 'sse42' and not sse42: return False if apart == 'sse4a' and not sse4a: return False if apart == 'popcnt' and not popcnt: return False if apart == 'adx' and not adx: return False if apart == 'avx' and not avx: return False if apart == 'bmi1' and not bmi1: return False if apart == 'bmi2' and not bmi2: return False if apart == 'avx2' and not avx2: return False if apart == 'avx512f' and not avx512f: return False if apart == 'avx512vl' and not avx512vl: return False if apart == 'avx512ifma' and not avx512ifma: return False if apart == 'vaes' and not vaes: return False if apart == 'waitpkg' and not waitpkg: return False if apart.startswith('avx'): if not osxsave: return False if not xmmsaved: return False if not ymmsaved: return False return True def selectic(a,aexclude): if len(aexclude) > 0: print('note: considering other machines supporting %s' % a) else: print('note: considering machines supporting %s' % a) # requirement: icarch[i,c] is a subset of a compatibleimpls = [(i,c) for i,c in impls if asupportsic(a,i,c)] assert len(compatibleimpls) > 0 # desideratum: good performance based on prioritydata directmatches = any( priohost == host and cpuidsupports(cpuid,a) and all(not cpuidsupports(cpuid,b) for b in aexclude) for i,prio,score,priohost,cpuid,machine,c in prioritydata ) if not directmatches: print('note: no direct matches, so extrapolating from all machines') totalprio = {(i,c):0 for i,c in compatibleimpls} totalweight = {(i,c):0 for i,c in compatibleimpls} for prioi,prio,score,priohost,cpuid,machine,prioc in prioritydata: if directmatches: if priohost != host: continue if any(cpuidsupports(cpuid,b) for b in aexclude): continue if not cpuidsupports(cpuid,a): continue for i,c in compatibleimpls: if i != prioi: continue # XXX: use more serious machine learning here weight = 1.0 if priohost == host: weight *= 10 if cpuidsupports(cpuid,a): weight *= 10 if all(not cpuidsupports(cpuid,b) for b in aexclude): weight *= 10 weight *= 1+len(os.path.commonprefix([iccompiler[i,c],prioc])) if iccompiler[i,c] == prioc: weight *= 10 # print('note: weight %s from %s %s %s %s for %s %s' % (weight,prio,machine,prioi,prioc,i,c)) totalprio[i,c] += prio*weight totalweight[i,c] += weight # note that implementations without priority data are excluded from ranking ranking = [(totalprio[i,c]/totalweight[i,c],i,c) for i,c in compatibleimpls if totalweight[i,c] > 0] ranking.sort() for prio,i,c in ranking: print('note: priority %s for %s %s' % (prio,i,c)) if len(ranking) == 0: return compatibleimpls[0] return ranking[0][1:] usedimpls = set() handledarches = set() for a in allarches: i,c = selectic(a,handledarches) usedimpls.add((i,c)) print('selected %s %s %s' % (a,i,c)) handledarches.add(a) for i,c in impls: if (i,c) in usedimpls or trim == 'False': print('impl %s %s' % (i,c))