#!/usr/bin/env python3

import os
import sys

# XXX
tune = {
  'verify_897':('*',1),
  'verify_1039':('*',1),
  'verify_1184':('*',1),
  'verify_1349':('*',1),
  'verify_1455':('*',1),
  'verify_1847':('*',1),
  'decode_653x3':('*',1),
  'decode_653x1541':('*',1),
  'decode_653x4621':('*',1),
  'decode_653xint16':('*',1),
  'decode_653xint32':('*',1),
  'decode_761x3':('*',1),
  'decode_761x1531':('*',1),
  'decode_761x4591':('*',1),
  'decode_761xint16':('*',1),
  'decode_761xint32':('*',1),
  'decode_857x3':('*',1),
  'decode_857x1723':('*',1),
  'decode_857x5167':('*',1),
  'decode_857xint16':('*',1),
  'decode_857xint32':('*',1),
  'decode_953x3':('*',1),
  'decode_953x2115':('*',1),
  'decode_953x6343':('*',1),
  'decode_953xint16':('*',1),
  'decode_953xint32':('*',1),
  'decode_1013x3':('*',1),
  'decode_1013x2393':('*',1),
  'decode_1013x7177':('*',1),
  'decode_1013xint16':('*',1),
  'decode_1013xint32':('*',1),
  'decode_1277x3':('*',1),
  'decode_1277x2627':('*',1),
  'decode_1277x7879':('*',1),
  'decode_1277xint16':('*',1),
  'decode_1277xint32':('*',1),
  'decode_int16':('*',1),
  'encode_653x3':('*',1),
  'encode_653x1541':('*',1),
  'encode_653x1541round':('*',1),
  'encode_653x4621':('*',1),
  'encode_653xfreeze3':('*',1),
  'encode_653xint16':('*',1),
  'encode_761x3':('*',1),
  'encode_761x1531':('*',1),
  'encode_761x1531round':('*',1),
  'encode_761x4591':('*',1),
  'encode_761xfreeze3':('*',1),
  'encode_761xint16':('*',1),
  'encode_857x3':('*',1),
  'encode_857x1723':('*',1),
  'encode_857x1723round':('*',1),
  'encode_857x5167':('*',1),
  'encode_857xfreeze3':('*',1),
  'encode_857xint16':('*',1),
  'encode_953x3':('*',1),
  'encode_953x2115':('*',1),
  'encode_953x2115round':('*',1),
  'encode_953x6343':('*',1),
  'encode_953xfreeze3':('*',1),
  'encode_953xint16':('*',1),
  'encode_1013x3':('*',1),
  'encode_1013x2393':('*',1),
  'encode_1013x2393round':('*',1),
  'encode_1013x7177':('*',1),
  'encode_1013xfreeze3':('*',1),
  'encode_1013xint16':('*',1),
  'encode_1277x3':('*',1),
  'encode_1277x2627':('*',1),
  'encode_1277x2627round':('*',1),
  'encode_1277x7879':('*',1),
  'encode_1277xfreeze3':('*',1),
  'encode_1277xint16':('*',1),
  'encode_int16':('*',1),
  'sort_int32':('1597',1),
  'sort_uint32':('1597',1),
  'core_inv3sntrup653':('*',1),
  'core_inv3sntrup761':('*',1),
  'core_inv3sntrup857':('*',1),
  'core_inv3sntrup953':('*',1),
  'core_inv3sntrup1013':('*',1),
  'core_inv3sntrup1277':('*',1),
  'core_invsntrup653':('*',1),
  'core_invsntrup761':('*',1),
  'core_invsntrup857':('*',1),
  'core_invsntrup953':('*',1),
  'core_invsntrup1013':('*',1),
  'core_invsntrup1277':('*',1),
  'core_mult3sntrup653':('*',1),
  'core_mult3sntrup761':('*',1),
  'core_mult3sntrup857':('*',1),
  'core_mult3sntrup953':('*',1),
  'core_mult3sntrup1013':('*',1),
  'core_mult3sntrup1277':('*',1),
  'core_multsntrup653':('*',1),
  'core_multsntrup761':('*',1),
  'core_multsntrup857':('*',1),
  'core_multsntrup953':('*',1),
  'core_multsntrup1013':('*',1),
  'core_multsntrup1277':('*',1),
  'core_scale3sntrup653':('*',1),
  'core_scale3sntrup761':('*',1),
  'core_scale3sntrup857':('*',1),
  'core_scale3sntrup953':('*',1),
  'core_scale3sntrup1013':('*',1),
  'core_scale3sntrup1277':('*',1),
  'core_weightsntrup653':('*',1),
  'core_weightsntrup761':('*',1),
  'core_weightsntrup857':('*',1),
  'core_weightsntrup953':('*',1),
  'core_weightsntrup1013':('*',1),
  'core_weightsntrup1277':('*',1),
  'core_wforcesntrup653':('*',1),
  'core_wforcesntrup761':('*',1),
  'core_wforcesntrup857':('*',1),
  'core_wforcesntrup953':('*',1),
  'core_wforcesntrup1013':('*',1),
  'core_wforcesntrup1277':('*',1),
  'hashblocks_sha512':('8090',1),
  'hash_sha512':('8090',1),
  'kem_sntrup653_dec':('*',1),
  'kem_sntrup761_dec':('*',1),
  'kem_sntrup857_dec':('*',1),
  'kem_sntrup953_dec':('*',1),
  'kem_sntrup1013_dec':('*',1),
  'kem_sntrup1277_dec':('*',1),
}

impls = set()
data = {}

def handle(benchmark):
  with open('benchmarks/%s' % benchmark) as f:
    for line in f:
      line = line.split()
      if line[:2] == ['ntruprime','version']:
        version = line[2]
        continue
      if line[:2] == ['ntruprime','arch']:
        arch = line[2]
        continue
      if line[:1] == ['cpuid']:
        cpuid = ''.join(line[1:])
      if line[:3] == ['cpucycles','selected','0']:
        cpucyclesoverhead = int(line[3])
      if line[:3] == ['randombytes','selected','26']:
        randombytesoverhead = int(line[3])
      if len(line) >= 5:
        shortfun = line[0]
        if line[1].isnumeric() and line[2] == 'implementation' and line[4] == 'compiler':
          implnum = line[1]
          implop = line[0]
          i = line[3]
          c = ' '.join(line[5:])
        if line[1].isnumeric() and shortfun in tune and line[2] != 'implementation' and tune[shortfun][0] in (line[2],'*') and line[3].isnumeric():
          o = shortfun.split('_')[0]
          p = shortfun.split('_')[1]
          assert implop == '%s_%s' % (o,p)
          assert implnum == line[1]
          cycles = int(line[3])
          cycles -= cpucyclesoverhead
          if shortfun.endswith('_keypair'): cycles -= randombytesoverhead
          if cycles < 1: cycles = 1
          key = benchmark,version,arch,cpuid,o,p,i,c
          if key not in data: data[key] = []
          data[key] += [(shortfun,cycles)]

for benchmark in sorted(os.listdir('benchmarks')):
  handle(benchmark)

impldata = {}
bestscore = {}

for key in sorted(data):
  benchmark,version,arch,cpuid,o,p,i,c = key
  assert sorted(shortfun for shortfun,cycles in data[key]) == sorted(shortfun for shortfun in tune if shortfun.split('_')[:2] == [o,p])
  score = sum(cycles*tune[shortfun][1] for shortfun,cycles in data[key])

  if (o,p,i) not in impldata: impldata[o,p,i] = []
  impldata[o,p,i] += [(benchmark,version,arch,cpuid,c,score)]

  if (benchmark,o,p) not in bestscore: bestscore[benchmark,o,p] = score
  bestscore[benchmark,o,p] = min(score,bestscore[benchmark,o,p])

os.makedirs('priority',exist_ok=True)
for o,p,i in impldata:
  with open('priority/%s-%s-%s' % (o,p,i),'w') as f:
    for benchmark,version,arch,cpuid,c,score in impldata[o,p,i]:
      if bestscore[benchmark,o,p] <= 0: continue
      f.write('%.6f %s %s %s %s %s %s\n' % (score/bestscore[benchmark,o,p],score,arch,cpuid,version,benchmark,c))