#!/usr/bin/env python3

import os
import sys
import subprocess

host = sys.argv[1]

def warn(w):
  sys.stderr.write('warning: %s\n' % w)
  fresult.write('%s\n' % w)

# ===== setting up capstone

def capstone_disassembler():
  try:
    import capstone
  except:
    warn('this script does not know how to check instruction-set extensions without python3-capstone')
    sys.exit(0)

  # XXX: for arm32 should trace from entry points to see thumb vs. arm
  capstoneopts = {
    'amd64':        ('CS_ARCH_X86','CS_MODE_64','CS_MODE_LITTLE_ENDIAN'),
    'arm32':        ('CS_ARCH_ARM','CS_MODE_THUMB','CS_MODE_LITTLE_ENDIAN'),
    'arm64':        ('CS_ARCH_ARM64','CS_MODE_LITTLE_ENDIAN'),
    'm68k':         ('CS_ARCH_M68K','CS_MODE_M68K_040','CS_MODE_BIG_ENDIAN'),
    'mips32big':    ('CS_ARCH_MIPS','CS_MODE_MIPS32','CS_MODE_BIG_ENDIAN'),
    'mips32little': ('CS_ARCH_MIPS','CS_MODE_MIPS32','CS_MODE_LITTLE_ENDIAN'),
    'mips64big':    ('CS_ARCH_MIPS','CS_MODE_MIPS64','CS_MODE_BIG_ENDIAN'),
    'mips64little': ('CS_ARCH_MIPS','CS_MODE_MIPS64','CS_MODE_LITTLE_ENDIAN'),
    'mipsn32big':   ('CS_ARCH_MIPS','CS_MODE_MIPS32','CS_MODE_BIG_ENDIAN'),
    'mipsn32little':('CS_ARCH_MIPS','CS_MODE_MIPS32','CS_MODE_LITTLE_ENDIAN'),
    'ppc32big':     ('CS_ARCH_PPC','CS_MODE_32','CS_MODE_BIG_ENDIAN'),
    'ppc32little':  ('CS_ARCH_PPC','CS_MODE_32','CS_MODE_LITTLE_ENDIAN'),
    'ppc64big':     ('CS_ARCH_PPC','CS_MODE_64','CS_MODE_BIG_ENDIAN'),
    'ppc64little':  ('CS_ARCH_PPC','CS_MODE_64','CS_MODE_LITTLE_ENDIAN'),
    'riscv32':      ('CS_ARCH_RISCV','CS_MODE_RISCV64','CS_MODE_RISCVC'),
    'riscv64':      ('CS_ARCH_RISCV','CS_MODE_RISCV32','CS_MODE_RISCVC'),
    's390':         ('CS_ARCH_SYSZ','CS_MODE_BIG_ENDIAN'),
    's390x':        ('CS_ARCH_SYSZ','CS_MODE_BIG_ENDIAN'),
    'sparc32':      ('CS_ARCH_SPARC','CS_MODE_BIG_ENDIAN'),
    'sparc64':      ('CS_ARCH_SPARC','CS_MODE_V9','CS_MODE_BIG_ENDIAN'),
    'x86':          ('CS_ARCH_X86','CS_MODE_32','CS_MODE_LITTLE_ENDIAN'),
  }

  if host not in capstoneopts:
    warn(f'this script does not know how to check instruction-set extensions for {host}')
    sys.exit(0)

  try:
    bigendian = False
    capstonearch = capstone.__getattribute__(capstoneopts[host][0])
    capstonemode = 0
    for opt in capstoneopts[host][1:]:
      if opt == 'CS_MODE_BIG_ENDIAN': bigendian = True
      capstonemode |= capstone.__getattribute__(opt)
    disassembler = capstone.Cs(capstonearch,capstonemode)
    disassembler.detail = True
  except Exception as e:
    warn(f'failed to initialize python3-capstone: {e}')
    sys.exit(0)

  # script checks these overrides against objdump mnemonics
  # and against capstone mnemonics
  capstoneoverrides = {
    'amd64':{
      'xgetbv':{'xsave'},
      'vpmadd52huq':{'avx512vl','avx512ifma'},
      'vpmadd52luq':{'avx512vl','avx512ifma'},
    },
    'arm64':{
      'aese':{'aes'},
      'aesd':{'aes'},
      'aesmc':{'aes'},
      'aesimc':{'aes'},
      'sha1c':{'sha1'},
      'sha1p':{'sha1'},
      'sha1m':{'sha1'},
      'sha1h':{'sha1'},
      'sha1u0':{'sha1'},
      'sha1u1':{'sha1'},
      'sha256h':{'sha256'},
      'sha256h2':{'sha256'},
      'sha256u0':{'sha256'},
      'sha256u1':{'sha256'},
      'sha512h':{'sha512'},
      'sha512h2':{'sha512'},
      'sha512u0':{'sha512'},
      'sha512u1':{'sha512'},
    },
  }
  capstonealiases = {
    'amd64':{
      'int':'interrupt',
      'sse1':'sse',
      'novlx':'avx',
      'vlx':'avx512vl',
      'dqi':'avx512dq',
      'bmi':'bmi1',
      'avx512':'avx512f',
    }
  }
  # base: instruction sets that can be assumed
  # (see also cross-arch list below)
  # script checks base after applying aliases
  capstonebase = {
    'amd64':{'fpu','cmov','sse','sse2','mode64'},
    'arm32':{'int','v5t','v6','v6m','thumb','thumb1only','thumb2','T2EXTRACTPACK','THUMB2DSP','mulops'},
    'arm64':{'neon','fparmv8'},
    'mips64big':{'stdenc','mips3','mips32','mips4_32','mips32r2','mips64r2','notmips32r6','notmips64r6','notinmicromips'},
    'mips64little':{'stdenc','mips3','mips32','mips4_32','mips32r2','mips64r2','notmips32r6','notmips64r6','notinmicromips'},
    'ppc64big':{'mode32','vsx','altivec'},
    'ppc64little':{'mode32','vsx','altivec','p8altivec','p8vector'},
    's390':{'distinctops','highword'},
    's390x':{'distinctops','highword'},
    'x86':{'mode32','fpu','cmov','not64bitmode'},
  }

  overrides = capstoneoverrides.get(host,{})
  aliases = capstonealiases.get(host,{})

  base = capstonebase.get(host,set())
  for ext in 'call','ret','return','jump','branch_relative','privilege','interrupt':
    base.add(ext)

  extrules = aliases,base
  return disassembler,bigendian,overrides,extrules

# ===== main work

carch = {}
for c in os.listdir('compilerarch'):
  with open('compilerarch/%s' % c) as f:
    carch[c] = f.read().strip()
    if carch[c] == 'default': carch[c] = host
    assert carch[c].split('+')[0] == host

def checkext(obj,archlist,ext,extrules,insnstr):
  aliases,base = extrules
  if ext in aliases: ext = aliases[ext]
  extset.add(ext)
  if ext in base: return
  for arch in archlist:
    if ext not in arch.split('+')[1:]:
      warn(f'{obj}: {arch} instruction set does not allow {ext} for {insnstr}')

def dir2objs(d):
  objs = [d+'/'+fn for fn in os.listdir(d) if fn.endswith('.o')]
  if os.path.exists(d+'/dependencies'):
    with open(d+'/dependencies') as f:
      for line in f:
        objs += dir2objs(line.strip())
  return objs

def doit(d):
  disassembler,bigendian,overrides,extrules = capstone_disassembler()

  objs = dir2objs(d)
  if len(objs) == 0: return

  if len(d.split('/')) < 4:
    archlist = [host] # no instruction-set extensions allowed
  else:
    compiler = d.split('/')[3]
    if compiler in carch:
      archlist = [carch[compiler]]
    else:
      archlist = []
      with open(d+'/compilerdirs') as f:
        for arch in f:
          archlist += [carch[arch.strip()]]

  try:
    # XXX: think about how to safely exclude glink section on ppc64
    p = subprocess.Popen(['objdump','-d']+objs,stdout=subprocess.PIPE,stderr=subprocess.STDOUT,universal_newlines=True)
    out,err = p.communicate()
  except Exception as e:
    warn('objdump exception: %s' % e)
    return
  if err:
    warn('objdump error: %s' % err)
    return
  if p.returncode:
    warn('objdump failure: %s' % p.returncode)
    return

  obj = 'unknown-object-file'
  todo = b''
  # on x86 and amd64, objdump can split insns across lines
  # and todo accumulates instruction bytes not yet processed

  for line in out.splitlines():
    if len(objs) > 0 and line.startswith('%s:'%objs[0]):
      obj,objs = objs[0],objs[1:]
    if '\t' in line:
      line = line[line.index('\t')+1:]
      if '#' in line:
        line = line[:line.index('#')]
      if '<' in line:
        line = line[:line.index('<')]

      for x in line.split():
        try:
          x = bytes.fromhex(x)
        except:
          if x in overrides:
            for ext in overrides[x]:
              checkext(obj,archlist,ext,extrules,f'{x} (from objdump)')
          break
        if not bigendian:
          x = bytes(reversed(bytearray(x)))
        todo += x

      for insn in disassembler.disasm(todo,0x1000):
        assert todo.startswith(insn.bytes)
        todo = todo[len(insn.bytes):]
        if insn.mnemonic in overrides:
          for ext in overrides[insn.mnemonic]:
            checkext(obj,archlist,ext,extrules,f'{insn.mnemonic} {insn.op_str} (from capstone)')
        else:
          for g in insn.groups:
            checkext(obj,archlist,insn.group_name(g),extrules,f'{insn.mnemonic} {insn.op_str} (from capstone)')

      if host not in ('x86','amd64'):
        if todo != b'':
          warn(f'capstone disassembly failed, could be capstone bug: {line}')
          todo = b''
      if len(todo) > 32:
        warn(f'capstone disassembly has built up more than 32 bytes, resetting')
        todo = b''

for d in sys.argv[2:]:
  d = d.strip()
  extset = set()
  with open('%s/result-insns'%d,'w') as fresult:
    doit(d)
  with open('%s/info-insns'%d,'w') as finfo:
    for ext in sorted(extset):
      finfo.write(ext+'\n')