-rw-r--r-- 1301 libntruprime-20240825/src/core/scale3sntrupP/avx/core.c raw
#include <immintrin.h> #include "crypto_core.h" #define p {P} #define q {Q} #include "crypto_decode_{P}xint16.h" #define crypto_decode_pxint16 crypto_decode_{P}xint16 #include "crypto_encode_{P}xint16.h" #define crypto_encode_pxint16 crypto_encode_{P}xint16 #include "crypto_int16.h" typedef crypto_int16 Fq; /* out = 3*in in Rq */ void crypto_core(unsigned char *outbytes,const unsigned char *inbytes,const unsigned char *kbytes,const unsigned char *cbytes) { int i = p-16; __m256i save = _mm256_loadu_si256((__m256i *) (inbytes+2*i)); /* in case outbytes = inbytes */ for (;;) { do { __m256i x = _mm256_loadu_si256((__m256i *) inbytes); __m256i xneg; x = _mm256_mullo_epi16(x,_mm256_set1_epi16(3)); x = _mm256_sub_epi16(x,_mm256_set1_epi16((q+1)/2)); xneg = _mm256_srai_epi16(x,15); x = _mm256_add_epi16(x,_mm256_set1_epi16(q)&xneg); xneg = _mm256_srai_epi16(x,15); x = _mm256_add_epi16(x,_mm256_set1_epi16(q)&xneg); x = _mm256_sub_epi16(x,_mm256_set1_epi16((q-1)/2)); _mm256_storeu_si256((__m256i *) outbytes,x); inbytes += 32; outbytes += 32; i -= 16; } while (i >= 0); if (i <= -16) break; inbytes += 2*i; outbytes += 2*i; _mm256_storeu_si256((__m256i *) outbytes,save); } }