-rw-r--r-- 1301 libntruprime-20240825/src/core/scale3sntrupP/avx/core.c raw
#include <immintrin.h>
#include "crypto_core.h"
#define p {P}
#define q {Q}
#include "crypto_decode_{P}xint16.h"
#define crypto_decode_pxint16 crypto_decode_{P}xint16
#include "crypto_encode_{P}xint16.h"
#define crypto_encode_pxint16 crypto_encode_{P}xint16
#include "crypto_int16.h"
typedef crypto_int16 Fq;
/* out = 3*in in Rq */
void crypto_core(unsigned char *outbytes,const unsigned char *inbytes,const unsigned char *kbytes,const unsigned char *cbytes)
{
int i = p-16;
__m256i save = _mm256_loadu_si256((__m256i *) (inbytes+2*i));
/* in case outbytes = inbytes */
for (;;) {
do {
__m256i x = _mm256_loadu_si256((__m256i *) inbytes);
__m256i xneg;
x = _mm256_mullo_epi16(x,_mm256_set1_epi16(3));
x = _mm256_sub_epi16(x,_mm256_set1_epi16((q+1)/2));
xneg = _mm256_srai_epi16(x,15);
x = _mm256_add_epi16(x,_mm256_set1_epi16(q)&xneg);
xneg = _mm256_srai_epi16(x,15);
x = _mm256_add_epi16(x,_mm256_set1_epi16(q)&xneg);
x = _mm256_sub_epi16(x,_mm256_set1_epi16((q-1)/2));
_mm256_storeu_si256((__m256i *) outbytes,x);
inbytes += 32;
outbytes += 32;
i -= 16;
} while (i >= 0);
if (i <= -16) break;
inbytes += 2*i;
outbytes += 2*i;
_mm256_storeu_si256((__m256i *) outbytes,save);
}
}