/* WARNING: auto-generated (by autogen/test); do not edit */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <time.h>
#include <assert.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <sys/resource.h>
#include "crypto_uint8.h"
#include "crypto_uint32.h"
#include "crypto_uint64.h"
#include "crypto_declassify.h"
#include <djbsort.h> /* -ldjbsort */

static const char *targeto = 0;
static const char *targetp = 0;
static const char *targeti = 0;
static const char *targetn = 0;
static const char *targetoffset = 0;

static int ok = 1;

#define fail ((ok = 0),printf)

/* ----- valgrind support */

static int valgrind = 0;
static unsigned char valgrind_undefined_byte = 0;
static char *volatile valgrind_pointer = 0;

static char *valgrind_malloc_1(void)
{
  char *x = (char *) malloc(1);
  if (!x) abort();
  *(char **volatile) &valgrind_pointer = x;
  return valgrind_pointer;
}

static void valgrind_init(void)
{
  char *e = getenv("valgrind_multiplier");
  char *x;
  if (!e) return;
  x = valgrind_malloc_1();
  valgrind_undefined_byte = x[0]+1;
  valgrind_undefined_byte *= atoi(e);
  valgrind_undefined_byte ^= x[0]+1;
  free(x);
  valgrind = 1;
}

static void secret(void *xvoid,long long xlen)
{
  unsigned char *x = (unsigned char *) xvoid;
  while (xlen > 0) {
    *x ^= valgrind_undefined_byte;
    ++x;
    --xlen;
  }
}

static void declassify(void *x,long long xlen)
{
  crypto_declassify(x,xlen);
}

/* ----- rng and hash, from supercop/try-anything.c */

typedef crypto_uint8 u8;
typedef crypto_uint32 u32;
typedef crypto_uint64 u64;

#define FOR(i,n) for (i = 0;i < n;++i)

static u32 L32(u32 x,int c) { return (x << c) | ((x&0xffffffff) >> (32 - c)); }

static u32 ld32(const u8 *x)
{
  u32 u = x[3];
  u = (u<<8)|x[2];
  u = (u<<8)|x[1];
  return (u<<8)|x[0];
}

static void st32(u8 *x,u32 u)
{
  int i;
  FOR(i,4) { x[i] = u; u >>= 8; }
}

static const u8 sigma[17] = "expand 32-byte k";

static void core_salsa(u8 *out,const u8 *in,const u8 *k)
{
  u32 w[16],x[16],y[16],t[4];
  int i,j,m;

  FOR(i,4) {
    x[5*i] = ld32(sigma+4*i);
    x[1+i] = ld32(k+4*i);
    x[6+i] = ld32(in+4*i);
    x[11+i] = ld32(k+16+4*i);
  }

  FOR(i,16) y[i] = x[i];

  FOR(i,20) {
    FOR(j,4) {
      FOR(m,4) t[m] = x[(5*j+4*m)%16];
      t[1] ^= L32(t[0]+t[3], 7);
      t[2] ^= L32(t[1]+t[0], 9);
      t[3] ^= L32(t[2]+t[1],13);
      t[0] ^= L32(t[3]+t[2],18);
      FOR(m,4) w[4*j+(j+m)%4] = t[m];
    }
    FOR(m,16) x[m] = w[m];
  }

  FOR(i,16) st32(out + 4 * i,x[i] + y[i]);
}

static void salsa20(u8 *c,u64 b,const u8 *n,const u8 *k)
{
  u8 z[16],x[64];
  u32 u,i;
  if (!b) return;
  FOR(i,16) z[i] = 0;
  FOR(i,8) z[i] = n[i];
  while (b >= 64) {
    core_salsa(x,z,k);
    FOR(i,64) c[i] = x[i];
    u = 1;
    for (i = 8;i < 16;++i) {
      u += (u32) z[i];
      z[i] = u;
      u >>= 8;
    }
    b -= 64;
    c += 64;
  }
  if (b) {
    core_salsa(x,z,k);
    FOR(i,b) c[i] = x[i];
  }
}

static void increment(u8 *n)
{
  if (!++n[0])
    if (!++n[1])
      if (!++n[2])
        if (!++n[3])
          if (!++n[4])
            if (!++n[5])
              if (!++n[6])
                if (!++n[7])
                  ;
}

static unsigned char testvector_n[8];

static void testvector_mutate(unsigned long long seed)
{
  const static unsigned char reseed[33] = "inject new seed for test vectors";
  unsigned char tmp[32];
  long long i;
  salsa20(tmp,sizeof tmp,testvector_n,reseed);
  for (i = 0;i < 8;++i) {
    tmp[i] ^= seed;
    seed >>= 8;
  }
  salsa20(testvector_n,sizeof testvector_n,testvector_n,tmp);
}

static void testvector(unsigned char *x,unsigned long long xlen)
{
  const static unsigned char testvector_k[33] = "generate inputs for test vectors";
  salsa20(x,xlen,testvector_n,testvector_k);
  increment(testvector_n);
}

static unsigned long long myrandom(void)
{
  unsigned char x[8];
  unsigned long long result;
  testvector(x,8);
  result = x[7];
  result = (result<<8)|x[6];
  result = (result<<8)|x[5];
  result = (result<<8)|x[4];
  result = (result<<8)|x[3];
  result = (result<<8)|x[2];
  result = (result<<8)|x[1];
  result = (result<<8)|x[0];
  return result;
}

static unsigned char canary_n[8];

static void canary(unsigned char *x,unsigned long long xlen)
{
  const static unsigned char canary_k[33] = "generate pad to catch overwrites";
  salsa20(x,xlen,canary_n,canary_k);
  increment(canary_n);
}

static void double_canary(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
  if (valgrind) return;
  canary(x - 16,16);
  canary(x + xlen,16);
  memcpy(x2 - 16,x - 16,16);
  memcpy(x2 + xlen,x + xlen,16);
}

static void input_prepare(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
  testvector(x,xlen);
  if (valgrind) {
    memcpy(x2,x,xlen);
    return;
  }
  canary(x - 16,16);
  canary(x + xlen,16);
  memcpy(x2 - 16,x - 16,xlen + 32);
}

static void output_compare(const unsigned char *x2,const unsigned char *x,unsigned long long xlen,const char *fun)
{
  if (valgrind) return;
  if (memcmp(x2 - 16,x - 16,16)) {
    fail("failure: %s writes before output\n",fun);
  }
  if (memcmp(x2 + xlen,x + xlen,16)) {
    fail("failure: %s writes after output\n",fun);
  }
}

/* ----- knownrandombytes */

static const int knownrandombytes_is_only_for_testing_not_for_cryptographic_use = 1;
extern void knownrandombytes(void *,long long);

#define QUARTERROUND(a,b,c,d) \
  a += b; d = L32(d^a,16); \
  c += d; b = L32(b^c,12); \
  a += b; d = L32(d^a, 8); \
  c += d; b = L32(b^c, 7);

static void core_chacha(u8 *out,const u8 *in,const u8 *k)
{
  u32 x[16],y[16];
  int i,j;
  FOR(i,4) {
    x[i] = ld32(sigma+4*i);
    x[12+i] = ld32(in+4*i);
  }
  FOR(i,8) x[4+i] = ld32(k+4*i);
  FOR(i,16) y[i] = x[i];
  FOR(i,10) {
    FOR(j,4) { QUARTERROUND(x[j],x[j+4],x[j+8],x[j+12]) }
    FOR(j,4) { QUARTERROUND(x[j],x[((j+1)&3)+4],x[((j+2)&3)+8],x[((j+3)&3)+12]) }
  }
  FOR(i,16) st32(out+4*i,x[i]+y[i]);
}

static void chacha20(u8 *c,u64 b,const u8 *n,const u8 *k)
{
  u8 z[16],x[64];
  u32 u,i;
  if (!b) return;
  FOR(i,16) z[i] = 0;
  FOR(i,8) z[i+8] = n[i];
  while (b >= 64) {
    core_chacha(x,z,k);
    FOR(i,64) c[i] = x[i];
    u = 1;
    FOR(i,8) {
      u += (u32) z[i];
      z[i] = u;
      u >>= 8;
    }
    b -= 64;
    c += 64;
  }
  if (b) {
    core_chacha(x,z,k);
    FOR(i,b) c[i] = x[i];
  }
}

#define crypto_rng_OUTPUTBYTES 736

static int crypto_rng(
        unsigned char *r, /* random output */
        unsigned char *n, /* new key */
  const unsigned char *g  /* old key */
)
{
  static const unsigned char nonce[8] = {0};
  unsigned char x[32+crypto_rng_OUTPUTBYTES];
  chacha20(x,sizeof x,nonce,g);
  memcpy(n,x,32);
  memcpy(r,x+32,crypto_rng_OUTPUTBYTES);
  return 0;
}

static unsigned char knownrandombytes_g[32];
static unsigned char knownrandombytes_r[crypto_rng_OUTPUTBYTES];
static unsigned long long knownrandombytes_pos = crypto_rng_OUTPUTBYTES;

void knownrandombytes_main(void *xvoid,long long xlen)
{
  unsigned char *x = (unsigned char *) xvoid;
  assert(knownrandombytes_is_only_for_testing_not_for_cryptographic_use);

  while (xlen > 0) {
    if (knownrandombytes_pos == crypto_rng_OUTPUTBYTES) {
      crypto_rng(knownrandombytes_r,knownrandombytes_g,knownrandombytes_g);
      knownrandombytes_pos = 0;
    }
    *x++ = knownrandombytes_r[knownrandombytes_pos];
    xlen -= 1;
    knownrandombytes_r[knownrandombytes_pos++] = 0;
  }
}

void knownrandombytes(void *xvoid,long long xlen)
{
  knownrandombytes_main(xvoid,xlen);
  secret(xvoid,xlen);
}

/* ----- memory handling */

static void *callocplus(long long len)
{
  if (valgrind) {
    unsigned char *x = (unsigned char *) malloc(len);
    if (!x) abort();
    return x;
  } else {
    unsigned char *x = (unsigned char *) calloc(1,len + 256);
    long long i;
    if (!x) abort();
    for (i = 0;i < len + 256;++i) x[i] = random();
    return x;
  }
}

static void *aligned(void *x,long long len)
{
  if (valgrind)
    return x;
  else {
    long long i;
    unsigned char *y = (unsigned char *) x;
    y += 64;
    y += 63 & (-(unsigned long) y);
    for (i = 0;i < len;++i) y[i] = 0;
    return y;
  }
}

/* ----- catching SIGILL, SIGBUS, SIGSEGV, etc. */

#include "limits.inc"

static void forked(void (*test)(long long),long long impl)
{
  if (valgrind) {
    test(impl);
    return;
  }
  fflush(stdout);
  pid_t child = fork();
  int childstatus = -1;
  if (child == -1) {
    fprintf(stderr,"fatal: fork failed: %s",strerror(errno));
    exit(111);
  }
  if (child == 0) {
    ok = 1;
    limits();
    test(impl);
    if (!ok) exit(100);
    exit(0);
  }
  if (waitpid(child,&childstatus,0) != child) {
    fprintf(stderr,"fatal: wait failed: %s",strerror(errno));
    exit(111);
  }
  if (childstatus)
    fail("failure: process failed, status %d\n",childstatus);
  fflush(stdout);
}

/* ----- sorting */

static void *storage_sort_int32_x;
static unsigned char *test_sort_int32_x;
static void *storage_sort_int32_x2;
static unsigned char *test_sort_int32_x2;
static long long test_sort_int32_offset;

static int int32_cmp(const void *x,const void *y)
{
  const int32_t a = *(int32_t *) x;
  const int32_t b = *(int32_t *) y;
  if (a < b) return -1;
  if (a > b) return 1;
  return 0;
}

static void test_sort_int32_impl(long long impl)
{
  unsigned char *x = test_sort_int32_x;
  unsigned char *x2 = test_sort_int32_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(int32_t *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(1);
  testvector_mutate(impl);
  testvector_mutate(test_sort_int32_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_int32_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_int32(impl);
    printf("sort_int32 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int32_implementation(impl),djbsort_dispatch_int32_compiler(impl));
  } else {
    crypto_sort = djbsort_int32;
    printf("sort_int32 selected implementation %s compiler %s\n",djbsort_int32_implementation(),djbsort_int32_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      int32_t *xs = (int32_t *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 4*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (int32_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,4,int32_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_int32(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"int32")) return;
  storage_sort_int32_x = callocplus(4*1048576);
  test_sort_int32_x = (unsigned char *) aligned(storage_sort_int32_x,4*1048576);
  storage_sort_int32_x2 = callocplus(4*1048576);
  test_sort_int32_x2 = (unsigned char *) aligned(storage_sort_int32_x2,4*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_int32 offset %lld\n",offset);
    test_sort_int32_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_int32();++impl)
      forked(test_sort_int32_impl,impl);
    test_sort_int32_x += 4;
    test_sort_int32_x2 += 4;
  }
  free(storage_sort_int32_x2);
  free(storage_sort_int32_x);
}

static void *storage_sort_int32down_x;
static unsigned char *test_sort_int32down_x;
static void *storage_sort_int32down_x2;
static unsigned char *test_sort_int32down_x2;
static long long test_sort_int32down_offset;

static int int32down_cmp(const void *x,const void *y)
{
  const int32_t a = *(int32_t *) x;
  const int32_t b = *(int32_t *) y;
  if (a < b) return 1;
  if (a > b) return -1;
  return 0;
}

static void test_sort_int32down_impl(long long impl)
{
  unsigned char *x = test_sort_int32down_x;
  unsigned char *x2 = test_sort_int32down_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(int32_t *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(2);
  testvector_mutate(impl);
  testvector_mutate(test_sort_int32down_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_int32down_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_int32down(impl);
    printf("sort_int32down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int32down_implementation(impl),djbsort_dispatch_int32down_compiler(impl));
  } else {
    crypto_sort = djbsort_int32down;
    printf("sort_int32down selected implementation %s compiler %s\n",djbsort_int32down_implementation(),djbsort_int32down_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      int32_t *xs = (int32_t *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 4*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (int32down_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,4,int32down_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_int32down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"int32down")) return;
  storage_sort_int32down_x = callocplus(4*1048576);
  test_sort_int32down_x = (unsigned char *) aligned(storage_sort_int32down_x,4*1048576);
  storage_sort_int32down_x2 = callocplus(4*1048576);
  test_sort_int32down_x2 = (unsigned char *) aligned(storage_sort_int32down_x2,4*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_int32down offset %lld\n",offset);
    test_sort_int32down_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_int32down();++impl)
      forked(test_sort_int32down_impl,impl);
    test_sort_int32down_x += 4;
    test_sort_int32down_x2 += 4;
  }
  free(storage_sort_int32down_x2);
  free(storage_sort_int32down_x);
}

static void *storage_sort_uint32_x;
static unsigned char *test_sort_uint32_x;
static void *storage_sort_uint32_x2;
static unsigned char *test_sort_uint32_x2;
static long long test_sort_uint32_offset;

static int uint32_cmp(const void *x,const void *y)
{
  const uint32_t a = *(uint32_t *) x;
  const uint32_t b = *(uint32_t *) y;
  if (a < b) return -1;
  if (a > b) return 1;
  return 0;
}

static void test_sort_uint32_impl(long long impl)
{
  unsigned char *x = test_sort_uint32_x;
  unsigned char *x2 = test_sort_uint32_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(uint32_t *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(3);
  testvector_mutate(impl);
  testvector_mutate(test_sort_uint32_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_uint32_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_uint32(impl);
    printf("sort_uint32 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint32_implementation(impl),djbsort_dispatch_uint32_compiler(impl));
  } else {
    crypto_sort = djbsort_uint32;
    printf("sort_uint32 selected implementation %s compiler %s\n",djbsort_uint32_implementation(),djbsort_uint32_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      uint32_t *xs = (uint32_t *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 4*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (uint32_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,4,uint32_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_uint32(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"uint32")) return;
  storage_sort_uint32_x = callocplus(4*1048576);
  test_sort_uint32_x = (unsigned char *) aligned(storage_sort_uint32_x,4*1048576);
  storage_sort_uint32_x2 = callocplus(4*1048576);
  test_sort_uint32_x2 = (unsigned char *) aligned(storage_sort_uint32_x2,4*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_uint32 offset %lld\n",offset);
    test_sort_uint32_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_uint32();++impl)
      forked(test_sort_uint32_impl,impl);
    test_sort_uint32_x += 4;
    test_sort_uint32_x2 += 4;
  }
  free(storage_sort_uint32_x2);
  free(storage_sort_uint32_x);
}

static void *storage_sort_uint32down_x;
static unsigned char *test_sort_uint32down_x;
static void *storage_sort_uint32down_x2;
static unsigned char *test_sort_uint32down_x2;
static long long test_sort_uint32down_offset;

static int uint32down_cmp(const void *x,const void *y)
{
  const uint32_t a = *(uint32_t *) x;
  const uint32_t b = *(uint32_t *) y;
  if (a < b) return 1;
  if (a > b) return -1;
  return 0;
}

static void test_sort_uint32down_impl(long long impl)
{
  unsigned char *x = test_sort_uint32down_x;
  unsigned char *x2 = test_sort_uint32down_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(uint32_t *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(4);
  testvector_mutate(impl);
  testvector_mutate(test_sort_uint32down_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_uint32down_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_uint32down(impl);
    printf("sort_uint32down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint32down_implementation(impl),djbsort_dispatch_uint32down_compiler(impl));
  } else {
    crypto_sort = djbsort_uint32down;
    printf("sort_uint32down selected implementation %s compiler %s\n",djbsort_uint32down_implementation(),djbsort_uint32down_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      uint32_t *xs = (uint32_t *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 4*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (uint32down_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,4,uint32down_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_uint32down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"uint32down")) return;
  storage_sort_uint32down_x = callocplus(4*1048576);
  test_sort_uint32down_x = (unsigned char *) aligned(storage_sort_uint32down_x,4*1048576);
  storage_sort_uint32down_x2 = callocplus(4*1048576);
  test_sort_uint32down_x2 = (unsigned char *) aligned(storage_sort_uint32down_x2,4*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_uint32down offset %lld\n",offset);
    test_sort_uint32down_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_uint32down();++impl)
      forked(test_sort_uint32down_impl,impl);
    test_sort_uint32down_x += 4;
    test_sort_uint32down_x2 += 4;
  }
  free(storage_sort_uint32down_x2);
  free(storage_sort_uint32down_x);
}

static void *storage_sort_float32_x;
static unsigned char *test_sort_float32_x;
static void *storage_sort_float32_x2;
static unsigned char *test_sort_float32_x2;
static long long test_sort_float32_offset;

static int float32_cmp(const void *x,const void *y)
{
  const float a = *(float *) x;
  const float b = *(float *) y;
  if (a != a || b != b) { /* NaN handling */
    int32_t ai = *(int32_t *) x;
    int32_t bi = *(int32_t *) y;
    ai ^= ((uint32_t) (ai >> 31)) >> 1;
    bi ^= ((uint32_t) (bi >> 31)) >> 1;
    if (ai < bi) return -1;
    if (ai > bi) return 1;
    return 0;
  }
  if (a < b) return -1;
  if (a > b) return 1;
  return 0;
}

static void test_sort_float32_impl(long long impl)
{
  unsigned char *x = test_sort_float32_x;
  unsigned char *x2 = test_sort_float32_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(float *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(5);
  testvector_mutate(impl);
  testvector_mutate(test_sort_float32_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_float32_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_float32(impl);
    printf("sort_float32 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float32_implementation(impl),djbsort_dispatch_float32_compiler(impl));
  } else {
    crypto_sort = djbsort_float32;
    printf("sort_float32 selected implementation %s compiler %s\n",djbsort_float32_implementation(),djbsort_float32_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      float *xs = (float *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 4*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (float32_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,4,float32_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_float32(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"float32")) return;
  storage_sort_float32_x = callocplus(4*1048576);
  test_sort_float32_x = (unsigned char *) aligned(storage_sort_float32_x,4*1048576);
  storage_sort_float32_x2 = callocplus(4*1048576);
  test_sort_float32_x2 = (unsigned char *) aligned(storage_sort_float32_x2,4*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_float32 offset %lld\n",offset);
    test_sort_float32_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_float32();++impl)
      forked(test_sort_float32_impl,impl);
    test_sort_float32_x += 4;
    test_sort_float32_x2 += 4;
  }
  free(storage_sort_float32_x2);
  free(storage_sort_float32_x);
}

static void *storage_sort_float32down_x;
static unsigned char *test_sort_float32down_x;
static void *storage_sort_float32down_x2;
static unsigned char *test_sort_float32down_x2;
static long long test_sort_float32down_offset;

static int float32down_cmp(const void *x,const void *y)
{
  const float a = *(float *) x;
  const float b = *(float *) y;
  if (a != a || b != b) { /* NaN handling */
    int32_t ai = *(int32_t *) x;
    int32_t bi = *(int32_t *) y;
    ai ^= ((uint32_t) (ai >> 31)) >> 1;
    bi ^= ((uint32_t) (bi >> 31)) >> 1;
    if (ai < bi) return 1;
    if (ai > bi) return -1;
    return 0;
  }
  if (a < b) return 1;
  if (a > b) return -1;
  return 0;
}

static void test_sort_float32down_impl(long long impl)
{
  unsigned char *x = test_sort_float32down_x;
  unsigned char *x2 = test_sort_float32down_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(float *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(6);
  testvector_mutate(impl);
  testvector_mutate(test_sort_float32down_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_float32down_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_float32down(impl);
    printf("sort_float32down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float32down_implementation(impl),djbsort_dispatch_float32down_compiler(impl));
  } else {
    crypto_sort = djbsort_float32down;
    printf("sort_float32down selected implementation %s compiler %s\n",djbsort_float32down_implementation(),djbsort_float32down_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      float *xs = (float *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 4*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (float32down_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,4,float32down_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_float32down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"float32down")) return;
  storage_sort_float32down_x = callocplus(4*1048576);
  test_sort_float32down_x = (unsigned char *) aligned(storage_sort_float32down_x,4*1048576);
  storage_sort_float32down_x2 = callocplus(4*1048576);
  test_sort_float32down_x2 = (unsigned char *) aligned(storage_sort_float32down_x2,4*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_float32down offset %lld\n",offset);
    test_sort_float32down_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_float32down();++impl)
      forked(test_sort_float32down_impl,impl);
    test_sort_float32down_x += 4;
    test_sort_float32down_x2 += 4;
  }
  free(storage_sort_float32down_x2);
  free(storage_sort_float32down_x);
}

static void *storage_sort_int64_x;
static unsigned char *test_sort_int64_x;
static void *storage_sort_int64_x2;
static unsigned char *test_sort_int64_x2;
static long long test_sort_int64_offset;

static int int64_cmp(const void *x,const void *y)
{
  const int64_t a = *(int64_t *) x;
  const int64_t b = *(int64_t *) y;
  if (a < b) return -1;
  if (a > b) return 1;
  return 0;
}

static void test_sort_int64_impl(long long impl)
{
  unsigned char *x = test_sort_int64_x;
  unsigned char *x2 = test_sort_int64_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(int64_t *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(7);
  testvector_mutate(impl);
  testvector_mutate(test_sort_int64_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_int64_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_int64(impl);
    printf("sort_int64 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int64_implementation(impl),djbsort_dispatch_int64_compiler(impl));
  } else {
    crypto_sort = djbsort_int64;
    printf("sort_int64 selected implementation %s compiler %s\n",djbsort_int64_implementation(),djbsort_int64_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      int64_t *xs = (int64_t *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 8*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (int64_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,8,int64_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_int64(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"int64")) return;
  storage_sort_int64_x = callocplus(8*1048576);
  test_sort_int64_x = (unsigned char *) aligned(storage_sort_int64_x,8*1048576);
  storage_sort_int64_x2 = callocplus(8*1048576);
  test_sort_int64_x2 = (unsigned char *) aligned(storage_sort_int64_x2,8*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_int64 offset %lld\n",offset);
    test_sort_int64_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_int64();++impl)
      forked(test_sort_int64_impl,impl);
    test_sort_int64_x += 8;
    test_sort_int64_x2 += 8;
  }
  free(storage_sort_int64_x2);
  free(storage_sort_int64_x);
}

static void *storage_sort_int64down_x;
static unsigned char *test_sort_int64down_x;
static void *storage_sort_int64down_x2;
static unsigned char *test_sort_int64down_x2;
static long long test_sort_int64down_offset;

static int int64down_cmp(const void *x,const void *y)
{
  const int64_t a = *(int64_t *) x;
  const int64_t b = *(int64_t *) y;
  if (a < b) return 1;
  if (a > b) return -1;
  return 0;
}

static void test_sort_int64down_impl(long long impl)
{
  unsigned char *x = test_sort_int64down_x;
  unsigned char *x2 = test_sort_int64down_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(int64_t *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(8);
  testvector_mutate(impl);
  testvector_mutate(test_sort_int64down_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_int64down_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_int64down(impl);
    printf("sort_int64down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_int64down_implementation(impl),djbsort_dispatch_int64down_compiler(impl));
  } else {
    crypto_sort = djbsort_int64down;
    printf("sort_int64down selected implementation %s compiler %s\n",djbsort_int64down_implementation(),djbsort_int64down_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      int64_t *xs = (int64_t *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 8*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (int64down_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,8,int64down_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_int64down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"int64down")) return;
  storage_sort_int64down_x = callocplus(8*1048576);
  test_sort_int64down_x = (unsigned char *) aligned(storage_sort_int64down_x,8*1048576);
  storage_sort_int64down_x2 = callocplus(8*1048576);
  test_sort_int64down_x2 = (unsigned char *) aligned(storage_sort_int64down_x2,8*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_int64down offset %lld\n",offset);
    test_sort_int64down_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_int64down();++impl)
      forked(test_sort_int64down_impl,impl);
    test_sort_int64down_x += 8;
    test_sort_int64down_x2 += 8;
  }
  free(storage_sort_int64down_x2);
  free(storage_sort_int64down_x);
}

static void *storage_sort_uint64_x;
static unsigned char *test_sort_uint64_x;
static void *storage_sort_uint64_x2;
static unsigned char *test_sort_uint64_x2;
static long long test_sort_uint64_offset;

static int uint64_cmp(const void *x,const void *y)
{
  const uint64_t a = *(uint64_t *) x;
  const uint64_t b = *(uint64_t *) y;
  if (a < b) return -1;
  if (a > b) return 1;
  return 0;
}

static void test_sort_uint64_impl(long long impl)
{
  unsigned char *x = test_sort_uint64_x;
  unsigned char *x2 = test_sort_uint64_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(uint64_t *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(9);
  testvector_mutate(impl);
  testvector_mutate(test_sort_uint64_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_uint64_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_uint64(impl);
    printf("sort_uint64 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint64_implementation(impl),djbsort_dispatch_uint64_compiler(impl));
  } else {
    crypto_sort = djbsort_uint64;
    printf("sort_uint64 selected implementation %s compiler %s\n",djbsort_uint64_implementation(),djbsort_uint64_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      uint64_t *xs = (uint64_t *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 8*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (uint64_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,8,uint64_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_uint64(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"uint64")) return;
  storage_sort_uint64_x = callocplus(8*1048576);
  test_sort_uint64_x = (unsigned char *) aligned(storage_sort_uint64_x,8*1048576);
  storage_sort_uint64_x2 = callocplus(8*1048576);
  test_sort_uint64_x2 = (unsigned char *) aligned(storage_sort_uint64_x2,8*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_uint64 offset %lld\n",offset);
    test_sort_uint64_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_uint64();++impl)
      forked(test_sort_uint64_impl,impl);
    test_sort_uint64_x += 8;
    test_sort_uint64_x2 += 8;
  }
  free(storage_sort_uint64_x2);
  free(storage_sort_uint64_x);
}

static void *storage_sort_uint64down_x;
static unsigned char *test_sort_uint64down_x;
static void *storage_sort_uint64down_x2;
static unsigned char *test_sort_uint64down_x2;
static long long test_sort_uint64down_offset;

static int uint64down_cmp(const void *x,const void *y)
{
  const uint64_t a = *(uint64_t *) x;
  const uint64_t b = *(uint64_t *) y;
  if (a < b) return 1;
  if (a > b) return -1;
  return 0;
}

static void test_sort_uint64down_impl(long long impl)
{
  unsigned char *x = test_sort_uint64down_x;
  unsigned char *x2 = test_sort_uint64down_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(uint64_t *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(10);
  testvector_mutate(impl);
  testvector_mutate(test_sort_uint64down_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_uint64down_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_uint64down(impl);
    printf("sort_uint64down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_uint64down_implementation(impl),djbsort_dispatch_uint64down_compiler(impl));
  } else {
    crypto_sort = djbsort_uint64down;
    printf("sort_uint64down selected implementation %s compiler %s\n",djbsort_uint64down_implementation(),djbsort_uint64down_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      uint64_t *xs = (uint64_t *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 8*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (uint64down_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,8,uint64down_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_uint64down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"uint64down")) return;
  storage_sort_uint64down_x = callocplus(8*1048576);
  test_sort_uint64down_x = (unsigned char *) aligned(storage_sort_uint64down_x,8*1048576);
  storage_sort_uint64down_x2 = callocplus(8*1048576);
  test_sort_uint64down_x2 = (unsigned char *) aligned(storage_sort_uint64down_x2,8*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_uint64down offset %lld\n",offset);
    test_sort_uint64down_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_uint64down();++impl)
      forked(test_sort_uint64down_impl,impl);
    test_sort_uint64down_x += 8;
    test_sort_uint64down_x2 += 8;
  }
  free(storage_sort_uint64down_x2);
  free(storage_sort_uint64down_x);
}

static void *storage_sort_float64_x;
static unsigned char *test_sort_float64_x;
static void *storage_sort_float64_x2;
static unsigned char *test_sort_float64_x2;
static long long test_sort_float64_offset;

static int float64_cmp(const void *x,const void *y)
{
  const double a = *(double *) x;
  const double b = *(double *) y;
  if (a != a || b != b) { /* NaN handling */
    int64_t ai = *(int64_t *) x;
    int64_t bi = *(int64_t *) y;
    ai ^= ((uint64_t) (ai >> 63)) >> 1;
    bi ^= ((uint64_t) (bi >> 63)) >> 1;
    if (ai < bi) return -1;
    if (ai > bi) return 1;
    return 0;
  }
  if (a < b) return -1;
  if (a > b) return 1;
  return 0;
}

static void test_sort_float64_impl(long long impl)
{
  unsigned char *x = test_sort_float64_x;
  unsigned char *x2 = test_sort_float64_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(double *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(11);
  testvector_mutate(impl);
  testvector_mutate(test_sort_float64_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_float64_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_float64(impl);
    printf("sort_float64 %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float64_implementation(impl),djbsort_dispatch_float64_compiler(impl));
  } else {
    crypto_sort = djbsort_float64;
    printf("sort_float64 selected implementation %s compiler %s\n",djbsort_float64_implementation(),djbsort_float64_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      double *xs = (double *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 8*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (float64_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,8,float64_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_float64(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"float64")) return;
  storage_sort_float64_x = callocplus(8*1048576);
  test_sort_float64_x = (unsigned char *) aligned(storage_sort_float64_x,8*1048576);
  storage_sort_float64_x2 = callocplus(8*1048576);
  test_sort_float64_x2 = (unsigned char *) aligned(storage_sort_float64_x2,8*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_float64 offset %lld\n",offset);
    test_sort_float64_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_float64();++impl)
      forked(test_sort_float64_impl,impl);
    test_sort_float64_x += 8;
    test_sort_float64_x2 += 8;
  }
  free(storage_sort_float64_x2);
  free(storage_sort_float64_x);
}

static void *storage_sort_float64down_x;
static unsigned char *test_sort_float64down_x;
static void *storage_sort_float64down_x2;
static unsigned char *test_sort_float64down_x2;
static long long test_sort_float64down_offset;

static int float64down_cmp(const void *x,const void *y)
{
  const double a = *(double *) x;
  const double b = *(double *) y;
  if (a != a || b != b) { /* NaN handling */
    int64_t ai = *(int64_t *) x;
    int64_t bi = *(int64_t *) y;
    ai ^= ((uint64_t) (ai >> 63)) >> 1;
    bi ^= ((uint64_t) (bi >> 63)) >> 1;
    if (ai < bi) return 1;
    if (ai > bi) return -1;
    return 0;
  }
  if (a < b) return 1;
  if (a > b) return -1;
  return 0;
}

static void test_sort_float64down_impl(long long impl)
{
  unsigned char *x = test_sort_float64down_x;
  unsigned char *x2 = test_sort_float64down_x2;
  long long xlen;
  long long xwords;
  void (*crypto_sort)(double *,long long);

  testvector_mutate(valgrind);
  testvector_mutate(12);
  testvector_mutate(impl);
  testvector_mutate(test_sort_float64down_offset);
  if (targeti && strcmp(targeti,".") && strcmp(targeti,djbsort_dispatch_float64down_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_sort = djbsort_dispatch_float64down(impl);
    printf("sort_float64down %lld implementation %s compiler %s\n",impl,djbsort_dispatch_float64down_implementation(impl),djbsort_dispatch_float64down_compiler(impl));
  } else {
    crypto_sort = djbsort_float64down;
    printf("sort_float64down selected implementation %s compiler %s\n",djbsort_float64down_implementation(),djbsort_float64down_compiler());
  }
  for (long long stage = 0;stage < 4;++stage) {
    long long loops,maxtest;
    switch(stage) {
      case 0: loops = 1024; maxtest = 128; break;
      case 1: loops = 4096; maxtest = 4096; break;
      case 2: loops = 128; maxtest = 65536; break;
      default: loops = 4; maxtest = 1048576; break;
    }

    for (long long loop = 0;loop < loops;++loop) {
      double *xs = (double *) x;
      xwords = myrandom() % (maxtest + 1);
      xlen = 8*xwords;

      input_prepare(x2,x,xlen);
      secret(x,xlen);
      crypto_sort(xs,xwords);
      declassify(x,xlen);
      output_compare(x2,x,xlen,"crypto_sort");

      for (long long i = 1;i < xwords;++i)
        if (float64down_cmp(&xs[i-1],&xs[i]) > 0) {
          fail("failure: crypto_sort output is not in order\n");
          break;
        }

      double_canary(x2,x,xlen);
      qsort(x2,xwords,8,float64down_cmp);
      output_compare(x2,x,xlen,"crypto_sort");
      if (memcmp(x2,x,xlen) != 0) fail("failure: crypto_sort does not match qsort\n");
    }
  }
}

static void test_sort_float64down(void)
{
  if (targeto && strcmp(targeto,"sort")) return;
  if (targetp && strcmp(targetp,"float64down")) return;
  storage_sort_float64down_x = callocplus(8*1048576);
  test_sort_float64down_x = (unsigned char *) aligned(storage_sort_float64down_x,8*1048576);
  storage_sort_float64down_x2 = callocplus(8*1048576);
  test_sort_float64down_x2 = (unsigned char *) aligned(storage_sort_float64down_x2,8*1048576);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("sort_float64down offset %lld\n",offset);
    test_sort_float64down_offset = offset;
    for (long long impl = -1;impl < djbsort_numimpl_float64down();++impl)
      forked(test_sort_float64down_impl,impl);
    test_sort_float64down_x += 8;
    test_sort_float64down_x2 += 8;
  }
  free(storage_sort_float64down_x2);
  free(storage_sort_float64down_x);
}

/* ----- top level */

#include "print_cpuid.inc"

int main(int argc,char **argv)
{
  valgrind_init();
  if (valgrind) limits();

  setvbuf(stdout,0,_IOLBF,0);
  printf("djbsort version %s\n",djbsort_version());
  printf("djbsort arch %s\n",djbsort_arch());
  print_cpuid();

  if (valgrind) {
    printf("valgrind %d",(int) valgrind);
    printf(" declassify %d",(int) crypto_declassify_uses_valgrind);
    if (!crypto_declassify_uses_valgrind)
      printf(" (expect false positives)");
    printf("\n");
  }

  if (*argv) ++argv;
  if (*argv) {
    targeto = *argv++;
    if (*argv) {
      targetp = *argv++;
      if (*argv) {
        targeti = *argv++;
        if (*argv) {
          targetn = *argv++;
          if (*argv) {
            targetoffset = *argv++;
          }
        }
      }
    }
  }

  test_sort_int32();
  test_sort_int32down();
  test_sort_uint32();
  test_sort_uint32down();
  test_sort_float32();
  test_sort_float32down();
  test_sort_int64();
  test_sort_int64down();
  test_sort_uint64();
  test_sort_uint64down();
  test_sort_float64();
  test_sort_float64down();

  if (!ok) {
    printf("some tests failed\n");
    return 100;
  }
  printf("all tests succeeded\n");
  return 0;
}
