#!/bin/sh -e

runs=8
top=`pwd`

BENCHCLANG="clang++ -std=c++17 -O3 -march=native"
BENCHGCC="g++ -O3 -march=native"
# these are for compiling bench-* including any code in library headers
# while each library makes its own selection of compiler for its precompiled code


# =====

X=djbsort
url=https://sorting.cr.yp.to
version="installed version = $(djbsort-speed | head -1)"
date=`djbsort-speed | head -1 | awk '{print $3}'`
echo "[1;34m===== $X ($url) version: $version [0m"

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  mkdir $X
  cd $X
  # assume djbsort installed already, so nothing to build here
  echo $date > date
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32 64
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -ldjbsort -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)


# =====

X=radixwrapper
url='based on https://cr.yp.to/2026/20260202-sort.c'
version='version included in sortbench'
date=`djbsort-speed | head -1 | awk '{print $3}'`
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  mkdir $X
  cp $X.h $X/$X.h
  cp $X.c $X/$X.cc
  cd $X

  $BENCHCLANG -c $X.cc
  echo $date > date
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    $X.o -ldjbsort -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=aspas
url=https://github.com/vtsynergy/aspas_sort
version=738ec7a1c051f6005ecb0cfec159eff9387b5f9d
date=20180217 # latest as of 20260621
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X
  git checkout $version
  echo $date > date
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -I include -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=far
url=https://github.com/simd-sorting/fast-and-robust
version=054f2e2e9f7c00be4dc8d69567f92ddf9832a8f3
date=20210820 # latest as of 20260621
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X
  git checkout $version
  echo $date > date
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    # seems noticeably faster with gcc than with clang
    $BENCHGCC \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -I avx2_sort_demo -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-far.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=herf
url=https://github.com/herf/radix
version=97335ff3ab05f052b4028cba0c8eec9e151eb9b2
date=20260213 # latest as of 20260621
echo "[1;34m===== $X ($url) version: $version [0m"

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X
  git checkout $version

  (
    echo '#ifndef herf_h'
    echo '#define herf_h'
    echo '#include <inttypes.h>'
    echo 'extern void herf_sort(int32_t *,uint32_t);'
    echo '#endif'
  ) > $X.h

  ( grep -v 'windows.h' \
    | grep -v 'stdio.h' \
    | grep -v 'real64' \
    | grep -v 'uint8' \
    | grep -v 'typedef float real32' \
    | grep -v 'uint32 ct = 65536' \
    | sed -n -e '/Simple test/q' -e p \
    | sed 's_// memcpy_memcpy_' \
    | sed 's/typedef long int32/typedef int32_t int32/' \
    | sed 's/typedef unsigned long uint32/typedef uint32_t uint32/' \
    | sed 's_real32_uint32_g' \
    | sed 's_farray_array_g' \
    | sed 's_sorted_sort_g' \
    | sed 's/IFloatFlip(ai)/0x80000000 ^ ai/' \
    | sed 's/FloatFlip((uint32&)array\[i\])/0x80000000 ^ array[i]/' \
    | sed 's/FloatFlipX(fi)/fi = 0x80000000 ^ fi/' \
    | sed 's/floatflip/flip/' \
    | sed 's/_mm_prefetch(\(.*\), 0)/_mm_prefetch(\1, _MM_HINT_NTA)/' \
    | sed 's/cpointer(/(cpointer)(/' \
    | sed '/Configuration/, /define C/ d' \
    | sed '/Visual/, /utils/ d' \
    | sed '/stdlib/ a#include "herf.h"' \
    | sed '/if PREFETCH/ a#if defined(__x86_64__) || defined(__i386__)' \
    | sed '/PREFETCH/, /if PREFETCH/ d' \
    | grep -v 'uint32 \*sort = (uint32\*)sort' \
    | grep -v 'uint32 \*array = (uint32\*)array' \
    | grep -v -e ---------

    echo 'void herf_sort(int32 *x,uint32 n)'
    echo '{'
    echo '  int32 y[n];'
    echo '  RadixSort11((uint32 *) x,(uint32 *) y,n);'
    echo '}'
  ) < radix.cpp > $X.cc

  echo $date > date
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    $X.cc -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)


# =====

X=sid1607
url=https://github.com/sid1607/avx2-merge-sort
version=f1c4e0f5f0e28cde5d3cdf16ebf0ce745201366a
date=20161212 # latest as of 20260621
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X
  git checkout $version

  # same flags as in Makefile
  # (not calling make here since that produces a test program without merge_sort.o)
  g++ -m64 -std=c++0x -O3 -Wall -mavx2 -c merge_sort.cpp

  echo $date > date
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    merge_sort.o -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=vxsort
url=https://github.com/damageboy/vxsort-cpp
version=2c7f79ba539a5c1ad2acf1c2e3c0fa828da25635
date=20230510 # latest as of 20260621
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X
  git checkout $version

  sed -i 's/^cmake_minimum_required(.*)$/cmake_minimum_required(VERSION 3.15)/' CMakeLists.txt
  sed -i 's/include(ConfigGBench)/# include(ConfigGBench)/' CMakeLists.txt
  sed -i 's/include(ConfigGTest)/# include(ConfigGTest)/' CMakeLists.txt
  sed -i 's!add_subdirectory(${PROJECT_SOURCE_DIR}/bench/)!# add_subdirectory(${PROJECT_SOURCE_DIR}/bench/)!' CMakeLists.txt
  sed -i 's!add_subdirectory(${PROJECT_SOURCE_DIR}/tests/)!# add_subdirectory(${PROJECT_SOURCE_DIR}/tests/)!' CMakeLists.txt

  ( mkdir build-release
    cd build-release
    cmake ..
    make -j 4
  )

  echo $date > date
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32 64
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG -std=c++17 \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -I vxsort \
    -I vxsort/vector_machine \
    -I build-release/_deps/cpu_features-src/include \
    -L build-release/vxsort \
    -L build-release/_deps/cpu_features-build \
    -lvxsort \
    -lcpu_features -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=x86simdsort
url=https://github.com/intel/x86-simd-sort
version=b8c9fd861f25ce0549f45d71058dfc2ba19508e4
date=20260617 # latest as of 20260621
echo "[1;34m===== $X ($url) version: $version [0m"

case `arch` in
  arm*|aarch*)
    echo skipping for `arch`
    ;;
  *)

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X
  git checkout $version

  ( # many -Wmaybe-uninitialized errors with current compiler
    # so save output to a file
    exec > mesonlog 2>&1

    meson setup --buildtype release builddir && cd builddir
    meson compile
  )

  echo $date > date
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32 64
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHGCC -std=c++20 \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -I lib -L builddir -L builddir/lib \
    -lx86simdsortcpp -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      env LD_LIBRARY_PATH=$top/$X/builddir${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH \
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)

esac


# =====

X=vqsort
url=https://github.com/google/highway
version=fc7fe272166535357951bf9f49077cf3b68086c1
date=20260620 # latest as of 20260621
echo "[1;34m===== $X ($url) version: $version [0m"

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  git clone $url $X
  cd $X

  ( git checkout $version
    mkdir -p build && cd build
    cmake -DBUILD_TESTING=OFF -DHWY_ENABLE_TESTS=OFF -DHWY_TEST_STANDALONE=ON ..
    make -j 4
  )

  echo $date > date
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32 64
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -I . -I build/googletest-src/googletest/include -L build -L build/lib \
    -lhwy_contrib -lhwy -pthread -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)


# =====

X=stdsort
url='std::sort'
version='installed version'
echo "[1;34m===== $X ($url) version: $version [0m"

[ -f $top/$X/skiprebuild ] || (
  cd $top
  rm -rf $X
  mkdir $X
  cd $X
  # will use whichever version comes with OS, so nothing to build here
  touch skiprebuild
)

[ -f $top/$X/skipbench ] || (
  for bits in 32 64
  do
    cd $top/$X
    cp $top/bench${bits}-$X.cc .
    $BENCHCLANG \
    -o bench${bits}-$X bench${bits}-$X.cc \
    -lcpucycles -lm

    for run in `seq 1 $runs`
    do
      echo bench $X $bits $run `date -u` >&2
      ./bench${bits}-$X
    done > bench${bits}-$X.out
  done
  touch $top/$X/skipbench
)


# =====

echo '[1;34m===== create plot32.pdf, plot64.pdf [0m'

cd $top
for bits in 32 64
do
  ./plot $bits \
  stdsort black \
  herf slategray \
  aspas darkcyan \
  sid1607 black \
  vqsort red \
  vxsort darkviolet \
  x86simdsort darkgreen \
  far sienna \
  djbsort blue \
  radixwrapper orangered
done
