commit 18c9a8b8905304cf5f8fc15825769046a3144866
parent f25a10e19eca294c4e6a99e4f80ce5cfd11a0e5f
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Sun, 18 Aug 2024 14:26:45 +0200
Reorganized folder structure
Diffstat:
44 files changed, 2246 insertions(+), 2281 deletions(-)
diff --git a/LICENSE b/LICENSE
@@ -2,7 +2,9 @@ The following license applies to every C source code and header file
distributed with this LICENSE file.
h48 - a prototype for an optimal rubik's cube solver
- Copyright (C) 2023 Sebastiano Tronto <sebastiano@tronto.net>
+ Copyright (C) 2023-2024
+ Sebastiano Tronto <sebastiano@tronto.net>
+ Enrico Tenuti <>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/Makefile b/Makefile
@@ -3,13 +3,13 @@ include config.mk
all: cube.o debugcube.o
cube.s: clean
- ${CC} -D${CUBETYPE} ${CFLAGS} -c -S -o cube.s src/cube.c
+ ${CC} -D${CUBETYPE} ${CFLAGS} -c -S -o cube.s src/nissy.c
cube.o: clean
- ${CC} -D${CUBETYPE} ${CFLAGS} -c -o cube.o src/cube.c
+ ${CC} -D${CUBETYPE} ${CFLAGS} -c -o cube.o src/nissy.c
debugcube.o: clean
- ${CC} -D${CUBETYPE} ${DBGFLAGS} -c -o debugcube.o src/cube.c
+ ${CC} -D${CUBETYPE} ${DBGFLAGS} -c -o debugcube.o src/nissy.c
clean:
rm -rf *.o run
diff --git a/shell.c b/shell.c
@@ -7,7 +7,7 @@
#include <string.h>
#include <time.h>
-#include "src/cube.h"
+#include "src/nissy.h"
#define PRINTCUBE_BUFFER_SIZE 1024 /* Should be enough */
#define SOLUTIONS_BUFFER_SIZE 500000 /* Should be enough */
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -0,0 +1,41 @@
+#if defined(CUBE_AVX2)
+
+#include <immintrin.h>
+
+typedef __m256i cube_t;
+
+#if !defined(TEST_H)
+#include "common.h"
+#include "avx2.h"
+#endif
+
+#elif defined(CUBE_NEON)
+
+#include <stdlib.h>
+#include <arm_neon.h>
+
+typedef struct {
+ uint8x16_t corner;
+ uint8x16_t edge;
+} cube_t;
+
+#if !defined(TEST_H)
+#include "common.h"
+#include "neon.h"
+#endif
+
+#else
+
+#include <stdlib.h>
+
+typedef struct {
+ uint8_t corner[8];
+ uint8_t edge[12];
+} cube_t;
+
+#if !defined(TEST_H)
+#include "common.h"
+#include "portable.h"
+#endif
+
+#endif
diff --git a/src/arch/avx2.h b/src/arch/avx2.h
@@ -0,0 +1,319 @@
+#define _co2_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x6060606060606060))
+#define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x2020202020202020))
+#define _cp_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x0707070707070707))
+#define _ep_avx2 \
+ _mm256_set_epi64x(INT64_C(0x0F0F0F0F), INT64_C(0x0F0F0F0F0F0F0F0F), 0, 0)
+#define _eo_avx2 \
+ _mm256_set_epi64x(INT64_C(0x10101010), INT64_C(0x1010101010101010), 0, 0)
+
+#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \
+ e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \
+ _mm256_set_epi8(0, 0, 0, 0, e_br, e_bl, e_fl, e_fr, \
+ e_dr, e_dl, e_ul, e_ur, e_df, e_db, e_ub, e_uf, \
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+ c_dbl, c_dfr, c_ubr, c_ufl, c_dbr, c_dfl, c_ubl, c_ufr)
+#define zero _mm256_set_epi64x(0, 0, 0, 0)
+#define solved static_cube( \
+ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+
+_static void
+pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12])
+{
+ uint8_t aux[32];
+
+ _mm256_storeu_si256((__m256i_u *)aux, *cube);
+ memcpy(c, aux, 8);
+ memcpy(e, aux+16, 12);
+}
+
+_static_inline bool
+equal(cube_t c1, cube_t c2)
+{
+ int32_t mask;
+ __m256i cmp;
+
+ cmp = _mm256_cmpeq_epi8(c1, c2);
+ mask = _mm256_movemask_epi8(cmp);
+
+ return mask == ~0;
+}
+
+_static_inline cube_t
+invertco(cube_t c)
+{
+ cube_t co, shleft, shright, summed, newco, cleanco, ret;
+
+ co = _mm256_and_si256(c, _co2_avx2);
+ shleft = _mm256_slli_epi32(co, 1);
+ shright = _mm256_srli_epi32(co, 1);
+ summed = _mm256_or_si256(shleft, shright);
+ newco = _mm256_and_si256(summed, _co2_avx2);
+ cleanco = _mm256_xor_si256(c, co);
+ ret = _mm256_or_si256(cleanco, newco);
+
+ return ret;
+}
+
+_static_inline cube_t
+compose_epcpeo(cube_t c1, cube_t c2)
+{
+ cube_t b, s, eo2;
+
+ /* Permute and clean unused bits */
+ s = _mm256_shuffle_epi8(c1, c2);
+ b = _mm256_set_epi8(
+ ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0
+ );
+ s = _mm256_andnot_si256(b, s);
+
+ /* Change EO */
+ eo2 = _mm256_and_si256(c2, _eo_avx2);
+ s = _mm256_xor_si256(s, eo2);
+
+ return s;
+}
+
+_static_inline cube_t
+compose_edges(cube_t c1, cube_t c2)
+{
+ return compose_epcpeo(c1, c2);
+}
+
+_static_inline cube_t
+compose_corners(cube_t c1, cube_t c2)
+{
+ /*
+ * We do a full compose. Minor optimizations are possible, like
+ * saving one instruction by not doing EO, but it should not
+ * be significant.
+ */
+ return compose(c1, c2);
+}
+
+_static_inline cube_t
+compose(cube_t c1, cube_t c2)
+{
+ cube_t s, co1, co2, aux, auy1, auy2, auz1, auz2;
+
+ s = compose_epcpeo(c1, c2);
+
+ /* Change CO */
+ co1 = _mm256_and_si256(s, _co2_avx2);
+ co2 = _mm256_and_si256(c2, _co2_avx2);
+ aux = _mm256_add_epi8(co1, co2);
+ auy1 = _mm256_add_epi8(aux, _cocw_avx2);
+ auy2 = _mm256_srli_epi32(auy1, 2);
+ auz1 = _mm256_add_epi8(aux, auy2);
+ auz2 = _mm256_and_si256(auz1, _co2_avx2);
+
+ /* Put together */
+ s = _mm256_andnot_si256(_co2_avx2, s);
+ s = _mm256_or_si256(s, auz2);
+
+ return s;
+}
+
+_static_inline cube_t
+cleanaftershuffle(cube_t c)
+{
+ __m256i b;
+
+ b = _mm256_set_epi8(
+ ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0
+ );
+
+ return _mm256_andnot_si256(b, c);
+}
+
+_static_inline cube_t
+inverse(cube_t c)
+{
+ /* Method taken from Andrew Skalski's vcube[1]. The addition sequence
+ * was generated using [2].
+ * [1] https://github.com/Voltara/vcube
+ * [2] http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
+ */
+ cube_t v3, vi, vo, vp, ret;
+
+ v3 = _mm256_shuffle_epi8(c, c);
+ v3 = _mm256_shuffle_epi8(v3, c);
+ vi = _mm256_shuffle_epi8(v3, v3);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, v3);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, c);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, v3);
+ vi = _mm256_shuffle_epi8(vi, vi);
+ vi = _mm256_shuffle_epi8(vi, c);
+
+ vo = _mm256_and_si256(c, _mm256_or_si256(_eo_avx2, _co2_avx2));
+ vo = _mm256_shuffle_epi8(vo, vi);
+ vp = _mm256_andnot_si256(_mm256_or_si256(_eo_avx2, _co2_avx2), vi);
+ ret = _mm256_or_si256(vp, vo);
+ ret = cleanaftershuffle(ret);
+
+ return invertco(ret);
+}
+
+_static_inline int64_t
+coord_co(cube_t c)
+{
+ cube_t co;
+ int64_t mem[4], ret, i, p;
+
+ co = _mm256_and_si256(c, _co2_avx2);
+ _mm256_storeu_si256((__m256i *)mem, co);
+
+ mem[0] >>= 5;
+ for (i = 0, ret = 0, p = 1; i < 7; i++, mem[0] >>= 8, p *= 3)
+ ret += (mem[0] & 3) * p;
+
+ return ret;
+}
+
+_static_inline int64_t
+coord_csep(cube_t c)
+{
+ cube_t cp, shifted;
+ int64_t mask;
+
+ cp = _mm256_and_si256(c, _cp_avx2);
+ shifted = _mm256_slli_epi32(cp, 5);
+ mask = _mm256_movemask_epi8(shifted);
+
+ return mask & 0x7F;
+}
+
+_static_inline int64_t
+coord_cocsep(cube_t c)
+{
+ return (coord_co(c) << 7) + coord_csep(c);
+}
+
+_static_inline int64_t
+coord_eo(cube_t c)
+{
+ cube_t eo, shifted;
+ int64_t mask;
+
+ eo = _mm256_and_si256(c, _eo_avx2);
+ shifted = _mm256_slli_epi32(eo, 3);
+ mask = _mm256_movemask_epi8(shifted);
+
+ return mask >> 17;
+}
+
+_static_inline int64_t
+coord_esep(cube_t c)
+{
+ cube_t ep;
+ int64_t e, mem[4], i, j, jj, k, l, ret1, ret2, bit1, bit2, is1;
+
+ ep = _mm256_and_si256(c, _ep_avx2);
+ _mm256_storeu_si256((__m256i *)mem, ep);
+
+ mem[3] <<= 8;
+ ret1 = ret2 = 0;
+ k = l = 4;
+ for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8) {
+ e = mem[i/8 + 2];
+
+ bit1 = (e & _esepbit1) >> 2;
+ bit2 = (e & _esepbit2) >> 3;
+ is1 = (1 - bit2) * bit1;
+
+ ret1 += bit2 * binomial[11-i][k];
+ k -= bit2;
+
+ jj = j < 8;
+ ret2 += jj * is1 * binomial[7-(j*jj)][l];
+ l -= is1;
+ j += (1-bit2);
+ }
+
+ return ret1 * 70 + ret2;
+}
+
+_static_inline void
+copy_corners(cube_t *dest, cube_t src)
+{
+ *dest = _mm256_blend_epi32(*dest, src, 0x0F);
+}
+
+_static_inline void
+copy_edges(cube_t *dest, cube_t src)
+{
+ *dest = _mm256_blend_epi32(*dest, src, 0xF0);
+}
+
+_static_inline void
+set_eo(cube_t *cube, int64_t eo)
+{
+ int64_t eo12, eotop, eobot;
+ __m256i veo;
+
+ eo12 = (eo << 1) + (_mm_popcnt_u64(eo) % 2);
+ eotop = (eo12 & (1 << 11)) << 17 |
+ (eo12 & (1 << 10)) << 10 |
+ (eo12 & (1 << 9)) << 3 |
+ (eo12 & (1 << 8)) >> 4;
+ eobot = (eo12 & (1 << 7)) << 53 |
+ (eo12 & (1 << 6)) << 46 |
+ (eo12 & (1 << 5)) << 39 |
+ (eo12 & (1 << 4)) << 32 |
+ (eo12 & (1 << 3)) << 25 |
+ (eo12 & (1 << 2)) << 18 |
+ (eo12 & (1 << 1)) << 11 |
+ (eo12 & 1) << 4;
+ veo = _mm256_set_epi64x(eotop, eobot, 0, 0);
+
+ *cube = _mm256_andnot_si256(_eo_avx2, *cube);
+ *cube = _mm256_or_si256(*cube, veo);
+}
+
+_static_inline cube_t
+invcoord_esep(int64_t esep)
+{
+ cube_t eee, ret;
+ int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2;
+ uint8_t mem[32];
+ uint8_t slice[3] = {0};
+
+ set1 = esep % 70;
+ set2 = esep / 70;
+
+ for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) {
+ v = binomial[11-i][k];
+ jj = j < 8;
+ w = jj * binomial[7-(j*jj)][l];
+ bit2 = set2 >= v;
+ bit1 = set1 >= w;
+ is1 = (1 - bit2) * bit1;
+
+ set2 -= bit2 * v;
+ k -= bit2;
+ set1 -= is1 * w;
+ l -= is1;
+ j += (1-bit2);
+ s = 2*bit2 + (1-bit2)*bit1;
+
+ mem[i+16] = (slice[s]++) | (uint8_t)(s << 2);
+ }
+
+ ret = solved;
+ eee = _mm256_loadu_si256((__m256i_u *)&mem);
+ copy_edges(&ret, eee);
+
+ return ret;
+}
diff --git a/src/arch/common.h b/src/arch/common.h
@@ -0,0 +1,19 @@
+_static void pieces(cube_t *, uint8_t [static 8], uint8_t [static 12]);
+_static_inline bool equal(cube_t, cube_t);
+_static_inline cube_t invertco(cube_t);
+_static_inline cube_t compose_epcpeo(cube_t, cube_t);
+_static_inline cube_t compose_edges(cube_t, cube_t);
+_static_inline cube_t compose_corners(cube_t, cube_t);
+_static_inline cube_t compose(cube_t, cube_t);
+_static_inline cube_t inverse(cube_t);
+
+_static_inline int64_t coord_co(cube_t);
+_static_inline int64_t coord_csep(cube_t);
+_static_inline int64_t coord_cocsep(cube_t);
+_static_inline int64_t coord_eo(cube_t);
+_static_inline int64_t coord_esep(cube_t);
+
+_static_inline void copy_corners(cube_t *, cube_t);
+_static_inline void copy_edges(cube_t *, cube_t);
+_static_inline void set_eo(cube_t *, int64_t);
+_static_inline cube_t invcoord_esep(int64_t);
diff --git a/src/arch/neon.h b/src/arch/neon.h
@@ -0,0 +1,348 @@
+#define _co2_neon vdupq_n_u8(0x60)
+#define _cocw_neon vdupq_n_u8(0x20)
+#define _cp_neon vdupq_n_u8(0x07)
+#define _ep_neon vcombine_u8(vdupq_n_u8(0x0F), vdupq_n_u8(0x0F))
+#define _eo_neon vcombine_u8(vdupq_n_u8(0x10), vdupq_n_u8(0x10))
+
+// static cube
+#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \
+ e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \
+ ((cube_t){ \
+ .corner = {c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, 0, 0, 0, 0, 0, 0, 0, 0}, \
+ .edge = {e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br, 0, 0, 0, 0}})
+
+// zero cube
+#define zero \
+ (cube_t) \
+ { \
+ .corner = vdupq_n_u8(0), \
+ .edge = vdupq_n_u8(0) \
+ }
+
+// solved cube
+#define solved static_cube( \
+ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+
+_static void
+pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12])
+{
+ // First 8 bytes of the corner vector are copied from the c array
+ vst1_u8(c, vget_low_u8(cube->corner));
+
+ // 12 bytes of the edge vector are copied from the e array
+ // First 8 bytes
+ vst1_u8(e, vget_low_u8(cube->edge));
+ // Next 4 bytes
+ vst1_lane_u32((uint32_t *)(e + 8), vreinterpret_u32_u8(vget_high_u8(cube->edge)), 0);
+}
+
+_static_inline bool
+equal(cube_t c1, cube_t c2)
+{
+ uint8x16_t cmp_corner, cmp_edge;
+ uint64x2_t cmp_corner_u64, cmp_edge_u64;
+ uint64x2_t cmp_result;
+
+ // compare the corner vectors
+ cmp_corner = vceqq_u8(c1.corner, c2.corner);
+ // compare the edge vectors
+ cmp_edge = vceqq_u8(c1.edge, c2.edge);
+
+ // convert the comparison vectors to 64-bit vectors
+ cmp_corner_u64 = vreinterpretq_u64_u8(cmp_corner);
+ cmp_edge_u64 = vreinterpretq_u64_u8(cmp_edge);
+
+ // combine the comparison vectors
+ cmp_result = vandq_u64(cmp_corner_u64, cmp_edge_u64);
+
+ // check if all the bits are set
+ return vgetq_lane_u64(cmp_result, 0) == ~0ULL && vgetq_lane_u64(cmp_result, 1) == ~0ULL;
+}
+
+_static_inline cube_t
+invertco(cube_t c)
+{
+ cube_t ret;
+ uint8x16_t co, shleft, shright, summed, newco, cleanco;
+
+ co = vandq_u8(c.corner, _co2_neon);
+ shleft = vshlq_n_u8(co, 1);
+ shright = vshrq_n_u8(co, 1);
+ summed = vorrq_u8(shleft, shright);
+ newco = vandq_u8(summed, _co2_neon);
+ cleanco = veorq_u8(c.corner, co);
+ ret.corner = vorrq_u8(cleanco, newco);
+ ret.edge = c.edge;
+
+ return ret;
+}
+
+_static_inline cube_t
+compose_edges(cube_t c1, cube_t c2)
+{
+ cube_t ret = {0};
+ ret.edge = compose_edges_slim(c1.edge, c2.edge);
+ return ret;
+}
+
+_static_inline cube_t
+compose_corners(cube_t c1, cube_t c2)
+{
+ cube_t ret = {0};
+ ret.corner = compose_corners_slim(c1.corner, c2.corner);
+ return ret;
+}
+
+_static_inline uint8x16_t
+compose_edges_slim(uint8x16_t edge1, uint8x16_t edge2)
+{
+ // Masks
+ uint8x16_t p_bits = vdupq_n_u8(_pbits);
+ uint8x16_t eo_bit = vdupq_n_u8(_eobit);
+
+ // Find the index and permutation
+ uint8x16_t p = vandq_u8(edge2, p_bits);
+ uint8x16_t piece1 = vqtbl1q_u8(edge1, p);
+
+ // Calculate the orientation through XOR
+ uint8x16_t orien = vandq_u8(veorq_u8(edge2, piece1), eo_bit);
+
+ // Combine the results
+ uint8x16_t ret = vorrq_u8(vandq_u8(piece1, p_bits), orien);
+
+ // Mask to clear the last 32 bits of the result
+ uint8x16_t mask_last_32 = vsetq_lane_u32(0, vreinterpretq_u32_u8(ret), 3);
+ ret = vreinterpretq_u8_u32(mask_last_32);
+
+ return ret;
+}
+
+_static_inline uint8x16_t
+compose_corners_slim(uint8x16_t corner1, uint8x16_t corner2)
+{
+ // Masks
+ uint8x16_t p_bits = vdupq_n_u8(_pbits);
+ uint8x16_t cobits = vdupq_n_u8(_cobits);
+ uint8x16_t cobits2 = vdupq_n_u8(_cobits2);
+ uint8x16_t twist_cw = vdupq_n_u8(_ctwist_cw);
+
+ // Find the index and permutation
+ uint8x16_t p = vandq_u8(corner2, p_bits);
+ uint8x16_t piece1 = vqtbl1q_u8(corner1, p);
+
+ // Calculate the orientation
+ uint8x16_t aux = vaddq_u8(vandq_u8(corner2, cobits), vandq_u8(piece1, cobits));
+ uint8x16_t auy = vshrq_n_u8(vaddq_u8(aux, twist_cw), 2);
+ uint8x16_t orien = vandq_u8(vaddq_u8(aux, auy), cobits2);
+
+ // Combine the results
+ uint8x16_t ret = vorrq_u8(vandq_u8(piece1, p_bits), orien);
+
+ // Mask to clear the last 64 bits of the result
+ uint8x16_t mask_last_64 = vsetq_lane_u64(0, vreinterpretq_u64_u8(ret), 1);
+ ret = vreinterpretq_u8_u64(mask_last_64);
+
+ return ret;
+}
+
+_static_inline cube_t
+compose(cube_t c1, cube_t c2)
+{
+ cube_t ret = {0};
+
+ ret.edge = compose_edges_slim(c1.edge, c2.edge);
+ ret.corner = compose_corners_slim(c1.corner, c2.corner);
+
+ return ret;
+}
+
+_static_inline cube_t
+inverse(cube_t cube)
+{
+ uint8_t i, piece, orien;
+ cube_t ret;
+
+ // Temp arrays to store the NEON vectors
+ uint8_t edges[16];
+ uint8_t corners[16];
+
+ // Copy the NEON vectors to the arrays
+ vst1q_u8(edges, cube.edge);
+ vst1q_u8(corners, cube.corner);
+
+ uint8_t edge_result[16] = {0};
+ uint8_t corner_result[16] = {0};
+
+ // Process the edges
+ for (i = 0; i < 12; i++)
+ {
+ piece = edges[i];
+ orien = piece & _eobit;
+ edge_result[piece & _pbits] = i | orien;
+ }
+
+ // Process the corners
+ for (i = 0; i < 8; i++)
+ {
+ piece = corners[i];
+ orien = ((piece << 1) | (piece >> 1)) & _cobits2;
+ corner_result[piece & _pbits] = i | orien;
+ }
+
+ // Copy the results back to the NEON vectors
+ ret.edge = vld1q_u8(edge_result);
+ ret.corner = vld1q_u8(corner_result);
+
+ return ret;
+}
+
+_static_inline int64_t
+coord_co(cube_t c)
+{
+ // Temp array to store the NEON vector
+ uint8_t mem[16];
+ vst1q_u8(mem, c.corner);
+
+ int i, p;
+ int64_t ret;
+
+ for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 3)
+ ret += p * (mem[i] >> _coshift);
+
+ return ret;
+}
+
+_static_inline int64_t
+coord_csep(cube_t c)
+{
+ // Temp array to store the NEON vector
+ uint8_t mem[16];
+ vst1q_u8(mem, c.corner);
+
+ int64_t ret = 0;
+ int i, p;
+ for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2)
+ ret += p * ((mem[i] & _csepbit) >> 2);
+
+ return ret;
+ return 0;
+}
+
+_static_inline int64_t
+coord_cocsep(cube_t c)
+{
+ return (coord_co(c) << 7) + coord_csep(c);
+}
+
+_static_inline int64_t
+coord_eo(cube_t c)
+{
+ int64_t ret = 0;
+ int64_t p = 1;
+
+ // Temp array to store the NEON vector
+ uint8_t mem[16];
+ vst1q_u8(mem, c.edge);
+
+ for (int i = 1; i < 12; i++, p *= 2)
+ {
+ ret += p * (mem[i] >> _eoshift);
+ }
+
+ return ret;
+}
+
+_static_inline int64_t
+coord_esep(cube_t c)
+{
+ int64_t i, j, jj, k, l, ret1, ret2, bit1, bit2, is1;
+
+ // Temp array to store the NEON vector
+ uint8_t mem[16];
+ vst1q_u8(mem, c.edge);
+
+ for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++)
+ {
+ bit1 = (mem[i] & _esepbit1) >> 2;
+ bit2 = (mem[i] & _esepbit2) >> 3;
+ is1 = (1 - bit2) * bit1;
+
+ ret1 += bit2 * binomial[11 - i][k];
+ k -= bit2;
+
+ jj = j < 8;
+ ret2 += jj * is1 * binomial[7 - (j * jj)][l];
+ l -= is1;
+ j += (1 - bit2);
+ }
+
+ return ret1 * 70 + ret2;
+}
+
+_static_inline void
+copy_corners(cube_t *dst, cube_t src)
+{
+ dst->corner = src.corner;
+}
+
+_static_inline void
+copy_edges(cube_t *dst, cube_t src)
+{
+ dst->edge = src.edge;
+}
+
+_static_inline void
+set_eo(cube_t *cube, int64_t eo)
+{
+ // Temp array to store the NEON vector
+ uint8_t mem[16];
+ vst1q_u8(mem, cube->edge);
+ uint8_t i, sum, flip;
+
+ for (sum = 0, i = 1; i < 12; i++, eo >>= 1)
+ {
+ flip = eo % 2;
+ sum += flip;
+ mem[i] = (mem[i] & ~_eobit) | (_eobit * flip);
+ }
+ mem[0] = (mem[0] & ~_eobit) | (_eobit * (sum % 2));
+
+ // Copy the results back to the NEON vector
+ cube->edge = vld1q_u8(mem);
+ return;
+}
+
+_static_inline cube_t
+invcoord_esep(int64_t esep)
+{
+ cube_t ret;
+ int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2;
+ uint8_t slice[3] = {0};
+
+ ret = solved;
+ uint8_t mem[16];
+ set1 = esep % 70;
+ set2 = esep / 70;
+
+ for (i = 0, j = 0, k = 4, l = 4; i < 12; i++)
+ {
+ v = binomial[11 - i][k];
+ jj = j < 8;
+ w = jj * binomial[7 - (j * jj)][l];
+ bit2 = set2 >= v;
+ bit1 = set1 >= w;
+ is1 = (1 - bit2) * bit1;
+
+ set2 -= bit2 * v;
+ k -= bit2;
+ set1 -= is1 * w;
+ l -= is1;
+ j += (1 - bit2);
+ s = 2 * bit2 + (1 - bit2) * bit1;
+
+ mem[i] = (slice[s]++) | (uint8_t)(s << 2);
+ }
+
+ ret.edge = vld1q_u8(mem);
+ return ret;
+}
diff --git a/src/arch/portable.h b/src/arch/portable.h
@@ -0,0 +1,272 @@
+#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \
+ e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \
+ ((cube_t) { \
+ .corner = { c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl }, \
+ .edge = { e_uf, e_ub, e_db, e_df, e_ur, e_ul, \
+ e_dl, e_dr, e_fr, e_fl, e_bl, e_br } })
+#define zero static_cube( \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+#define solved static_cube( \
+ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+
+_static void
+pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12])
+{
+ memcpy(c, cube->corner, 8);
+ memcpy(e, cube->edge, 12);
+}
+
+_static_inline bool
+equal(cube_t c1, cube_t c2)
+{
+ uint8_t i;
+ bool ret;
+
+ ret = true;
+ for (i = 0; i < 8; i++)
+ ret = ret && c1.corner[i] == c2.corner[i];
+ for (i = 0; i < 12; i++)
+ ret = ret && c1.edge[i] == c2.edge[i];
+
+ return ret;
+}
+
+_static_inline cube_t
+invertco(cube_t c)
+{
+ uint8_t i, piece, orien;
+ cube_t ret;
+
+ ret = c;
+ for (i = 0; i < 8; i++) {
+ piece = c.corner[i];
+ orien = ((piece << 1) | (piece >> 1)) & _cobits2;
+ ret.corner[i] = (piece & _pbits) | orien;
+ }
+
+ return ret;
+}
+
+_static_inline void
+compose_edges_inplace(cube_t c1, cube_t c2, cube_t *ret)
+{
+ uint8_t i, piece1, piece2, p, orien;
+
+ for (i = 0; i < 12; i++) {
+ piece2 = c2.edge[i];
+ p = piece2 & _pbits;
+ piece1 = c1.edge[p];
+ orien = (piece2 ^ piece1) & _eobit;
+ ret->edge[i] = (piece1 & _pbits) | orien;
+ }
+}
+
+_static_inline void
+compose_corners_inplace(cube_t c1, cube_t c2, cube_t *ret)
+{
+ uint8_t i, piece1, piece2, p, orien, aux, auy;
+
+ for (i = 0; i < 8; i++) {
+ piece2 = c2.corner[i];
+ p = piece2 & _pbits;
+ piece1 = c1.corner[p];
+ aux = (piece2 & _cobits) + (piece1 & _cobits);
+ auy = (aux + _ctwist_cw) >> 2;
+ orien = (aux + auy) & _cobits2;
+ ret->corner[i] = (piece1 & _pbits) | orien;
+ }
+}
+
+_static_inline cube_t
+compose_edges(cube_t c1, cube_t c2)
+{
+ cube_t ret = zero;
+
+ compose_edges_inplace(c1, c2, &ret);
+
+ return ret;
+}
+
+_static_inline cube_t
+compose_corners(cube_t c1, cube_t c2)
+{
+ cube_t ret = zero;
+
+ compose_corners_inplace(c1, c2, &ret);
+
+ return ret;
+}
+
+_static_inline cube_t
+compose(cube_t c1, cube_t c2)
+{
+ cube_t ret = zero;
+
+ compose_edges_inplace(c1, c2, &ret);
+ compose_corners_inplace(c1, c2, &ret);
+
+ return ret;
+}
+
+cube_t
+inverse(cube_t cube)
+{
+ uint8_t i, piece, orien;
+ cube_t ret;
+
+ for (i = 0; i < 12; i++) {
+ piece = cube.edge[i];
+ orien = piece & _eobit;
+ ret.edge[piece & _pbits] = i | orien;
+ }
+
+ for (i = 0; i < 8; i++) {
+ piece = cube.corner[i];
+ orien = ((piece << 1) | (piece >> 1)) & _cobits2;
+ ret.corner[piece & _pbits] = i | orien;
+ }
+
+ return ret;
+}
+
+_static_inline int64_t
+coord_co(cube_t c)
+{
+ int i, p;
+ int64_t ret;
+
+ for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 3)
+ ret += p * (c.corner[i] >> _coshift);
+
+ return ret;
+}
+
+/*
+For corner separation, we consider the axis (a.k.a. tetrad) each
+corner belongs to as 0 or 1 and we translate this sequence into binary.
+Ignoring the last bit, we have a value up to 2^7, but not all values are
+possible. Encoding this as a number from 0 to C(8,4) would save about 40%
+of space, but we are not going to use this coordinate in large tables.
+*/
+_static_inline int64_t
+coord_csep(cube_t c)
+{
+ int i, p;
+ int64_t ret;
+
+ for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2)
+ ret += p * ((c.corner[i] & _csepbit) >> 2);
+
+ return ret;
+}
+
+_static_inline int64_t
+coord_cocsep(cube_t c)
+{
+ return (coord_co(c) << 7) + coord_csep(c);
+}
+
+_static_inline int64_t
+coord_eo(cube_t c)
+{
+ int i, p;
+ int64_t ret;
+
+ for (ret = 0, i = 1, p = 1; i < 12; i++, p *= 2)
+ ret += p * (c.edge[i] >> _eoshift);
+
+ return ret;
+}
+
+/*
+We encode the edge separation as a number from 0 to C(12,4)*C(8,4).
+It can be seen as the composition of two "subset index" coordinates.
+*/
+_static_inline int64_t
+coord_esep(cube_t c)
+{
+ int64_t i, j, jj, k, l, ret1, ret2, bit1, bit2, is1;
+
+ for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++) {
+ /* Simple version:
+ if (c.edge[i] & _esepbit2) {
+ ret1 += binomial[11-i][k--];
+ } else {
+ if (c.edge[i] & _esepbit1)
+ ret2 += binomial[7-j][l--];
+ j++;
+ }
+ */
+
+ bit1 = (c.edge[i] & _esepbit1) >> 2;
+ bit2 = (c.edge[i] & _esepbit2) >> 3;
+ is1 = (1 - bit2) * bit1;
+
+ ret1 += bit2 * binomial[11-i][k];
+ k -= bit2;
+
+ jj = j < 8;
+ ret2 += jj * is1 * binomial[7-(j*jj)][l];
+ l -= is1;
+ j += (1-bit2);
+ }
+
+ return ret1 * 70 + ret2;
+}
+
+_static_inline void
+copy_corners(cube_t *dest, cube_t src)
+{
+ memcpy(&dest->corner, src.corner, sizeof(src.corner));
+}
+
+_static_inline void
+copy_edges(cube_t *dest, cube_t src)
+{
+ memcpy(&dest->edge, src.edge, sizeof(src.edge));
+}
+
+_static_inline void
+set_eo(cube_t *cube, int64_t eo)
+{
+ uint8_t i, sum, flip;
+
+ for (sum = 0, i = 1; i < 12; i++, eo >>= 1) {
+ flip = eo % 2;
+ sum += flip;
+ cube->edge[i] = (cube->edge[i] & ~_eobit) | (_eobit * flip);
+ }
+ cube->edge[0] = (cube->edge[0] & ~_eobit) | (_eobit * (sum % 2));
+}
+
+_static_inline cube_t
+invcoord_esep(int64_t esep)
+{
+ cube_t ret;
+ int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2;
+ uint8_t slice[3] = {0};
+
+ ret = solved;
+ set1 = esep % 70;
+ set2 = esep / 70;
+
+ for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) {
+ v = binomial[11-i][k];
+ jj = j < 8;
+ w = jj * binomial[7-(j*jj)][l];
+ bit2 = set2 >= v;
+ bit1 = set1 >= w;
+ is1 = (1 - bit2) * bit1;
+
+ set2 -= bit2 * v;
+ k -= bit2;
+ set1 -= is1 * w;
+ l -= is1;
+ j += (1-bit2);
+ s = 2*bit2 + (1-bit2)*bit1;
+
+ ret.edge[i] = (slice[s]++) | (uint8_t)(s << 2);
+ }
+
+ return ret;
+}
diff --git a/src/constant_cubes.h b/src/core/constant_cubes.h
diff --git a/src/core/core.h b/src/core/core.h
@@ -0,0 +1,13 @@
+#include "constant_cubes.h"
+#include "io_moves.h"
+#include "io_trans.h"
+#include "cube.h"
+#include "io_cube.h"
+#include "moves.h"
+
+/* TODO: work in progress */
+#if 0
+#include "transform.h"
+#else
+#include "transform_with_switch.h"
+#endif
diff --git a/src/cube_generic.h b/src/core/cube.h
diff --git a/src/io_cube.h b/src/core/io_cube.h
diff --git a/src/core/io_moves.h b/src/core/io_moves.h
@@ -0,0 +1,62 @@
+_static uint8_t readmove(char);
+_static uint8_t readmodifier(char);
+_static int writemoves(uint8_t *, int, char *);
+
+_static uint8_t
+readmove(char c)
+{
+ switch (c) {
+ case 'U':
+ return _move_U;
+ case 'D':
+ return _move_D;
+ case 'R':
+ return _move_R;
+ case 'L':
+ return _move_L;
+ case 'F':
+ return _move_F;
+ case 'B':
+ return _move_B;
+ default:
+ return _error;
+ }
+}
+
+_static uint8_t
+readmodifier(char c)
+{
+ switch (c) {
+ case '1': /* Fallthrough */
+ case '2': /* Fallthrough */
+ case '3':
+ return c - '0' - 1;
+ case '\'':
+ return 2;
+ default:
+ return 0;
+ }
+}
+
+_static int
+writemoves(uint8_t *m, int n, char *buf)
+{
+ int i;
+ size_t len;
+ const char *s;
+ char *b;
+
+ for (i = 0, b = buf; i < n; i++, b++) {
+ s = movestr[m[i]];
+ len = strlen(s);
+ memcpy(b, s, len);
+ b += len;
+ *b = ' ';
+ }
+
+ if (b != buf)
+ b--; /* Remove last space */
+ *b = '\0';
+
+ return b - buf;
+}
diff --git a/src/core/io_trans.h b/src/core/io_trans.h
@@ -0,0 +1,25 @@
+_static uint8_t readtrans(const char *);
+_static void writetrans(uint8_t, char *);
+
+_static uint8_t
+readtrans(const char *buf)
+{
+ uint8_t t;
+
+ for (t = 0; t < 48; t++)
+ if (!strncmp(buf, transstr[t], 11))
+ return t;
+
+ LOG("readtrans error\n");
+ return _error;
+}
+
+_static void
+writetrans(uint8_t t, char *buf)
+{
+ if (t >= 48)
+ memcpy(buf, "error trans", 11);
+ else
+ memcpy(buf, transstr[t], 11);
+ buf[11] = '\0';
+}
diff --git a/src/moves.h b/src/core/moves.h
diff --git a/src/cube_transform.h b/src/core/transform.h
diff --git a/src/cube_transform_with_switch.h b/src/core/transform_with_switch.h
diff --git a/src/cube.c b/src/cube.c
@@ -1,55 +0,0 @@
-#include <inttypes.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <string.h>
-
-void (*nissy_log)(const char *, ...);
-
-#define LOG(...) if (nissy_log != NULL) nissy_log(__VA_ARGS__);
-
-#ifdef DEBUG
-#define _static
-#define _static_inline
-#define DBG_WARN(condition, ...) if (!(condition)) LOG(__VA_ARGS__);
-#define DBG_ASSERT(condition, retval, ...) \
- if (!(condition)) { LOG(__VA_ARGS__); return retval; }
-#else
-#define _static static
-#define _static_inline static inline
-#define DBG_WARN(condition, ...)
-#define DBG_ASSERT(condition, retval, ...)
-#endif
-
-#include "constants.h"
-#include "utils.h"
-
-#if defined(CUBE_AVX2)
-#include <immintrin.h>
-#include "cube_avx2.h"
-#elif defined(CUBE_NEON)
-#include <stdlib.h>
-#include <arm_neon.h>
-#include "cube_neon.h"
-#else
-#include <stdlib.h> /* TODO: check if can be removed */
-#include "cube_portable.h"
-#endif
-
-#include "io_move_trans.h"
-#include "constant_cubes.h"
-#include "cube_generic.h"
-#include "io_cube.h"
-
-/* TODO: work in progress */
-#if 0
-#include "constant_cubes_transform.h"
-#include "cube_transform.h"
-#else
-#include "cube_transform_with_switch.h"
-#endif
-
-#include "moves.h"
-#include "solve_h48.h"
-#include "solve_generic.h"
-
-#include "cube_public.h"
diff --git a/src/cube_avx2.h b/src/cube_avx2.h
@@ -1,341 +0,0 @@
-typedef __m256i cube_t;
-
-#define _co2_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x6060606060606060))
-#define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x2020202020202020))
-#define _cp_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x0707070707070707))
-#define _ep_avx2 \
- _mm256_set_epi64x(INT64_C(0x0F0F0F0F), INT64_C(0x0F0F0F0F0F0F0F0F), 0, 0)
-#define _eo_avx2 \
- _mm256_set_epi64x(INT64_C(0x10101010), INT64_C(0x1010101010101010), 0, 0)
-
-#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \
- e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \
- _mm256_set_epi8(0, 0, 0, 0, e_br, e_bl, e_fl, e_fr, \
- e_dr, e_dl, e_ul, e_ur, e_df, e_db, e_ub, e_uf, \
- 0, 0, 0, 0, 0, 0, 0, 0, \
- c_dbl, c_dfr, c_ubr, c_ufl, c_dbr, c_dfl, c_ubl, c_ufr)
-#define zero _mm256_set_epi64x(0, 0, 0, 0)
-#define solved static_cube( \
- 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
-
-_static void pieces(cube_t *, uint8_t [static 8], uint8_t [static 12]);
-_static_inline bool equal(cube_t, cube_t);
-_static_inline cube_t invertco(cube_t);
-_static_inline cube_t compose_epcpeo(cube_t, cube_t);
-_static_inline cube_t compose_edges(cube_t, cube_t);
-_static_inline cube_t compose_corners(cube_t, cube_t);
-_static_inline cube_t compose(cube_t, cube_t);
-_static_inline cube_t inverse(cube_t);
-
-_static_inline int64_t coord_co(cube_t);
-_static_inline int64_t coord_csep(cube_t);
-_static_inline int64_t coord_cocsep(cube_t);
-_static_inline int64_t coord_eo(cube_t);
-_static_inline int64_t coord_esep(cube_t);
-
-_static_inline void copy_corners(cube_t *, cube_t);
-_static_inline void copy_edges(cube_t *, cube_t);
-_static_inline void set_eo(cube_t *, int64_t);
-_static_inline cube_t invcoord_esep(int64_t);
-
-_static void
-pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12])
-{
- uint8_t aux[32];
-
- _mm256_storeu_si256((__m256i_u *)aux, *cube);
- memcpy(c, aux, 8);
- memcpy(e, aux+16, 12);
-}
-
-_static_inline bool
-equal(cube_t c1, cube_t c2)
-{
- int32_t mask;
- __m256i cmp;
-
- cmp = _mm256_cmpeq_epi8(c1, c2);
- mask = _mm256_movemask_epi8(cmp);
-
- return mask == ~0;
-}
-
-_static_inline cube_t
-invertco(cube_t c)
-{
- cube_t co, shleft, shright, summed, newco, cleanco, ret;
-
- co = _mm256_and_si256(c, _co2_avx2);
- shleft = _mm256_slli_epi32(co, 1);
- shright = _mm256_srli_epi32(co, 1);
- summed = _mm256_or_si256(shleft, shright);
- newco = _mm256_and_si256(summed, _co2_avx2);
- cleanco = _mm256_xor_si256(c, co);
- ret = _mm256_or_si256(cleanco, newco);
-
- return ret;
-}
-
-_static_inline cube_t
-compose_epcpeo(cube_t c1, cube_t c2)
-{
- cube_t b, s, eo2;
-
- /* Permute and clean unused bits */
- s = _mm256_shuffle_epi8(c1, c2);
- b = _mm256_set_epi8(
- ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0
- );
- s = _mm256_andnot_si256(b, s);
-
- /* Change EO */
- eo2 = _mm256_and_si256(c2, _eo_avx2);
- s = _mm256_xor_si256(s, eo2);
-
- return s;
-}
-
-_static_inline cube_t
-compose_edges(cube_t c1, cube_t c2)
-{
- return compose_epcpeo(c1, c2);
-}
-
-_static_inline cube_t
-compose_corners(cube_t c1, cube_t c2)
-{
- /*
- * We do a full compose. Minor optimizations are possible, like
- * saving one instruction by not doing EO, but it should not
- * be significant.
- */
- return compose(c1, c2);
-}
-
-_static_inline cube_t
-compose(cube_t c1, cube_t c2)
-{
- cube_t s, co1, co2, aux, auy1, auy2, auz1, auz2;
-
- s = compose_epcpeo(c1, c2);
-
- /* Change CO */
- co1 = _mm256_and_si256(s, _co2_avx2);
- co2 = _mm256_and_si256(c2, _co2_avx2);
- aux = _mm256_add_epi8(co1, co2);
- auy1 = _mm256_add_epi8(aux, _cocw_avx2);
- auy2 = _mm256_srli_epi32(auy1, 2);
- auz1 = _mm256_add_epi8(aux, auy2);
- auz2 = _mm256_and_si256(auz1, _co2_avx2);
-
- /* Put together */
- s = _mm256_andnot_si256(_co2_avx2, s);
- s = _mm256_or_si256(s, auz2);
-
- return s;
-}
-
-_static_inline cube_t
-cleanaftershuffle(cube_t c)
-{
- __m256i b;
-
- b = _mm256_set_epi8(
- ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0
- );
-
- return _mm256_andnot_si256(b, c);
-}
-
-_static_inline cube_t
-inverse(cube_t c)
-{
- /* Method taken from Andrew Skalski's vcube[1]. The addition sequence
- * was generated using [2].
- * [1] https://github.com/Voltara/vcube
- * [2] http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
- */
- cube_t v3, vi, vo, vp, ret;
-
- v3 = _mm256_shuffle_epi8(c, c);
- v3 = _mm256_shuffle_epi8(v3, c);
- vi = _mm256_shuffle_epi8(v3, v3);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, v3);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, c);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, v3);
- vi = _mm256_shuffle_epi8(vi, vi);
- vi = _mm256_shuffle_epi8(vi, c);
-
- vo = _mm256_and_si256(c, _mm256_or_si256(_eo_avx2, _co2_avx2));
- vo = _mm256_shuffle_epi8(vo, vi);
- vp = _mm256_andnot_si256(_mm256_or_si256(_eo_avx2, _co2_avx2), vi);
- ret = _mm256_or_si256(vp, vo);
- ret = cleanaftershuffle(ret);
-
- return invertco(ret);
-}
-
-_static_inline int64_t
-coord_co(cube_t c)
-{
- cube_t co;
- int64_t mem[4], ret, i, p;
-
- co = _mm256_and_si256(c, _co2_avx2);
- _mm256_storeu_si256((__m256i *)mem, co);
-
- mem[0] >>= 5;
- for (i = 0, ret = 0, p = 1; i < 7; i++, mem[0] >>= 8, p *= 3)
- ret += (mem[0] & 3) * p;
-
- return ret;
-}
-
-_static_inline int64_t
-coord_csep(cube_t c)
-{
- cube_t cp, shifted;
- int64_t mask;
-
- cp = _mm256_and_si256(c, _cp_avx2);
- shifted = _mm256_slli_epi32(cp, 5);
- mask = _mm256_movemask_epi8(shifted);
-
- return mask & 0x7F;
-}
-
-_static_inline int64_t
-coord_cocsep(cube_t c)
-{
- return (coord_co(c) << 7) + coord_csep(c);
-}
-
-_static_inline int64_t
-coord_eo(cube_t c)
-{
- cube_t eo, shifted;
- int64_t mask;
-
- eo = _mm256_and_si256(c, _eo_avx2);
- shifted = _mm256_slli_epi32(eo, 3);
- mask = _mm256_movemask_epi8(shifted);
-
- return mask >> 17;
-}
-
-_static_inline int64_t
-coord_esep(cube_t c)
-{
- cube_t ep;
- int64_t e, mem[4], i, j, jj, k, l, ret1, ret2, bit1, bit2, is1;
-
- ep = _mm256_and_si256(c, _ep_avx2);
- _mm256_storeu_si256((__m256i *)mem, ep);
-
- mem[3] <<= 8;
- ret1 = ret2 = 0;
- k = l = 4;
- for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8) {
- e = mem[i/8 + 2];
-
- bit1 = (e & _esepbit1) >> 2;
- bit2 = (e & _esepbit2) >> 3;
- is1 = (1 - bit2) * bit1;
-
- ret1 += bit2 * binomial[11-i][k];
- k -= bit2;
-
- jj = j < 8;
- ret2 += jj * is1 * binomial[7-(j*jj)][l];
- l -= is1;
- j += (1-bit2);
- }
-
- return ret1 * 70 + ret2;
-}
-
-_static_inline void
-copy_corners(cube_t *dest, cube_t src)
-{
- *dest = _mm256_blend_epi32(*dest, src, 0x0F);
-}
-
-_static_inline void
-copy_edges(cube_t *dest, cube_t src)
-{
- *dest = _mm256_blend_epi32(*dest, src, 0xF0);
-}
-
-_static_inline void
-set_eo(cube_t *cube, int64_t eo)
-{
- int64_t eo12, eotop, eobot;
- __m256i veo;
-
- eo12 = (eo << 1) + (_mm_popcnt_u64(eo) % 2);
- eotop = (eo12 & (1 << 11)) << 17 |
- (eo12 & (1 << 10)) << 10 |
- (eo12 & (1 << 9)) << 3 |
- (eo12 & (1 << 8)) >> 4;
- eobot = (eo12 & (1 << 7)) << 53 |
- (eo12 & (1 << 6)) << 46 |
- (eo12 & (1 << 5)) << 39 |
- (eo12 & (1 << 4)) << 32 |
- (eo12 & (1 << 3)) << 25 |
- (eo12 & (1 << 2)) << 18 |
- (eo12 & (1 << 1)) << 11 |
- (eo12 & 1) << 4;
- veo = _mm256_set_epi64x(eotop, eobot, 0, 0);
-
- *cube = _mm256_andnot_si256(_eo_avx2, *cube);
- *cube = _mm256_or_si256(*cube, veo);
-}
-
-_static_inline cube_t
-invcoord_esep(int64_t esep)
-{
- cube_t eee, ret;
- int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2;
- uint8_t mem[32];
- uint8_t slice[3] = {0};
-
- set1 = esep % 70;
- set2 = esep / 70;
-
- for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) {
- v = binomial[11-i][k];
- jj = j < 8;
- w = jj * binomial[7-(j*jj)][l];
- bit2 = set2 >= v;
- bit1 = set1 >= w;
- is1 = (1 - bit2) * bit1;
-
- set2 -= bit2 * v;
- k -= bit2;
- set1 -= is1 * w;
- l -= is1;
- j += (1-bit2);
- s = 2*bit2 + (1-bit2)*bit1;
-
- mem[i+16] = (slice[s]++) | (uint8_t)(s << 2);
- }
-
- ret = solved;
- eee = _mm256_loadu_si256((__m256i_u *)&mem);
- copy_edges(&ret, eee);
-
- return ret;
-}
diff --git a/src/cube_neon.h b/src/cube_neon.h
@@ -1,377 +0,0 @@
-// cube_t
-typedef struct
-{
- uint8x16_t corner;
- uint8x16_t edge;
-} cube_t;
-
-#define _co2_neon vdupq_n_u8(0x60)
-#define _cocw_neon vdupq_n_u8(0x20)
-#define _cp_neon vdupq_n_u8(0x07)
-#define _ep_neon vcombine_u8(vdupq_n_u8(0x0F), vdupq_n_u8(0x0F))
-#define _eo_neon vcombine_u8(vdupq_n_u8(0x10), vdupq_n_u8(0x10))
-
-// static cube
-#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \
- e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \
- ((cube_t){ \
- .corner = {c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, 0, 0, 0, 0, 0, 0, 0, 0}, \
- .edge = {e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br, 0, 0, 0, 0}})
-
-// zero cube
-#define zero \
- (cube_t) \
- { \
- .corner = vdupq_n_u8(0), \
- .edge = vdupq_n_u8(0) \
- }
-
-// solved cube
-#define solved static_cube( \
- 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
-
-// Functions
-_static void pieces(cube_t *, uint8_t[static 8], uint8_t[static 12]);
-_static_inline bool equal(cube_t, cube_t);
-_static_inline cube_t invertco(cube_t);
-_static_inline cube_t compose_edges(cube_t, cube_t);
-_static_inline cube_t compose_corners(cube_t, cube_t);
-_static_inline uint8x16_t compose_edges_slim(uint8x16_t, uint8x16_t);
-_static_inline uint8x16_t compose_corners_slim(uint8x16_t, uint8x16_t);
-_static_inline cube_t compose(cube_t, cube_t);
-_static_inline cube_t inverse(cube_t);
-
-_static_inline int64_t coord_co(cube_t);
-_static_inline int64_t coord_csep(cube_t);
-_static_inline int64_t coord_cocsep(cube_t);
-_static_inline int64_t coord_eo(cube_t);
-_static_inline int64_t coord_esep(cube_t);
-
-_static_inline void copy_corners(cube_t *, cube_t);
-_static_inline void copy_edges(cube_t *, cube_t);
-_static_inline void set_eo(cube_t *, int64_t);
-_static_inline cube_t invcoord_esep(int64_t);
-
-_static void
-pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12])
-{
- // First 8 bytes of the corner vector are copied from the c array
- vst1_u8(c, vget_low_u8(cube->corner));
-
- // 12 bytes of the edge vector are copied from the e array
- // First 8 bytes
- vst1_u8(e, vget_low_u8(cube->edge));
- // Next 4 bytes
- vst1_lane_u32((uint32_t *)(e + 8), vreinterpret_u32_u8(vget_high_u8(cube->edge)), 0);
-}
-
-_static_inline bool
-equal(cube_t c1, cube_t c2)
-{
- uint8x16_t cmp_corner, cmp_edge;
- uint64x2_t cmp_corner_u64, cmp_edge_u64;
- uint64x2_t cmp_result;
-
- // compare the corner vectors
- cmp_corner = vceqq_u8(c1.corner, c2.corner);
- // compare the edge vectors
- cmp_edge = vceqq_u8(c1.edge, c2.edge);
-
- // convert the comparison vectors to 64-bit vectors
- cmp_corner_u64 = vreinterpretq_u64_u8(cmp_corner);
- cmp_edge_u64 = vreinterpretq_u64_u8(cmp_edge);
-
- // combine the comparison vectors
- cmp_result = vandq_u64(cmp_corner_u64, cmp_edge_u64);
-
- // check if all the bits are set
- return vgetq_lane_u64(cmp_result, 0) == ~0ULL && vgetq_lane_u64(cmp_result, 1) == ~0ULL;
-}
-
-_static_inline cube_t
-invertco(cube_t c)
-{
- cube_t ret;
- uint8x16_t co, shleft, shright, summed, newco, cleanco;
-
- co = vandq_u8(c.corner, _co2_neon);
- shleft = vshlq_n_u8(co, 1);
- shright = vshrq_n_u8(co, 1);
- summed = vorrq_u8(shleft, shright);
- newco = vandq_u8(summed, _co2_neon);
- cleanco = veorq_u8(c.corner, co);
- ret.corner = vorrq_u8(cleanco, newco);
- ret.edge = c.edge;
-
- return ret;
-}
-
-_static_inline cube_t
-compose_edges(cube_t c1, cube_t c2)
-{
- cube_t ret = {0};
- ret.edge = compose_edges_slim(c1.edge, c2.edge);
- return ret;
-}
-
-_static_inline cube_t
-compose_corners(cube_t c1, cube_t c2)
-{
- cube_t ret = {0};
- ret.corner = compose_corners_slim(c1.corner, c2.corner);
- return ret;
-}
-
-_static_inline uint8x16_t
-compose_edges_slim(uint8x16_t edge1, uint8x16_t edge2)
-{
- // Masks
- uint8x16_t p_bits = vdupq_n_u8(_pbits);
- uint8x16_t eo_bit = vdupq_n_u8(_eobit);
-
- // Find the index and permutation
- uint8x16_t p = vandq_u8(edge2, p_bits);
- uint8x16_t piece1 = vqtbl1q_u8(edge1, p);
-
- // Calculate the orientation through XOR
- uint8x16_t orien = vandq_u8(veorq_u8(edge2, piece1), eo_bit);
-
- // Combine the results
- uint8x16_t ret = vorrq_u8(vandq_u8(piece1, p_bits), orien);
-
- // Mask to clear the last 32 bits of the result
- uint8x16_t mask_last_32 = vsetq_lane_u32(0, vreinterpretq_u32_u8(ret), 3);
- ret = vreinterpretq_u8_u32(mask_last_32);
-
- return ret;
-}
-
-_static_inline uint8x16_t
-compose_corners_slim(uint8x16_t corner1, uint8x16_t corner2)
-{
- // Masks
- uint8x16_t p_bits = vdupq_n_u8(_pbits);
- uint8x16_t cobits = vdupq_n_u8(_cobits);
- uint8x16_t cobits2 = vdupq_n_u8(_cobits2);
- uint8x16_t twist_cw = vdupq_n_u8(_ctwist_cw);
-
- // Find the index and permutation
- uint8x16_t p = vandq_u8(corner2, p_bits);
- uint8x16_t piece1 = vqtbl1q_u8(corner1, p);
-
- // Calculate the orientation
- uint8x16_t aux = vaddq_u8(vandq_u8(corner2, cobits), vandq_u8(piece1, cobits));
- uint8x16_t auy = vshrq_n_u8(vaddq_u8(aux, twist_cw), 2);
- uint8x16_t orien = vandq_u8(vaddq_u8(aux, auy), cobits2);
-
- // Combine the results
- uint8x16_t ret = vorrq_u8(vandq_u8(piece1, p_bits), orien);
-
- // Mask to clear the last 64 bits of the result
- uint8x16_t mask_last_64 = vsetq_lane_u64(0, vreinterpretq_u64_u8(ret), 1);
- ret = vreinterpretq_u8_u64(mask_last_64);
-
- return ret;
-}
-
-_static_inline cube_t
-compose(cube_t c1, cube_t c2)
-{
- cube_t ret = {0};
-
- ret.edge = compose_edges_slim(c1.edge, c2.edge);
- ret.corner = compose_corners_slim(c1.corner, c2.corner);
-
- return ret;
-}
-
-_static_inline cube_t
-inverse(cube_t cube)
-{
- uint8_t i, piece, orien;
- cube_t ret;
-
- // Temp arrays to store the NEON vectors
- uint8_t edges[16];
- uint8_t corners[16];
-
- // Copy the NEON vectors to the arrays
- vst1q_u8(edges, cube.edge);
- vst1q_u8(corners, cube.corner);
-
- uint8_t edge_result[16] = {0};
- uint8_t corner_result[16] = {0};
-
- // Process the edges
- for (i = 0; i < 12; i++)
- {
- piece = edges[i];
- orien = piece & _eobit;
- edge_result[piece & _pbits] = i | orien;
- }
-
- // Process the corners
- for (i = 0; i < 8; i++)
- {
- piece = corners[i];
- orien = ((piece << 1) | (piece >> 1)) & _cobits2;
- corner_result[piece & _pbits] = i | orien;
- }
-
- // Copy the results back to the NEON vectors
- ret.edge = vld1q_u8(edge_result);
- ret.corner = vld1q_u8(corner_result);
-
- return ret;
-}
-
-_static_inline int64_t
-coord_co(cube_t c)
-{
- // Temp array to store the NEON vector
- uint8_t mem[16];
- vst1q_u8(mem, c.corner);
-
- int i, p;
- int64_t ret;
-
- for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 3)
- ret += p * (mem[i] >> _coshift);
-
- return ret;
-}
-
-_static_inline int64_t
-coord_csep(cube_t c)
-{
- // Temp array to store the NEON vector
- uint8_t mem[16];
- vst1q_u8(mem, c.corner);
-
- int64_t ret = 0;
- int i, p;
- for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2)
- ret += p * ((mem[i] & _csepbit) >> 2);
-
- return ret;
- return 0;
-}
-
-_static_inline int64_t
-coord_cocsep(cube_t c)
-{
- return (coord_co(c) << 7) + coord_csep(c);
-}
-
-_static_inline int64_t
-coord_eo(cube_t c)
-{
- int64_t ret = 0;
- int64_t p = 1;
-
- // Temp array to store the NEON vector
- uint8_t mem[16];
- vst1q_u8(mem, c.edge);
-
- for (int i = 1; i < 12; i++, p *= 2)
- {
- ret += p * (mem[i] >> _eoshift);
- }
-
- return ret;
-}
-
-_static_inline int64_t
-coord_esep(cube_t c)
-{
- int64_t i, j, jj, k, l, ret1, ret2, bit1, bit2, is1;
-
- // Temp array to store the NEON vector
- uint8_t mem[16];
- vst1q_u8(mem, c.edge);
-
- for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++)
- {
- bit1 = (mem[i] & _esepbit1) >> 2;
- bit2 = (mem[i] & _esepbit2) >> 3;
- is1 = (1 - bit2) * bit1;
-
- ret1 += bit2 * binomial[11 - i][k];
- k -= bit2;
-
- jj = j < 8;
- ret2 += jj * is1 * binomial[7 - (j * jj)][l];
- l -= is1;
- j += (1 - bit2);
- }
-
- return ret1 * 70 + ret2;
-}
-
-_static_inline void
-copy_corners(cube_t *dst, cube_t src)
-{
- dst->corner = src.corner;
-}
-
-_static_inline void
-copy_edges(cube_t *dst, cube_t src)
-{
- dst->edge = src.edge;
-}
-
-_static_inline void
-set_eo(cube_t *cube, int64_t eo)
-{
- // Temp array to store the NEON vector
- uint8_t mem[16];
- vst1q_u8(mem, cube->edge);
- uint8_t i, sum, flip;
-
- for (sum = 0, i = 1; i < 12; i++, eo >>= 1)
- {
- flip = eo % 2;
- sum += flip;
- mem[i] = (mem[i] & ~_eobit) | (_eobit * flip);
- }
- mem[0] = (mem[0] & ~_eobit) | (_eobit * (sum % 2));
-
- // Copy the results back to the NEON vector
- cube->edge = vld1q_u8(mem);
- return;
-}
-
-_static_inline cube_t
-invcoord_esep(int64_t esep)
-{
- cube_t ret;
- int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2;
- uint8_t slice[3] = {0};
-
- ret = solved;
- uint8_t mem[16];
- set1 = esep % 70;
- set2 = esep / 70;
-
- for (i = 0, j = 0, k = 4, l = 4; i < 12; i++)
- {
- v = binomial[11 - i][k];
- jj = j < 8;
- w = jj * binomial[7 - (j * jj)][l];
- bit2 = set2 >= v;
- bit1 = set1 >= w;
- is1 = (1 - bit2) * bit1;
-
- set2 -= bit2 * v;
- k -= bit2;
- set1 -= is1 * w;
- l -= is1;
- j += (1 - bit2);
- s = 2 * bit2 + (1 - bit2) * bit1;
-
- mem[i] = (slice[s]++) | (uint8_t)(s << 2);
- }
-
- ret.edge = vld1q_u8(mem);
- return ret;
-}
diff --git a/src/cube_portable.h b/src/cube_portable.h
@@ -1,298 +0,0 @@
-typedef struct {
- uint8_t corner[8];
- uint8_t edge[12];
-} cube_t;
-
-#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \
- e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \
- ((cube_t) { \
- .corner = { c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl }, \
- .edge = { e_uf, e_ub, e_db, e_df, e_ur, e_ul, \
- e_dl, e_dr, e_fr, e_fl, e_bl, e_br } })
-#define zero static_cube( \
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-#define solved static_cube( \
- 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
-
-_static void pieces(cube_t *, uint8_t [static 8], uint8_t [static 12]);
-_static_inline bool equal(cube_t, cube_t);
-_static_inline cube_t invertco(cube_t);
-_static_inline void compose_edges_inplace(cube_t, cube_t, cube_t *);
-_static_inline void compose_corners_inplace(cube_t, cube_t, cube_t *);
-_static_inline cube_t compose_edges(cube_t, cube_t);
-_static_inline cube_t compose_corners(cube_t, cube_t);
-_static_inline cube_t compose(cube_t, cube_t);
-_static_inline cube_t inverse(cube_t);
-
-_static_inline int64_t coord_co(cube_t);
-_static_inline int64_t coord_csep(cube_t);
-_static_inline int64_t coord_cocsep(cube_t);
-_static_inline int64_t coord_eo(cube_t);
-_static_inline int64_t coord_esep(cube_t);
-
-_static_inline void copy_corners(cube_t *, cube_t);
-_static_inline void copy_edges(cube_t *, cube_t);
-_static_inline void set_eo(cube_t *, int64_t);
-_static_inline cube_t invcoord_esep(int64_t);
-
-_static void
-pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12])
-{
- memcpy(c, cube->corner, 8);
- memcpy(e, cube->edge, 12);
-}
-
-_static_inline bool
-equal(cube_t c1, cube_t c2)
-{
- uint8_t i;
- bool ret;
-
- ret = true;
- for (i = 0; i < 8; i++)
- ret = ret && c1.corner[i] == c2.corner[i];
- for (i = 0; i < 12; i++)
- ret = ret && c1.edge[i] == c2.edge[i];
-
- return ret;
-}
-
-_static_inline cube_t
-invertco(cube_t c)
-{
- uint8_t i, piece, orien;
- cube_t ret;
-
- ret = c;
- for (i = 0; i < 8; i++) {
- piece = c.corner[i];
- orien = ((piece << 1) | (piece >> 1)) & _cobits2;
- ret.corner[i] = (piece & _pbits) | orien;
- }
-
- return ret;
-}
-
-_static_inline void
-compose_edges_inplace(cube_t c1, cube_t c2, cube_t *ret)
-{
- uint8_t i, piece1, piece2, p, orien;
-
- for (i = 0; i < 12; i++) {
- piece2 = c2.edge[i];
- p = piece2 & _pbits;
- piece1 = c1.edge[p];
- orien = (piece2 ^ piece1) & _eobit;
- ret->edge[i] = (piece1 & _pbits) | orien;
- }
-}
-
-_static_inline void
-compose_corners_inplace(cube_t c1, cube_t c2, cube_t *ret)
-{
- uint8_t i, piece1, piece2, p, orien, aux, auy;
-
- for (i = 0; i < 8; i++) {
- piece2 = c2.corner[i];
- p = piece2 & _pbits;
- piece1 = c1.corner[p];
- aux = (piece2 & _cobits) + (piece1 & _cobits);
- auy = (aux + _ctwist_cw) >> 2;
- orien = (aux + auy) & _cobits2;
- ret->corner[i] = (piece1 & _pbits) | orien;
- }
-}
-
-_static_inline cube_t
-compose_edges(cube_t c1, cube_t c2)
-{
- cube_t ret = zero;
-
- compose_edges_inplace(c1, c2, &ret);
-
- return ret;
-}
-
-_static_inline cube_t
-compose_corners(cube_t c1, cube_t c2)
-{
- cube_t ret = zero;
-
- compose_corners_inplace(c1, c2, &ret);
-
- return ret;
-}
-
-_static_inline cube_t
-compose(cube_t c1, cube_t c2)
-{
- cube_t ret = zero;
-
- compose_edges_inplace(c1, c2, &ret);
- compose_corners_inplace(c1, c2, &ret);
-
- return ret;
-}
-
-cube_t
-inverse(cube_t cube)
-{
- uint8_t i, piece, orien;
- cube_t ret;
-
- for (i = 0; i < 12; i++) {
- piece = cube.edge[i];
- orien = piece & _eobit;
- ret.edge[piece & _pbits] = i | orien;
- }
-
- for (i = 0; i < 8; i++) {
- piece = cube.corner[i];
- orien = ((piece << 1) | (piece >> 1)) & _cobits2;
- ret.corner[piece & _pbits] = i | orien;
- }
-
- return ret;
-}
-
-_static_inline int64_t
-coord_co(cube_t c)
-{
- int i, p;
- int64_t ret;
-
- for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 3)
- ret += p * (c.corner[i] >> _coshift);
-
- return ret;
-}
-
-/*
-For corner separation, we consider the axis (a.k.a. tetrad) each
-corner belongs to as 0 or 1 and we translate this sequence into binary.
-Ignoring the last bit, we have a value up to 2^7, but not all values are
-possible. Encoding this as a number from 0 to C(8,4) would save about 40%
-of space, but we are not going to use this coordinate in large tables.
-*/
-_static_inline int64_t
-coord_csep(cube_t c)
-{
- int i, p;
- int64_t ret;
-
- for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2)
- ret += p * ((c.corner[i] & _csepbit) >> 2);
-
- return ret;
-}
-
-_static_inline int64_t
-coord_cocsep(cube_t c)
-{
- return (coord_co(c) << 7) + coord_csep(c);
-}
-
-_static_inline int64_t
-coord_eo(cube_t c)
-{
- int i, p;
- int64_t ret;
-
- for (ret = 0, i = 1, p = 1; i < 12; i++, p *= 2)
- ret += p * (c.edge[i] >> _eoshift);
-
- return ret;
-}
-
-/*
-We encode the edge separation as a number from 0 to C(12,4)*C(8,4).
-It can be seen as the composition of two "subset index" coordinates.
-*/
-_static_inline int64_t
-coord_esep(cube_t c)
-{
- int64_t i, j, jj, k, l, ret1, ret2, bit1, bit2, is1;
-
- for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++) {
- /* Simple version:
- if (c.edge[i] & _esepbit2) {
- ret1 += binomial[11-i][k--];
- } else {
- if (c.edge[i] & _esepbit1)
- ret2 += binomial[7-j][l--];
- j++;
- }
- */
-
- bit1 = (c.edge[i] & _esepbit1) >> 2;
- bit2 = (c.edge[i] & _esepbit2) >> 3;
- is1 = (1 - bit2) * bit1;
-
- ret1 += bit2 * binomial[11-i][k];
- k -= bit2;
-
- jj = j < 8;
- ret2 += jj * is1 * binomial[7-(j*jj)][l];
- l -= is1;
- j += (1-bit2);
- }
-
- return ret1 * 70 + ret2;
-}
-
-_static_inline void
-copy_corners(cube_t *dest, cube_t src)
-{
- memcpy(&dest->corner, src.corner, sizeof(src.corner));
-}
-
-_static_inline void
-copy_edges(cube_t *dest, cube_t src)
-{
- memcpy(&dest->edge, src.edge, sizeof(src.edge));
-}
-
-_static_inline void
-set_eo(cube_t *cube, int64_t eo)
-{
- uint8_t i, sum, flip;
-
- for (sum = 0, i = 1; i < 12; i++, eo >>= 1) {
- flip = eo % 2;
- sum += flip;
- cube->edge[i] = (cube->edge[i] & ~_eobit) | (_eobit * flip);
- }
- cube->edge[0] = (cube->edge[0] & ~_eobit) | (_eobit * (sum % 2));
-}
-
-_static_inline cube_t
-invcoord_esep(int64_t esep)
-{
- cube_t ret;
- int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2;
- uint8_t slice[3] = {0};
-
- ret = solved;
- set1 = esep % 70;
- set2 = esep / 70;
-
- for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) {
- v = binomial[11-i][k];
- jj = j < 8;
- w = jj * binomial[7-(j*jj)][l];
- bit2 = set2 >= v;
- bit1 = set1 >= w;
- is1 = (1 - bit2) * bit1;
-
- set2 -= bit2 * v;
- k -= bit2;
- set1 -= is1 * w;
- l -= is1;
- j += (1-bit2);
- s = 2*bit2 + (1-bit2)*bit1;
-
- ret.edge[i] = (slice[s]++) | (uint8_t)(s << 2);
- }
-
- return ret;
-}
diff --git a/src/cube_public.h b/src/cube_public.h
@@ -1,255 +0,0 @@
-#include "cube.h"
-
-_static int64_t write_result(cube_t, char [static 22]);
-
-/* TODO: add option to get DR, maybe C-only, E-only, eo... */
-#define GETCUBE_OPTIONS(S, F) { .option = S, .fix = F }
-struct {
- char *option;
- void (*fix)(int64_t *, int64_t *, int64_t *, int64_t *);
-} getcube_options[] = {
- GETCUBE_OPTIONS("fix", getcube_fix),
- GETCUBE_OPTIONS(NULL, NULL)
-};
-
-_static int64_t
-write_result(cube_t cube, char result[static 22])
-{
- if (!isconsistent(cube)) {
- writecube("B32", zero, result);
- return 2;
- }
-
- writecube("B32", cube, result);
-
- return issolvable(cube) ? 0 : 1;
-}
-
-int64_t
-nissy_compose(
- const char cube[static 22],
- const char permutation[static 22],
- char result[static 22]
-)
-{
- cube_t c, p, res;
-
- c = readcube("B32", cube);
- p = readcube("B32", permutation);
- res = compose(c, p);
-
- return write_result(res, result);
-}
-
-int64_t
-nissy_inverse(
- const char cube[static 22],
- char result[static 22]
-)
-{
- cube_t c, res;
-
- c = readcube("B32", cube);
- res = inverse(c);
-
- return write_result(res, result);
-}
-
-int64_t
-nissy_applymoves(
- const char cube[static 22],
- const char *moves,
- char result[static 22]
-)
-{
- cube_t c, res;
-
- c = readcube("B32", cube);
- res = applymoves(c, moves);
-
- return write_result(res, result);
-}
-
-int64_t
-nissy_applytrans(
- const char cube[static 22],
- const char *transformation,
- char result[static 22]
-)
-{
- cube_t c, res;
-
- c = readcube("B32", cube);
- res = applytrans(c, transformation);
-
- return write_result(res, result);
-}
-
-int64_t
-nissy_frommoves(
- const char *moves,
- char result[static 22]
-)
-{
- cube_t res;
-
- res = applymoves(solved, moves);
-
- return write_result(res, result);
-}
-
-int64_t
-nissy_convert(
- const char *format_in,
- const char *format_out,
- const char *cube_string,
- char *result
-)
-{
- cube_t c;
-
- c = readcube(format_in, cube_string);
- writecube(format_out, c, result);
-
- return isconsistent(c) ? 0 : 2;
-}
-
-int64_t
-nissy_getcube(
- int64_t ep,
- int64_t eo,
- int64_t cp,
- int64_t co,
- const char *options,
- char result[static 22]
-)
-{
- int i;
- cube_t c;
-
- for (i = 0; getcube_options[i].option != NULL; i++)
- if (!strcmp(options, getcube_options[i].option))
- getcube_options[i].fix(&ep, &eo, &cp, &co);
-
- c = getcube(ep, eo, cp, co);
-
- return write_result(c, result);
-}
-
-int64_t
-nissy_datasize(
- const char *solver,
- const char *options
-)
-{
- /* gendata() handles a NULL *data as a "dryrun" request */
- return nissy_gendata(solver, options, NULL);
-}
-
-int64_t
-nissy_gendata(
- const char *solver,
- const char *options,
- void *data
-)
-{
- int64_t ret;
- uint8_t maxdepth, h, i, j;
-
- if (!strcmp(solver, "h48")) {
- /* options are in the form "h;maxdepth" */
- for (i = 0; options[i] != ';'; i++) ;
- for (j = i; options[j]; j++) ;
- h = atoi(options);
- if (h != 0) {
- LOG("Temporarily only h=0 is supported\n");
- ret = -1;
- } else {
- maxdepth = atoi(&options[i+1]);
- ret = gendata_h48h0k4(data, maxdepth);
- }
- } else if (!strcmp(solver, "h48stats")) {
- ret = gendata_h48h0k4(data, 20);
- } else {
- LOG("gendata: implemented only for h48 solver\n");
- ret = -1;
- }
-
- return ret;
-}
-
-int64_t
-nissy_solve(
- const char cube[static 22],
- const char *solver,
- const char *options,
- const char *nisstype,
- int8_t minmoves,
- int8_t maxmoves,
- int64_t maxsolutions,
- int8_t optimal,
- const void *data,
- char *solutions
-)
-{
- cube_t c;
- int64_t ret;
- int h;
-
- c = readcube_B32(cube);
-
- if (!issolvable(c)) {
- LOG("solve: cube is not solvable\n");
- return -1;
- }
-
- if (minmoves < 0) {
- LOG("solve: 'minmoves' is negative, setting it to 0\n");
- minmoves = 0;
- }
-
- if (maxmoves < 0) {
- LOG("solve: 'maxmoves' is negative, setting it to 20\n");
- maxmoves = 20;
- }
-
- if (maxsolutions < 0) {
- LOG("solve: 'maxsols' is negative, stopping\n");
- return -1;
- }
-
- if (maxsolutions == 0) {
- LOG("solve: 'maxsols' is 0, returning no solution\n");
- return 0;
- }
-
- if (solutions == NULL) {
- LOG("solve: return parameter 'solutions' is NULL, stopping\n");
- return -1;
- }
-
- /* TODO define and use solve_options_t */
- if (!strcmp(solver, "h48")) {
- h = atoi(options); /* TODO: better parsing */
- ret = solve_h48(
- c, minmoves, maxmoves, maxsolutions,
- (uint8_t)h, data, solutions);
- ret = -1;
- } else if (!strcmp(solver, "h48stats")) {
- ret = solve_h48stats(c, maxmoves, data, solutions);
- } else if (!strcmp(solver, "simple")) {
- ret = solve_simple(
- c, minmoves, maxmoves, maxsolutions, optimal, solutions);
- } else {
- LOG("solve: unknown solver '%s'\n", solver);
- ret = -1;
- }
-
- return ret;
-}
-
-void
-nissy_setlogger(void (*log)(const char *, ...))
-{
- nissy_log = log;
-}
diff --git a/src/io_move_trans.h b/src/io_move_trans.h
@@ -1,87 +0,0 @@
-_static uint8_t readmove(char);
-_static uint8_t readmodifier(char);
-_static uint8_t readtrans(const char *);
-_static int writemoves(uint8_t *, int, char *);
-_static void writetrans(uint8_t, char *);
-
-_static uint8_t
-readmove(char c)
-{
- switch (c) {
- case 'U':
- return _move_U;
- case 'D':
- return _move_D;
- case 'R':
- return _move_R;
- case 'L':
- return _move_L;
- case 'F':
- return _move_F;
- case 'B':
- return _move_B;
- default:
- return _error;
- }
-}
-
-_static uint8_t
-readmodifier(char c)
-{
- switch (c) {
- case '1': /* Fallthrough */
- case '2': /* Fallthrough */
- case '3':
- return c - '0' - 1;
- case '\'':
- return 2;
- default:
- return 0;
- }
-}
-
-_static uint8_t
-readtrans(const char *buf)
-{
- uint8_t t;
-
- for (t = 0; t < 48; t++)
- if (!strncmp(buf, transstr[t], 11))
- return t;
-
- LOG("readtrans error\n");
- return _error;
-}
-
-_static int
-writemoves(uint8_t *m, int n, char *buf)
-{
- int i;
- size_t len;
- const char *s;
- char *b;
-
- for (i = 0, b = buf; i < n; i++, b++) {
- s = movestr[m[i]];
- len = strlen(s);
- memcpy(b, s, len);
- b += len;
- *b = ' ';
- }
-
- if (b != buf)
- b--; /* Remove last space */
- *b = '\0';
-
- return b - buf;
-}
-
-_static void
-writetrans(uint8_t t, char *buf)
-{
- if (t >= 48)
- memcpy(buf, "error trans", 11);
- else
- memcpy(buf, transstr[t], 11);
- buf[11] = '\0';
-}
diff --git a/src/nissy.c b/src/nissy.c
@@ -0,0 +1,265 @@
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "utils/utils.h"
+#include "arch/arch.h"
+#include "core/core.h"
+#include "solvers/solvers.h"
+
+#include "nissy.h"
+
+_static int64_t write_result(cube_t, char [static 22]);
+
+/* TODO: add option to get DR, maybe C-only, E-only, eo... */
+#define GETCUBE_OPTIONS(S, F) { .option = S, .fix = F }
+struct {
+ char *option;
+ void (*fix)(int64_t *, int64_t *, int64_t *, int64_t *);
+} getcube_options[] = {
+ GETCUBE_OPTIONS("fix", getcube_fix),
+ GETCUBE_OPTIONS(NULL, NULL)
+};
+
+_static int64_t
+write_result(cube_t cube, char result[static 22])
+{
+ if (!isconsistent(cube)) {
+ writecube("B32", zero, result);
+ return 2;
+ }
+
+ writecube("B32", cube, result);
+
+ return issolvable(cube) ? 0 : 1;
+}
+
+int64_t
+nissy_compose(
+ const char cube[static 22],
+ const char permutation[static 22],
+ char result[static 22]
+)
+{
+ cube_t c, p, res;
+
+ c = readcube("B32", cube);
+ p = readcube("B32", permutation);
+ res = compose(c, p);
+
+ return write_result(res, result);
+}
+
+int64_t
+nissy_inverse(
+ const char cube[static 22],
+ char result[static 22]
+)
+{
+ cube_t c, res;
+
+ c = readcube("B32", cube);
+ res = inverse(c);
+
+ return write_result(res, result);
+}
+
+int64_t
+nissy_applymoves(
+ const char cube[static 22],
+ const char *moves,
+ char result[static 22]
+)
+{
+ cube_t c, res;
+
+ c = readcube("B32", cube);
+ res = applymoves(c, moves);
+
+ return write_result(res, result);
+}
+
+int64_t
+nissy_applytrans(
+ const char cube[static 22],
+ const char *transformation,
+ char result[static 22]
+)
+{
+ cube_t c, res;
+
+ c = readcube("B32", cube);
+ res = applytrans(c, transformation);
+
+ return write_result(res, result);
+}
+
+int64_t
+nissy_frommoves(
+ const char *moves,
+ char result[static 22]
+)
+{
+ cube_t res;
+
+ res = applymoves(solved, moves);
+
+ return write_result(res, result);
+}
+
+int64_t
+nissy_convert(
+ const char *format_in,
+ const char *format_out,
+ const char *cube_string,
+ char *result
+)
+{
+ cube_t c;
+
+ c = readcube(format_in, cube_string);
+ writecube(format_out, c, result);
+
+ return isconsistent(c) ? 0 : 2;
+}
+
+int64_t
+nissy_getcube(
+ int64_t ep,
+ int64_t eo,
+ int64_t cp,
+ int64_t co,
+ const char *options,
+ char result[static 22]
+)
+{
+ int i;
+ cube_t c;
+
+ for (i = 0; getcube_options[i].option != NULL; i++)
+ if (!strcmp(options, getcube_options[i].option))
+ getcube_options[i].fix(&ep, &eo, &cp, &co);
+
+ c = getcube(ep, eo, cp, co);
+
+ return write_result(c, result);
+}
+
+int64_t
+nissy_datasize(
+ const char *solver,
+ const char *options
+)
+{
+ /* gendata() handles a NULL *data as a "dryrun" request */
+ return nissy_gendata(solver, options, NULL);
+}
+
+int64_t
+nissy_gendata(
+ const char *solver,
+ const char *options,
+ void *data
+)
+{
+ int64_t ret;
+ uint8_t maxdepth, h, i, j;
+
+ if (!strcmp(solver, "h48")) {
+ /* options are in the form "h;maxdepth" */
+ for (i = 0; options[i] != ';'; i++) ;
+ for (j = i; options[j]; j++) ;
+ h = atoi(options);
+ if (h != 0) {
+ LOG("Temporarily only h=0 is supported\n");
+ ret = -1;
+ } else {
+ maxdepth = atoi(&options[i+1]);
+ ret = gendata_h48h0k4(data, maxdepth);
+ }
+ } else if (!strcmp(solver, "h48stats")) {
+ ret = gendata_h48h0k4(data, 20);
+ } else {
+ LOG("gendata: implemented only for h48 solver\n");
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int64_t
+nissy_solve(
+ const char cube[static 22],
+ const char *solver,
+ const char *options,
+ const char *nisstype,
+ int8_t minmoves,
+ int8_t maxmoves,
+ int64_t maxsolutions,
+ int8_t optimal,
+ const void *data,
+ char *solutions
+)
+{
+ cube_t c;
+ int64_t ret;
+ int h;
+
+ c = readcube_B32(cube);
+
+ if (!issolvable(c)) {
+ LOG("solve: cube is not solvable\n");
+ return -1;
+ }
+
+ if (minmoves < 0) {
+ LOG("solve: 'minmoves' is negative, setting it to 0\n");
+ minmoves = 0;
+ }
+
+ if (maxmoves < 0) {
+ LOG("solve: 'maxmoves' is negative, setting it to 20\n");
+ maxmoves = 20;
+ }
+
+ if (maxsolutions < 0) {
+ LOG("solve: 'maxsols' is negative, stopping\n");
+ return -1;
+ }
+
+ if (maxsolutions == 0) {
+ LOG("solve: 'maxsols' is 0, returning no solution\n");
+ return 0;
+ }
+
+ if (solutions == NULL) {
+ LOG("solve: return parameter 'solutions' is NULL, stopping\n");
+ return -1;
+ }
+
+ /* TODO define and use solve_options_t */
+ if (!strcmp(solver, "h48")) {
+ h = atoi(options); /* TODO: better parsing */
+ ret = solve_h48(
+ c, minmoves, maxmoves, maxsolutions,
+ (uint8_t)h, data, solutions);
+ ret = -1;
+ } else if (!strcmp(solver, "h48stats")) {
+ ret = solve_h48stats(c, maxmoves, data, solutions);
+ } else if (!strcmp(solver, "simple")) {
+ ret = solve_simple(
+ c, minmoves, maxmoves, maxsolutions, optimal, solutions);
+ } else {
+ LOG("solve: unknown solver '%s'\n", solver);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+void
+nissy_setlogger(void (*log)(const char *, ...))
+{
+ nissy_log = log;
+}
diff --git a/src/cube.h b/src/nissy.h
diff --git a/src/solve_h48.h b/src/solve_h48.h
@@ -1,839 +0,0 @@
-#define MAP_UNSET UINT64_C(0xFFFFFFFFFFFFFFFF)
-#define MAP_KEYMASK UINT64_C(0xFFFFFFFFFF)
-#define MAP_KEYSHIFT UINT64_C(40)
-
-#define COCSEP_CLASSES ((size_t)3393)
-#define COCSEP_TABLESIZE ((size_t)_3p7 << (size_t)7)
-#define COCSEP_VISITEDSIZE ((COCSEP_TABLESIZE + (size_t)7) / (size_t)8)
-#define COCSEP_FULLSIZE ((size_t)4 * (COCSEP_TABLESIZE + (size_t)12))
-
-#define ESEP_NOEO (COCSEP_CLASSES * (size_t)_12c4 * (size_t)_8c4)
-#define ESEP_MAX(h) (ESEP_NOEO << (size_t)(h))
-#define ESEP_TABLESIZE(h, k) (ESEP_MAX((h)) / ((size_t)8 / (size_t)(k)))
-
-#define COCLASS_MASK (UINT32_C(0xFFFF) << UINT32_C(16))
-#define COCLASS(x) (((x) & COCLASS_MASK) >> UINT32_C(16))
-#define TTREP_MASK (UINT32_C(0xFF) << UINT32_C(8))
-#define TTREP(x) (((x) & TTREP_MASK) >> UINT32_C(8))
-#define CBOUND_MASK UINT32_C(0xFF)
-#define CBOUND(x) ((x) & CBOUND_MASK)
-#define H48_ESIZE(h) ((_12c4 * _8c4) << (int64_t)(h))
-
-#define ESEP_IND(i) ((uint32_t)(i) / UINT32_C(8))
-#define ESEP_SHIFT(i) (UINT32_C(4) * ((uint32_t)(i) % UINT32_C(8)))
-#define ESEP_MASK(i) ((_bit_u32(4) - (uint32_t)(1)) << ESEP_SHIFT(i))
-#define VISITED_IND(i) ((uint32_t)(i) / UINT32_C(8))
-#define VISITED_MASK(i) (UINT32_C(1) << ((uint32_t)(i) % UINT32_C(8)))
-
-#define MAX_SOLUTION_LENGTH 20
-
-/*
-TODO: This loop other similar h48 coordinates can be improved by only
-transforming edges, but we need to compose transformations (i.e. conjugate
-_t by _ttrep).
-*/
-#define _foreach_h48sim(_cube, _cocsepdata, _selfsim, _h, _action) \
- int64_t _cocsep = coord_cocsep(_cube); \
- uint8_t _ttrep = TTREP(_cocsepdata[_cocsep]); \
- uint8_t _inverse_ttrep = inverse_trans(_ttrep); \
- int64_t _coclass = COCLASS(_cocsepdata[_cocsep]); \
- cube_t _rep = transform(_cube, _ttrep); \
- uint64_t _sim = _selfsim[_coclass]; \
- for (uint8_t _t = 0; _t < 48 && _sim; _t++, _sim >>= 1) { \
- if (!(_sim & 1)) continue; \
- _cube = transform(_rep, _t); \
- _cube = transform(_cube, _inverse_ttrep); \
- _action \
- }
-
-typedef struct {
- uint64_t n;
- uint64_t capacity;
- uint64_t randomizer;
- uint64_t *table;
-} h48map_t;
-
-typedef struct {
- uint64_t key;
- uint64_t val;
-} kvpair_t;
-
-typedef struct {
- cube_t cube;
- uint8_t depth;
- uint8_t maxdepth;
- uint16_t *n;
- uint32_t *buf32;
- uint8_t *visited;
- uint64_t *selfsim;
- cube_t *rep;
-} dfsarg_cocsep_t;
-
-/* TODO keep or not? */
-typedef struct {
- cube_t cube;
- int8_t nmoves;
- int8_t depth;
- uint8_t moves[MAX_SOLUTION_LENGTH];
- uint32_t *cocsepdata;
- h48map_t *visited;
-} dfsarg_genh48set_t;
-
-typedef struct {
- uint8_t depth;
- uint32_t *cocsepdata;
- uint32_t *buf32;
- uint64_t *selfsim;
- int64_t done;
- cube_t *crep;
-} bfsarg_esep_t;
-
-typedef struct {
- cube_t cube;
- cube_t inverse;
- int8_t nmoves;
- int8_t depth;
- uint8_t moves[MAX_SOLUTION_LENGTH];
- int64_t *nsols;
- int64_t maxsolutions;
- uint8_t h;
- uint32_t *cocsepdata;
- uint32_t *h48data;
- char **nextsol;
-} dfsarg_solveh48_t;
-
-typedef struct {
- cube_t cube;
- int8_t nmoves;
- int8_t depth;
- uint8_t moves[MAX_SOLUTION_LENGTH];
- uint32_t *cocsepdata;
- uint32_t *h48data;
- char *s;
-} dfsarg_solveh48stats_t;
-
-_static void h48map_create(h48map_t *, uint64_t, uint64_t);
-_static void h48map_clear(h48map_t *);
-_static void h48map_destroy(h48map_t *);
-_static uint64_t h48map_lookup(h48map_t *, uint64_t);
-_static void h48map_insertmin(h48map_t *, uint64_t, uint64_t);
-_static uint64_t h48map_value(h48map_t *, uint64_t);
-_static kvpair_t h48map_nextkvpair(h48map_t *, uint64_t *);
-
-_static_inline int64_t coord_h48(cube_t, const uint32_t *, uint8_t);
-_static_inline int64_t coord_h48_edges(cube_t, int64_t, uint8_t, uint8_t);
-_static_inline cube_t invcoord_h48(int64_t, const cube_t *, uint8_t);
-
-_static_inline bool get_visited(const uint8_t *, int64_t);
-_static_inline void set_visited(uint8_t *, int64_t);
-_static_inline uint8_t get_esep_pval(const uint32_t *, int64_t);
-_static_inline void set_esep_pval(uint32_t *, int64_t, uint8_t);
-
-_static size_t gendata_cocsep(void *, uint64_t *, cube_t *);
-_static uint32_t gendata_cocsep_dfs(dfsarg_cocsep_t *);
-_static uint64_t gen_h48short(
- uint8_t, const uint32_t *, const cube_t *, const uint64_t *, h48map_t *);
-_static size_t gendata_h48h0k4(void *, uint8_t);
-_static int64_t gendata_h48h0k4_bfs(bfsarg_esep_t *);
-_static int64_t gendata_h48h0k4_bfs_fromdone(bfsarg_esep_t *);
-_static int64_t gendata_h48h0k4_bfs_fromnew(bfsarg_esep_t *);
-
-_static void solve_h48_appendsolution(dfsarg_solveh48_t *);
-_static_inline int8_t get_h48_cdata(cube_t, uint32_t *, uint32_t *);
-_static_inline int8_t get_h48_bound(cube_t, uint32_t, uint8_t, uint32_t *);
-_static_inline bool solve_h48_stop(dfsarg_solveh48_t *);
-_static int64_t solve_h48_dfs(dfsarg_solveh48_t *);
-_static int64_t solve_h48(
- cube_t, int8_t, int8_t, int8_t, uint8_t, const void *, char *);
-
-_static int64_t solve_h48stats_dfs(dfsarg_solveh48stats_t *);
-_static int64_t solve_h48stats(cube_t, int8_t, const void *, char [static 12]);
-
-_static void
-h48map_create(h48map_t *map, uint64_t capacity, uint64_t randomizer)
-{
- map->capacity = capacity;
- map->randomizer = randomizer;
-
- map->table = malloc(map->capacity * sizeof(int64_t));
- h48map_clear(map);
-}
-
-_static void
-h48map_clear(h48map_t *map)
-{
- memset(map->table, 0xFF, map->capacity * sizeof(uint64_t));
- map->n = 0;
-}
-
-_static void
-h48map_destroy(h48map_t *map)
-{
- free(map->table);
-}
-
-_static_inline uint64_t
-h48map_lookup(h48map_t *map, uint64_t x)
-{
- uint64_t hash, i;
-
- hash = ((x % map->capacity) * map->randomizer) % map->capacity;
- for (i = hash;
- map->table[i] != MAP_UNSET && (map->table[i] & MAP_KEYMASK) != x;
- i = (i+1) % map->capacity
- ) ;
-
- return i;
-}
-
-_static_inline void
-h48map_insertmin(h48map_t *map, uint64_t key, uint64_t val)
-{
- uint64_t i, oldval, min;
-
- i = h48map_lookup(map, key);
- oldval = map->table[i] >> MAP_KEYSHIFT;
- min = _min(val, oldval);
-
- map->n += map->table[i] == MAP_UNSET;
- map->table[i] = (key & MAP_KEYMASK) | (min << MAP_KEYSHIFT);
-}
-
-_static_inline uint64_t
-h48map_value(h48map_t *map, uint64_t key)
-{
- return map->table[h48map_lookup(map, key)] >> MAP_KEYSHIFT;
-}
-
-_static kvpair_t
-h48map_nextkvpair(h48map_t *map, uint64_t *p)
-{
- kvpair_t kv;
- uint64_t pair;
-
- kv.key = MAP_UNSET;
- kv.val = MAP_UNSET;
-
- DBG_ASSERT(*p < map->capacity, kv,
- "Error looping over map: given index %" PRIu64 " is out of "
- "range [0,%" PRIu64 "]", *p, map->capacity);
-
- for ( ; *p < map->capacity; (*p)++) {
- if (map->table[*p] != MAP_UNSET) {
- pair = map->table[(*p)++];
- kv.key = pair & MAP_KEYMASK;
- kv.val = pair >> MAP_KEYSHIFT;
- return kv;
- }
- }
-
- return kv;
-}
-
-_static_inline int64_t
-coord_h48(cube_t c, const uint32_t *cocsepdata, uint8_t h)
-{
- int64_t cocsep, coclass;
- uint32_t data;
- uint8_t ttrep;
-
- DBG_ASSERT(h <= 11, -1, "coord_h48: h must be between 0 and 11\n");
-
- cocsep = coord_cocsep(c);
- data = cocsepdata[cocsep];
- coclass = (int64_t)COCLASS(data);
- ttrep = (int64_t)TTREP(data);
-
- return coord_h48_edges(c, coclass, ttrep, h);
-}
-
-_static_inline int64_t
-coord_h48_edges(cube_t c, int64_t coclass, uint8_t ttrep, uint8_t h)
-{
- cube_t d;
- int64_t esep, eo, edges;
-
- d = transform_edges(c, ttrep);
- esep = coord_esep(d);
- eo = coord_eo(d);
- edges = (esep << 11) + eo;
-
- return (coclass * H48_ESIZE(11) + edges) >> (11 - (int64_t)h);
-}
-
-/*
-This function does not necessarily return a cube whose coordinate is
-the given value, because it works up to symmetry. This means that the
-returned cube is a transformed cube of one that gives the correct value.
-*/
-_static_inline cube_t
-invcoord_h48(int64_t i, const cube_t *crep, uint8_t h)
-{
- cube_t ret;
- int64_t hh, coclass, ee, esep, eo;
-
- DBG_ASSERT(h <= 11, zero,
- "invcoord_h48: h must be between 0 and 11\n");
-
- hh = (int64_t)h;
- coclass = i / H48_ESIZE(h);
- ee = i % H48_ESIZE(h);
- esep = ee >> hh;
- eo = (ee & ((1 << hh) - 1)) << (11 - hh);
-
- ret = invcoord_esep(esep);
- copy_corners(&ret, crep[coclass]);
- set_eo(&ret, eo);
-
- return ret;
-}
-
-/*
-Each element of the cocsep table is a uint32_t used as follows:
- - Lowest 8-bit block: pruning value
- - Second-lowest 8-bit block: "ttrep" (transformation to representative)
- - Top 16-bit block: symcoord value
-After the data as described above, more auxiliary information is appended:
- - A uint32_t representing the number of symmetry classes
- - A uint32_t representing the highest value of the pruning table
- - One uint32_t for each "line" of the pruning table, representing the number
- of positions having that pruning value.
-*/
-_static size_t
-gendata_cocsep(void *buf, uint64_t *selfsim, cube_t *rep)
-{
- uint32_t *buf32, *info, cc;
- uint16_t n;
- uint8_t i, j, visited[COCSEP_VISITEDSIZE];
- dfsarg_cocsep_t arg;
-
- if (buf == NULL)
- goto gendata_cocsep_return_size;
-
- buf32 = (uint32_t *)buf;
- info = buf32 + COCSEP_TABLESIZE;
- memset(buf32, 0xFF, sizeof(uint32_t) * COCSEP_TABLESIZE);
- if (selfsim != NULL)
- memset(selfsim, 0, sizeof(uint64_t) * COCSEP_CLASSES);
-
- arg = (dfsarg_cocsep_t) {
- .cube = solved,
- .n = &n,
- .buf32 = buf32,
- .visited = visited,
- .selfsim = selfsim,
- .rep = rep
- };
- for (i = 0, n = 0, cc = 0; i < 10; i++) {
- LOG("cocsep: generating depth %" PRIu8 "\n", i);
- memset(visited, 0, COCSEP_VISITEDSIZE);
- arg.depth = 0;
- arg.maxdepth = i;
- cc = gendata_cocsep_dfs(&arg);
- info[i+2] = cc;
- LOG("found %" PRIu32 "\n", cc);
- }
-
- info[0] = (uint32_t)n;
- info[1] = 9; /* Known max pruning value */
- DBG_ASSERT(n == COCSEP_CLASSES, 0,
- "cocsep: computed %" PRIu16 " symmetry classes, "
- "expected %zu\n", n, COCSEP_CLASSES);
-
- LOG("cocsep data computed\n");
- LOG("Symmetry classes: %" PRIu32 "\n", info[0]);
- LOG("Maximum pruning value: %" PRIu32 "\n", info[1]);
- LOG("Pruning value distribution:\n");
- for (j = 0; j < 10; j++)
- LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+2]);
-
-gendata_cocsep_return_size:
- return COCSEP_FULLSIZE;
-}
-
-_static uint32_t
-gendata_cocsep_dfs(dfsarg_cocsep_t *arg)
-{
- uint8_t m;
- uint32_t cc, class, ttrep, depth, olddepth, tinv;
- uint64_t t;
- int64_t i, j;
- cube_t d;
- dfsarg_cocsep_t nextarg;
-
- i = coord_cocsep(arg->cube);
- olddepth = (uint8_t)(arg->buf32[i] & 0xFF);
- if (olddepth < arg->depth || get_visited(arg->visited, i))
- return 0;
- set_visited(arg->visited, i);
-
- if (arg->depth == arg->maxdepth) {
- if ((arg->buf32[i] & 0xFF) != 0xFF)
- return 0;
-
- if (arg->rep != NULL)
- arg->rep[*arg->n] = arg->cube;
- for (t = 0, cc = 0; t < 48; t++) {
- d = transform_corners(arg->cube, t);
- j = coord_cocsep(d);
- if (i == j && arg->selfsim != NULL)
- arg->selfsim[*arg->n] |= UINT64_C(1) << t;
- if (COCLASS(arg->buf32[j]) != UINT32_C(0xFFFF))
- continue;
- set_visited(arg->visited, j);
- tinv = inverse_trans(t);
- olddepth = arg->buf32[j] & 0xFF;
- cc += olddepth == 0xFF;
-
- class = (uint32_t)(*arg->n) << UINT32_C(16);
- ttrep = (uint32_t)tinv << UINT32_C(8);
- depth = (uint32_t)arg->depth;
- arg->buf32[j] = class | ttrep | depth;
- }
- (*arg->n)++;
-
- return cc;
- }
-
- memcpy(&nextarg, arg, sizeof(dfsarg_cocsep_t));
- nextarg.depth++;
- for (m = 0, cc = 0; m < 18; m++) {
- nextarg.cube = move(arg->cube, m);
- cc += gendata_cocsep_dfs(&nextarg);
- }
-
- return cc;
-}
-
-_static uint64_t
-gen_h48short(
- uint8_t n,
- const uint32_t *cocsepdata,
- const cube_t *crep,
- const uint64_t *selfsim,
- h48map_t *map
-) {
- uint8_t i, m;
- int64_t coord;
- uint64_t j, oldn;
- kvpair_t kv;
- cube_t cube, d;
-
- cube = solvedcube();
- coord = coord_h48(cube, cocsepdata, 11);
- h48map_insertmin(map, coord, 0);
- oldn = 0;
- LOG("Short h48: generating depth 0\nfound %" PRIu8 "\n", map->n-oldn);
- for (i = 0; i < n; i++) {
- LOG("Short h48: generating depth %" PRIu8 "\n", i+1);
- j = 0;
- oldn = map->n;
- for (kv = h48map_nextkvpair(map, &j);
- j != map->capacity;
- kv = h48map_nextkvpair(map, &j)
- ) {
- if (kv.val != i)
- continue;
- cube = invcoord_h48(kv.key, crep, 11);
- for (m = 0; m < 18; m++) {
- d = move(cube, m);
- _foreach_h48sim(d, cocsepdata, selfsim, 11,
- coord = coord_h48(d, cocsepdata, 11);
- h48map_insertmin(map, coord, i+1);
- )
- }
- }
- LOG("found %" PRIu8 "\n", map->n-oldn);
- }
-
- return map->n;
-}
-
-/*
-TODO description
-generating fixed table with h=0, k=4
-*/
-_static size_t
-gendata_h48h0k4(void *buf, uint8_t maxdepth)
-{
- uint32_t j, *buf32, *info, *cocsepdata;
- bfsarg_esep_t arg;
- int64_t sc, cc, esep_max;
- uint64_t selfsim[COCSEP_CLASSES];
- cube_t crep[COCSEP_CLASSES];
- size_t cocsepsize, infosize;
-
- /* TODO: move info at start of tables (all tables!) */
- infosize = 4 * maxdepth;
- cocsepsize = gendata_cocsep(buf, selfsim, crep);
- infosize = 88;
-
- if (buf == NULL)
- goto gendata_h48h0k4_return_size;
-
- esep_max = (int64_t)ESEP_MAX(0);
- cocsepdata = (uint32_t *)buf;
- buf32 = cocsepdata + cocsepsize / 4;
- info = buf32 + (ESEP_TABLESIZE(0, 4) / sizeof(uint32_t));
- memset(buf32, 0xFF, ESEP_TABLESIZE(0, 4));
-
- sc = coord_h48(solved, cocsepdata, 0);
- set_esep_pval(buf32, sc, 0);
- info[1] = 1;
- arg = (bfsarg_esep_t) {
- .cocsepdata = cocsepdata,
- .buf32 = buf32,
- .selfsim = selfsim,
- .crep = crep
- };
- for (
- arg.done = 1, arg.depth = 1, cc = 0;
- arg.done < esep_max && arg.depth <= maxdepth;
- arg.depth++
- ) {
- LOG("esep: generating depth %" PRIu8 "\n", arg.depth);
- cc = gendata_h48h0k4_bfs(&arg);
- arg.done += cc;
- info[arg.depth+1] = cc;
- LOG("found %" PRId64 "\n", cc);
- }
-
- info[0] = arg.depth-1;
-
- LOG("h48 pruning table computed\n");
- LOG("Maximum pruning value: %" PRIu32 "\n", info[0]);
- LOG("Pruning value distribution:\n");
- for (j = 0; j <= info[0]; j++)
- LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+1]);
-
-gendata_h48h0k4_return_size:
- return cocsepsize + ESEP_TABLESIZE(0, 4) + infosize;
-}
-
-_static int64_t
-gendata_h48h0k4_bfs(bfsarg_esep_t *arg)
-{
- const uint8_t breakpoint = 10; /* Hand-picked optimal */
-
- if (arg->depth < breakpoint)
- return gendata_h48h0k4_bfs_fromdone(arg);
- else
- return gendata_h48h0k4_bfs_fromnew(arg);
-}
-
-_static int64_t
-gendata_h48h0k4_bfs_fromdone(bfsarg_esep_t *arg)
-{
- uint8_t c, m, x;
- uint32_t cc;
- int64_t i, j, k;
- cube_t cube, moved;
-
- for (i = 0, cc = 0; i < (int64_t)ESEP_MAX(0); i++) {
- c = get_esep_pval(arg->buf32, i);
- if (c != arg->depth - 1)
- continue;
- cube = invcoord_h48(i, arg->crep, 0);
- for (m = 0; m < 18; m++) {
- moved = move(cube, m);
- j = coord_h48(moved, arg->cocsepdata, 0);
- if (get_esep_pval(arg->buf32, j) <= arg->depth)
- continue;
- _foreach_h48sim(moved, arg->cocsepdata, arg->selfsim, 0,
- k = coord_h48(moved, arg->cocsepdata, 0);
- x = get_esep_pval(arg->buf32, k);
- set_esep_pval(arg->buf32, k, arg->depth);
- cc += x != arg->depth;
- )
- }
- }
-
- return cc;
-}
-
-_static int64_t
-gendata_h48h0k4_bfs_fromnew(bfsarg_esep_t *arg)
-{
- uint8_t c, m, x;
- uint32_t cc;
- int64_t i, j;
- cube_t cube, moved;
-
- for (i = 0, cc = 0; i < (int64_t)ESEP_MAX(0); i++) {
- c = get_esep_pval(arg->buf32, i);
- if (c != 0xF)
- continue;
- cube = invcoord_h48(i, arg->crep, 0);
- for (m = 0; m < 18; m++) {
- moved = move(cube, m);
- j = coord_h48(moved, arg->cocsepdata, 0);
- x = get_esep_pval(arg->buf32, j);
- if (x >= arg->depth)
- continue;
- _foreach_h48sim(cube, arg->cocsepdata, arg->selfsim, 0,
- j = coord_h48(cube, arg->cocsepdata, 0);
- x = get_esep_pval(arg->buf32, j);
- set_esep_pval(arg->buf32, j, arg->depth);
- cc += x == 0xF;
- )
- break; /* Enough to find one, skip the rest */
- }
- }
-
- return cc;
-}
-
-_static_inline bool
-get_visited(const uint8_t *a, int64_t i)
-{
- return a[VISITED_IND(i)] & VISITED_MASK(i);
-}
-
-_static_inline void
-set_visited(uint8_t *a, int64_t i)
-{
- a[VISITED_IND(i)] |= VISITED_MASK(i);
-}
-
-_static_inline uint8_t
-get_esep_pval(const uint32_t *buf32, int64_t i)
-{
- return (buf32[ESEP_IND(i)] & ESEP_MASK(i)) >> ESEP_SHIFT(i);
-}
-
-_static_inline void
-set_esep_pval(uint32_t *buf32, int64_t i, uint8_t val)
-{
- buf32[ESEP_IND(i)] =
- (buf32[ESEP_IND(i)] & (~ESEP_MASK(i))) | (val << ESEP_SHIFT(i));
-}
-
-_static void
-solve_h48_appendsolution(dfsarg_solveh48_t *arg)
-{
- int strl;
-
- strl = writemoves(arg->moves, arg->nmoves, *arg->nextsol);
- LOG("Solution found: %s\n", *arg->nextsol);
- *arg->nextsol += strl;
- **arg->nextsol = '\n';
- (*arg->nextsol)++;
- (*arg->nsols)++;
-}
-
-_static_inline int8_t
-get_h48_cdata(cube_t cube, uint32_t *cocsepdata, uint32_t *cdata)
-{
- int64_t coord;
-
- coord = coord_cocsep(cube);
- *cdata = cocsepdata[coord];
-
- return CBOUND(*cdata);
-}
-
-_static_inline int8_t
-get_h48_bound(cube_t cube, uint32_t cdata, uint8_t h, uint32_t *h48data)
-{
- int64_t coord;
-
- coord = coord_h48_edges(cube, COCLASS(cdata), TTREP(cdata), h);
- return get_esep_pval(h48data, coord);
-}
-
-_static_inline bool
-solve_h48_stop(dfsarg_solveh48_t *arg)
-{
- uint32_t data, data_inv;
- int8_t bound;
-
- bound = get_h48_cdata(arg->cube, arg->cocsepdata, &data);
- if (bound + arg->nmoves > arg->depth)
- return true;
-
- bound = get_h48_cdata(arg->inverse, arg->cocsepdata, &data_inv);
- if (bound + arg->nmoves > arg->depth)
- return true;
-
-/*
- bound = get_h48_bound(arg->cube, data, arg->h, arg->h48data);
-LOG("Using pval %" PRId8 "\n", bound);
- if (bound + arg->nmoves > arg->depth)
- return true;
-
- bound = get_h48_bound(arg->inverse, data_inv, arg->h, arg->h48data);
- if (bound + arg->nmoves > arg->depth)
- return true;
-*/
-
- return false;
-}
-
-_static int64_t
-solve_h48_dfs(dfsarg_solveh48_t *arg)
-{
- dfsarg_solveh48_t nextarg;
- int64_t ret;
- uint8_t m;
-
- if (*arg->nsols == arg->maxsolutions)
- return 0;
-
- if (solve_h48_stop(arg))
- return 0;
-
- if (issolved(arg->cube)) {
- if (arg->nmoves != arg->depth)
- return 0;
- solve_h48_appendsolution(arg);
- return 1;
- }
-
- /* TODO: avoid copy, change arg and undo changes after recursion */
- nextarg = *arg;
- nextarg.nmoves = arg->nmoves + 1;
- ret = 0;
- for (m = 0; m < 18; m++) {
- nextarg.moves[arg->nmoves] = m;
- if (!allowednextmove(nextarg.moves, nextarg.nmoves)) {
- /* If a move is not allowed, neither are its 180
- * and 270 degree variations */
- m += 2;
- continue;
- }
- nextarg.cube = move(arg->cube, m);
- nextarg.inverse = inverse(nextarg.cube); /* TODO: use premove */
- ret += solve_h48_dfs(&nextarg);
- }
-
- return ret;
-}
-
-_static int64_t
-solve_h48(
- cube_t cube,
- int8_t minmoves,
- int8_t maxmoves,
- int8_t maxsolutions,
- uint8_t h,
- const void *data,
- char *solutions
-)
-{
- int64_t nsols;
- dfsarg_solveh48_t arg;
-
- arg = (dfsarg_solveh48_t) {
- .cube = cube,
- .inverse = inverse(cube),
- .nsols = &nsols,
- .maxsolutions = maxsolutions,
- .h = h,
- .cocsepdata = (uint32_t *)data,
- .h48data = ((uint32_t *)data) + COCSEP_FULLSIZE / 4,
- .nextsol = &solutions
- };
-
- nsols = 0;
- for (arg.depth = minmoves;
- arg.depth <= maxmoves && nsols < maxsolutions;
- arg.depth++)
- {
- LOG("Found %" PRId64 " solutions, searching at depth %"
- PRId8 "\n", nsols, arg.depth);
- arg.nmoves = 0;
- solve_h48_dfs(&arg);
- }
-
- return nsols;
-}
-
-/*
-The h48stats solver computes how many moves it takes to solve to
-each of the 12 h48 coordinates, one for each value of h from 0 to 11.
-The solutions array is filled with the length of the solutions. The
-solution array is therefore not a printable string.
-*/
-_static int64_t
-solve_h48stats_dfs(dfsarg_solveh48stats_t *arg)
-{
- const int64_t limit = 11;
-
- int8_t bound, u;
- uint8_t m;
- uint32_t d;
- int64_t coord, h;
- dfsarg_solveh48stats_t nextarg;
-
- /* Check cocsep lower bound (corners only) */
- bound = get_h48_cdata(arg->cube, arg->cocsepdata, &d);
- if (bound + arg->nmoves > arg->depth)
- return 0;
-
- /* Check h48 lower bound for h=0 (esep, but no eo) */
- coord = coord_h48_edges(arg->cube, COCLASS(d), TTREP(d), 0);
- bound = get_esep_pval(arg->h48data, coord);
- if (bound + arg->nmoves > arg->depth)
- return 0;
-
- /* Update all other values, if solved */
- coord = coord_h48_edges(arg->cube, COCLASS(d), TTREP(d), 11);
- for (h = 0; h <= limit; h++) {
- u = coord >> (11-h) == 0 && arg->s[h] == 99;
- arg->s[h] = u * arg->nmoves + (1-u) * arg->s[h];
- }
-
- if (arg->s[limit] != 99)
- return 0;
-
- nextarg = *arg;
- nextarg.nmoves = arg->nmoves + 1;
- for (m = 0; m < 18; m++) {
- nextarg.moves[arg->nmoves] = m;
- if (!allowednextmove(nextarg.moves, nextarg.nmoves)) {
- /* If a move is not allowed, neither are its 180
- * and 270 degree variations */
- m += 2;
- continue;
- }
- nextarg.cube = move(arg->cube, m);
- solve_h48stats_dfs(&nextarg);
- }
-
- return 0;
-}
-
-_static int64_t
-solve_h48stats(
- cube_t cube,
- int8_t maxmoves,
- const void *data,
- char solutions[static 12]
-)
-{
- int i;
- size_t cocsepsize;
- dfsarg_solveh48stats_t arg;
-
- cocsepsize = gendata_cocsep(NULL, NULL, NULL);
-
- arg = (dfsarg_solveh48stats_t) {
- .cube = cube,
- .cocsepdata = (uint32_t *)data,
- .h48data = ((uint32_t *)data) + (cocsepsize/4),
- .s = solutions
- };
-
- for (i = 0; i < 12; i++)
- solutions[i] = (char)99;
-
- for (arg.depth = 0;
- arg.depth <= maxmoves && solutions[11] == 99;
- arg.depth++)
- {
- arg.nmoves = 0;
- solve_h48stats_dfs(&arg);
- }
-
- return 0;
-}
diff --git a/src/solve_generic.h b/src/solvers/generic/generic.h
diff --git a/src/solvers/h48/coordinate.h b/src/solvers/h48/coordinate.h
@@ -0,0 +1,68 @@
+#define H48_ESIZE(h) ((_12c4 * _8c4) << (int64_t)(h))
+
+#define COCLASS_MASK (UINT32_C(0xFFFF) << UINT32_C(16))
+#define COCLASS(x) (((x) & COCLASS_MASK) >> UINT32_C(16))
+#define TTREP_MASK (UINT32_C(0xFF) << UINT32_C(8))
+#define TTREP(x) (((x) & TTREP_MASK) >> UINT32_C(8))
+
+_static_inline int64_t coord_h48(cube_t, const uint32_t *, uint8_t);
+_static_inline int64_t coord_h48_edges(cube_t, int64_t, uint8_t, uint8_t);
+_static_inline cube_t invcoord_h48(int64_t, const cube_t *, uint8_t);
+
+_static_inline int64_t
+coord_h48(cube_t c, const uint32_t *cocsepdata, uint8_t h)
+{
+ int64_t cocsep, coclass;
+ uint32_t data;
+ uint8_t ttrep;
+
+ DBG_ASSERT(h <= 11, -1, "coord_h48: h must be between 0 and 11\n");
+
+ cocsep = coord_cocsep(c);
+ data = cocsepdata[cocsep];
+ coclass = (int64_t)COCLASS(data);
+ ttrep = (int64_t)TTREP(data);
+
+ return coord_h48_edges(c, coclass, ttrep, h);
+}
+
+_static_inline int64_t
+coord_h48_edges(cube_t c, int64_t coclass, uint8_t ttrep, uint8_t h)
+{
+ cube_t d;
+ int64_t esep, eo, edges;
+
+ d = transform_edges(c, ttrep);
+ esep = coord_esep(d);
+ eo = coord_eo(d);
+ edges = (esep << 11) + eo;
+
+ return (coclass * H48_ESIZE(11) + edges) >> (11 - (int64_t)h);
+}
+
+/*
+This function does not necessarily return a cube whose coordinate is
+the given value, because it works up to symmetry. This means that the
+returned cube is a transformed cube of one that gives the correct value.
+*/
+_static_inline cube_t
+invcoord_h48(int64_t i, const cube_t *crep, uint8_t h)
+{
+ cube_t ret;
+ int64_t hh, coclass, ee, esep, eo;
+
+ DBG_ASSERT(h <= 11, zero,
+ "invcoord_h48: h must be between 0 and 11\n");
+
+ hh = (int64_t)h;
+ coclass = i / H48_ESIZE(h);
+ ee = i % H48_ESIZE(h);
+ esep = ee >> hh;
+ eo = (ee & ((1 << hh) - 1)) << (11 - hh);
+
+ ret = invcoord_esep(esep);
+ copy_corners(&ret, crep[coclass]);
+ set_eo(&ret, eo);
+
+ return ret;
+}
diff --git a/src/solvers/h48/gendata.h b/src/solvers/h48/gendata.h
@@ -0,0 +1,425 @@
+#define COCSEP_CLASSES ((size_t)3393)
+#define COCSEP_TABLESIZE ((size_t)_3p7 << (size_t)7)
+#define COCSEP_VISITEDSIZE ((COCSEP_TABLESIZE + (size_t)7) / (size_t)8)
+#define COCSEP_FULLSIZE ((size_t)4 * (COCSEP_TABLESIZE + (size_t)12))
+
+#define ESEP_NOEO (COCSEP_CLASSES * (size_t)_12c4 * (size_t)_8c4)
+#define ESEP_MAX(h) (ESEP_NOEO << (size_t)(h))
+#define ESEP_TABLESIZE(h, k) (ESEP_MAX((h)) / ((size_t)8 / (size_t)(k)))
+
+#define ESEP_IND(i) ((uint32_t)(i) / UINT32_C(8))
+#define ESEP_SHIFT(i) (UINT32_C(4) * ((uint32_t)(i) % UINT32_C(8)))
+#define ESEP_MASK(i) ((_bit_u32(4) - (uint32_t)(1)) << ESEP_SHIFT(i))
+#define VISITED_IND(i) ((uint32_t)(i) / UINT32_C(8))
+#define VISITED_MASK(i) (UINT32_C(1) << ((uint32_t)(i) % UINT32_C(8)))
+
+#define CBOUND_MASK UINT32_C(0xFF)
+#define CBOUND(x) ((x) & CBOUND_MASK)
+
+#define MAXLEN 20
+
+/*
+TODO: This loop over similar h48 coordinates can be improved by only
+transforming edges, but we need to compose transformations (i.e. conjugate
+_t by _ttrep).
+*/
+#define _foreach_h48sim(_cube, _cocsepdata, _selfsim, _h, _action) \
+ int64_t _cocsep = coord_cocsep(_cube); \
+ uint8_t _ttrep = TTREP(_cocsepdata[_cocsep]); \
+ uint8_t _inverse_ttrep = inverse_trans(_ttrep); \
+ int64_t _coclass = COCLASS(_cocsepdata[_cocsep]); \
+ cube_t _rep = transform(_cube, _ttrep); \
+ uint64_t _sim = _selfsim[_coclass]; \
+ for (uint8_t _t = 0; _t < 48 && _sim; _t++, _sim >>= 1) { \
+ if (!(_sim & 1)) continue; \
+ _cube = transform(_rep, _t); \
+ _cube = transform(_cube, _inverse_ttrep); \
+ _action \
+ }
+
+typedef struct {
+ cube_t cube;
+ uint8_t depth;
+ uint8_t maxdepth;
+ uint16_t *n;
+ uint32_t *buf32;
+ uint8_t *visited;
+ uint64_t *selfsim;
+ cube_t *rep;
+} dfsarg_cocsep_t;
+
+/* TODO keep or not? */
+typedef struct {
+ cube_t cube;
+ int8_t nmoves;
+ int8_t depth;
+ uint8_t moves[MAXLEN];
+ uint32_t *cocsepdata;
+ h48map_t *visited;
+} dfsarg_genh48set_t;
+
+typedef struct {
+ uint8_t depth;
+ uint32_t *cocsepdata;
+ uint32_t *buf32;
+ uint64_t *selfsim;
+ int64_t done;
+ cube_t *crep;
+} bfsarg_esep_t;
+
+_static_inline bool get_visited(const uint8_t *, int64_t);
+_static_inline void set_visited(uint8_t *, int64_t);
+_static_inline uint8_t get_esep_pval(const uint32_t *, int64_t);
+_static_inline void set_esep_pval(uint32_t *, int64_t, uint8_t);
+
+_static size_t gendata_cocsep(void *, uint64_t *, cube_t *);
+_static uint32_t gendata_cocsep_dfs(dfsarg_cocsep_t *);
+_static uint64_t gen_h48short(
+ uint8_t, const uint32_t *, const cube_t *, const uint64_t *, h48map_t *);
+_static size_t gendata_h48h0k4(void *, uint8_t);
+_static int64_t gendata_h48h0k4_bfs(bfsarg_esep_t *);
+_static int64_t gendata_h48h0k4_bfs_fromdone(bfsarg_esep_t *);
+_static int64_t gendata_h48h0k4_bfs_fromnew(bfsarg_esep_t *);
+
+_static_inline int8_t get_h48_cdata(cube_t, uint32_t *, uint32_t *);
+_static_inline int8_t get_h48_bound(cube_t, uint32_t, uint8_t, uint32_t *);
+
+/*
+Each element of the cocsep table is a uint32_t used as follows:
+ - Lowest 8-bit block: pruning value
+ - Second-lowest 8-bit block: "ttrep" (transformation to representative)
+ - Top 16-bit block: symcoord value
+After the data as described above, more auxiliary information is appended:
+ - A uint32_t representing the number of symmetry classes
+ - A uint32_t representing the highest value of the pruning table
+ - One uint32_t for each "line" of the pruning table, representing the number
+ of positions having that pruning value.
+*/
+_static size_t
+gendata_cocsep(void *buf, uint64_t *selfsim, cube_t *rep)
+{
+ uint32_t *buf32, *info, cc;
+ uint16_t n;
+ uint8_t i, j, visited[COCSEP_VISITEDSIZE];
+ dfsarg_cocsep_t arg;
+
+ if (buf == NULL)
+ goto gendata_cocsep_return_size;
+
+ buf32 = (uint32_t *)buf;
+ info = buf32 + COCSEP_TABLESIZE;
+ memset(buf32, 0xFF, sizeof(uint32_t) * COCSEP_TABLESIZE);
+ if (selfsim != NULL)
+ memset(selfsim, 0, sizeof(uint64_t) * COCSEP_CLASSES);
+
+ arg = (dfsarg_cocsep_t) {
+ .cube = solved,
+ .n = &n,
+ .buf32 = buf32,
+ .visited = visited,
+ .selfsim = selfsim,
+ .rep = rep
+ };
+ for (i = 0, n = 0, cc = 0; i < 10; i++) {
+ LOG("cocsep: generating depth %" PRIu8 "\n", i);
+ memset(visited, 0, COCSEP_VISITEDSIZE);
+ arg.depth = 0;
+ arg.maxdepth = i;
+ cc = gendata_cocsep_dfs(&arg);
+ info[i+2] = cc;
+ LOG("found %" PRIu32 "\n", cc);
+ }
+
+ info[0] = (uint32_t)n;
+ info[1] = 9; /* Known max pruning value */
+ DBG_ASSERT(n == COCSEP_CLASSES, 0,
+ "cocsep: computed %" PRIu16 " symmetry classes, "
+ "expected %zu\n", n, COCSEP_CLASSES);
+
+ LOG("cocsep data computed\n");
+ LOG("Symmetry classes: %" PRIu32 "\n", info[0]);
+ LOG("Maximum pruning value: %" PRIu32 "\n", info[1]);
+ LOG("Pruning value distribution:\n");
+ for (j = 0; j < 10; j++)
+ LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+2]);
+
+gendata_cocsep_return_size:
+ return COCSEP_FULLSIZE;
+}
+
+_static uint32_t
+gendata_cocsep_dfs(dfsarg_cocsep_t *arg)
+{
+ uint8_t m;
+ uint32_t cc, class, ttrep, depth, olddepth, tinv;
+ uint64_t t;
+ int64_t i, j;
+ cube_t d;
+ dfsarg_cocsep_t nextarg;
+
+ i = coord_cocsep(arg->cube);
+ olddepth = (uint8_t)(arg->buf32[i] & 0xFF);
+ if (olddepth < arg->depth || get_visited(arg->visited, i))
+ return 0;
+ set_visited(arg->visited, i);
+
+ if (arg->depth == arg->maxdepth) {
+ if ((arg->buf32[i] & 0xFF) != 0xFF)
+ return 0;
+
+ if (arg->rep != NULL)
+ arg->rep[*arg->n] = arg->cube;
+ for (t = 0, cc = 0; t < 48; t++) {
+ d = transform_corners(arg->cube, t);
+ j = coord_cocsep(d);
+ if (i == j && arg->selfsim != NULL)
+ arg->selfsim[*arg->n] |= UINT64_C(1) << t;
+ if (COCLASS(arg->buf32[j]) != UINT32_C(0xFFFF))
+ continue;
+ set_visited(arg->visited, j);
+ tinv = inverse_trans(t);
+ olddepth = arg->buf32[j] & 0xFF;
+ cc += olddepth == 0xFF;
+
+ class = (uint32_t)(*arg->n) << UINT32_C(16);
+ ttrep = (uint32_t)tinv << UINT32_C(8);
+ depth = (uint32_t)arg->depth;
+ arg->buf32[j] = class | ttrep | depth;
+ }
+ (*arg->n)++;
+
+ return cc;
+ }
+
+ memcpy(&nextarg, arg, sizeof(dfsarg_cocsep_t));
+ nextarg.depth++;
+ for (m = 0, cc = 0; m < 18; m++) {
+ nextarg.cube = move(arg->cube, m);
+ cc += gendata_cocsep_dfs(&nextarg);
+ }
+
+ return cc;
+}
+
+_static uint64_t
+gen_h48short(
+ uint8_t n,
+ const uint32_t *cocsepdata,
+ const cube_t *crep,
+ const uint64_t *selfsim,
+ h48map_t *map
+) {
+ uint8_t i, m;
+ int64_t coord;
+ uint64_t j, oldn;
+ kvpair_t kv;
+ cube_t cube, d;
+
+ cube = solvedcube();
+ coord = coord_h48(cube, cocsepdata, 11);
+ h48map_insertmin(map, coord, 0);
+ oldn = 0;
+ LOG("Short h48: generating depth 0\nfound %" PRIu8 "\n", map->n-oldn);
+ for (i = 0; i < n; i++) {
+ LOG("Short h48: generating depth %" PRIu8 "\n", i+1);
+ j = 0;
+ oldn = map->n;
+ for (kv = h48map_nextkvpair(map, &j);
+ j != map->capacity;
+ kv = h48map_nextkvpair(map, &j)
+ ) {
+ if (kv.val != i)
+ continue;
+ cube = invcoord_h48(kv.key, crep, 11);
+ for (m = 0; m < 18; m++) {
+ d = move(cube, m);
+ _foreach_h48sim(d, cocsepdata, selfsim, 11,
+ coord = coord_h48(d, cocsepdata, 11);
+ h48map_insertmin(map, coord, i+1);
+ )
+ }
+ }
+ LOG("found %" PRIu8 "\n", map->n-oldn);
+ }
+
+ return map->n;
+}
+
+/*
+TODO description
+generating fixed table with h=0, k=4
+*/
+_static size_t
+gendata_h48h0k4(void *buf, uint8_t maxdepth)
+{
+ uint32_t j, *buf32, *info, *cocsepdata;
+ bfsarg_esep_t arg;
+ int64_t sc, cc, esep_max;
+ uint64_t selfsim[COCSEP_CLASSES];
+ cube_t crep[COCSEP_CLASSES];
+ size_t cocsepsize, infosize;
+
+ /* TODO: move info at start of tables (all tables!) */
+ infosize = 4 * maxdepth;
+ cocsepsize = gendata_cocsep(buf, selfsim, crep);
+ infosize = 88;
+
+ if (buf == NULL)
+ goto gendata_h48h0k4_return_size;
+
+ esep_max = (int64_t)ESEP_MAX(0);
+ cocsepdata = (uint32_t *)buf;
+ buf32 = cocsepdata + cocsepsize / 4;
+ info = buf32 + (ESEP_TABLESIZE(0, 4) / sizeof(uint32_t));
+ memset(buf32, 0xFF, ESEP_TABLESIZE(0, 4));
+
+ sc = coord_h48(solved, cocsepdata, 0);
+ set_esep_pval(buf32, sc, 0);
+ info[1] = 1;
+ arg = (bfsarg_esep_t) {
+ .cocsepdata = cocsepdata,
+ .buf32 = buf32,
+ .selfsim = selfsim,
+ .crep = crep
+ };
+ for (
+ arg.done = 1, arg.depth = 1, cc = 0;
+ arg.done < esep_max && arg.depth <= maxdepth;
+ arg.depth++
+ ) {
+ LOG("esep: generating depth %" PRIu8 "\n", arg.depth);
+ cc = gendata_h48h0k4_bfs(&arg);
+ arg.done += cc;
+ info[arg.depth+1] = cc;
+ LOG("found %" PRId64 "\n", cc);
+ }
+
+ info[0] = arg.depth-1;
+
+ LOG("h48 pruning table computed\n");
+ LOG("Maximum pruning value: %" PRIu32 "\n", info[0]);
+ LOG("Pruning value distribution:\n");
+ for (j = 0; j <= info[0]; j++)
+ LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+1]);
+
+gendata_h48h0k4_return_size:
+ return cocsepsize + ESEP_TABLESIZE(0, 4) + infosize;
+}
+
+_static int64_t
+gendata_h48h0k4_bfs(bfsarg_esep_t *arg)
+{
+ const uint8_t breakpoint = 10; /* Hand-picked optimal */
+
+ if (arg->depth < breakpoint)
+ return gendata_h48h0k4_bfs_fromdone(arg);
+ else
+ return gendata_h48h0k4_bfs_fromnew(arg);
+}
+
+_static int64_t
+gendata_h48h0k4_bfs_fromdone(bfsarg_esep_t *arg)
+{
+ uint8_t c, m, x;
+ uint32_t cc;
+ int64_t i, j, k;
+ cube_t cube, moved;
+
+ for (i = 0, cc = 0; i < (int64_t)ESEP_MAX(0); i++) {
+ c = get_esep_pval(arg->buf32, i);
+ if (c != arg->depth - 1)
+ continue;
+ cube = invcoord_h48(i, arg->crep, 0);
+ for (m = 0; m < 18; m++) {
+ moved = move(cube, m);
+ j = coord_h48(moved, arg->cocsepdata, 0);
+ if (get_esep_pval(arg->buf32, j) <= arg->depth)
+ continue;
+ _foreach_h48sim(moved, arg->cocsepdata, arg->selfsim, 0,
+ k = coord_h48(moved, arg->cocsepdata, 0);
+ x = get_esep_pval(arg->buf32, k);
+ set_esep_pval(arg->buf32, k, arg->depth);
+ cc += x != arg->depth;
+ )
+ }
+ }
+
+ return cc;
+}
+
+_static int64_t
+gendata_h48h0k4_bfs_fromnew(bfsarg_esep_t *arg)
+{
+ uint8_t c, m, x;
+ uint32_t cc;
+ int64_t i, j;
+ cube_t cube, moved;
+
+ for (i = 0, cc = 0; i < (int64_t)ESEP_MAX(0); i++) {
+ c = get_esep_pval(arg->buf32, i);
+ if (c != 0xF)
+ continue;
+ cube = invcoord_h48(i, arg->crep, 0);
+ for (m = 0; m < 18; m++) {
+ moved = move(cube, m);
+ j = coord_h48(moved, arg->cocsepdata, 0);
+ x = get_esep_pval(arg->buf32, j);
+ if (x >= arg->depth)
+ continue;
+ _foreach_h48sim(cube, arg->cocsepdata, arg->selfsim, 0,
+ j = coord_h48(cube, arg->cocsepdata, 0);
+ x = get_esep_pval(arg->buf32, j);
+ set_esep_pval(arg->buf32, j, arg->depth);
+ cc += x == 0xF;
+ )
+ break; /* Enough to find one, skip the rest */
+ }
+ }
+
+ return cc;
+}
+
+_static_inline bool
+get_visited(const uint8_t *a, int64_t i)
+{
+ return a[VISITED_IND(i)] & VISITED_MASK(i);
+}
+
+_static_inline void
+set_visited(uint8_t *a, int64_t i)
+{
+ a[VISITED_IND(i)] |= VISITED_MASK(i);
+}
+
+_static_inline uint8_t
+get_esep_pval(const uint32_t *buf32, int64_t i)
+{
+ return (buf32[ESEP_IND(i)] & ESEP_MASK(i)) >> ESEP_SHIFT(i);
+}
+
+_static_inline void
+set_esep_pval(uint32_t *buf32, int64_t i, uint8_t val)
+{
+ buf32[ESEP_IND(i)] =
+ (buf32[ESEP_IND(i)] & (~ESEP_MASK(i))) | (val << ESEP_SHIFT(i));
+}
+
+_static_inline int8_t
+get_h48_cdata(cube_t cube, uint32_t *cocsepdata, uint32_t *cdata)
+{
+ int64_t coord;
+
+ coord = coord_cocsep(cube);
+ *cdata = cocsepdata[coord];
+
+ return CBOUND(*cdata);
+}
+
+_static_inline int8_t
+get_h48_bound(cube_t cube, uint32_t cdata, uint8_t h, uint32_t *h48data)
+{
+ int64_t coord;
+
+ coord = coord_h48_edges(cube, COCLASS(cdata), TTREP(cdata), h);
+ return get_esep_pval(h48data, coord);
+}
diff --git a/src/solvers/h48/h48.h b/src/solvers/h48/h48.h
@@ -0,0 +1,4 @@
+#include "coordinate.h"
+#include "map.h"
+#include "gendata.h"
+#include "solve.h"
diff --git a/src/solvers/h48/map.h b/src/solvers/h48/map.h
@@ -0,0 +1,104 @@
+#define MAP_UNSET UINT64_C(0xFFFFFFFFFFFFFFFF)
+#define MAP_KEYMASK UINT64_C(0xFFFFFFFFFF)
+#define MAP_KEYSHIFT UINT64_C(40)
+
+typedef struct {
+ uint64_t n;
+ uint64_t capacity;
+ uint64_t randomizer;
+ uint64_t *table;
+} h48map_t;
+
+typedef struct {
+ uint64_t key;
+ uint64_t val;
+} kvpair_t;
+
+_static void h48map_create(h48map_t *, uint64_t, uint64_t);
+_static void h48map_clear(h48map_t *);
+_static void h48map_destroy(h48map_t *);
+_static uint64_t h48map_lookup(h48map_t *, uint64_t);
+_static void h48map_insertmin(h48map_t *, uint64_t, uint64_t);
+_static uint64_t h48map_value(h48map_t *, uint64_t);
+_static kvpair_t h48map_nextkvpair(h48map_t *, uint64_t *);
+
+_static void
+h48map_create(h48map_t *map, uint64_t capacity, uint64_t randomizer)
+{
+ map->capacity = capacity;
+ map->randomizer = randomizer;
+
+ map->table = malloc(map->capacity * sizeof(int64_t));
+ h48map_clear(map);
+}
+
+_static void
+h48map_clear(h48map_t *map)
+{
+ memset(map->table, 0xFF, map->capacity * sizeof(uint64_t));
+ map->n = 0;
+}
+
+_static void
+h48map_destroy(h48map_t *map)
+{
+ free(map->table);
+}
+
+_static_inline uint64_t
+h48map_lookup(h48map_t *map, uint64_t x)
+{
+ uint64_t hash, i;
+
+ hash = ((x % map->capacity) * map->randomizer) % map->capacity;
+ for (i = hash;
+ map->table[i] != MAP_UNSET && (map->table[i] & MAP_KEYMASK) != x;
+ i = (i+1) % map->capacity
+ ) ;
+
+ return i;
+}
+
+_static_inline void
+h48map_insertmin(h48map_t *map, uint64_t key, uint64_t val)
+{
+ uint64_t i, oldval, min;
+
+ i = h48map_lookup(map, key);
+ oldval = map->table[i] >> MAP_KEYSHIFT;
+ min = _min(val, oldval);
+
+ map->n += map->table[i] == MAP_UNSET;
+ map->table[i] = (key & MAP_KEYMASK) | (min << MAP_KEYSHIFT);
+}
+
+_static_inline uint64_t
+h48map_value(h48map_t *map, uint64_t key)
+{
+ return map->table[h48map_lookup(map, key)] >> MAP_KEYSHIFT;
+}
+
+_static kvpair_t
+h48map_nextkvpair(h48map_t *map, uint64_t *p)
+{
+ kvpair_t kv;
+ uint64_t pair;
+
+ kv.key = MAP_UNSET;
+ kv.val = MAP_UNSET;
+
+ DBG_ASSERT(*p < map->capacity, kv,
+ "Error looping over map: given index %" PRIu64 " is out of "
+ "range [0,%" PRIu64 "]", *p, map->capacity);
+
+ for ( ; *p < map->capacity; (*p)++) {
+ if (map->table[*p] != MAP_UNSET) {
+ pair = map->table[(*p)++];
+ kv.key = pair & MAP_KEYMASK;
+ kv.val = pair >> MAP_KEYSHIFT;
+ return kv;
+ }
+ }
+
+ return kv;
+}
diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h
@@ -0,0 +1,242 @@
+typedef struct {
+ cube_t cube;
+ cube_t inverse;
+ int8_t nmoves;
+ int8_t depth;
+ uint8_t moves[MAXLEN];
+ int64_t *nsols;
+ int64_t maxsolutions;
+ uint8_t h;
+ uint32_t *cocsepdata;
+ uint32_t *h48data;
+ char **nextsol;
+} dfsarg_solveh48_t;
+
+typedef struct {
+ cube_t cube;
+ int8_t nmoves;
+ int8_t depth;
+ uint8_t moves[MAXLEN];
+ uint32_t *cocsepdata;
+ uint32_t *h48data;
+ char *s;
+} dfsarg_solveh48stats_t;
+
+_static void solve_h48_appendsolution(dfsarg_solveh48_t *);
+_static_inline bool solve_h48_stop(dfsarg_solveh48_t *);
+_static int64_t solve_h48_dfs(dfsarg_solveh48_t *);
+_static int64_t solve_h48(
+ cube_t, int8_t, int8_t, int8_t, uint8_t, const void *, char *);
+
+_static int64_t solve_h48stats_dfs(dfsarg_solveh48stats_t *);
+_static int64_t solve_h48stats(cube_t, int8_t, const void *, char [static 12]);
+
+_static void
+solve_h48_appendsolution(dfsarg_solveh48_t *arg)
+{
+ int strl;
+
+ strl = writemoves(arg->moves, arg->nmoves, *arg->nextsol);
+ LOG("Solution found: %s\n", *arg->nextsol);
+ *arg->nextsol += strl;
+ **arg->nextsol = '\n';
+ (*arg->nextsol)++;
+ (*arg->nsols)++;
+}
+
+_static_inline bool
+solve_h48_stop(dfsarg_solveh48_t *arg)
+{
+ uint32_t data, data_inv;
+ int8_t bound;
+
+ bound = get_h48_cdata(arg->cube, arg->cocsepdata, &data);
+ if (bound + arg->nmoves > arg->depth)
+ return true;
+
+ bound = get_h48_cdata(arg->inverse, arg->cocsepdata, &data_inv);
+ if (bound + arg->nmoves > arg->depth)
+ return true;
+
+/*
+ bound = get_h48_bound(arg->cube, data, arg->h, arg->h48data);
+LOG("Using pval %" PRId8 "\n", bound);
+ if (bound + arg->nmoves > arg->depth)
+ return true;
+
+ bound = get_h48_bound(arg->inverse, data_inv, arg->h, arg->h48data);
+ if (bound + arg->nmoves > arg->depth)
+ return true;
+*/
+
+ return false;
+}
+
+_static int64_t
+solve_h48_dfs(dfsarg_solveh48_t *arg)
+{
+ dfsarg_solveh48_t nextarg;
+ int64_t ret;
+ uint8_t m;
+
+ if (*arg->nsols == arg->maxsolutions)
+ return 0;
+
+ if (solve_h48_stop(arg))
+ return 0;
+
+ if (issolved(arg->cube)) {
+ if (arg->nmoves != arg->depth)
+ return 0;
+ solve_h48_appendsolution(arg);
+ return 1;
+ }
+
+ /* TODO: avoid copy, change arg and undo changes after recursion */
+ nextarg = *arg;
+ nextarg.nmoves = arg->nmoves + 1;
+ ret = 0;
+ for (m = 0; m < 18; m++) {
+ nextarg.moves[arg->nmoves] = m;
+ if (!allowednextmove(nextarg.moves, nextarg.nmoves)) {
+ /* If a move is not allowed, neither are its 180
+ * and 270 degree variations */
+ m += 2;
+ continue;
+ }
+ nextarg.cube = move(arg->cube, m);
+ nextarg.inverse = inverse(nextarg.cube); /* TODO: use premove */
+ ret += solve_h48_dfs(&nextarg);
+ }
+
+ return ret;
+}
+
+_static int64_t
+solve_h48(
+ cube_t cube,
+ int8_t minmoves,
+ int8_t maxmoves,
+ int8_t maxsolutions,
+ uint8_t h,
+ const void *data,
+ char *solutions
+)
+{
+ int64_t nsols;
+ dfsarg_solveh48_t arg;
+
+ arg = (dfsarg_solveh48_t) {
+ .cube = cube,
+ .inverse = inverse(cube),
+ .nsols = &nsols,
+ .maxsolutions = maxsolutions,
+ .h = h,
+ .cocsepdata = (uint32_t *)data,
+ .h48data = ((uint32_t *)data) + COCSEP_FULLSIZE / 4,
+ .nextsol = &solutions
+ };
+
+ nsols = 0;
+ for (arg.depth = minmoves;
+ arg.depth <= maxmoves && nsols < maxsolutions;
+ arg.depth++)
+ {
+ LOG("Found %" PRId64 " solutions, searching at depth %"
+ PRId8 "\n", nsols, arg.depth);
+ arg.nmoves = 0;
+ solve_h48_dfs(&arg);
+ }
+
+ return nsols;
+}
+
+/*
+The h48stats solver computes how many moves it takes to solve to
+each of the 12 h48 coordinates, one for each value of h from 0 to 11.
+The solutions array is filled with the length of the solutions. The
+solution array is therefore not a printable string.
+*/
+_static int64_t
+solve_h48stats_dfs(dfsarg_solveh48stats_t *arg)
+{
+ const int64_t limit = 11;
+
+ int8_t bound, u;
+ uint8_t m;
+ uint32_t d;
+ int64_t coord, h;
+ dfsarg_solveh48stats_t nextarg;
+
+ /* Check cocsep lower bound (corners only) */
+ bound = get_h48_cdata(arg->cube, arg->cocsepdata, &d);
+ if (bound + arg->nmoves > arg->depth)
+ return 0;
+
+ /* Check h48 lower bound for h=0 (esep, but no eo) */
+ coord = coord_h48_edges(arg->cube, COCLASS(d), TTREP(d), 0);
+ bound = get_esep_pval(arg->h48data, coord);
+ if (bound + arg->nmoves > arg->depth)
+ return 0;
+
+ /* Update all other values, if solved */
+ coord = coord_h48_edges(arg->cube, COCLASS(d), TTREP(d), 11);
+ for (h = 0; h <= limit; h++) {
+ u = coord >> (11-h) == 0 && arg->s[h] == 99;
+ arg->s[h] = u * arg->nmoves + (1-u) * arg->s[h];
+ }
+
+ if (arg->s[limit] != 99)
+ return 0;
+
+ nextarg = *arg;
+ nextarg.nmoves = arg->nmoves + 1;
+ for (m = 0; m < 18; m++) {
+ nextarg.moves[arg->nmoves] = m;
+ if (!allowednextmove(nextarg.moves, nextarg.nmoves)) {
+ /* If a move is not allowed, neither are its 180
+ * and 270 degree variations */
+ m += 2;
+ continue;
+ }
+ nextarg.cube = move(arg->cube, m);
+ solve_h48stats_dfs(&nextarg);
+ }
+
+ return 0;
+}
+
+_static int64_t
+solve_h48stats(
+ cube_t cube,
+ int8_t maxmoves,
+ const void *data,
+ char solutions[static 12]
+)
+{
+ int i;
+ size_t cocsepsize;
+ dfsarg_solveh48stats_t arg;
+
+ cocsepsize = gendata_cocsep(NULL, NULL, NULL);
+
+ arg = (dfsarg_solveh48stats_t) {
+ .cube = cube,
+ .cocsepdata = (uint32_t *)data,
+ .h48data = ((uint32_t *)data) + (cocsepsize/4),
+ .s = solutions
+ };
+
+ for (i = 0; i < 12; i++)
+ solutions[i] = (char)99;
+
+ for (arg.depth = 0;
+ arg.depth <= maxmoves && solutions[11] == 99;
+ arg.depth++)
+ {
+ arg.nmoves = 0;
+ solve_h48stats_dfs(&arg);
+ }
+
+ return 0;
+}
diff --git a/src/solvers/solvers.h b/src/solvers/solvers.h
@@ -0,0 +1,2 @@
+#include "generic/generic.h"
+#include "h48/h48.h"
diff --git a/src/constants.h b/src/utils/constants.h
diff --git a/src/utils/dbg_log.h b/src/utils/dbg_log.h
@@ -0,0 +1,16 @@
+void (*nissy_log)(const char *, ...);
+
+#define LOG(...) if (nissy_log != NULL) nissy_log(__VA_ARGS__);
+
+#ifdef DEBUG
+#define _static
+#define _static_inline
+#define DBG_WARN(condition, ...) if (!(condition)) LOG(__VA_ARGS__);
+#define DBG_ASSERT(condition, retval, ...) \
+ if (!(condition)) { LOG(__VA_ARGS__); return retval; }
+#else
+#define _static static
+#define _static_inline static inline
+#define DBG_WARN(condition, ...)
+#define DBG_ASSERT(condition, retval, ...)
+#endif
diff --git a/src/utils.h b/src/utils/math.h
diff --git a/src/utils/utils.h b/src/utils/utils.h
@@ -0,0 +1,3 @@
+#include "dbg_log.h"
+#include "constants.h"
+#include "math.h"
diff --git a/test/test.h b/test/test.h
@@ -1,3 +1,5 @@
+#define TEST_H
+
#include <inttypes.h>
#include <stdarg.h>
#include <stdbool.h>
@@ -5,24 +7,9 @@
#include <stdlib.h>
#include <string.h>
-#define STRLENMAX 10000
+#include "../src/arch/arch.h"
-#if defined(CUBE_AVX2)
-#include <immintrin.h>
-typedef __m256i cube_t;
-#elif defined(CUBE_NEON)
-#include <stdlib.h>
-#include <arm_neon.h>
-typedef struct {
- uint8x16_t corner;
- uint8x16_t edge;
-} cube_t;
-#else
-typedef struct {
- uint8_t corner[8];
- uint8_t edge[12];
-} cube_t;
-#endif
+#define STRLENMAX 10000
/* Basic functions used in most tests */
cube_t solvedcube(void);
diff --git a/tools/001_gendata_h48/gendata_h48.c b/tools/001_gendata_h48/gendata_h48.c
@@ -1,5 +1,5 @@
#include "../timerun.h"
-#include "../../src/cube.h"
+#include "../../src/nissy.h"
#define MAXDEPTH 20
#define HVALUE 0
diff --git a/tools/002_stats_tables_h48/stats_tables_h48.c b/tools/002_stats_tables_h48/stats_tables_h48.c
@@ -1,7 +1,7 @@
#include <pthread.h>
#include <time.h>
#include "../timerun.h"
-#include "../../src/cube.h"
+#include "../../src/nissy.h"
#define MAXMOVES 20
#define NTHREADS 32
diff --git a/utils/genmovecode.sh b/utils/genmovecode.sh
@@ -1,6 +1,6 @@
#!/bin/sh
-cc -DDEBUG h48_to_lst.c ../src/cube.c -o h48_to_lst
+cc -DDEBUG h48_to_lst.c ../src/nissy.c -o h48_to_lst
gen() {
for f in cubes/move_??_*.txt; do
diff --git a/utils/gentranscode.sh b/utils/gentranscode.sh
@@ -1,7 +1,7 @@
#!/bin/sh
-cc -DDEBUG h48_to_lst.c ../src/cube.c -o h48_to_lst
-cc -DDEBUG invert.c ../src/cube.c -o invert
+cc -DDEBUG h48_to_lst.c ../src/nissy.c -o h48_to_lst
+cc -DDEBUG invert.c ../src/nissy.c -o invert
lineavx() { printf '#define _trans_cube_%s ' "$1"; }
linesrc() { printf '_static cube_fast_t _trans_cube_%s = ' "$1"; }
diff --git a/utils/h48_to_lst.c b/utils/h48_to_lst.c
@@ -2,7 +2,7 @@
#include <inttypes.h>
#include <stdbool.h>
-#include "../src/cube.h"
+#include "../src/nissy.h"
#define STRLENMAX 1000
diff --git a/utils/invert.c b/utils/invert.c
@@ -2,7 +2,7 @@
#include <inttypes.h>
#include <stdbool.h>
-#include "../src/cube.h"
+#include "../src/nissy.h"
#define STRLENMAX 1000