h48

A prototype for an optimal Rubik's cube solver, work in progress.
git clone https://git.tronto.net/h48
Download | Log | Files | Refs | README | LICENSE

commit 18c9a8b8905304cf5f8fc15825769046a3144866
parent f25a10e19eca294c4e6a99e4f80ce5cfd11a0e5f
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Sun, 18 Aug 2024 14:26:45 +0200

Reorganized folder structure

Diffstat:
MLICENSE | 4+++-
MMakefile | 6+++---
Mshell.c | 2+-
Asrc/arch/arch.h | 41+++++++++++++++++++++++++++++++++++++++++
Asrc/arch/avx2.h | 319+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/common.h | 19+++++++++++++++++++
Asrc/arch/neon.h | 348+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/portable.h | 272+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/constant_cubes.h -> src/core/constant_cubes.h | 0
Asrc/core/core.h | 13+++++++++++++
Rsrc/cube_generic.h -> src/core/cube.h | 0
Rsrc/io_cube.h -> src/core/io_cube.h | 0
Asrc/core/io_moves.h | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/core/io_trans.h | 25+++++++++++++++++++++++++
Rsrc/moves.h -> src/core/moves.h | 0
Rsrc/cube_transform.h -> src/core/transform.h | 0
Rsrc/cube_transform_with_switch.h -> src/core/transform_with_switch.h | 0
Dsrc/cube.c | 55-------------------------------------------------------
Dsrc/cube_avx2.h | 341-------------------------------------------------------------------------------
Dsrc/cube_neon.h | 377-------------------------------------------------------------------------------
Dsrc/cube_portable.h | 298-------------------------------------------------------------------------------
Dsrc/cube_public.h | 255-------------------------------------------------------------------------------
Dsrc/io_move_trans.h | 87-------------------------------------------------------------------------------
Asrc/nissy.c | 265+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/cube.h -> src/nissy.h | 0
Dsrc/solve_h48.h | 839-------------------------------------------------------------------------------
Rsrc/solve_generic.h -> src/solvers/generic/generic.h | 0
Asrc/solvers/h48/coordinate.h | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/solvers/h48/gendata.h | 425+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/solvers/h48/h48.h | 4++++
Asrc/solvers/h48/map.h | 104+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/solvers/h48/solve.h | 242+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/solvers/solvers.h | 2++
Rsrc/constants.h -> src/utils/constants.h | 0
Asrc/utils/dbg_log.h | 16++++++++++++++++
Rsrc/utils.h -> src/utils/math.h | 0
Asrc/utils/utils.h | 3+++
Mtest/test.h | 21++++-----------------
Mtools/001_gendata_h48/gendata_h48.c | 2+-
Mtools/002_stats_tables_h48/stats_tables_h48.c | 2+-
Mutils/genmovecode.sh | 2+-
Mutils/gentranscode.sh | 4++--
Mutils/h48_to_lst.c | 2+-
Mutils/invert.c | 2+-
44 files changed, 2246 insertions(+), 2281 deletions(-)

diff --git a/LICENSE b/LICENSE @@ -2,7 +2,9 @@ The following license applies to every C source code and header file distributed with this LICENSE file. h48 - a prototype for an optimal rubik's cube solver - Copyright (C) 2023 Sebastiano Tronto <sebastiano@tronto.net> + Copyright (C) 2023-2024 + Sebastiano Tronto <sebastiano@tronto.net> + Enrico Tenuti <> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/Makefile b/Makefile @@ -3,13 +3,13 @@ include config.mk all: cube.o debugcube.o cube.s: clean - ${CC} -D${CUBETYPE} ${CFLAGS} -c -S -o cube.s src/cube.c + ${CC} -D${CUBETYPE} ${CFLAGS} -c -S -o cube.s src/nissy.c cube.o: clean - ${CC} -D${CUBETYPE} ${CFLAGS} -c -o cube.o src/cube.c + ${CC} -D${CUBETYPE} ${CFLAGS} -c -o cube.o src/nissy.c debugcube.o: clean - ${CC} -D${CUBETYPE} ${DBGFLAGS} -c -o debugcube.o src/cube.c + ${CC} -D${CUBETYPE} ${DBGFLAGS} -c -o debugcube.o src/nissy.c clean: rm -rf *.o run diff --git a/shell.c b/shell.c @@ -7,7 +7,7 @@ #include <string.h> #include <time.h> -#include "src/cube.h" +#include "src/nissy.h" #define PRINTCUBE_BUFFER_SIZE 1024 /* Should be enough */ #define SOLUTIONS_BUFFER_SIZE 500000 /* Should be enough */ diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -0,0 +1,41 @@ +#if defined(CUBE_AVX2) + +#include <immintrin.h> + +typedef __m256i cube_t; + +#if !defined(TEST_H) +#include "common.h" +#include "avx2.h" +#endif + +#elif defined(CUBE_NEON) + +#include <stdlib.h> +#include <arm_neon.h> + +typedef struct { + uint8x16_t corner; + uint8x16_t edge; +} cube_t; + +#if !defined(TEST_H) +#include "common.h" +#include "neon.h" +#endif + +#else + +#include <stdlib.h> + +typedef struct { + uint8_t corner[8]; + uint8_t edge[12]; +} cube_t; + +#if !defined(TEST_H) +#include "common.h" +#include "portable.h" +#endif + +#endif diff --git a/src/arch/avx2.h b/src/arch/avx2.h @@ -0,0 +1,319 @@ +#define _co2_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x6060606060606060)) +#define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x2020202020202020)) +#define _cp_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x0707070707070707)) +#define _ep_avx2 \ + _mm256_set_epi64x(INT64_C(0x0F0F0F0F), INT64_C(0x0F0F0F0F0F0F0F0F), 0, 0) +#define _eo_avx2 \ + _mm256_set_epi64x(INT64_C(0x10101010), INT64_C(0x1010101010101010), 0, 0) + +#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \ + e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \ + _mm256_set_epi8(0, 0, 0, 0, e_br, e_bl, e_fl, e_fr, \ + e_dr, e_dl, e_ul, e_ur, e_df, e_db, e_ub, e_uf, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + c_dbl, c_dfr, c_ubr, c_ufl, c_dbr, c_dfl, c_ubl, c_ufr) +#define zero _mm256_set_epi64x(0, 0, 0, 0) +#define solved static_cube( \ + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) + +_static void +pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12]) +{ + uint8_t aux[32]; + + _mm256_storeu_si256((__m256i_u *)aux, *cube); + memcpy(c, aux, 8); + memcpy(e, aux+16, 12); +} + +_static_inline bool +equal(cube_t c1, cube_t c2) +{ + int32_t mask; + __m256i cmp; + + cmp = _mm256_cmpeq_epi8(c1, c2); + mask = _mm256_movemask_epi8(cmp); + + return mask == ~0; +} + +_static_inline cube_t +invertco(cube_t c) +{ + cube_t co, shleft, shright, summed, newco, cleanco, ret; + + co = _mm256_and_si256(c, _co2_avx2); + shleft = _mm256_slli_epi32(co, 1); + shright = _mm256_srli_epi32(co, 1); + summed = _mm256_or_si256(shleft, shright); + newco = _mm256_and_si256(summed, _co2_avx2); + cleanco = _mm256_xor_si256(c, co); + ret = _mm256_or_si256(cleanco, newco); + + return ret; +} + +_static_inline cube_t +compose_epcpeo(cube_t c1, cube_t c2) +{ + cube_t b, s, eo2; + + /* Permute and clean unused bits */ + s = _mm256_shuffle_epi8(c1, c2); + b = _mm256_set_epi8( + ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0 + ); + s = _mm256_andnot_si256(b, s); + + /* Change EO */ + eo2 = _mm256_and_si256(c2, _eo_avx2); + s = _mm256_xor_si256(s, eo2); + + return s; +} + +_static_inline cube_t +compose_edges(cube_t c1, cube_t c2) +{ + return compose_epcpeo(c1, c2); +} + +_static_inline cube_t +compose_corners(cube_t c1, cube_t c2) +{ + /* + * We do a full compose. Minor optimizations are possible, like + * saving one instruction by not doing EO, but it should not + * be significant. + */ + return compose(c1, c2); +} + +_static_inline cube_t +compose(cube_t c1, cube_t c2) +{ + cube_t s, co1, co2, aux, auy1, auy2, auz1, auz2; + + s = compose_epcpeo(c1, c2); + + /* Change CO */ + co1 = _mm256_and_si256(s, _co2_avx2); + co2 = _mm256_and_si256(c2, _co2_avx2); + aux = _mm256_add_epi8(co1, co2); + auy1 = _mm256_add_epi8(aux, _cocw_avx2); + auy2 = _mm256_srli_epi32(auy1, 2); + auz1 = _mm256_add_epi8(aux, auy2); + auz2 = _mm256_and_si256(auz1, _co2_avx2); + + /* Put together */ + s = _mm256_andnot_si256(_co2_avx2, s); + s = _mm256_or_si256(s, auz2); + + return s; +} + +_static_inline cube_t +cleanaftershuffle(cube_t c) +{ + __m256i b; + + b = _mm256_set_epi8( + ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0 + ); + + return _mm256_andnot_si256(b, c); +} + +_static_inline cube_t +inverse(cube_t c) +{ + /* Method taken from Andrew Skalski's vcube[1]. The addition sequence + * was generated using [2]. + * [1] https://github.com/Voltara/vcube + * [2] http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html + */ + cube_t v3, vi, vo, vp, ret; + + v3 = _mm256_shuffle_epi8(c, c); + v3 = _mm256_shuffle_epi8(v3, c); + vi = _mm256_shuffle_epi8(v3, v3); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, v3); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, c); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, v3); + vi = _mm256_shuffle_epi8(vi, vi); + vi = _mm256_shuffle_epi8(vi, c); + + vo = _mm256_and_si256(c, _mm256_or_si256(_eo_avx2, _co2_avx2)); + vo = _mm256_shuffle_epi8(vo, vi); + vp = _mm256_andnot_si256(_mm256_or_si256(_eo_avx2, _co2_avx2), vi); + ret = _mm256_or_si256(vp, vo); + ret = cleanaftershuffle(ret); + + return invertco(ret); +} + +_static_inline int64_t +coord_co(cube_t c) +{ + cube_t co; + int64_t mem[4], ret, i, p; + + co = _mm256_and_si256(c, _co2_avx2); + _mm256_storeu_si256((__m256i *)mem, co); + + mem[0] >>= 5; + for (i = 0, ret = 0, p = 1; i < 7; i++, mem[0] >>= 8, p *= 3) + ret += (mem[0] & 3) * p; + + return ret; +} + +_static_inline int64_t +coord_csep(cube_t c) +{ + cube_t cp, shifted; + int64_t mask; + + cp = _mm256_and_si256(c, _cp_avx2); + shifted = _mm256_slli_epi32(cp, 5); + mask = _mm256_movemask_epi8(shifted); + + return mask & 0x7F; +} + +_static_inline int64_t +coord_cocsep(cube_t c) +{ + return (coord_co(c) << 7) + coord_csep(c); +} + +_static_inline int64_t +coord_eo(cube_t c) +{ + cube_t eo, shifted; + int64_t mask; + + eo = _mm256_and_si256(c, _eo_avx2); + shifted = _mm256_slli_epi32(eo, 3); + mask = _mm256_movemask_epi8(shifted); + + return mask >> 17; +} + +_static_inline int64_t +coord_esep(cube_t c) +{ + cube_t ep; + int64_t e, mem[4], i, j, jj, k, l, ret1, ret2, bit1, bit2, is1; + + ep = _mm256_and_si256(c, _ep_avx2); + _mm256_storeu_si256((__m256i *)mem, ep); + + mem[3] <<= 8; + ret1 = ret2 = 0; + k = l = 4; + for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8) { + e = mem[i/8 + 2]; + + bit1 = (e & _esepbit1) >> 2; + bit2 = (e & _esepbit2) >> 3; + is1 = (1 - bit2) * bit1; + + ret1 += bit2 * binomial[11-i][k]; + k -= bit2; + + jj = j < 8; + ret2 += jj * is1 * binomial[7-(j*jj)][l]; + l -= is1; + j += (1-bit2); + } + + return ret1 * 70 + ret2; +} + +_static_inline void +copy_corners(cube_t *dest, cube_t src) +{ + *dest = _mm256_blend_epi32(*dest, src, 0x0F); +} + +_static_inline void +copy_edges(cube_t *dest, cube_t src) +{ + *dest = _mm256_blend_epi32(*dest, src, 0xF0); +} + +_static_inline void +set_eo(cube_t *cube, int64_t eo) +{ + int64_t eo12, eotop, eobot; + __m256i veo; + + eo12 = (eo << 1) + (_mm_popcnt_u64(eo) % 2); + eotop = (eo12 & (1 << 11)) << 17 | + (eo12 & (1 << 10)) << 10 | + (eo12 & (1 << 9)) << 3 | + (eo12 & (1 << 8)) >> 4; + eobot = (eo12 & (1 << 7)) << 53 | + (eo12 & (1 << 6)) << 46 | + (eo12 & (1 << 5)) << 39 | + (eo12 & (1 << 4)) << 32 | + (eo12 & (1 << 3)) << 25 | + (eo12 & (1 << 2)) << 18 | + (eo12 & (1 << 1)) << 11 | + (eo12 & 1) << 4; + veo = _mm256_set_epi64x(eotop, eobot, 0, 0); + + *cube = _mm256_andnot_si256(_eo_avx2, *cube); + *cube = _mm256_or_si256(*cube, veo); +} + +_static_inline cube_t +invcoord_esep(int64_t esep) +{ + cube_t eee, ret; + int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2; + uint8_t mem[32]; + uint8_t slice[3] = {0}; + + set1 = esep % 70; + set2 = esep / 70; + + for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) { + v = binomial[11-i][k]; + jj = j < 8; + w = jj * binomial[7-(j*jj)][l]; + bit2 = set2 >= v; + bit1 = set1 >= w; + is1 = (1 - bit2) * bit1; + + set2 -= bit2 * v; + k -= bit2; + set1 -= is1 * w; + l -= is1; + j += (1-bit2); + s = 2*bit2 + (1-bit2)*bit1; + + mem[i+16] = (slice[s]++) | (uint8_t)(s << 2); + } + + ret = solved; + eee = _mm256_loadu_si256((__m256i_u *)&mem); + copy_edges(&ret, eee); + + return ret; +} diff --git a/src/arch/common.h b/src/arch/common.h @@ -0,0 +1,19 @@ +_static void pieces(cube_t *, uint8_t [static 8], uint8_t [static 12]); +_static_inline bool equal(cube_t, cube_t); +_static_inline cube_t invertco(cube_t); +_static_inline cube_t compose_epcpeo(cube_t, cube_t); +_static_inline cube_t compose_edges(cube_t, cube_t); +_static_inline cube_t compose_corners(cube_t, cube_t); +_static_inline cube_t compose(cube_t, cube_t); +_static_inline cube_t inverse(cube_t); + +_static_inline int64_t coord_co(cube_t); +_static_inline int64_t coord_csep(cube_t); +_static_inline int64_t coord_cocsep(cube_t); +_static_inline int64_t coord_eo(cube_t); +_static_inline int64_t coord_esep(cube_t); + +_static_inline void copy_corners(cube_t *, cube_t); +_static_inline void copy_edges(cube_t *, cube_t); +_static_inline void set_eo(cube_t *, int64_t); +_static_inline cube_t invcoord_esep(int64_t); diff --git a/src/arch/neon.h b/src/arch/neon.h @@ -0,0 +1,348 @@ +#define _co2_neon vdupq_n_u8(0x60) +#define _cocw_neon vdupq_n_u8(0x20) +#define _cp_neon vdupq_n_u8(0x07) +#define _ep_neon vcombine_u8(vdupq_n_u8(0x0F), vdupq_n_u8(0x0F)) +#define _eo_neon vcombine_u8(vdupq_n_u8(0x10), vdupq_n_u8(0x10)) + +// static cube +#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \ + e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \ + ((cube_t){ \ + .corner = {c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, 0, 0, 0, 0, 0, 0, 0, 0}, \ + .edge = {e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br, 0, 0, 0, 0}}) + +// zero cube +#define zero \ + (cube_t) \ + { \ + .corner = vdupq_n_u8(0), \ + .edge = vdupq_n_u8(0) \ + } + +// solved cube +#define solved static_cube( \ + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) + +_static void +pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12]) +{ + // First 8 bytes of the corner vector are copied from the c array + vst1_u8(c, vget_low_u8(cube->corner)); + + // 12 bytes of the edge vector are copied from the e array + // First 8 bytes + vst1_u8(e, vget_low_u8(cube->edge)); + // Next 4 bytes + vst1_lane_u32((uint32_t *)(e + 8), vreinterpret_u32_u8(vget_high_u8(cube->edge)), 0); +} + +_static_inline bool +equal(cube_t c1, cube_t c2) +{ + uint8x16_t cmp_corner, cmp_edge; + uint64x2_t cmp_corner_u64, cmp_edge_u64; + uint64x2_t cmp_result; + + // compare the corner vectors + cmp_corner = vceqq_u8(c1.corner, c2.corner); + // compare the edge vectors + cmp_edge = vceqq_u8(c1.edge, c2.edge); + + // convert the comparison vectors to 64-bit vectors + cmp_corner_u64 = vreinterpretq_u64_u8(cmp_corner); + cmp_edge_u64 = vreinterpretq_u64_u8(cmp_edge); + + // combine the comparison vectors + cmp_result = vandq_u64(cmp_corner_u64, cmp_edge_u64); + + // check if all the bits are set + return vgetq_lane_u64(cmp_result, 0) == ~0ULL && vgetq_lane_u64(cmp_result, 1) == ~0ULL; +} + +_static_inline cube_t +invertco(cube_t c) +{ + cube_t ret; + uint8x16_t co, shleft, shright, summed, newco, cleanco; + + co = vandq_u8(c.corner, _co2_neon); + shleft = vshlq_n_u8(co, 1); + shright = vshrq_n_u8(co, 1); + summed = vorrq_u8(shleft, shright); + newco = vandq_u8(summed, _co2_neon); + cleanco = veorq_u8(c.corner, co); + ret.corner = vorrq_u8(cleanco, newco); + ret.edge = c.edge; + + return ret; +} + +_static_inline cube_t +compose_edges(cube_t c1, cube_t c2) +{ + cube_t ret = {0}; + ret.edge = compose_edges_slim(c1.edge, c2.edge); + return ret; +} + +_static_inline cube_t +compose_corners(cube_t c1, cube_t c2) +{ + cube_t ret = {0}; + ret.corner = compose_corners_slim(c1.corner, c2.corner); + return ret; +} + +_static_inline uint8x16_t +compose_edges_slim(uint8x16_t edge1, uint8x16_t edge2) +{ + // Masks + uint8x16_t p_bits = vdupq_n_u8(_pbits); + uint8x16_t eo_bit = vdupq_n_u8(_eobit); + + // Find the index and permutation + uint8x16_t p = vandq_u8(edge2, p_bits); + uint8x16_t piece1 = vqtbl1q_u8(edge1, p); + + // Calculate the orientation through XOR + uint8x16_t orien = vandq_u8(veorq_u8(edge2, piece1), eo_bit); + + // Combine the results + uint8x16_t ret = vorrq_u8(vandq_u8(piece1, p_bits), orien); + + // Mask to clear the last 32 bits of the result + uint8x16_t mask_last_32 = vsetq_lane_u32(0, vreinterpretq_u32_u8(ret), 3); + ret = vreinterpretq_u8_u32(mask_last_32); + + return ret; +} + +_static_inline uint8x16_t +compose_corners_slim(uint8x16_t corner1, uint8x16_t corner2) +{ + // Masks + uint8x16_t p_bits = vdupq_n_u8(_pbits); + uint8x16_t cobits = vdupq_n_u8(_cobits); + uint8x16_t cobits2 = vdupq_n_u8(_cobits2); + uint8x16_t twist_cw = vdupq_n_u8(_ctwist_cw); + + // Find the index and permutation + uint8x16_t p = vandq_u8(corner2, p_bits); + uint8x16_t piece1 = vqtbl1q_u8(corner1, p); + + // Calculate the orientation + uint8x16_t aux = vaddq_u8(vandq_u8(corner2, cobits), vandq_u8(piece1, cobits)); + uint8x16_t auy = vshrq_n_u8(vaddq_u8(aux, twist_cw), 2); + uint8x16_t orien = vandq_u8(vaddq_u8(aux, auy), cobits2); + + // Combine the results + uint8x16_t ret = vorrq_u8(vandq_u8(piece1, p_bits), orien); + + // Mask to clear the last 64 bits of the result + uint8x16_t mask_last_64 = vsetq_lane_u64(0, vreinterpretq_u64_u8(ret), 1); + ret = vreinterpretq_u8_u64(mask_last_64); + + return ret; +} + +_static_inline cube_t +compose(cube_t c1, cube_t c2) +{ + cube_t ret = {0}; + + ret.edge = compose_edges_slim(c1.edge, c2.edge); + ret.corner = compose_corners_slim(c1.corner, c2.corner); + + return ret; +} + +_static_inline cube_t +inverse(cube_t cube) +{ + uint8_t i, piece, orien; + cube_t ret; + + // Temp arrays to store the NEON vectors + uint8_t edges[16]; + uint8_t corners[16]; + + // Copy the NEON vectors to the arrays + vst1q_u8(edges, cube.edge); + vst1q_u8(corners, cube.corner); + + uint8_t edge_result[16] = {0}; + uint8_t corner_result[16] = {0}; + + // Process the edges + for (i = 0; i < 12; i++) + { + piece = edges[i]; + orien = piece & _eobit; + edge_result[piece & _pbits] = i | orien; + } + + // Process the corners + for (i = 0; i < 8; i++) + { + piece = corners[i]; + orien = ((piece << 1) | (piece >> 1)) & _cobits2; + corner_result[piece & _pbits] = i | orien; + } + + // Copy the results back to the NEON vectors + ret.edge = vld1q_u8(edge_result); + ret.corner = vld1q_u8(corner_result); + + return ret; +} + +_static_inline int64_t +coord_co(cube_t c) +{ + // Temp array to store the NEON vector + uint8_t mem[16]; + vst1q_u8(mem, c.corner); + + int i, p; + int64_t ret; + + for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 3) + ret += p * (mem[i] >> _coshift); + + return ret; +} + +_static_inline int64_t +coord_csep(cube_t c) +{ + // Temp array to store the NEON vector + uint8_t mem[16]; + vst1q_u8(mem, c.corner); + + int64_t ret = 0; + int i, p; + for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2) + ret += p * ((mem[i] & _csepbit) >> 2); + + return ret; + return 0; +} + +_static_inline int64_t +coord_cocsep(cube_t c) +{ + return (coord_co(c) << 7) + coord_csep(c); +} + +_static_inline int64_t +coord_eo(cube_t c) +{ + int64_t ret = 0; + int64_t p = 1; + + // Temp array to store the NEON vector + uint8_t mem[16]; + vst1q_u8(mem, c.edge); + + for (int i = 1; i < 12; i++, p *= 2) + { + ret += p * (mem[i] >> _eoshift); + } + + return ret; +} + +_static_inline int64_t +coord_esep(cube_t c) +{ + int64_t i, j, jj, k, l, ret1, ret2, bit1, bit2, is1; + + // Temp array to store the NEON vector + uint8_t mem[16]; + vst1q_u8(mem, c.edge); + + for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++) + { + bit1 = (mem[i] & _esepbit1) >> 2; + bit2 = (mem[i] & _esepbit2) >> 3; + is1 = (1 - bit2) * bit1; + + ret1 += bit2 * binomial[11 - i][k]; + k -= bit2; + + jj = j < 8; + ret2 += jj * is1 * binomial[7 - (j * jj)][l]; + l -= is1; + j += (1 - bit2); + } + + return ret1 * 70 + ret2; +} + +_static_inline void +copy_corners(cube_t *dst, cube_t src) +{ + dst->corner = src.corner; +} + +_static_inline void +copy_edges(cube_t *dst, cube_t src) +{ + dst->edge = src.edge; +} + +_static_inline void +set_eo(cube_t *cube, int64_t eo) +{ + // Temp array to store the NEON vector + uint8_t mem[16]; + vst1q_u8(mem, cube->edge); + uint8_t i, sum, flip; + + for (sum = 0, i = 1; i < 12; i++, eo >>= 1) + { + flip = eo % 2; + sum += flip; + mem[i] = (mem[i] & ~_eobit) | (_eobit * flip); + } + mem[0] = (mem[0] & ~_eobit) | (_eobit * (sum % 2)); + + // Copy the results back to the NEON vector + cube->edge = vld1q_u8(mem); + return; +} + +_static_inline cube_t +invcoord_esep(int64_t esep) +{ + cube_t ret; + int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2; + uint8_t slice[3] = {0}; + + ret = solved; + uint8_t mem[16]; + set1 = esep % 70; + set2 = esep / 70; + + for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) + { + v = binomial[11 - i][k]; + jj = j < 8; + w = jj * binomial[7 - (j * jj)][l]; + bit2 = set2 >= v; + bit1 = set1 >= w; + is1 = (1 - bit2) * bit1; + + set2 -= bit2 * v; + k -= bit2; + set1 -= is1 * w; + l -= is1; + j += (1 - bit2); + s = 2 * bit2 + (1 - bit2) * bit1; + + mem[i] = (slice[s]++) | (uint8_t)(s << 2); + } + + ret.edge = vld1q_u8(mem); + return ret; +} diff --git a/src/arch/portable.h b/src/arch/portable.h @@ -0,0 +1,272 @@ +#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \ + e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \ + ((cube_t) { \ + .corner = { c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl }, \ + .edge = { e_uf, e_ub, e_db, e_df, e_ur, e_ul, \ + e_dl, e_dr, e_fr, e_fl, e_bl, e_br } }) +#define zero static_cube( \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) +#define solved static_cube( \ + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) + +_static void +pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12]) +{ + memcpy(c, cube->corner, 8); + memcpy(e, cube->edge, 12); +} + +_static_inline bool +equal(cube_t c1, cube_t c2) +{ + uint8_t i; + bool ret; + + ret = true; + for (i = 0; i < 8; i++) + ret = ret && c1.corner[i] == c2.corner[i]; + for (i = 0; i < 12; i++) + ret = ret && c1.edge[i] == c2.edge[i]; + + return ret; +} + +_static_inline cube_t +invertco(cube_t c) +{ + uint8_t i, piece, orien; + cube_t ret; + + ret = c; + for (i = 0; i < 8; i++) { + piece = c.corner[i]; + orien = ((piece << 1) | (piece >> 1)) & _cobits2; + ret.corner[i] = (piece & _pbits) | orien; + } + + return ret; +} + +_static_inline void +compose_edges_inplace(cube_t c1, cube_t c2, cube_t *ret) +{ + uint8_t i, piece1, piece2, p, orien; + + for (i = 0; i < 12; i++) { + piece2 = c2.edge[i]; + p = piece2 & _pbits; + piece1 = c1.edge[p]; + orien = (piece2 ^ piece1) & _eobit; + ret->edge[i] = (piece1 & _pbits) | orien; + } +} + +_static_inline void +compose_corners_inplace(cube_t c1, cube_t c2, cube_t *ret) +{ + uint8_t i, piece1, piece2, p, orien, aux, auy; + + for (i = 0; i < 8; i++) { + piece2 = c2.corner[i]; + p = piece2 & _pbits; + piece1 = c1.corner[p]; + aux = (piece2 & _cobits) + (piece1 & _cobits); + auy = (aux + _ctwist_cw) >> 2; + orien = (aux + auy) & _cobits2; + ret->corner[i] = (piece1 & _pbits) | orien; + } +} + +_static_inline cube_t +compose_edges(cube_t c1, cube_t c2) +{ + cube_t ret = zero; + + compose_edges_inplace(c1, c2, &ret); + + return ret; +} + +_static_inline cube_t +compose_corners(cube_t c1, cube_t c2) +{ + cube_t ret = zero; + + compose_corners_inplace(c1, c2, &ret); + + return ret; +} + +_static_inline cube_t +compose(cube_t c1, cube_t c2) +{ + cube_t ret = zero; + + compose_edges_inplace(c1, c2, &ret); + compose_corners_inplace(c1, c2, &ret); + + return ret; +} + +cube_t +inverse(cube_t cube) +{ + uint8_t i, piece, orien; + cube_t ret; + + for (i = 0; i < 12; i++) { + piece = cube.edge[i]; + orien = piece & _eobit; + ret.edge[piece & _pbits] = i | orien; + } + + for (i = 0; i < 8; i++) { + piece = cube.corner[i]; + orien = ((piece << 1) | (piece >> 1)) & _cobits2; + ret.corner[piece & _pbits] = i | orien; + } + + return ret; +} + +_static_inline int64_t +coord_co(cube_t c) +{ + int i, p; + int64_t ret; + + for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 3) + ret += p * (c.corner[i] >> _coshift); + + return ret; +} + +/* +For corner separation, we consider the axis (a.k.a. tetrad) each +corner belongs to as 0 or 1 and we translate this sequence into binary. +Ignoring the last bit, we have a value up to 2^7, but not all values are +possible. Encoding this as a number from 0 to C(8,4) would save about 40% +of space, but we are not going to use this coordinate in large tables. +*/ +_static_inline int64_t +coord_csep(cube_t c) +{ + int i, p; + int64_t ret; + + for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2) + ret += p * ((c.corner[i] & _csepbit) >> 2); + + return ret; +} + +_static_inline int64_t +coord_cocsep(cube_t c) +{ + return (coord_co(c) << 7) + coord_csep(c); +} + +_static_inline int64_t +coord_eo(cube_t c) +{ + int i, p; + int64_t ret; + + for (ret = 0, i = 1, p = 1; i < 12; i++, p *= 2) + ret += p * (c.edge[i] >> _eoshift); + + return ret; +} + +/* +We encode the edge separation as a number from 0 to C(12,4)*C(8,4). +It can be seen as the composition of two "subset index" coordinates. +*/ +_static_inline int64_t +coord_esep(cube_t c) +{ + int64_t i, j, jj, k, l, ret1, ret2, bit1, bit2, is1; + + for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++) { + /* Simple version: + if (c.edge[i] & _esepbit2) { + ret1 += binomial[11-i][k--]; + } else { + if (c.edge[i] & _esepbit1) + ret2 += binomial[7-j][l--]; + j++; + } + */ + + bit1 = (c.edge[i] & _esepbit1) >> 2; + bit2 = (c.edge[i] & _esepbit2) >> 3; + is1 = (1 - bit2) * bit1; + + ret1 += bit2 * binomial[11-i][k]; + k -= bit2; + + jj = j < 8; + ret2 += jj * is1 * binomial[7-(j*jj)][l]; + l -= is1; + j += (1-bit2); + } + + return ret1 * 70 + ret2; +} + +_static_inline void +copy_corners(cube_t *dest, cube_t src) +{ + memcpy(&dest->corner, src.corner, sizeof(src.corner)); +} + +_static_inline void +copy_edges(cube_t *dest, cube_t src) +{ + memcpy(&dest->edge, src.edge, sizeof(src.edge)); +} + +_static_inline void +set_eo(cube_t *cube, int64_t eo) +{ + uint8_t i, sum, flip; + + for (sum = 0, i = 1; i < 12; i++, eo >>= 1) { + flip = eo % 2; + sum += flip; + cube->edge[i] = (cube->edge[i] & ~_eobit) | (_eobit * flip); + } + cube->edge[0] = (cube->edge[0] & ~_eobit) | (_eobit * (sum % 2)); +} + +_static_inline cube_t +invcoord_esep(int64_t esep) +{ + cube_t ret; + int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2; + uint8_t slice[3] = {0}; + + ret = solved; + set1 = esep % 70; + set2 = esep / 70; + + for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) { + v = binomial[11-i][k]; + jj = j < 8; + w = jj * binomial[7-(j*jj)][l]; + bit2 = set2 >= v; + bit1 = set1 >= w; + is1 = (1 - bit2) * bit1; + + set2 -= bit2 * v; + k -= bit2; + set1 -= is1 * w; + l -= is1; + j += (1-bit2); + s = 2*bit2 + (1-bit2)*bit1; + + ret.edge[i] = (slice[s]++) | (uint8_t)(s << 2); + } + + return ret; +} diff --git a/src/constant_cubes.h b/src/core/constant_cubes.h diff --git a/src/core/core.h b/src/core/core.h @@ -0,0 +1,13 @@ +#include "constant_cubes.h" +#include "io_moves.h" +#include "io_trans.h" +#include "cube.h" +#include "io_cube.h" +#include "moves.h" + +/* TODO: work in progress */ +#if 0 +#include "transform.h" +#else +#include "transform_with_switch.h" +#endif diff --git a/src/cube_generic.h b/src/core/cube.h diff --git a/src/io_cube.h b/src/core/io_cube.h diff --git a/src/core/io_moves.h b/src/core/io_moves.h @@ -0,0 +1,62 @@ +_static uint8_t readmove(char); +_static uint8_t readmodifier(char); +_static int writemoves(uint8_t *, int, char *); + +_static uint8_t +readmove(char c) +{ + switch (c) { + case 'U': + return _move_U; + case 'D': + return _move_D; + case 'R': + return _move_R; + case 'L': + return _move_L; + case 'F': + return _move_F; + case 'B': + return _move_B; + default: + return _error; + } +} + +_static uint8_t +readmodifier(char c) +{ + switch (c) { + case '1': /* Fallthrough */ + case '2': /* Fallthrough */ + case '3': + return c - '0' - 1; + case '\'': + return 2; + default: + return 0; + } +} + +_static int +writemoves(uint8_t *m, int n, char *buf) +{ + int i; + size_t len; + const char *s; + char *b; + + for (i = 0, b = buf; i < n; i++, b++) { + s = movestr[m[i]]; + len = strlen(s); + memcpy(b, s, len); + b += len; + *b = ' '; + } + + if (b != buf) + b--; /* Remove last space */ + *b = '\0'; + + return b - buf; +} diff --git a/src/core/io_trans.h b/src/core/io_trans.h @@ -0,0 +1,25 @@ +_static uint8_t readtrans(const char *); +_static void writetrans(uint8_t, char *); + +_static uint8_t +readtrans(const char *buf) +{ + uint8_t t; + + for (t = 0; t < 48; t++) + if (!strncmp(buf, transstr[t], 11)) + return t; + + LOG("readtrans error\n"); + return _error; +} + +_static void +writetrans(uint8_t t, char *buf) +{ + if (t >= 48) + memcpy(buf, "error trans", 11); + else + memcpy(buf, transstr[t], 11); + buf[11] = '\0'; +} diff --git a/src/moves.h b/src/core/moves.h diff --git a/src/cube_transform.h b/src/core/transform.h diff --git a/src/cube_transform_with_switch.h b/src/core/transform_with_switch.h diff --git a/src/cube.c b/src/cube.c @@ -1,55 +0,0 @@ -#include <inttypes.h> -#include <stdarg.h> -#include <stdbool.h> -#include <string.h> - -void (*nissy_log)(const char *, ...); - -#define LOG(...) if (nissy_log != NULL) nissy_log(__VA_ARGS__); - -#ifdef DEBUG -#define _static -#define _static_inline -#define DBG_WARN(condition, ...) if (!(condition)) LOG(__VA_ARGS__); -#define DBG_ASSERT(condition, retval, ...) \ - if (!(condition)) { LOG(__VA_ARGS__); return retval; } -#else -#define _static static -#define _static_inline static inline -#define DBG_WARN(condition, ...) -#define DBG_ASSERT(condition, retval, ...) -#endif - -#include "constants.h" -#include "utils.h" - -#if defined(CUBE_AVX2) -#include <immintrin.h> -#include "cube_avx2.h" -#elif defined(CUBE_NEON) -#include <stdlib.h> -#include <arm_neon.h> -#include "cube_neon.h" -#else -#include <stdlib.h> /* TODO: check if can be removed */ -#include "cube_portable.h" -#endif - -#include "io_move_trans.h" -#include "constant_cubes.h" -#include "cube_generic.h" -#include "io_cube.h" - -/* TODO: work in progress */ -#if 0 -#include "constant_cubes_transform.h" -#include "cube_transform.h" -#else -#include "cube_transform_with_switch.h" -#endif - -#include "moves.h" -#include "solve_h48.h" -#include "solve_generic.h" - -#include "cube_public.h" diff --git a/src/cube_avx2.h b/src/cube_avx2.h @@ -1,341 +0,0 @@ -typedef __m256i cube_t; - -#define _co2_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x6060606060606060)) -#define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x2020202020202020)) -#define _cp_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x0707070707070707)) -#define _ep_avx2 \ - _mm256_set_epi64x(INT64_C(0x0F0F0F0F), INT64_C(0x0F0F0F0F0F0F0F0F), 0, 0) -#define _eo_avx2 \ - _mm256_set_epi64x(INT64_C(0x10101010), INT64_C(0x1010101010101010), 0, 0) - -#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \ - e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \ - _mm256_set_epi8(0, 0, 0, 0, e_br, e_bl, e_fl, e_fr, \ - e_dr, e_dl, e_ul, e_ur, e_df, e_db, e_ub, e_uf, \ - 0, 0, 0, 0, 0, 0, 0, 0, \ - c_dbl, c_dfr, c_ubr, c_ufl, c_dbr, c_dfl, c_ubl, c_ufr) -#define zero _mm256_set_epi64x(0, 0, 0, 0) -#define solved static_cube( \ - 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) - -_static void pieces(cube_t *, uint8_t [static 8], uint8_t [static 12]); -_static_inline bool equal(cube_t, cube_t); -_static_inline cube_t invertco(cube_t); -_static_inline cube_t compose_epcpeo(cube_t, cube_t); -_static_inline cube_t compose_edges(cube_t, cube_t); -_static_inline cube_t compose_corners(cube_t, cube_t); -_static_inline cube_t compose(cube_t, cube_t); -_static_inline cube_t inverse(cube_t); - -_static_inline int64_t coord_co(cube_t); -_static_inline int64_t coord_csep(cube_t); -_static_inline int64_t coord_cocsep(cube_t); -_static_inline int64_t coord_eo(cube_t); -_static_inline int64_t coord_esep(cube_t); - -_static_inline void copy_corners(cube_t *, cube_t); -_static_inline void copy_edges(cube_t *, cube_t); -_static_inline void set_eo(cube_t *, int64_t); -_static_inline cube_t invcoord_esep(int64_t); - -_static void -pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12]) -{ - uint8_t aux[32]; - - _mm256_storeu_si256((__m256i_u *)aux, *cube); - memcpy(c, aux, 8); - memcpy(e, aux+16, 12); -} - -_static_inline bool -equal(cube_t c1, cube_t c2) -{ - int32_t mask; - __m256i cmp; - - cmp = _mm256_cmpeq_epi8(c1, c2); - mask = _mm256_movemask_epi8(cmp); - - return mask == ~0; -} - -_static_inline cube_t -invertco(cube_t c) -{ - cube_t co, shleft, shright, summed, newco, cleanco, ret; - - co = _mm256_and_si256(c, _co2_avx2); - shleft = _mm256_slli_epi32(co, 1); - shright = _mm256_srli_epi32(co, 1); - summed = _mm256_or_si256(shleft, shright); - newco = _mm256_and_si256(summed, _co2_avx2); - cleanco = _mm256_xor_si256(c, co); - ret = _mm256_or_si256(cleanco, newco); - - return ret; -} - -_static_inline cube_t -compose_epcpeo(cube_t c1, cube_t c2) -{ - cube_t b, s, eo2; - - /* Permute and clean unused bits */ - s = _mm256_shuffle_epi8(c1, c2); - b = _mm256_set_epi8( - ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0 - ); - s = _mm256_andnot_si256(b, s); - - /* Change EO */ - eo2 = _mm256_and_si256(c2, _eo_avx2); - s = _mm256_xor_si256(s, eo2); - - return s; -} - -_static_inline cube_t -compose_edges(cube_t c1, cube_t c2) -{ - return compose_epcpeo(c1, c2); -} - -_static_inline cube_t -compose_corners(cube_t c1, cube_t c2) -{ - /* - * We do a full compose. Minor optimizations are possible, like - * saving one instruction by not doing EO, but it should not - * be significant. - */ - return compose(c1, c2); -} - -_static_inline cube_t -compose(cube_t c1, cube_t c2) -{ - cube_t s, co1, co2, aux, auy1, auy2, auz1, auz2; - - s = compose_epcpeo(c1, c2); - - /* Change CO */ - co1 = _mm256_and_si256(s, _co2_avx2); - co2 = _mm256_and_si256(c2, _co2_avx2); - aux = _mm256_add_epi8(co1, co2); - auy1 = _mm256_add_epi8(aux, _cocw_avx2); - auy2 = _mm256_srli_epi32(auy1, 2); - auz1 = _mm256_add_epi8(aux, auy2); - auz2 = _mm256_and_si256(auz1, _co2_avx2); - - /* Put together */ - s = _mm256_andnot_si256(_co2_avx2, s); - s = _mm256_or_si256(s, auz2); - - return s; -} - -_static_inline cube_t -cleanaftershuffle(cube_t c) -{ - __m256i b; - - b = _mm256_set_epi8( - ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0 - ); - - return _mm256_andnot_si256(b, c); -} - -_static_inline cube_t -inverse(cube_t c) -{ - /* Method taken from Andrew Skalski's vcube[1]. The addition sequence - * was generated using [2]. - * [1] https://github.com/Voltara/vcube - * [2] http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html - */ - cube_t v3, vi, vo, vp, ret; - - v3 = _mm256_shuffle_epi8(c, c); - v3 = _mm256_shuffle_epi8(v3, c); - vi = _mm256_shuffle_epi8(v3, v3); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, v3); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, c); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, v3); - vi = _mm256_shuffle_epi8(vi, vi); - vi = _mm256_shuffle_epi8(vi, c); - - vo = _mm256_and_si256(c, _mm256_or_si256(_eo_avx2, _co2_avx2)); - vo = _mm256_shuffle_epi8(vo, vi); - vp = _mm256_andnot_si256(_mm256_or_si256(_eo_avx2, _co2_avx2), vi); - ret = _mm256_or_si256(vp, vo); - ret = cleanaftershuffle(ret); - - return invertco(ret); -} - -_static_inline int64_t -coord_co(cube_t c) -{ - cube_t co; - int64_t mem[4], ret, i, p; - - co = _mm256_and_si256(c, _co2_avx2); - _mm256_storeu_si256((__m256i *)mem, co); - - mem[0] >>= 5; - for (i = 0, ret = 0, p = 1; i < 7; i++, mem[0] >>= 8, p *= 3) - ret += (mem[0] & 3) * p; - - return ret; -} - -_static_inline int64_t -coord_csep(cube_t c) -{ - cube_t cp, shifted; - int64_t mask; - - cp = _mm256_and_si256(c, _cp_avx2); - shifted = _mm256_slli_epi32(cp, 5); - mask = _mm256_movemask_epi8(shifted); - - return mask & 0x7F; -} - -_static_inline int64_t -coord_cocsep(cube_t c) -{ - return (coord_co(c) << 7) + coord_csep(c); -} - -_static_inline int64_t -coord_eo(cube_t c) -{ - cube_t eo, shifted; - int64_t mask; - - eo = _mm256_and_si256(c, _eo_avx2); - shifted = _mm256_slli_epi32(eo, 3); - mask = _mm256_movemask_epi8(shifted); - - return mask >> 17; -} - -_static_inline int64_t -coord_esep(cube_t c) -{ - cube_t ep; - int64_t e, mem[4], i, j, jj, k, l, ret1, ret2, bit1, bit2, is1; - - ep = _mm256_and_si256(c, _ep_avx2); - _mm256_storeu_si256((__m256i *)mem, ep); - - mem[3] <<= 8; - ret1 = ret2 = 0; - k = l = 4; - for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8) { - e = mem[i/8 + 2]; - - bit1 = (e & _esepbit1) >> 2; - bit2 = (e & _esepbit2) >> 3; - is1 = (1 - bit2) * bit1; - - ret1 += bit2 * binomial[11-i][k]; - k -= bit2; - - jj = j < 8; - ret2 += jj * is1 * binomial[7-(j*jj)][l]; - l -= is1; - j += (1-bit2); - } - - return ret1 * 70 + ret2; -} - -_static_inline void -copy_corners(cube_t *dest, cube_t src) -{ - *dest = _mm256_blend_epi32(*dest, src, 0x0F); -} - -_static_inline void -copy_edges(cube_t *dest, cube_t src) -{ - *dest = _mm256_blend_epi32(*dest, src, 0xF0); -} - -_static_inline void -set_eo(cube_t *cube, int64_t eo) -{ - int64_t eo12, eotop, eobot; - __m256i veo; - - eo12 = (eo << 1) + (_mm_popcnt_u64(eo) % 2); - eotop = (eo12 & (1 << 11)) << 17 | - (eo12 & (1 << 10)) << 10 | - (eo12 & (1 << 9)) << 3 | - (eo12 & (1 << 8)) >> 4; - eobot = (eo12 & (1 << 7)) << 53 | - (eo12 & (1 << 6)) << 46 | - (eo12 & (1 << 5)) << 39 | - (eo12 & (1 << 4)) << 32 | - (eo12 & (1 << 3)) << 25 | - (eo12 & (1 << 2)) << 18 | - (eo12 & (1 << 1)) << 11 | - (eo12 & 1) << 4; - veo = _mm256_set_epi64x(eotop, eobot, 0, 0); - - *cube = _mm256_andnot_si256(_eo_avx2, *cube); - *cube = _mm256_or_si256(*cube, veo); -} - -_static_inline cube_t -invcoord_esep(int64_t esep) -{ - cube_t eee, ret; - int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2; - uint8_t mem[32]; - uint8_t slice[3] = {0}; - - set1 = esep % 70; - set2 = esep / 70; - - for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) { - v = binomial[11-i][k]; - jj = j < 8; - w = jj * binomial[7-(j*jj)][l]; - bit2 = set2 >= v; - bit1 = set1 >= w; - is1 = (1 - bit2) * bit1; - - set2 -= bit2 * v; - k -= bit2; - set1 -= is1 * w; - l -= is1; - j += (1-bit2); - s = 2*bit2 + (1-bit2)*bit1; - - mem[i+16] = (slice[s]++) | (uint8_t)(s << 2); - } - - ret = solved; - eee = _mm256_loadu_si256((__m256i_u *)&mem); - copy_edges(&ret, eee); - - return ret; -} diff --git a/src/cube_neon.h b/src/cube_neon.h @@ -1,377 +0,0 @@ -// cube_t -typedef struct -{ - uint8x16_t corner; - uint8x16_t edge; -} cube_t; - -#define _co2_neon vdupq_n_u8(0x60) -#define _cocw_neon vdupq_n_u8(0x20) -#define _cp_neon vdupq_n_u8(0x07) -#define _ep_neon vcombine_u8(vdupq_n_u8(0x0F), vdupq_n_u8(0x0F)) -#define _eo_neon vcombine_u8(vdupq_n_u8(0x10), vdupq_n_u8(0x10)) - -// static cube -#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \ - e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \ - ((cube_t){ \ - .corner = {c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, 0, 0, 0, 0, 0, 0, 0, 0}, \ - .edge = {e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br, 0, 0, 0, 0}}) - -// zero cube -#define zero \ - (cube_t) \ - { \ - .corner = vdupq_n_u8(0), \ - .edge = vdupq_n_u8(0) \ - } - -// solved cube -#define solved static_cube( \ - 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) - -// Functions -_static void pieces(cube_t *, uint8_t[static 8], uint8_t[static 12]); -_static_inline bool equal(cube_t, cube_t); -_static_inline cube_t invertco(cube_t); -_static_inline cube_t compose_edges(cube_t, cube_t); -_static_inline cube_t compose_corners(cube_t, cube_t); -_static_inline uint8x16_t compose_edges_slim(uint8x16_t, uint8x16_t); -_static_inline uint8x16_t compose_corners_slim(uint8x16_t, uint8x16_t); -_static_inline cube_t compose(cube_t, cube_t); -_static_inline cube_t inverse(cube_t); - -_static_inline int64_t coord_co(cube_t); -_static_inline int64_t coord_csep(cube_t); -_static_inline int64_t coord_cocsep(cube_t); -_static_inline int64_t coord_eo(cube_t); -_static_inline int64_t coord_esep(cube_t); - -_static_inline void copy_corners(cube_t *, cube_t); -_static_inline void copy_edges(cube_t *, cube_t); -_static_inline void set_eo(cube_t *, int64_t); -_static_inline cube_t invcoord_esep(int64_t); - -_static void -pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12]) -{ - // First 8 bytes of the corner vector are copied from the c array - vst1_u8(c, vget_low_u8(cube->corner)); - - // 12 bytes of the edge vector are copied from the e array - // First 8 bytes - vst1_u8(e, vget_low_u8(cube->edge)); - // Next 4 bytes - vst1_lane_u32((uint32_t *)(e + 8), vreinterpret_u32_u8(vget_high_u8(cube->edge)), 0); -} - -_static_inline bool -equal(cube_t c1, cube_t c2) -{ - uint8x16_t cmp_corner, cmp_edge; - uint64x2_t cmp_corner_u64, cmp_edge_u64; - uint64x2_t cmp_result; - - // compare the corner vectors - cmp_corner = vceqq_u8(c1.corner, c2.corner); - // compare the edge vectors - cmp_edge = vceqq_u8(c1.edge, c2.edge); - - // convert the comparison vectors to 64-bit vectors - cmp_corner_u64 = vreinterpretq_u64_u8(cmp_corner); - cmp_edge_u64 = vreinterpretq_u64_u8(cmp_edge); - - // combine the comparison vectors - cmp_result = vandq_u64(cmp_corner_u64, cmp_edge_u64); - - // check if all the bits are set - return vgetq_lane_u64(cmp_result, 0) == ~0ULL && vgetq_lane_u64(cmp_result, 1) == ~0ULL; -} - -_static_inline cube_t -invertco(cube_t c) -{ - cube_t ret; - uint8x16_t co, shleft, shright, summed, newco, cleanco; - - co = vandq_u8(c.corner, _co2_neon); - shleft = vshlq_n_u8(co, 1); - shright = vshrq_n_u8(co, 1); - summed = vorrq_u8(shleft, shright); - newco = vandq_u8(summed, _co2_neon); - cleanco = veorq_u8(c.corner, co); - ret.corner = vorrq_u8(cleanco, newco); - ret.edge = c.edge; - - return ret; -} - -_static_inline cube_t -compose_edges(cube_t c1, cube_t c2) -{ - cube_t ret = {0}; - ret.edge = compose_edges_slim(c1.edge, c2.edge); - return ret; -} - -_static_inline cube_t -compose_corners(cube_t c1, cube_t c2) -{ - cube_t ret = {0}; - ret.corner = compose_corners_slim(c1.corner, c2.corner); - return ret; -} - -_static_inline uint8x16_t -compose_edges_slim(uint8x16_t edge1, uint8x16_t edge2) -{ - // Masks - uint8x16_t p_bits = vdupq_n_u8(_pbits); - uint8x16_t eo_bit = vdupq_n_u8(_eobit); - - // Find the index and permutation - uint8x16_t p = vandq_u8(edge2, p_bits); - uint8x16_t piece1 = vqtbl1q_u8(edge1, p); - - // Calculate the orientation through XOR - uint8x16_t orien = vandq_u8(veorq_u8(edge2, piece1), eo_bit); - - // Combine the results - uint8x16_t ret = vorrq_u8(vandq_u8(piece1, p_bits), orien); - - // Mask to clear the last 32 bits of the result - uint8x16_t mask_last_32 = vsetq_lane_u32(0, vreinterpretq_u32_u8(ret), 3); - ret = vreinterpretq_u8_u32(mask_last_32); - - return ret; -} - -_static_inline uint8x16_t -compose_corners_slim(uint8x16_t corner1, uint8x16_t corner2) -{ - // Masks - uint8x16_t p_bits = vdupq_n_u8(_pbits); - uint8x16_t cobits = vdupq_n_u8(_cobits); - uint8x16_t cobits2 = vdupq_n_u8(_cobits2); - uint8x16_t twist_cw = vdupq_n_u8(_ctwist_cw); - - // Find the index and permutation - uint8x16_t p = vandq_u8(corner2, p_bits); - uint8x16_t piece1 = vqtbl1q_u8(corner1, p); - - // Calculate the orientation - uint8x16_t aux = vaddq_u8(vandq_u8(corner2, cobits), vandq_u8(piece1, cobits)); - uint8x16_t auy = vshrq_n_u8(vaddq_u8(aux, twist_cw), 2); - uint8x16_t orien = vandq_u8(vaddq_u8(aux, auy), cobits2); - - // Combine the results - uint8x16_t ret = vorrq_u8(vandq_u8(piece1, p_bits), orien); - - // Mask to clear the last 64 bits of the result - uint8x16_t mask_last_64 = vsetq_lane_u64(0, vreinterpretq_u64_u8(ret), 1); - ret = vreinterpretq_u8_u64(mask_last_64); - - return ret; -} - -_static_inline cube_t -compose(cube_t c1, cube_t c2) -{ - cube_t ret = {0}; - - ret.edge = compose_edges_slim(c1.edge, c2.edge); - ret.corner = compose_corners_slim(c1.corner, c2.corner); - - return ret; -} - -_static_inline cube_t -inverse(cube_t cube) -{ - uint8_t i, piece, orien; - cube_t ret; - - // Temp arrays to store the NEON vectors - uint8_t edges[16]; - uint8_t corners[16]; - - // Copy the NEON vectors to the arrays - vst1q_u8(edges, cube.edge); - vst1q_u8(corners, cube.corner); - - uint8_t edge_result[16] = {0}; - uint8_t corner_result[16] = {0}; - - // Process the edges - for (i = 0; i < 12; i++) - { - piece = edges[i]; - orien = piece & _eobit; - edge_result[piece & _pbits] = i | orien; - } - - // Process the corners - for (i = 0; i < 8; i++) - { - piece = corners[i]; - orien = ((piece << 1) | (piece >> 1)) & _cobits2; - corner_result[piece & _pbits] = i | orien; - } - - // Copy the results back to the NEON vectors - ret.edge = vld1q_u8(edge_result); - ret.corner = vld1q_u8(corner_result); - - return ret; -} - -_static_inline int64_t -coord_co(cube_t c) -{ - // Temp array to store the NEON vector - uint8_t mem[16]; - vst1q_u8(mem, c.corner); - - int i, p; - int64_t ret; - - for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 3) - ret += p * (mem[i] >> _coshift); - - return ret; -} - -_static_inline int64_t -coord_csep(cube_t c) -{ - // Temp array to store the NEON vector - uint8_t mem[16]; - vst1q_u8(mem, c.corner); - - int64_t ret = 0; - int i, p; - for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2) - ret += p * ((mem[i] & _csepbit) >> 2); - - return ret; - return 0; -} - -_static_inline int64_t -coord_cocsep(cube_t c) -{ - return (coord_co(c) << 7) + coord_csep(c); -} - -_static_inline int64_t -coord_eo(cube_t c) -{ - int64_t ret = 0; - int64_t p = 1; - - // Temp array to store the NEON vector - uint8_t mem[16]; - vst1q_u8(mem, c.edge); - - for (int i = 1; i < 12; i++, p *= 2) - { - ret += p * (mem[i] >> _eoshift); - } - - return ret; -} - -_static_inline int64_t -coord_esep(cube_t c) -{ - int64_t i, j, jj, k, l, ret1, ret2, bit1, bit2, is1; - - // Temp array to store the NEON vector - uint8_t mem[16]; - vst1q_u8(mem, c.edge); - - for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++) - { - bit1 = (mem[i] & _esepbit1) >> 2; - bit2 = (mem[i] & _esepbit2) >> 3; - is1 = (1 - bit2) * bit1; - - ret1 += bit2 * binomial[11 - i][k]; - k -= bit2; - - jj = j < 8; - ret2 += jj * is1 * binomial[7 - (j * jj)][l]; - l -= is1; - j += (1 - bit2); - } - - return ret1 * 70 + ret2; -} - -_static_inline void -copy_corners(cube_t *dst, cube_t src) -{ - dst->corner = src.corner; -} - -_static_inline void -copy_edges(cube_t *dst, cube_t src) -{ - dst->edge = src.edge; -} - -_static_inline void -set_eo(cube_t *cube, int64_t eo) -{ - // Temp array to store the NEON vector - uint8_t mem[16]; - vst1q_u8(mem, cube->edge); - uint8_t i, sum, flip; - - for (sum = 0, i = 1; i < 12; i++, eo >>= 1) - { - flip = eo % 2; - sum += flip; - mem[i] = (mem[i] & ~_eobit) | (_eobit * flip); - } - mem[0] = (mem[0] & ~_eobit) | (_eobit * (sum % 2)); - - // Copy the results back to the NEON vector - cube->edge = vld1q_u8(mem); - return; -} - -_static_inline cube_t -invcoord_esep(int64_t esep) -{ - cube_t ret; - int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2; - uint8_t slice[3] = {0}; - - ret = solved; - uint8_t mem[16]; - set1 = esep % 70; - set2 = esep / 70; - - for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) - { - v = binomial[11 - i][k]; - jj = j < 8; - w = jj * binomial[7 - (j * jj)][l]; - bit2 = set2 >= v; - bit1 = set1 >= w; - is1 = (1 - bit2) * bit1; - - set2 -= bit2 * v; - k -= bit2; - set1 -= is1 * w; - l -= is1; - j += (1 - bit2); - s = 2 * bit2 + (1 - bit2) * bit1; - - mem[i] = (slice[s]++) | (uint8_t)(s << 2); - } - - ret.edge = vld1q_u8(mem); - return ret; -} diff --git a/src/cube_portable.h b/src/cube_portable.h @@ -1,298 +0,0 @@ -typedef struct { - uint8_t corner[8]; - uint8_t edge[12]; -} cube_t; - -#define static_cube(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \ - e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \ - ((cube_t) { \ - .corner = { c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl }, \ - .edge = { e_uf, e_ub, e_db, e_df, e_ur, e_ul, \ - e_dl, e_dr, e_fr, e_fl, e_bl, e_br } }) -#define zero static_cube( \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) -#define solved static_cube( \ - 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) - -_static void pieces(cube_t *, uint8_t [static 8], uint8_t [static 12]); -_static_inline bool equal(cube_t, cube_t); -_static_inline cube_t invertco(cube_t); -_static_inline void compose_edges_inplace(cube_t, cube_t, cube_t *); -_static_inline void compose_corners_inplace(cube_t, cube_t, cube_t *); -_static_inline cube_t compose_edges(cube_t, cube_t); -_static_inline cube_t compose_corners(cube_t, cube_t); -_static_inline cube_t compose(cube_t, cube_t); -_static_inline cube_t inverse(cube_t); - -_static_inline int64_t coord_co(cube_t); -_static_inline int64_t coord_csep(cube_t); -_static_inline int64_t coord_cocsep(cube_t); -_static_inline int64_t coord_eo(cube_t); -_static_inline int64_t coord_esep(cube_t); - -_static_inline void copy_corners(cube_t *, cube_t); -_static_inline void copy_edges(cube_t *, cube_t); -_static_inline void set_eo(cube_t *, int64_t); -_static_inline cube_t invcoord_esep(int64_t); - -_static void -pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12]) -{ - memcpy(c, cube->corner, 8); - memcpy(e, cube->edge, 12); -} - -_static_inline bool -equal(cube_t c1, cube_t c2) -{ - uint8_t i; - bool ret; - - ret = true; - for (i = 0; i < 8; i++) - ret = ret && c1.corner[i] == c2.corner[i]; - for (i = 0; i < 12; i++) - ret = ret && c1.edge[i] == c2.edge[i]; - - return ret; -} - -_static_inline cube_t -invertco(cube_t c) -{ - uint8_t i, piece, orien; - cube_t ret; - - ret = c; - for (i = 0; i < 8; i++) { - piece = c.corner[i]; - orien = ((piece << 1) | (piece >> 1)) & _cobits2; - ret.corner[i] = (piece & _pbits) | orien; - } - - return ret; -} - -_static_inline void -compose_edges_inplace(cube_t c1, cube_t c2, cube_t *ret) -{ - uint8_t i, piece1, piece2, p, orien; - - for (i = 0; i < 12; i++) { - piece2 = c2.edge[i]; - p = piece2 & _pbits; - piece1 = c1.edge[p]; - orien = (piece2 ^ piece1) & _eobit; - ret->edge[i] = (piece1 & _pbits) | orien; - } -} - -_static_inline void -compose_corners_inplace(cube_t c1, cube_t c2, cube_t *ret) -{ - uint8_t i, piece1, piece2, p, orien, aux, auy; - - for (i = 0; i < 8; i++) { - piece2 = c2.corner[i]; - p = piece2 & _pbits; - piece1 = c1.corner[p]; - aux = (piece2 & _cobits) + (piece1 & _cobits); - auy = (aux + _ctwist_cw) >> 2; - orien = (aux + auy) & _cobits2; - ret->corner[i] = (piece1 & _pbits) | orien; - } -} - -_static_inline cube_t -compose_edges(cube_t c1, cube_t c2) -{ - cube_t ret = zero; - - compose_edges_inplace(c1, c2, &ret); - - return ret; -} - -_static_inline cube_t -compose_corners(cube_t c1, cube_t c2) -{ - cube_t ret = zero; - - compose_corners_inplace(c1, c2, &ret); - - return ret; -} - -_static_inline cube_t -compose(cube_t c1, cube_t c2) -{ - cube_t ret = zero; - - compose_edges_inplace(c1, c2, &ret); - compose_corners_inplace(c1, c2, &ret); - - return ret; -} - -cube_t -inverse(cube_t cube) -{ - uint8_t i, piece, orien; - cube_t ret; - - for (i = 0; i < 12; i++) { - piece = cube.edge[i]; - orien = piece & _eobit; - ret.edge[piece & _pbits] = i | orien; - } - - for (i = 0; i < 8; i++) { - piece = cube.corner[i]; - orien = ((piece << 1) | (piece >> 1)) & _cobits2; - ret.corner[piece & _pbits] = i | orien; - } - - return ret; -} - -_static_inline int64_t -coord_co(cube_t c) -{ - int i, p; - int64_t ret; - - for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 3) - ret += p * (c.corner[i] >> _coshift); - - return ret; -} - -/* -For corner separation, we consider the axis (a.k.a. tetrad) each -corner belongs to as 0 or 1 and we translate this sequence into binary. -Ignoring the last bit, we have a value up to 2^7, but not all values are -possible. Encoding this as a number from 0 to C(8,4) would save about 40% -of space, but we are not going to use this coordinate in large tables. -*/ -_static_inline int64_t -coord_csep(cube_t c) -{ - int i, p; - int64_t ret; - - for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2) - ret += p * ((c.corner[i] & _csepbit) >> 2); - - return ret; -} - -_static_inline int64_t -coord_cocsep(cube_t c) -{ - return (coord_co(c) << 7) + coord_csep(c); -} - -_static_inline int64_t -coord_eo(cube_t c) -{ - int i, p; - int64_t ret; - - for (ret = 0, i = 1, p = 1; i < 12; i++, p *= 2) - ret += p * (c.edge[i] >> _eoshift); - - return ret; -} - -/* -We encode the edge separation as a number from 0 to C(12,4)*C(8,4). -It can be seen as the composition of two "subset index" coordinates. -*/ -_static_inline int64_t -coord_esep(cube_t c) -{ - int64_t i, j, jj, k, l, ret1, ret2, bit1, bit2, is1; - - for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++) { - /* Simple version: - if (c.edge[i] & _esepbit2) { - ret1 += binomial[11-i][k--]; - } else { - if (c.edge[i] & _esepbit1) - ret2 += binomial[7-j][l--]; - j++; - } - */ - - bit1 = (c.edge[i] & _esepbit1) >> 2; - bit2 = (c.edge[i] & _esepbit2) >> 3; - is1 = (1 - bit2) * bit1; - - ret1 += bit2 * binomial[11-i][k]; - k -= bit2; - - jj = j < 8; - ret2 += jj * is1 * binomial[7-(j*jj)][l]; - l -= is1; - j += (1-bit2); - } - - return ret1 * 70 + ret2; -} - -_static_inline void -copy_corners(cube_t *dest, cube_t src) -{ - memcpy(&dest->corner, src.corner, sizeof(src.corner)); -} - -_static_inline void -copy_edges(cube_t *dest, cube_t src) -{ - memcpy(&dest->edge, src.edge, sizeof(src.edge)); -} - -_static_inline void -set_eo(cube_t *cube, int64_t eo) -{ - uint8_t i, sum, flip; - - for (sum = 0, i = 1; i < 12; i++, eo >>= 1) { - flip = eo % 2; - sum += flip; - cube->edge[i] = (cube->edge[i] & ~_eobit) | (_eobit * flip); - } - cube->edge[0] = (cube->edge[0] & ~_eobit) | (_eobit * (sum % 2)); -} - -_static_inline cube_t -invcoord_esep(int64_t esep) -{ - cube_t ret; - int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2; - uint8_t slice[3] = {0}; - - ret = solved; - set1 = esep % 70; - set2 = esep / 70; - - for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) { - v = binomial[11-i][k]; - jj = j < 8; - w = jj * binomial[7-(j*jj)][l]; - bit2 = set2 >= v; - bit1 = set1 >= w; - is1 = (1 - bit2) * bit1; - - set2 -= bit2 * v; - k -= bit2; - set1 -= is1 * w; - l -= is1; - j += (1-bit2); - s = 2*bit2 + (1-bit2)*bit1; - - ret.edge[i] = (slice[s]++) | (uint8_t)(s << 2); - } - - return ret; -} diff --git a/src/cube_public.h b/src/cube_public.h @@ -1,255 +0,0 @@ -#include "cube.h" - -_static int64_t write_result(cube_t, char [static 22]); - -/* TODO: add option to get DR, maybe C-only, E-only, eo... */ -#define GETCUBE_OPTIONS(S, F) { .option = S, .fix = F } -struct { - char *option; - void (*fix)(int64_t *, int64_t *, int64_t *, int64_t *); -} getcube_options[] = { - GETCUBE_OPTIONS("fix", getcube_fix), - GETCUBE_OPTIONS(NULL, NULL) -}; - -_static int64_t -write_result(cube_t cube, char result[static 22]) -{ - if (!isconsistent(cube)) { - writecube("B32", zero, result); - return 2; - } - - writecube("B32", cube, result); - - return issolvable(cube) ? 0 : 1; -} - -int64_t -nissy_compose( - const char cube[static 22], - const char permutation[static 22], - char result[static 22] -) -{ - cube_t c, p, res; - - c = readcube("B32", cube); - p = readcube("B32", permutation); - res = compose(c, p); - - return write_result(res, result); -} - -int64_t -nissy_inverse( - const char cube[static 22], - char result[static 22] -) -{ - cube_t c, res; - - c = readcube("B32", cube); - res = inverse(c); - - return write_result(res, result); -} - -int64_t -nissy_applymoves( - const char cube[static 22], - const char *moves, - char result[static 22] -) -{ - cube_t c, res; - - c = readcube("B32", cube); - res = applymoves(c, moves); - - return write_result(res, result); -} - -int64_t -nissy_applytrans( - const char cube[static 22], - const char *transformation, - char result[static 22] -) -{ - cube_t c, res; - - c = readcube("B32", cube); - res = applytrans(c, transformation); - - return write_result(res, result); -} - -int64_t -nissy_frommoves( - const char *moves, - char result[static 22] -) -{ - cube_t res; - - res = applymoves(solved, moves); - - return write_result(res, result); -} - -int64_t -nissy_convert( - const char *format_in, - const char *format_out, - const char *cube_string, - char *result -) -{ - cube_t c; - - c = readcube(format_in, cube_string); - writecube(format_out, c, result); - - return isconsistent(c) ? 0 : 2; -} - -int64_t -nissy_getcube( - int64_t ep, - int64_t eo, - int64_t cp, - int64_t co, - const char *options, - char result[static 22] -) -{ - int i; - cube_t c; - - for (i = 0; getcube_options[i].option != NULL; i++) - if (!strcmp(options, getcube_options[i].option)) - getcube_options[i].fix(&ep, &eo, &cp, &co); - - c = getcube(ep, eo, cp, co); - - return write_result(c, result); -} - -int64_t -nissy_datasize( - const char *solver, - const char *options -) -{ - /* gendata() handles a NULL *data as a "dryrun" request */ - return nissy_gendata(solver, options, NULL); -} - -int64_t -nissy_gendata( - const char *solver, - const char *options, - void *data -) -{ - int64_t ret; - uint8_t maxdepth, h, i, j; - - if (!strcmp(solver, "h48")) { - /* options are in the form "h;maxdepth" */ - for (i = 0; options[i] != ';'; i++) ; - for (j = i; options[j]; j++) ; - h = atoi(options); - if (h != 0) { - LOG("Temporarily only h=0 is supported\n"); - ret = -1; - } else { - maxdepth = atoi(&options[i+1]); - ret = gendata_h48h0k4(data, maxdepth); - } - } else if (!strcmp(solver, "h48stats")) { - ret = gendata_h48h0k4(data, 20); - } else { - LOG("gendata: implemented only for h48 solver\n"); - ret = -1; - } - - return ret; -} - -int64_t -nissy_solve( - const char cube[static 22], - const char *solver, - const char *options, - const char *nisstype, - int8_t minmoves, - int8_t maxmoves, - int64_t maxsolutions, - int8_t optimal, - const void *data, - char *solutions -) -{ - cube_t c; - int64_t ret; - int h; - - c = readcube_B32(cube); - - if (!issolvable(c)) { - LOG("solve: cube is not solvable\n"); - return -1; - } - - if (minmoves < 0) { - LOG("solve: 'minmoves' is negative, setting it to 0\n"); - minmoves = 0; - } - - if (maxmoves < 0) { - LOG("solve: 'maxmoves' is negative, setting it to 20\n"); - maxmoves = 20; - } - - if (maxsolutions < 0) { - LOG("solve: 'maxsols' is negative, stopping\n"); - return -1; - } - - if (maxsolutions == 0) { - LOG("solve: 'maxsols' is 0, returning no solution\n"); - return 0; - } - - if (solutions == NULL) { - LOG("solve: return parameter 'solutions' is NULL, stopping\n"); - return -1; - } - - /* TODO define and use solve_options_t */ - if (!strcmp(solver, "h48")) { - h = atoi(options); /* TODO: better parsing */ - ret = solve_h48( - c, minmoves, maxmoves, maxsolutions, - (uint8_t)h, data, solutions); - ret = -1; - } else if (!strcmp(solver, "h48stats")) { - ret = solve_h48stats(c, maxmoves, data, solutions); - } else if (!strcmp(solver, "simple")) { - ret = solve_simple( - c, minmoves, maxmoves, maxsolutions, optimal, solutions); - } else { - LOG("solve: unknown solver '%s'\n", solver); - ret = -1; - } - - return ret; -} - -void -nissy_setlogger(void (*log)(const char *, ...)) -{ - nissy_log = log; -} diff --git a/src/io_move_trans.h b/src/io_move_trans.h @@ -1,87 +0,0 @@ -_static uint8_t readmove(char); -_static uint8_t readmodifier(char); -_static uint8_t readtrans(const char *); -_static int writemoves(uint8_t *, int, char *); -_static void writetrans(uint8_t, char *); - -_static uint8_t -readmove(char c) -{ - switch (c) { - case 'U': - return _move_U; - case 'D': - return _move_D; - case 'R': - return _move_R; - case 'L': - return _move_L; - case 'F': - return _move_F; - case 'B': - return _move_B; - default: - return _error; - } -} - -_static uint8_t -readmodifier(char c) -{ - switch (c) { - case '1': /* Fallthrough */ - case '2': /* Fallthrough */ - case '3': - return c - '0' - 1; - case '\'': - return 2; - default: - return 0; - } -} - -_static uint8_t -readtrans(const char *buf) -{ - uint8_t t; - - for (t = 0; t < 48; t++) - if (!strncmp(buf, transstr[t], 11)) - return t; - - LOG("readtrans error\n"); - return _error; -} - -_static int -writemoves(uint8_t *m, int n, char *buf) -{ - int i; - size_t len; - const char *s; - char *b; - - for (i = 0, b = buf; i < n; i++, b++) { - s = movestr[m[i]]; - len = strlen(s); - memcpy(b, s, len); - b += len; - *b = ' '; - } - - if (b != buf) - b--; /* Remove last space */ - *b = '\0'; - - return b - buf; -} - -_static void -writetrans(uint8_t t, char *buf) -{ - if (t >= 48) - memcpy(buf, "error trans", 11); - else - memcpy(buf, transstr[t], 11); - buf[11] = '\0'; -} diff --git a/src/nissy.c b/src/nissy.c @@ -0,0 +1,265 @@ +#include <inttypes.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> + +#include "utils/utils.h" +#include "arch/arch.h" +#include "core/core.h" +#include "solvers/solvers.h" + +#include "nissy.h" + +_static int64_t write_result(cube_t, char [static 22]); + +/* TODO: add option to get DR, maybe C-only, E-only, eo... */ +#define GETCUBE_OPTIONS(S, F) { .option = S, .fix = F } +struct { + char *option; + void (*fix)(int64_t *, int64_t *, int64_t *, int64_t *); +} getcube_options[] = { + GETCUBE_OPTIONS("fix", getcube_fix), + GETCUBE_OPTIONS(NULL, NULL) +}; + +_static int64_t +write_result(cube_t cube, char result[static 22]) +{ + if (!isconsistent(cube)) { + writecube("B32", zero, result); + return 2; + } + + writecube("B32", cube, result); + + return issolvable(cube) ? 0 : 1; +} + +int64_t +nissy_compose( + const char cube[static 22], + const char permutation[static 22], + char result[static 22] +) +{ + cube_t c, p, res; + + c = readcube("B32", cube); + p = readcube("B32", permutation); + res = compose(c, p); + + return write_result(res, result); +} + +int64_t +nissy_inverse( + const char cube[static 22], + char result[static 22] +) +{ + cube_t c, res; + + c = readcube("B32", cube); + res = inverse(c); + + return write_result(res, result); +} + +int64_t +nissy_applymoves( + const char cube[static 22], + const char *moves, + char result[static 22] +) +{ + cube_t c, res; + + c = readcube("B32", cube); + res = applymoves(c, moves); + + return write_result(res, result); +} + +int64_t +nissy_applytrans( + const char cube[static 22], + const char *transformation, + char result[static 22] +) +{ + cube_t c, res; + + c = readcube("B32", cube); + res = applytrans(c, transformation); + + return write_result(res, result); +} + +int64_t +nissy_frommoves( + const char *moves, + char result[static 22] +) +{ + cube_t res; + + res = applymoves(solved, moves); + + return write_result(res, result); +} + +int64_t +nissy_convert( + const char *format_in, + const char *format_out, + const char *cube_string, + char *result +) +{ + cube_t c; + + c = readcube(format_in, cube_string); + writecube(format_out, c, result); + + return isconsistent(c) ? 0 : 2; +} + +int64_t +nissy_getcube( + int64_t ep, + int64_t eo, + int64_t cp, + int64_t co, + const char *options, + char result[static 22] +) +{ + int i; + cube_t c; + + for (i = 0; getcube_options[i].option != NULL; i++) + if (!strcmp(options, getcube_options[i].option)) + getcube_options[i].fix(&ep, &eo, &cp, &co); + + c = getcube(ep, eo, cp, co); + + return write_result(c, result); +} + +int64_t +nissy_datasize( + const char *solver, + const char *options +) +{ + /* gendata() handles a NULL *data as a "dryrun" request */ + return nissy_gendata(solver, options, NULL); +} + +int64_t +nissy_gendata( + const char *solver, + const char *options, + void *data +) +{ + int64_t ret; + uint8_t maxdepth, h, i, j; + + if (!strcmp(solver, "h48")) { + /* options are in the form "h;maxdepth" */ + for (i = 0; options[i] != ';'; i++) ; + for (j = i; options[j]; j++) ; + h = atoi(options); + if (h != 0) { + LOG("Temporarily only h=0 is supported\n"); + ret = -1; + } else { + maxdepth = atoi(&options[i+1]); + ret = gendata_h48h0k4(data, maxdepth); + } + } else if (!strcmp(solver, "h48stats")) { + ret = gendata_h48h0k4(data, 20); + } else { + LOG("gendata: implemented only for h48 solver\n"); + ret = -1; + } + + return ret; +} + +int64_t +nissy_solve( + const char cube[static 22], + const char *solver, + const char *options, + const char *nisstype, + int8_t minmoves, + int8_t maxmoves, + int64_t maxsolutions, + int8_t optimal, + const void *data, + char *solutions +) +{ + cube_t c; + int64_t ret; + int h; + + c = readcube_B32(cube); + + if (!issolvable(c)) { + LOG("solve: cube is not solvable\n"); + return -1; + } + + if (minmoves < 0) { + LOG("solve: 'minmoves' is negative, setting it to 0\n"); + minmoves = 0; + } + + if (maxmoves < 0) { + LOG("solve: 'maxmoves' is negative, setting it to 20\n"); + maxmoves = 20; + } + + if (maxsolutions < 0) { + LOG("solve: 'maxsols' is negative, stopping\n"); + return -1; + } + + if (maxsolutions == 0) { + LOG("solve: 'maxsols' is 0, returning no solution\n"); + return 0; + } + + if (solutions == NULL) { + LOG("solve: return parameter 'solutions' is NULL, stopping\n"); + return -1; + } + + /* TODO define and use solve_options_t */ + if (!strcmp(solver, "h48")) { + h = atoi(options); /* TODO: better parsing */ + ret = solve_h48( + c, minmoves, maxmoves, maxsolutions, + (uint8_t)h, data, solutions); + ret = -1; + } else if (!strcmp(solver, "h48stats")) { + ret = solve_h48stats(c, maxmoves, data, solutions); + } else if (!strcmp(solver, "simple")) { + ret = solve_simple( + c, minmoves, maxmoves, maxsolutions, optimal, solutions); + } else { + LOG("solve: unknown solver '%s'\n", solver); + ret = -1; + } + + return ret; +} + +void +nissy_setlogger(void (*log)(const char *, ...)) +{ + nissy_log = log; +} diff --git a/src/cube.h b/src/nissy.h diff --git a/src/solve_h48.h b/src/solve_h48.h @@ -1,839 +0,0 @@ -#define MAP_UNSET UINT64_C(0xFFFFFFFFFFFFFFFF) -#define MAP_KEYMASK UINT64_C(0xFFFFFFFFFF) -#define MAP_KEYSHIFT UINT64_C(40) - -#define COCSEP_CLASSES ((size_t)3393) -#define COCSEP_TABLESIZE ((size_t)_3p7 << (size_t)7) -#define COCSEP_VISITEDSIZE ((COCSEP_TABLESIZE + (size_t)7) / (size_t)8) -#define COCSEP_FULLSIZE ((size_t)4 * (COCSEP_TABLESIZE + (size_t)12)) - -#define ESEP_NOEO (COCSEP_CLASSES * (size_t)_12c4 * (size_t)_8c4) -#define ESEP_MAX(h) (ESEP_NOEO << (size_t)(h)) -#define ESEP_TABLESIZE(h, k) (ESEP_MAX((h)) / ((size_t)8 / (size_t)(k))) - -#define COCLASS_MASK (UINT32_C(0xFFFF) << UINT32_C(16)) -#define COCLASS(x) (((x) & COCLASS_MASK) >> UINT32_C(16)) -#define TTREP_MASK (UINT32_C(0xFF) << UINT32_C(8)) -#define TTREP(x) (((x) & TTREP_MASK) >> UINT32_C(8)) -#define CBOUND_MASK UINT32_C(0xFF) -#define CBOUND(x) ((x) & CBOUND_MASK) -#define H48_ESIZE(h) ((_12c4 * _8c4) << (int64_t)(h)) - -#define ESEP_IND(i) ((uint32_t)(i) / UINT32_C(8)) -#define ESEP_SHIFT(i) (UINT32_C(4) * ((uint32_t)(i) % UINT32_C(8))) -#define ESEP_MASK(i) ((_bit_u32(4) - (uint32_t)(1)) << ESEP_SHIFT(i)) -#define VISITED_IND(i) ((uint32_t)(i) / UINT32_C(8)) -#define VISITED_MASK(i) (UINT32_C(1) << ((uint32_t)(i) % UINT32_C(8))) - -#define MAX_SOLUTION_LENGTH 20 - -/* -TODO: This loop other similar h48 coordinates can be improved by only -transforming edges, but we need to compose transformations (i.e. conjugate -_t by _ttrep). -*/ -#define _foreach_h48sim(_cube, _cocsepdata, _selfsim, _h, _action) \ - int64_t _cocsep = coord_cocsep(_cube); \ - uint8_t _ttrep = TTREP(_cocsepdata[_cocsep]); \ - uint8_t _inverse_ttrep = inverse_trans(_ttrep); \ - int64_t _coclass = COCLASS(_cocsepdata[_cocsep]); \ - cube_t _rep = transform(_cube, _ttrep); \ - uint64_t _sim = _selfsim[_coclass]; \ - for (uint8_t _t = 0; _t < 48 && _sim; _t++, _sim >>= 1) { \ - if (!(_sim & 1)) continue; \ - _cube = transform(_rep, _t); \ - _cube = transform(_cube, _inverse_ttrep); \ - _action \ - } - -typedef struct { - uint64_t n; - uint64_t capacity; - uint64_t randomizer; - uint64_t *table; -} h48map_t; - -typedef struct { - uint64_t key; - uint64_t val; -} kvpair_t; - -typedef struct { - cube_t cube; - uint8_t depth; - uint8_t maxdepth; - uint16_t *n; - uint32_t *buf32; - uint8_t *visited; - uint64_t *selfsim; - cube_t *rep; -} dfsarg_cocsep_t; - -/* TODO keep or not? */ -typedef struct { - cube_t cube; - int8_t nmoves; - int8_t depth; - uint8_t moves[MAX_SOLUTION_LENGTH]; - uint32_t *cocsepdata; - h48map_t *visited; -} dfsarg_genh48set_t; - -typedef struct { - uint8_t depth; - uint32_t *cocsepdata; - uint32_t *buf32; - uint64_t *selfsim; - int64_t done; - cube_t *crep; -} bfsarg_esep_t; - -typedef struct { - cube_t cube; - cube_t inverse; - int8_t nmoves; - int8_t depth; - uint8_t moves[MAX_SOLUTION_LENGTH]; - int64_t *nsols; - int64_t maxsolutions; - uint8_t h; - uint32_t *cocsepdata; - uint32_t *h48data; - char **nextsol; -} dfsarg_solveh48_t; - -typedef struct { - cube_t cube; - int8_t nmoves; - int8_t depth; - uint8_t moves[MAX_SOLUTION_LENGTH]; - uint32_t *cocsepdata; - uint32_t *h48data; - char *s; -} dfsarg_solveh48stats_t; - -_static void h48map_create(h48map_t *, uint64_t, uint64_t); -_static void h48map_clear(h48map_t *); -_static void h48map_destroy(h48map_t *); -_static uint64_t h48map_lookup(h48map_t *, uint64_t); -_static void h48map_insertmin(h48map_t *, uint64_t, uint64_t); -_static uint64_t h48map_value(h48map_t *, uint64_t); -_static kvpair_t h48map_nextkvpair(h48map_t *, uint64_t *); - -_static_inline int64_t coord_h48(cube_t, const uint32_t *, uint8_t); -_static_inline int64_t coord_h48_edges(cube_t, int64_t, uint8_t, uint8_t); -_static_inline cube_t invcoord_h48(int64_t, const cube_t *, uint8_t); - -_static_inline bool get_visited(const uint8_t *, int64_t); -_static_inline void set_visited(uint8_t *, int64_t); -_static_inline uint8_t get_esep_pval(const uint32_t *, int64_t); -_static_inline void set_esep_pval(uint32_t *, int64_t, uint8_t); - -_static size_t gendata_cocsep(void *, uint64_t *, cube_t *); -_static uint32_t gendata_cocsep_dfs(dfsarg_cocsep_t *); -_static uint64_t gen_h48short( - uint8_t, const uint32_t *, const cube_t *, const uint64_t *, h48map_t *); -_static size_t gendata_h48h0k4(void *, uint8_t); -_static int64_t gendata_h48h0k4_bfs(bfsarg_esep_t *); -_static int64_t gendata_h48h0k4_bfs_fromdone(bfsarg_esep_t *); -_static int64_t gendata_h48h0k4_bfs_fromnew(bfsarg_esep_t *); - -_static void solve_h48_appendsolution(dfsarg_solveh48_t *); -_static_inline int8_t get_h48_cdata(cube_t, uint32_t *, uint32_t *); -_static_inline int8_t get_h48_bound(cube_t, uint32_t, uint8_t, uint32_t *); -_static_inline bool solve_h48_stop(dfsarg_solveh48_t *); -_static int64_t solve_h48_dfs(dfsarg_solveh48_t *); -_static int64_t solve_h48( - cube_t, int8_t, int8_t, int8_t, uint8_t, const void *, char *); - -_static int64_t solve_h48stats_dfs(dfsarg_solveh48stats_t *); -_static int64_t solve_h48stats(cube_t, int8_t, const void *, char [static 12]); - -_static void -h48map_create(h48map_t *map, uint64_t capacity, uint64_t randomizer) -{ - map->capacity = capacity; - map->randomizer = randomizer; - - map->table = malloc(map->capacity * sizeof(int64_t)); - h48map_clear(map); -} - -_static void -h48map_clear(h48map_t *map) -{ - memset(map->table, 0xFF, map->capacity * sizeof(uint64_t)); - map->n = 0; -} - -_static void -h48map_destroy(h48map_t *map) -{ - free(map->table); -} - -_static_inline uint64_t -h48map_lookup(h48map_t *map, uint64_t x) -{ - uint64_t hash, i; - - hash = ((x % map->capacity) * map->randomizer) % map->capacity; - for (i = hash; - map->table[i] != MAP_UNSET && (map->table[i] & MAP_KEYMASK) != x; - i = (i+1) % map->capacity - ) ; - - return i; -} - -_static_inline void -h48map_insertmin(h48map_t *map, uint64_t key, uint64_t val) -{ - uint64_t i, oldval, min; - - i = h48map_lookup(map, key); - oldval = map->table[i] >> MAP_KEYSHIFT; - min = _min(val, oldval); - - map->n += map->table[i] == MAP_UNSET; - map->table[i] = (key & MAP_KEYMASK) | (min << MAP_KEYSHIFT); -} - -_static_inline uint64_t -h48map_value(h48map_t *map, uint64_t key) -{ - return map->table[h48map_lookup(map, key)] >> MAP_KEYSHIFT; -} - -_static kvpair_t -h48map_nextkvpair(h48map_t *map, uint64_t *p) -{ - kvpair_t kv; - uint64_t pair; - - kv.key = MAP_UNSET; - kv.val = MAP_UNSET; - - DBG_ASSERT(*p < map->capacity, kv, - "Error looping over map: given index %" PRIu64 " is out of " - "range [0,%" PRIu64 "]", *p, map->capacity); - - for ( ; *p < map->capacity; (*p)++) { - if (map->table[*p] != MAP_UNSET) { - pair = map->table[(*p)++]; - kv.key = pair & MAP_KEYMASK; - kv.val = pair >> MAP_KEYSHIFT; - return kv; - } - } - - return kv; -} - -_static_inline int64_t -coord_h48(cube_t c, const uint32_t *cocsepdata, uint8_t h) -{ - int64_t cocsep, coclass; - uint32_t data; - uint8_t ttrep; - - DBG_ASSERT(h <= 11, -1, "coord_h48: h must be between 0 and 11\n"); - - cocsep = coord_cocsep(c); - data = cocsepdata[cocsep]; - coclass = (int64_t)COCLASS(data); - ttrep = (int64_t)TTREP(data); - - return coord_h48_edges(c, coclass, ttrep, h); -} - -_static_inline int64_t -coord_h48_edges(cube_t c, int64_t coclass, uint8_t ttrep, uint8_t h) -{ - cube_t d; - int64_t esep, eo, edges; - - d = transform_edges(c, ttrep); - esep = coord_esep(d); - eo = coord_eo(d); - edges = (esep << 11) + eo; - - return (coclass * H48_ESIZE(11) + edges) >> (11 - (int64_t)h); -} - -/* -This function does not necessarily return a cube whose coordinate is -the given value, because it works up to symmetry. This means that the -returned cube is a transformed cube of one that gives the correct value. -*/ -_static_inline cube_t -invcoord_h48(int64_t i, const cube_t *crep, uint8_t h) -{ - cube_t ret; - int64_t hh, coclass, ee, esep, eo; - - DBG_ASSERT(h <= 11, zero, - "invcoord_h48: h must be between 0 and 11\n"); - - hh = (int64_t)h; - coclass = i / H48_ESIZE(h); - ee = i % H48_ESIZE(h); - esep = ee >> hh; - eo = (ee & ((1 << hh) - 1)) << (11 - hh); - - ret = invcoord_esep(esep); - copy_corners(&ret, crep[coclass]); - set_eo(&ret, eo); - - return ret; -} - -/* -Each element of the cocsep table is a uint32_t used as follows: - - Lowest 8-bit block: pruning value - - Second-lowest 8-bit block: "ttrep" (transformation to representative) - - Top 16-bit block: symcoord value -After the data as described above, more auxiliary information is appended: - - A uint32_t representing the number of symmetry classes - - A uint32_t representing the highest value of the pruning table - - One uint32_t for each "line" of the pruning table, representing the number - of positions having that pruning value. -*/ -_static size_t -gendata_cocsep(void *buf, uint64_t *selfsim, cube_t *rep) -{ - uint32_t *buf32, *info, cc; - uint16_t n; - uint8_t i, j, visited[COCSEP_VISITEDSIZE]; - dfsarg_cocsep_t arg; - - if (buf == NULL) - goto gendata_cocsep_return_size; - - buf32 = (uint32_t *)buf; - info = buf32 + COCSEP_TABLESIZE; - memset(buf32, 0xFF, sizeof(uint32_t) * COCSEP_TABLESIZE); - if (selfsim != NULL) - memset(selfsim, 0, sizeof(uint64_t) * COCSEP_CLASSES); - - arg = (dfsarg_cocsep_t) { - .cube = solved, - .n = &n, - .buf32 = buf32, - .visited = visited, - .selfsim = selfsim, - .rep = rep - }; - for (i = 0, n = 0, cc = 0; i < 10; i++) { - LOG("cocsep: generating depth %" PRIu8 "\n", i); - memset(visited, 0, COCSEP_VISITEDSIZE); - arg.depth = 0; - arg.maxdepth = i; - cc = gendata_cocsep_dfs(&arg); - info[i+2] = cc; - LOG("found %" PRIu32 "\n", cc); - } - - info[0] = (uint32_t)n; - info[1] = 9; /* Known max pruning value */ - DBG_ASSERT(n == COCSEP_CLASSES, 0, - "cocsep: computed %" PRIu16 " symmetry classes, " - "expected %zu\n", n, COCSEP_CLASSES); - - LOG("cocsep data computed\n"); - LOG("Symmetry classes: %" PRIu32 "\n", info[0]); - LOG("Maximum pruning value: %" PRIu32 "\n", info[1]); - LOG("Pruning value distribution:\n"); - for (j = 0; j < 10; j++) - LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+2]); - -gendata_cocsep_return_size: - return COCSEP_FULLSIZE; -} - -_static uint32_t -gendata_cocsep_dfs(dfsarg_cocsep_t *arg) -{ - uint8_t m; - uint32_t cc, class, ttrep, depth, olddepth, tinv; - uint64_t t; - int64_t i, j; - cube_t d; - dfsarg_cocsep_t nextarg; - - i = coord_cocsep(arg->cube); - olddepth = (uint8_t)(arg->buf32[i] & 0xFF); - if (olddepth < arg->depth || get_visited(arg->visited, i)) - return 0; - set_visited(arg->visited, i); - - if (arg->depth == arg->maxdepth) { - if ((arg->buf32[i] & 0xFF) != 0xFF) - return 0; - - if (arg->rep != NULL) - arg->rep[*arg->n] = arg->cube; - for (t = 0, cc = 0; t < 48; t++) { - d = transform_corners(arg->cube, t); - j = coord_cocsep(d); - if (i == j && arg->selfsim != NULL) - arg->selfsim[*arg->n] |= UINT64_C(1) << t; - if (COCLASS(arg->buf32[j]) != UINT32_C(0xFFFF)) - continue; - set_visited(arg->visited, j); - tinv = inverse_trans(t); - olddepth = arg->buf32[j] & 0xFF; - cc += olddepth == 0xFF; - - class = (uint32_t)(*arg->n) << UINT32_C(16); - ttrep = (uint32_t)tinv << UINT32_C(8); - depth = (uint32_t)arg->depth; - arg->buf32[j] = class | ttrep | depth; - } - (*arg->n)++; - - return cc; - } - - memcpy(&nextarg, arg, sizeof(dfsarg_cocsep_t)); - nextarg.depth++; - for (m = 0, cc = 0; m < 18; m++) { - nextarg.cube = move(arg->cube, m); - cc += gendata_cocsep_dfs(&nextarg); - } - - return cc; -} - -_static uint64_t -gen_h48short( - uint8_t n, - const uint32_t *cocsepdata, - const cube_t *crep, - const uint64_t *selfsim, - h48map_t *map -) { - uint8_t i, m; - int64_t coord; - uint64_t j, oldn; - kvpair_t kv; - cube_t cube, d; - - cube = solvedcube(); - coord = coord_h48(cube, cocsepdata, 11); - h48map_insertmin(map, coord, 0); - oldn = 0; - LOG("Short h48: generating depth 0\nfound %" PRIu8 "\n", map->n-oldn); - for (i = 0; i < n; i++) { - LOG("Short h48: generating depth %" PRIu8 "\n", i+1); - j = 0; - oldn = map->n; - for (kv = h48map_nextkvpair(map, &j); - j != map->capacity; - kv = h48map_nextkvpair(map, &j) - ) { - if (kv.val != i) - continue; - cube = invcoord_h48(kv.key, crep, 11); - for (m = 0; m < 18; m++) { - d = move(cube, m); - _foreach_h48sim(d, cocsepdata, selfsim, 11, - coord = coord_h48(d, cocsepdata, 11); - h48map_insertmin(map, coord, i+1); - ) - } - } - LOG("found %" PRIu8 "\n", map->n-oldn); - } - - return map->n; -} - -/* -TODO description -generating fixed table with h=0, k=4 -*/ -_static size_t -gendata_h48h0k4(void *buf, uint8_t maxdepth) -{ - uint32_t j, *buf32, *info, *cocsepdata; - bfsarg_esep_t arg; - int64_t sc, cc, esep_max; - uint64_t selfsim[COCSEP_CLASSES]; - cube_t crep[COCSEP_CLASSES]; - size_t cocsepsize, infosize; - - /* TODO: move info at start of tables (all tables!) */ - infosize = 4 * maxdepth; - cocsepsize = gendata_cocsep(buf, selfsim, crep); - infosize = 88; - - if (buf == NULL) - goto gendata_h48h0k4_return_size; - - esep_max = (int64_t)ESEP_MAX(0); - cocsepdata = (uint32_t *)buf; - buf32 = cocsepdata + cocsepsize / 4; - info = buf32 + (ESEP_TABLESIZE(0, 4) / sizeof(uint32_t)); - memset(buf32, 0xFF, ESEP_TABLESIZE(0, 4)); - - sc = coord_h48(solved, cocsepdata, 0); - set_esep_pval(buf32, sc, 0); - info[1] = 1; - arg = (bfsarg_esep_t) { - .cocsepdata = cocsepdata, - .buf32 = buf32, - .selfsim = selfsim, - .crep = crep - }; - for ( - arg.done = 1, arg.depth = 1, cc = 0; - arg.done < esep_max && arg.depth <= maxdepth; - arg.depth++ - ) { - LOG("esep: generating depth %" PRIu8 "\n", arg.depth); - cc = gendata_h48h0k4_bfs(&arg); - arg.done += cc; - info[arg.depth+1] = cc; - LOG("found %" PRId64 "\n", cc); - } - - info[0] = arg.depth-1; - - LOG("h48 pruning table computed\n"); - LOG("Maximum pruning value: %" PRIu32 "\n", info[0]); - LOG("Pruning value distribution:\n"); - for (j = 0; j <= info[0]; j++) - LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+1]); - -gendata_h48h0k4_return_size: - return cocsepsize + ESEP_TABLESIZE(0, 4) + infosize; -} - -_static int64_t -gendata_h48h0k4_bfs(bfsarg_esep_t *arg) -{ - const uint8_t breakpoint = 10; /* Hand-picked optimal */ - - if (arg->depth < breakpoint) - return gendata_h48h0k4_bfs_fromdone(arg); - else - return gendata_h48h0k4_bfs_fromnew(arg); -} - -_static int64_t -gendata_h48h0k4_bfs_fromdone(bfsarg_esep_t *arg) -{ - uint8_t c, m, x; - uint32_t cc; - int64_t i, j, k; - cube_t cube, moved; - - for (i = 0, cc = 0; i < (int64_t)ESEP_MAX(0); i++) { - c = get_esep_pval(arg->buf32, i); - if (c != arg->depth - 1) - continue; - cube = invcoord_h48(i, arg->crep, 0); - for (m = 0; m < 18; m++) { - moved = move(cube, m); - j = coord_h48(moved, arg->cocsepdata, 0); - if (get_esep_pval(arg->buf32, j) <= arg->depth) - continue; - _foreach_h48sim(moved, arg->cocsepdata, arg->selfsim, 0, - k = coord_h48(moved, arg->cocsepdata, 0); - x = get_esep_pval(arg->buf32, k); - set_esep_pval(arg->buf32, k, arg->depth); - cc += x != arg->depth; - ) - } - } - - return cc; -} - -_static int64_t -gendata_h48h0k4_bfs_fromnew(bfsarg_esep_t *arg) -{ - uint8_t c, m, x; - uint32_t cc; - int64_t i, j; - cube_t cube, moved; - - for (i = 0, cc = 0; i < (int64_t)ESEP_MAX(0); i++) { - c = get_esep_pval(arg->buf32, i); - if (c != 0xF) - continue; - cube = invcoord_h48(i, arg->crep, 0); - for (m = 0; m < 18; m++) { - moved = move(cube, m); - j = coord_h48(moved, arg->cocsepdata, 0); - x = get_esep_pval(arg->buf32, j); - if (x >= arg->depth) - continue; - _foreach_h48sim(cube, arg->cocsepdata, arg->selfsim, 0, - j = coord_h48(cube, arg->cocsepdata, 0); - x = get_esep_pval(arg->buf32, j); - set_esep_pval(arg->buf32, j, arg->depth); - cc += x == 0xF; - ) - break; /* Enough to find one, skip the rest */ - } - } - - return cc; -} - -_static_inline bool -get_visited(const uint8_t *a, int64_t i) -{ - return a[VISITED_IND(i)] & VISITED_MASK(i); -} - -_static_inline void -set_visited(uint8_t *a, int64_t i) -{ - a[VISITED_IND(i)] |= VISITED_MASK(i); -} - -_static_inline uint8_t -get_esep_pval(const uint32_t *buf32, int64_t i) -{ - return (buf32[ESEP_IND(i)] & ESEP_MASK(i)) >> ESEP_SHIFT(i); -} - -_static_inline void -set_esep_pval(uint32_t *buf32, int64_t i, uint8_t val) -{ - buf32[ESEP_IND(i)] = - (buf32[ESEP_IND(i)] & (~ESEP_MASK(i))) | (val << ESEP_SHIFT(i)); -} - -_static void -solve_h48_appendsolution(dfsarg_solveh48_t *arg) -{ - int strl; - - strl = writemoves(arg->moves, arg->nmoves, *arg->nextsol); - LOG("Solution found: %s\n", *arg->nextsol); - *arg->nextsol += strl; - **arg->nextsol = '\n'; - (*arg->nextsol)++; - (*arg->nsols)++; -} - -_static_inline int8_t -get_h48_cdata(cube_t cube, uint32_t *cocsepdata, uint32_t *cdata) -{ - int64_t coord; - - coord = coord_cocsep(cube); - *cdata = cocsepdata[coord]; - - return CBOUND(*cdata); -} - -_static_inline int8_t -get_h48_bound(cube_t cube, uint32_t cdata, uint8_t h, uint32_t *h48data) -{ - int64_t coord; - - coord = coord_h48_edges(cube, COCLASS(cdata), TTREP(cdata), h); - return get_esep_pval(h48data, coord); -} - -_static_inline bool -solve_h48_stop(dfsarg_solveh48_t *arg) -{ - uint32_t data, data_inv; - int8_t bound; - - bound = get_h48_cdata(arg->cube, arg->cocsepdata, &data); - if (bound + arg->nmoves > arg->depth) - return true; - - bound = get_h48_cdata(arg->inverse, arg->cocsepdata, &data_inv); - if (bound + arg->nmoves > arg->depth) - return true; - -/* - bound = get_h48_bound(arg->cube, data, arg->h, arg->h48data); -LOG("Using pval %" PRId8 "\n", bound); - if (bound + arg->nmoves > arg->depth) - return true; - - bound = get_h48_bound(arg->inverse, data_inv, arg->h, arg->h48data); - if (bound + arg->nmoves > arg->depth) - return true; -*/ - - return false; -} - -_static int64_t -solve_h48_dfs(dfsarg_solveh48_t *arg) -{ - dfsarg_solveh48_t nextarg; - int64_t ret; - uint8_t m; - - if (*arg->nsols == arg->maxsolutions) - return 0; - - if (solve_h48_stop(arg)) - return 0; - - if (issolved(arg->cube)) { - if (arg->nmoves != arg->depth) - return 0; - solve_h48_appendsolution(arg); - return 1; - } - - /* TODO: avoid copy, change arg and undo changes after recursion */ - nextarg = *arg; - nextarg.nmoves = arg->nmoves + 1; - ret = 0; - for (m = 0; m < 18; m++) { - nextarg.moves[arg->nmoves] = m; - if (!allowednextmove(nextarg.moves, nextarg.nmoves)) { - /* If a move is not allowed, neither are its 180 - * and 270 degree variations */ - m += 2; - continue; - } - nextarg.cube = move(arg->cube, m); - nextarg.inverse = inverse(nextarg.cube); /* TODO: use premove */ - ret += solve_h48_dfs(&nextarg); - } - - return ret; -} - -_static int64_t -solve_h48( - cube_t cube, - int8_t minmoves, - int8_t maxmoves, - int8_t maxsolutions, - uint8_t h, - const void *data, - char *solutions -) -{ - int64_t nsols; - dfsarg_solveh48_t arg; - - arg = (dfsarg_solveh48_t) { - .cube = cube, - .inverse = inverse(cube), - .nsols = &nsols, - .maxsolutions = maxsolutions, - .h = h, - .cocsepdata = (uint32_t *)data, - .h48data = ((uint32_t *)data) + COCSEP_FULLSIZE / 4, - .nextsol = &solutions - }; - - nsols = 0; - for (arg.depth = minmoves; - arg.depth <= maxmoves && nsols < maxsolutions; - arg.depth++) - { - LOG("Found %" PRId64 " solutions, searching at depth %" - PRId8 "\n", nsols, arg.depth); - arg.nmoves = 0; - solve_h48_dfs(&arg); - } - - return nsols; -} - -/* -The h48stats solver computes how many moves it takes to solve to -each of the 12 h48 coordinates, one for each value of h from 0 to 11. -The solutions array is filled with the length of the solutions. The -solution array is therefore not a printable string. -*/ -_static int64_t -solve_h48stats_dfs(dfsarg_solveh48stats_t *arg) -{ - const int64_t limit = 11; - - int8_t bound, u; - uint8_t m; - uint32_t d; - int64_t coord, h; - dfsarg_solveh48stats_t nextarg; - - /* Check cocsep lower bound (corners only) */ - bound = get_h48_cdata(arg->cube, arg->cocsepdata, &d); - if (bound + arg->nmoves > arg->depth) - return 0; - - /* Check h48 lower bound for h=0 (esep, but no eo) */ - coord = coord_h48_edges(arg->cube, COCLASS(d), TTREP(d), 0); - bound = get_esep_pval(arg->h48data, coord); - if (bound + arg->nmoves > arg->depth) - return 0; - - /* Update all other values, if solved */ - coord = coord_h48_edges(arg->cube, COCLASS(d), TTREP(d), 11); - for (h = 0; h <= limit; h++) { - u = coord >> (11-h) == 0 && arg->s[h] == 99; - arg->s[h] = u * arg->nmoves + (1-u) * arg->s[h]; - } - - if (arg->s[limit] != 99) - return 0; - - nextarg = *arg; - nextarg.nmoves = arg->nmoves + 1; - for (m = 0; m < 18; m++) { - nextarg.moves[arg->nmoves] = m; - if (!allowednextmove(nextarg.moves, nextarg.nmoves)) { - /* If a move is not allowed, neither are its 180 - * and 270 degree variations */ - m += 2; - continue; - } - nextarg.cube = move(arg->cube, m); - solve_h48stats_dfs(&nextarg); - } - - return 0; -} - -_static int64_t -solve_h48stats( - cube_t cube, - int8_t maxmoves, - const void *data, - char solutions[static 12] -) -{ - int i; - size_t cocsepsize; - dfsarg_solveh48stats_t arg; - - cocsepsize = gendata_cocsep(NULL, NULL, NULL); - - arg = (dfsarg_solveh48stats_t) { - .cube = cube, - .cocsepdata = (uint32_t *)data, - .h48data = ((uint32_t *)data) + (cocsepsize/4), - .s = solutions - }; - - for (i = 0; i < 12; i++) - solutions[i] = (char)99; - - for (arg.depth = 0; - arg.depth <= maxmoves && solutions[11] == 99; - arg.depth++) - { - arg.nmoves = 0; - solve_h48stats_dfs(&arg); - } - - return 0; -} diff --git a/src/solve_generic.h b/src/solvers/generic/generic.h diff --git a/src/solvers/h48/coordinate.h b/src/solvers/h48/coordinate.h @@ -0,0 +1,68 @@ +#define H48_ESIZE(h) ((_12c4 * _8c4) << (int64_t)(h)) + +#define COCLASS_MASK (UINT32_C(0xFFFF) << UINT32_C(16)) +#define COCLASS(x) (((x) & COCLASS_MASK) >> UINT32_C(16)) +#define TTREP_MASK (UINT32_C(0xFF) << UINT32_C(8)) +#define TTREP(x) (((x) & TTREP_MASK) >> UINT32_C(8)) + +_static_inline int64_t coord_h48(cube_t, const uint32_t *, uint8_t); +_static_inline int64_t coord_h48_edges(cube_t, int64_t, uint8_t, uint8_t); +_static_inline cube_t invcoord_h48(int64_t, const cube_t *, uint8_t); + +_static_inline int64_t +coord_h48(cube_t c, const uint32_t *cocsepdata, uint8_t h) +{ + int64_t cocsep, coclass; + uint32_t data; + uint8_t ttrep; + + DBG_ASSERT(h <= 11, -1, "coord_h48: h must be between 0 and 11\n"); + + cocsep = coord_cocsep(c); + data = cocsepdata[cocsep]; + coclass = (int64_t)COCLASS(data); + ttrep = (int64_t)TTREP(data); + + return coord_h48_edges(c, coclass, ttrep, h); +} + +_static_inline int64_t +coord_h48_edges(cube_t c, int64_t coclass, uint8_t ttrep, uint8_t h) +{ + cube_t d; + int64_t esep, eo, edges; + + d = transform_edges(c, ttrep); + esep = coord_esep(d); + eo = coord_eo(d); + edges = (esep << 11) + eo; + + return (coclass * H48_ESIZE(11) + edges) >> (11 - (int64_t)h); +} + +/* +This function does not necessarily return a cube whose coordinate is +the given value, because it works up to symmetry. This means that the +returned cube is a transformed cube of one that gives the correct value. +*/ +_static_inline cube_t +invcoord_h48(int64_t i, const cube_t *crep, uint8_t h) +{ + cube_t ret; + int64_t hh, coclass, ee, esep, eo; + + DBG_ASSERT(h <= 11, zero, + "invcoord_h48: h must be between 0 and 11\n"); + + hh = (int64_t)h; + coclass = i / H48_ESIZE(h); + ee = i % H48_ESIZE(h); + esep = ee >> hh; + eo = (ee & ((1 << hh) - 1)) << (11 - hh); + + ret = invcoord_esep(esep); + copy_corners(&ret, crep[coclass]); + set_eo(&ret, eo); + + return ret; +} diff --git a/src/solvers/h48/gendata.h b/src/solvers/h48/gendata.h @@ -0,0 +1,425 @@ +#define COCSEP_CLASSES ((size_t)3393) +#define COCSEP_TABLESIZE ((size_t)_3p7 << (size_t)7) +#define COCSEP_VISITEDSIZE ((COCSEP_TABLESIZE + (size_t)7) / (size_t)8) +#define COCSEP_FULLSIZE ((size_t)4 * (COCSEP_TABLESIZE + (size_t)12)) + +#define ESEP_NOEO (COCSEP_CLASSES * (size_t)_12c4 * (size_t)_8c4) +#define ESEP_MAX(h) (ESEP_NOEO << (size_t)(h)) +#define ESEP_TABLESIZE(h, k) (ESEP_MAX((h)) / ((size_t)8 / (size_t)(k))) + +#define ESEP_IND(i) ((uint32_t)(i) / UINT32_C(8)) +#define ESEP_SHIFT(i) (UINT32_C(4) * ((uint32_t)(i) % UINT32_C(8))) +#define ESEP_MASK(i) ((_bit_u32(4) - (uint32_t)(1)) << ESEP_SHIFT(i)) +#define VISITED_IND(i) ((uint32_t)(i) / UINT32_C(8)) +#define VISITED_MASK(i) (UINT32_C(1) << ((uint32_t)(i) % UINT32_C(8))) + +#define CBOUND_MASK UINT32_C(0xFF) +#define CBOUND(x) ((x) & CBOUND_MASK) + +#define MAXLEN 20 + +/* +TODO: This loop over similar h48 coordinates can be improved by only +transforming edges, but we need to compose transformations (i.e. conjugate +_t by _ttrep). +*/ +#define _foreach_h48sim(_cube, _cocsepdata, _selfsim, _h, _action) \ + int64_t _cocsep = coord_cocsep(_cube); \ + uint8_t _ttrep = TTREP(_cocsepdata[_cocsep]); \ + uint8_t _inverse_ttrep = inverse_trans(_ttrep); \ + int64_t _coclass = COCLASS(_cocsepdata[_cocsep]); \ + cube_t _rep = transform(_cube, _ttrep); \ + uint64_t _sim = _selfsim[_coclass]; \ + for (uint8_t _t = 0; _t < 48 && _sim; _t++, _sim >>= 1) { \ + if (!(_sim & 1)) continue; \ + _cube = transform(_rep, _t); \ + _cube = transform(_cube, _inverse_ttrep); \ + _action \ + } + +typedef struct { + cube_t cube; + uint8_t depth; + uint8_t maxdepth; + uint16_t *n; + uint32_t *buf32; + uint8_t *visited; + uint64_t *selfsim; + cube_t *rep; +} dfsarg_cocsep_t; + +/* TODO keep or not? */ +typedef struct { + cube_t cube; + int8_t nmoves; + int8_t depth; + uint8_t moves[MAXLEN]; + uint32_t *cocsepdata; + h48map_t *visited; +} dfsarg_genh48set_t; + +typedef struct { + uint8_t depth; + uint32_t *cocsepdata; + uint32_t *buf32; + uint64_t *selfsim; + int64_t done; + cube_t *crep; +} bfsarg_esep_t; + +_static_inline bool get_visited(const uint8_t *, int64_t); +_static_inline void set_visited(uint8_t *, int64_t); +_static_inline uint8_t get_esep_pval(const uint32_t *, int64_t); +_static_inline void set_esep_pval(uint32_t *, int64_t, uint8_t); + +_static size_t gendata_cocsep(void *, uint64_t *, cube_t *); +_static uint32_t gendata_cocsep_dfs(dfsarg_cocsep_t *); +_static uint64_t gen_h48short( + uint8_t, const uint32_t *, const cube_t *, const uint64_t *, h48map_t *); +_static size_t gendata_h48h0k4(void *, uint8_t); +_static int64_t gendata_h48h0k4_bfs(bfsarg_esep_t *); +_static int64_t gendata_h48h0k4_bfs_fromdone(bfsarg_esep_t *); +_static int64_t gendata_h48h0k4_bfs_fromnew(bfsarg_esep_t *); + +_static_inline int8_t get_h48_cdata(cube_t, uint32_t *, uint32_t *); +_static_inline int8_t get_h48_bound(cube_t, uint32_t, uint8_t, uint32_t *); + +/* +Each element of the cocsep table is a uint32_t used as follows: + - Lowest 8-bit block: pruning value + - Second-lowest 8-bit block: "ttrep" (transformation to representative) + - Top 16-bit block: symcoord value +After the data as described above, more auxiliary information is appended: + - A uint32_t representing the number of symmetry classes + - A uint32_t representing the highest value of the pruning table + - One uint32_t for each "line" of the pruning table, representing the number + of positions having that pruning value. +*/ +_static size_t +gendata_cocsep(void *buf, uint64_t *selfsim, cube_t *rep) +{ + uint32_t *buf32, *info, cc; + uint16_t n; + uint8_t i, j, visited[COCSEP_VISITEDSIZE]; + dfsarg_cocsep_t arg; + + if (buf == NULL) + goto gendata_cocsep_return_size; + + buf32 = (uint32_t *)buf; + info = buf32 + COCSEP_TABLESIZE; + memset(buf32, 0xFF, sizeof(uint32_t) * COCSEP_TABLESIZE); + if (selfsim != NULL) + memset(selfsim, 0, sizeof(uint64_t) * COCSEP_CLASSES); + + arg = (dfsarg_cocsep_t) { + .cube = solved, + .n = &n, + .buf32 = buf32, + .visited = visited, + .selfsim = selfsim, + .rep = rep + }; + for (i = 0, n = 0, cc = 0; i < 10; i++) { + LOG("cocsep: generating depth %" PRIu8 "\n", i); + memset(visited, 0, COCSEP_VISITEDSIZE); + arg.depth = 0; + arg.maxdepth = i; + cc = gendata_cocsep_dfs(&arg); + info[i+2] = cc; + LOG("found %" PRIu32 "\n", cc); + } + + info[0] = (uint32_t)n; + info[1] = 9; /* Known max pruning value */ + DBG_ASSERT(n == COCSEP_CLASSES, 0, + "cocsep: computed %" PRIu16 " symmetry classes, " + "expected %zu\n", n, COCSEP_CLASSES); + + LOG("cocsep data computed\n"); + LOG("Symmetry classes: %" PRIu32 "\n", info[0]); + LOG("Maximum pruning value: %" PRIu32 "\n", info[1]); + LOG("Pruning value distribution:\n"); + for (j = 0; j < 10; j++) + LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+2]); + +gendata_cocsep_return_size: + return COCSEP_FULLSIZE; +} + +_static uint32_t +gendata_cocsep_dfs(dfsarg_cocsep_t *arg) +{ + uint8_t m; + uint32_t cc, class, ttrep, depth, olddepth, tinv; + uint64_t t; + int64_t i, j; + cube_t d; + dfsarg_cocsep_t nextarg; + + i = coord_cocsep(arg->cube); + olddepth = (uint8_t)(arg->buf32[i] & 0xFF); + if (olddepth < arg->depth || get_visited(arg->visited, i)) + return 0; + set_visited(arg->visited, i); + + if (arg->depth == arg->maxdepth) { + if ((arg->buf32[i] & 0xFF) != 0xFF) + return 0; + + if (arg->rep != NULL) + arg->rep[*arg->n] = arg->cube; + for (t = 0, cc = 0; t < 48; t++) { + d = transform_corners(arg->cube, t); + j = coord_cocsep(d); + if (i == j && arg->selfsim != NULL) + arg->selfsim[*arg->n] |= UINT64_C(1) << t; + if (COCLASS(arg->buf32[j]) != UINT32_C(0xFFFF)) + continue; + set_visited(arg->visited, j); + tinv = inverse_trans(t); + olddepth = arg->buf32[j] & 0xFF; + cc += olddepth == 0xFF; + + class = (uint32_t)(*arg->n) << UINT32_C(16); + ttrep = (uint32_t)tinv << UINT32_C(8); + depth = (uint32_t)arg->depth; + arg->buf32[j] = class | ttrep | depth; + } + (*arg->n)++; + + return cc; + } + + memcpy(&nextarg, arg, sizeof(dfsarg_cocsep_t)); + nextarg.depth++; + for (m = 0, cc = 0; m < 18; m++) { + nextarg.cube = move(arg->cube, m); + cc += gendata_cocsep_dfs(&nextarg); + } + + return cc; +} + +_static uint64_t +gen_h48short( + uint8_t n, + const uint32_t *cocsepdata, + const cube_t *crep, + const uint64_t *selfsim, + h48map_t *map +) { + uint8_t i, m; + int64_t coord; + uint64_t j, oldn; + kvpair_t kv; + cube_t cube, d; + + cube = solvedcube(); + coord = coord_h48(cube, cocsepdata, 11); + h48map_insertmin(map, coord, 0); + oldn = 0; + LOG("Short h48: generating depth 0\nfound %" PRIu8 "\n", map->n-oldn); + for (i = 0; i < n; i++) { + LOG("Short h48: generating depth %" PRIu8 "\n", i+1); + j = 0; + oldn = map->n; + for (kv = h48map_nextkvpair(map, &j); + j != map->capacity; + kv = h48map_nextkvpair(map, &j) + ) { + if (kv.val != i) + continue; + cube = invcoord_h48(kv.key, crep, 11); + for (m = 0; m < 18; m++) { + d = move(cube, m); + _foreach_h48sim(d, cocsepdata, selfsim, 11, + coord = coord_h48(d, cocsepdata, 11); + h48map_insertmin(map, coord, i+1); + ) + } + } + LOG("found %" PRIu8 "\n", map->n-oldn); + } + + return map->n; +} + +/* +TODO description +generating fixed table with h=0, k=4 +*/ +_static size_t +gendata_h48h0k4(void *buf, uint8_t maxdepth) +{ + uint32_t j, *buf32, *info, *cocsepdata; + bfsarg_esep_t arg; + int64_t sc, cc, esep_max; + uint64_t selfsim[COCSEP_CLASSES]; + cube_t crep[COCSEP_CLASSES]; + size_t cocsepsize, infosize; + + /* TODO: move info at start of tables (all tables!) */ + infosize = 4 * maxdepth; + cocsepsize = gendata_cocsep(buf, selfsim, crep); + infosize = 88; + + if (buf == NULL) + goto gendata_h48h0k4_return_size; + + esep_max = (int64_t)ESEP_MAX(0); + cocsepdata = (uint32_t *)buf; + buf32 = cocsepdata + cocsepsize / 4; + info = buf32 + (ESEP_TABLESIZE(0, 4) / sizeof(uint32_t)); + memset(buf32, 0xFF, ESEP_TABLESIZE(0, 4)); + + sc = coord_h48(solved, cocsepdata, 0); + set_esep_pval(buf32, sc, 0); + info[1] = 1; + arg = (bfsarg_esep_t) { + .cocsepdata = cocsepdata, + .buf32 = buf32, + .selfsim = selfsim, + .crep = crep + }; + for ( + arg.done = 1, arg.depth = 1, cc = 0; + arg.done < esep_max && arg.depth <= maxdepth; + arg.depth++ + ) { + LOG("esep: generating depth %" PRIu8 "\n", arg.depth); + cc = gendata_h48h0k4_bfs(&arg); + arg.done += cc; + info[arg.depth+1] = cc; + LOG("found %" PRId64 "\n", cc); + } + + info[0] = arg.depth-1; + + LOG("h48 pruning table computed\n"); + LOG("Maximum pruning value: %" PRIu32 "\n", info[0]); + LOG("Pruning value distribution:\n"); + for (j = 0; j <= info[0]; j++) + LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+1]); + +gendata_h48h0k4_return_size: + return cocsepsize + ESEP_TABLESIZE(0, 4) + infosize; +} + +_static int64_t +gendata_h48h0k4_bfs(bfsarg_esep_t *arg) +{ + const uint8_t breakpoint = 10; /* Hand-picked optimal */ + + if (arg->depth < breakpoint) + return gendata_h48h0k4_bfs_fromdone(arg); + else + return gendata_h48h0k4_bfs_fromnew(arg); +} + +_static int64_t +gendata_h48h0k4_bfs_fromdone(bfsarg_esep_t *arg) +{ + uint8_t c, m, x; + uint32_t cc; + int64_t i, j, k; + cube_t cube, moved; + + for (i = 0, cc = 0; i < (int64_t)ESEP_MAX(0); i++) { + c = get_esep_pval(arg->buf32, i); + if (c != arg->depth - 1) + continue; + cube = invcoord_h48(i, arg->crep, 0); + for (m = 0; m < 18; m++) { + moved = move(cube, m); + j = coord_h48(moved, arg->cocsepdata, 0); + if (get_esep_pval(arg->buf32, j) <= arg->depth) + continue; + _foreach_h48sim(moved, arg->cocsepdata, arg->selfsim, 0, + k = coord_h48(moved, arg->cocsepdata, 0); + x = get_esep_pval(arg->buf32, k); + set_esep_pval(arg->buf32, k, arg->depth); + cc += x != arg->depth; + ) + } + } + + return cc; +} + +_static int64_t +gendata_h48h0k4_bfs_fromnew(bfsarg_esep_t *arg) +{ + uint8_t c, m, x; + uint32_t cc; + int64_t i, j; + cube_t cube, moved; + + for (i = 0, cc = 0; i < (int64_t)ESEP_MAX(0); i++) { + c = get_esep_pval(arg->buf32, i); + if (c != 0xF) + continue; + cube = invcoord_h48(i, arg->crep, 0); + for (m = 0; m < 18; m++) { + moved = move(cube, m); + j = coord_h48(moved, arg->cocsepdata, 0); + x = get_esep_pval(arg->buf32, j); + if (x >= arg->depth) + continue; + _foreach_h48sim(cube, arg->cocsepdata, arg->selfsim, 0, + j = coord_h48(cube, arg->cocsepdata, 0); + x = get_esep_pval(arg->buf32, j); + set_esep_pval(arg->buf32, j, arg->depth); + cc += x == 0xF; + ) + break; /* Enough to find one, skip the rest */ + } + } + + return cc; +} + +_static_inline bool +get_visited(const uint8_t *a, int64_t i) +{ + return a[VISITED_IND(i)] & VISITED_MASK(i); +} + +_static_inline void +set_visited(uint8_t *a, int64_t i) +{ + a[VISITED_IND(i)] |= VISITED_MASK(i); +} + +_static_inline uint8_t +get_esep_pval(const uint32_t *buf32, int64_t i) +{ + return (buf32[ESEP_IND(i)] & ESEP_MASK(i)) >> ESEP_SHIFT(i); +} + +_static_inline void +set_esep_pval(uint32_t *buf32, int64_t i, uint8_t val) +{ + buf32[ESEP_IND(i)] = + (buf32[ESEP_IND(i)] & (~ESEP_MASK(i))) | (val << ESEP_SHIFT(i)); +} + +_static_inline int8_t +get_h48_cdata(cube_t cube, uint32_t *cocsepdata, uint32_t *cdata) +{ + int64_t coord; + + coord = coord_cocsep(cube); + *cdata = cocsepdata[coord]; + + return CBOUND(*cdata); +} + +_static_inline int8_t +get_h48_bound(cube_t cube, uint32_t cdata, uint8_t h, uint32_t *h48data) +{ + int64_t coord; + + coord = coord_h48_edges(cube, COCLASS(cdata), TTREP(cdata), h); + return get_esep_pval(h48data, coord); +} diff --git a/src/solvers/h48/h48.h b/src/solvers/h48/h48.h @@ -0,0 +1,4 @@ +#include "coordinate.h" +#include "map.h" +#include "gendata.h" +#include "solve.h" diff --git a/src/solvers/h48/map.h b/src/solvers/h48/map.h @@ -0,0 +1,104 @@ +#define MAP_UNSET UINT64_C(0xFFFFFFFFFFFFFFFF) +#define MAP_KEYMASK UINT64_C(0xFFFFFFFFFF) +#define MAP_KEYSHIFT UINT64_C(40) + +typedef struct { + uint64_t n; + uint64_t capacity; + uint64_t randomizer; + uint64_t *table; +} h48map_t; + +typedef struct { + uint64_t key; + uint64_t val; +} kvpair_t; + +_static void h48map_create(h48map_t *, uint64_t, uint64_t); +_static void h48map_clear(h48map_t *); +_static void h48map_destroy(h48map_t *); +_static uint64_t h48map_lookup(h48map_t *, uint64_t); +_static void h48map_insertmin(h48map_t *, uint64_t, uint64_t); +_static uint64_t h48map_value(h48map_t *, uint64_t); +_static kvpair_t h48map_nextkvpair(h48map_t *, uint64_t *); + +_static void +h48map_create(h48map_t *map, uint64_t capacity, uint64_t randomizer) +{ + map->capacity = capacity; + map->randomizer = randomizer; + + map->table = malloc(map->capacity * sizeof(int64_t)); + h48map_clear(map); +} + +_static void +h48map_clear(h48map_t *map) +{ + memset(map->table, 0xFF, map->capacity * sizeof(uint64_t)); + map->n = 0; +} + +_static void +h48map_destroy(h48map_t *map) +{ + free(map->table); +} + +_static_inline uint64_t +h48map_lookup(h48map_t *map, uint64_t x) +{ + uint64_t hash, i; + + hash = ((x % map->capacity) * map->randomizer) % map->capacity; + for (i = hash; + map->table[i] != MAP_UNSET && (map->table[i] & MAP_KEYMASK) != x; + i = (i+1) % map->capacity + ) ; + + return i; +} + +_static_inline void +h48map_insertmin(h48map_t *map, uint64_t key, uint64_t val) +{ + uint64_t i, oldval, min; + + i = h48map_lookup(map, key); + oldval = map->table[i] >> MAP_KEYSHIFT; + min = _min(val, oldval); + + map->n += map->table[i] == MAP_UNSET; + map->table[i] = (key & MAP_KEYMASK) | (min << MAP_KEYSHIFT); +} + +_static_inline uint64_t +h48map_value(h48map_t *map, uint64_t key) +{ + return map->table[h48map_lookup(map, key)] >> MAP_KEYSHIFT; +} + +_static kvpair_t +h48map_nextkvpair(h48map_t *map, uint64_t *p) +{ + kvpair_t kv; + uint64_t pair; + + kv.key = MAP_UNSET; + kv.val = MAP_UNSET; + + DBG_ASSERT(*p < map->capacity, kv, + "Error looping over map: given index %" PRIu64 " is out of " + "range [0,%" PRIu64 "]", *p, map->capacity); + + for ( ; *p < map->capacity; (*p)++) { + if (map->table[*p] != MAP_UNSET) { + pair = map->table[(*p)++]; + kv.key = pair & MAP_KEYMASK; + kv.val = pair >> MAP_KEYSHIFT; + return kv; + } + } + + return kv; +} diff --git a/src/solvers/h48/solve.h b/src/solvers/h48/solve.h @@ -0,0 +1,242 @@ +typedef struct { + cube_t cube; + cube_t inverse; + int8_t nmoves; + int8_t depth; + uint8_t moves[MAXLEN]; + int64_t *nsols; + int64_t maxsolutions; + uint8_t h; + uint32_t *cocsepdata; + uint32_t *h48data; + char **nextsol; +} dfsarg_solveh48_t; + +typedef struct { + cube_t cube; + int8_t nmoves; + int8_t depth; + uint8_t moves[MAXLEN]; + uint32_t *cocsepdata; + uint32_t *h48data; + char *s; +} dfsarg_solveh48stats_t; + +_static void solve_h48_appendsolution(dfsarg_solveh48_t *); +_static_inline bool solve_h48_stop(dfsarg_solveh48_t *); +_static int64_t solve_h48_dfs(dfsarg_solveh48_t *); +_static int64_t solve_h48( + cube_t, int8_t, int8_t, int8_t, uint8_t, const void *, char *); + +_static int64_t solve_h48stats_dfs(dfsarg_solveh48stats_t *); +_static int64_t solve_h48stats(cube_t, int8_t, const void *, char [static 12]); + +_static void +solve_h48_appendsolution(dfsarg_solveh48_t *arg) +{ + int strl; + + strl = writemoves(arg->moves, arg->nmoves, *arg->nextsol); + LOG("Solution found: %s\n", *arg->nextsol); + *arg->nextsol += strl; + **arg->nextsol = '\n'; + (*arg->nextsol)++; + (*arg->nsols)++; +} + +_static_inline bool +solve_h48_stop(dfsarg_solveh48_t *arg) +{ + uint32_t data, data_inv; + int8_t bound; + + bound = get_h48_cdata(arg->cube, arg->cocsepdata, &data); + if (bound + arg->nmoves > arg->depth) + return true; + + bound = get_h48_cdata(arg->inverse, arg->cocsepdata, &data_inv); + if (bound + arg->nmoves > arg->depth) + return true; + +/* + bound = get_h48_bound(arg->cube, data, arg->h, arg->h48data); +LOG("Using pval %" PRId8 "\n", bound); + if (bound + arg->nmoves > arg->depth) + return true; + + bound = get_h48_bound(arg->inverse, data_inv, arg->h, arg->h48data); + if (bound + arg->nmoves > arg->depth) + return true; +*/ + + return false; +} + +_static int64_t +solve_h48_dfs(dfsarg_solveh48_t *arg) +{ + dfsarg_solveh48_t nextarg; + int64_t ret; + uint8_t m; + + if (*arg->nsols == arg->maxsolutions) + return 0; + + if (solve_h48_stop(arg)) + return 0; + + if (issolved(arg->cube)) { + if (arg->nmoves != arg->depth) + return 0; + solve_h48_appendsolution(arg); + return 1; + } + + /* TODO: avoid copy, change arg and undo changes after recursion */ + nextarg = *arg; + nextarg.nmoves = arg->nmoves + 1; + ret = 0; + for (m = 0; m < 18; m++) { + nextarg.moves[arg->nmoves] = m; + if (!allowednextmove(nextarg.moves, nextarg.nmoves)) { + /* If a move is not allowed, neither are its 180 + * and 270 degree variations */ + m += 2; + continue; + } + nextarg.cube = move(arg->cube, m); + nextarg.inverse = inverse(nextarg.cube); /* TODO: use premove */ + ret += solve_h48_dfs(&nextarg); + } + + return ret; +} + +_static int64_t +solve_h48( + cube_t cube, + int8_t minmoves, + int8_t maxmoves, + int8_t maxsolutions, + uint8_t h, + const void *data, + char *solutions +) +{ + int64_t nsols; + dfsarg_solveh48_t arg; + + arg = (dfsarg_solveh48_t) { + .cube = cube, + .inverse = inverse(cube), + .nsols = &nsols, + .maxsolutions = maxsolutions, + .h = h, + .cocsepdata = (uint32_t *)data, + .h48data = ((uint32_t *)data) + COCSEP_FULLSIZE / 4, + .nextsol = &solutions + }; + + nsols = 0; + for (arg.depth = minmoves; + arg.depth <= maxmoves && nsols < maxsolutions; + arg.depth++) + { + LOG("Found %" PRId64 " solutions, searching at depth %" + PRId8 "\n", nsols, arg.depth); + arg.nmoves = 0; + solve_h48_dfs(&arg); + } + + return nsols; +} + +/* +The h48stats solver computes how many moves it takes to solve to +each of the 12 h48 coordinates, one for each value of h from 0 to 11. +The solutions array is filled with the length of the solutions. The +solution array is therefore not a printable string. +*/ +_static int64_t +solve_h48stats_dfs(dfsarg_solveh48stats_t *arg) +{ + const int64_t limit = 11; + + int8_t bound, u; + uint8_t m; + uint32_t d; + int64_t coord, h; + dfsarg_solveh48stats_t nextarg; + + /* Check cocsep lower bound (corners only) */ + bound = get_h48_cdata(arg->cube, arg->cocsepdata, &d); + if (bound + arg->nmoves > arg->depth) + return 0; + + /* Check h48 lower bound for h=0 (esep, but no eo) */ + coord = coord_h48_edges(arg->cube, COCLASS(d), TTREP(d), 0); + bound = get_esep_pval(arg->h48data, coord); + if (bound + arg->nmoves > arg->depth) + return 0; + + /* Update all other values, if solved */ + coord = coord_h48_edges(arg->cube, COCLASS(d), TTREP(d), 11); + for (h = 0; h <= limit; h++) { + u = coord >> (11-h) == 0 && arg->s[h] == 99; + arg->s[h] = u * arg->nmoves + (1-u) * arg->s[h]; + } + + if (arg->s[limit] != 99) + return 0; + + nextarg = *arg; + nextarg.nmoves = arg->nmoves + 1; + for (m = 0; m < 18; m++) { + nextarg.moves[arg->nmoves] = m; + if (!allowednextmove(nextarg.moves, nextarg.nmoves)) { + /* If a move is not allowed, neither are its 180 + * and 270 degree variations */ + m += 2; + continue; + } + nextarg.cube = move(arg->cube, m); + solve_h48stats_dfs(&nextarg); + } + + return 0; +} + +_static int64_t +solve_h48stats( + cube_t cube, + int8_t maxmoves, + const void *data, + char solutions[static 12] +) +{ + int i; + size_t cocsepsize; + dfsarg_solveh48stats_t arg; + + cocsepsize = gendata_cocsep(NULL, NULL, NULL); + + arg = (dfsarg_solveh48stats_t) { + .cube = cube, + .cocsepdata = (uint32_t *)data, + .h48data = ((uint32_t *)data) + (cocsepsize/4), + .s = solutions + }; + + for (i = 0; i < 12; i++) + solutions[i] = (char)99; + + for (arg.depth = 0; + arg.depth <= maxmoves && solutions[11] == 99; + arg.depth++) + { + arg.nmoves = 0; + solve_h48stats_dfs(&arg); + } + + return 0; +} diff --git a/src/solvers/solvers.h b/src/solvers/solvers.h @@ -0,0 +1,2 @@ +#include "generic/generic.h" +#include "h48/h48.h" diff --git a/src/constants.h b/src/utils/constants.h diff --git a/src/utils/dbg_log.h b/src/utils/dbg_log.h @@ -0,0 +1,16 @@ +void (*nissy_log)(const char *, ...); + +#define LOG(...) if (nissy_log != NULL) nissy_log(__VA_ARGS__); + +#ifdef DEBUG +#define _static +#define _static_inline +#define DBG_WARN(condition, ...) if (!(condition)) LOG(__VA_ARGS__); +#define DBG_ASSERT(condition, retval, ...) \ + if (!(condition)) { LOG(__VA_ARGS__); return retval; } +#else +#define _static static +#define _static_inline static inline +#define DBG_WARN(condition, ...) +#define DBG_ASSERT(condition, retval, ...) +#endif diff --git a/src/utils.h b/src/utils/math.h diff --git a/src/utils/utils.h b/src/utils/utils.h @@ -0,0 +1,3 @@ +#include "dbg_log.h" +#include "constants.h" +#include "math.h" diff --git a/test/test.h b/test/test.h @@ -1,3 +1,5 @@ +#define TEST_H + #include <inttypes.h> #include <stdarg.h> #include <stdbool.h> @@ -5,24 +7,9 @@ #include <stdlib.h> #include <string.h> -#define STRLENMAX 10000 +#include "../src/arch/arch.h" -#if defined(CUBE_AVX2) -#include <immintrin.h> -typedef __m256i cube_t; -#elif defined(CUBE_NEON) -#include <stdlib.h> -#include <arm_neon.h> -typedef struct { - uint8x16_t corner; - uint8x16_t edge; -} cube_t; -#else -typedef struct { - uint8_t corner[8]; - uint8_t edge[12]; -} cube_t; -#endif +#define STRLENMAX 10000 /* Basic functions used in most tests */ cube_t solvedcube(void); diff --git a/tools/001_gendata_h48/gendata_h48.c b/tools/001_gendata_h48/gendata_h48.c @@ -1,5 +1,5 @@ #include "../timerun.h" -#include "../../src/cube.h" +#include "../../src/nissy.h" #define MAXDEPTH 20 #define HVALUE 0 diff --git a/tools/002_stats_tables_h48/stats_tables_h48.c b/tools/002_stats_tables_h48/stats_tables_h48.c @@ -1,7 +1,7 @@ #include <pthread.h> #include <time.h> #include "../timerun.h" -#include "../../src/cube.h" +#include "../../src/nissy.h" #define MAXMOVES 20 #define NTHREADS 32 diff --git a/utils/genmovecode.sh b/utils/genmovecode.sh @@ -1,6 +1,6 @@ #!/bin/sh -cc -DDEBUG h48_to_lst.c ../src/cube.c -o h48_to_lst +cc -DDEBUG h48_to_lst.c ../src/nissy.c -o h48_to_lst gen() { for f in cubes/move_??_*.txt; do diff --git a/utils/gentranscode.sh b/utils/gentranscode.sh @@ -1,7 +1,7 @@ #!/bin/sh -cc -DDEBUG h48_to_lst.c ../src/cube.c -o h48_to_lst -cc -DDEBUG invert.c ../src/cube.c -o invert +cc -DDEBUG h48_to_lst.c ../src/nissy.c -o h48_to_lst +cc -DDEBUG invert.c ../src/nissy.c -o invert lineavx() { printf '#define _trans_cube_%s ' "$1"; } linesrc() { printf '_static cube_fast_t _trans_cube_%s = ' "$1"; } diff --git a/utils/h48_to_lst.c b/utils/h48_to_lst.c @@ -2,7 +2,7 @@ #include <inttypes.h> #include <stdbool.h> -#include "../src/cube.h" +#include "../src/nissy.h" #define STRLENMAX 1000 diff --git a/utils/invert.c b/utils/invert.c @@ -2,7 +2,7 @@ #include <inttypes.h> #include <stdbool.h> -#include "../src/cube.h" +#include "../src/nissy.h" #define STRLENMAX 1000