h48

A prototype for an optimal Rubik's cube solver, work in progress.
git clone https://git.tronto.net/h48
Download | Log | Files | Refs | README | LICENSE

commit 49b9c4a516f72e03d53277075f4580e2df8f0e63
parent fde8dfa9df8df1e3d42a9a8840836e33817cd498
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Mon, 27 May 2024 12:04:53 +0200

Some type fixes and some cleanup

Diffstat:
MTODO.txt | 4++--
Msrc/constants.h | 214++++++++++++++++++++++++++++++++++++++++---------------------------------------
Msrc/cube_avx2.h | 32+++++++++++++++++---------------
Msrc/cube_portable.h | 17++++++++---------
Msrc/cube_routines.h | 6++++--
Msrc/solve_h48.h | 120+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
Mtest/075_set_eo/set_eo_tests.c | 6++++--
Mtest/100_gendata_cocsep/gendata_cocsep_tests.c | 2+-
Mtest/101_cocsep_selfsim/cocsep_selfsim_tests.c | 8++++----
Dtest/101_coord_invcoord_h48/coord_invcoord_h48_tests.c | 40----------------------------------------
Rtest/101_coord_invcoord_h48/00_all.in -> test/102_coord_invcoord_h48/00_all.in | 0
Rtest/101_coord_invcoord_h48/00_all.out -> test/102_coord_invcoord_h48/00_all.out | 0
Atest/102_coord_invcoord_h48/coord_invcoord_h48_tests.c | 40++++++++++++++++++++++++++++++++++++++++
Dtest/102_gendata_h48/gendata_h48_tests.c | 35-----------------------------------
Rtest/102_gendata_h48/00_h_0.in -> test/103_gendata_h48/00_h_0.in | 0
Rtest/102_gendata_h48/00_h_0.out -> test/103_gendata_h48/00_h_0.out | 0
Rtest/102_gendata_h48/01_h_1.in -> test/103_gendata_h48/01_h_1.in | 0
Rtest/102_gendata_h48/01_h_1.out -> test/103_gendata_h48/01_h_1.out | 0
Atest/103_gendata_h48/gendata_h48_tests.c | 35+++++++++++++++++++++++++++++++++++
19 files changed, 292 insertions(+), 267 deletions(-)

diff --git a/TODO.txt b/TODO.txt @@ -1,6 +1,6 @@ Correctness - - Check all cases of bitwise operations for type mismatch, - force cast to same type. Change to ULL where appropriate. + - check all bitwise operations specifically + - consider adding more warnings (-pedantic?) or using more static analyzers Solver - write a solver (how many tricks? some, but not all are needed) diff --git a/src/constants.h b/src/constants.h @@ -1,9 +1,13 @@ -#define _2p11 2048U -#define _2p12 4096U -#define _3p7 2187U -#define _3p8 6561U -#define _12c4 495U -#define _8c4 70U +#define _bit_u8(i) (UINT8_C(1) << (uint8_t)(i)) +#define _bit_u32(i) (UINT32_C(1) << (uint32_t)(i)) +#define _bit_u64(i) (UINT64_C(1) << (uint64_t)(i)) + +#define _2p11 INT64_C(2048) +#define _2p12 INT64_C(4096) +#define _3p7 INT64_C(2187) +#define _3p8 INT64_C(6561) +#define _12c4 INT64_C(495) +#define _8c4 INT64_C(70) _static int64_t binomial[12][12] = { {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, @@ -20,111 +24,111 @@ _static int64_t binomial[12][12] = { {1, 11, 55, 165, 330, 462, 462, 330, 165, 55, 11, 1}, }; -#define _move_U 0U -#define _move_U2 1U -#define _move_U3 2U -#define _move_D 3U -#define _move_D2 4U -#define _move_D3 5U -#define _move_R 6U -#define _move_R2 7U -#define _move_R3 8U -#define _move_L 9U -#define _move_L2 10U -#define _move_L3 11U -#define _move_F 12U -#define _move_F2 13U -#define _move_F3 14U -#define _move_B 15U -#define _move_B2 16U -#define _move_B3 17U +#define _move_U UINT8_C(0) +#define _move_U2 UINT8_C(1) +#define _move_U3 UINT8_C(2) +#define _move_D UINT8_C(3) +#define _move_D2 UINT8_C(4) +#define _move_D3 UINT8_C(5) +#define _move_R UINT8_C(6) +#define _move_R2 UINT8_C(7) +#define _move_R3 UINT8_C(8) +#define _move_L UINT8_C(9) +#define _move_L2 UINT8_C(10) +#define _move_L3 UINT8_C(11) +#define _move_F UINT8_C(12) +#define _move_F2 UINT8_C(13) +#define _move_F3 UINT8_C(14) +#define _move_B UINT8_C(15) +#define _move_B2 UINT8_C(16) +#define _move_B3 UINT8_C(17) -#define _trans_UFr 0 -#define _trans_ULr 1 -#define _trans_UBr 2 -#define _trans_URr 3 -#define _trans_DFr 4 -#define _trans_DLr 5 -#define _trans_DBr 6 -#define _trans_DRr 7 -#define _trans_RUr 8 -#define _trans_RFr 9 -#define _trans_RDr 10 -#define _trans_RBr 11 -#define _trans_LUr 12 -#define _trans_LFr 13 -#define _trans_LDr 14 -#define _trans_LBr 15 -#define _trans_FUr 16 -#define _trans_FRr 17 -#define _trans_FDr 18 -#define _trans_FLr 19 -#define _trans_BUr 20 -#define _trans_BRr 21 -#define _trans_BDr 22 -#define _trans_BLr 23 +#define _trans_UFr UINT8_C(0) +#define _trans_ULr UINT8_C(1) +#define _trans_UBr UINT8_C(2) +#define _trans_URr UINT8_C(3) +#define _trans_DFr UINT8_C(4) +#define _trans_DLr UINT8_C(5) +#define _trans_DBr UINT8_C(6) +#define _trans_DRr UINT8_C(7) +#define _trans_RUr UINT8_C(8) +#define _trans_RFr UINT8_C(9) +#define _trans_RDr UINT8_C(10) +#define _trans_RBr UINT8_C(11) +#define _trans_LUr UINT8_C(12) +#define _trans_LFr UINT8_C(13) +#define _trans_LDr UINT8_C(14) +#define _trans_LBr UINT8_C(15) +#define _trans_FUr UINT8_C(16) +#define _trans_FRr UINT8_C(17) +#define _trans_FDr UINT8_C(18) +#define _trans_FLr UINT8_C(19) +#define _trans_BUr UINT8_C(20) +#define _trans_BRr UINT8_C(21) +#define _trans_BDr UINT8_C(22) +#define _trans_BLr UINT8_C(23) -#define _trans_UFm 24 -#define _trans_ULm 25 -#define _trans_UBm 26 -#define _trans_URm 27 -#define _trans_DFm 28 -#define _trans_DLm 29 -#define _trans_DBm 30 -#define _trans_DRm 31 -#define _trans_RUm 32 -#define _trans_RFm 33 -#define _trans_RDm 34 -#define _trans_RBm 35 -#define _trans_LUm 36 -#define _trans_LFm 37 -#define _trans_LDm 38 -#define _trans_LBm 39 -#define _trans_FUm 40 -#define _trans_FRm 41 -#define _trans_FDm 42 -#define _trans_FLm 43 -#define _trans_BUm 44 -#define _trans_BRm 45 -#define _trans_BDm 46 -#define _trans_BLm 47 +#define _trans_UFm UINT8_C(24) +#define _trans_ULm UINT8_C(25) +#define _trans_UBm UINT8_C(26) +#define _trans_URm UINT8_C(27) +#define _trans_DFm UINT8_C(28) +#define _trans_DLm UINT8_C(29) +#define _trans_DBm UINT8_C(30) +#define _trans_DRm UINT8_C(31) +#define _trans_RUm UINT8_C(32) +#define _trans_RFm UINT8_C(33) +#define _trans_RDm UINT8_C(34) +#define _trans_RBm UINT8_C(35) +#define _trans_LUm UINT8_C(36) +#define _trans_LFm UINT8_C(37) +#define _trans_LDm UINT8_C(38) +#define _trans_LBm UINT8_C(39) +#define _trans_FUm UINT8_C(40) +#define _trans_FRm UINT8_C(41) +#define _trans_FDm UINT8_C(42) +#define _trans_FLm UINT8_C(43) +#define _trans_BUm UINT8_C(44) +#define _trans_BRm UINT8_C(45) +#define _trans_BDm UINT8_C(46) +#define _trans_BLm UINT8_C(47) -#define _c_ufr 0U -#define _c_ubl 1U -#define _c_dfl 2U -#define _c_dbr 3U -#define _c_ufl 4U -#define _c_ubr 5U -#define _c_dfr 6U -#define _c_dbl 7U +#define _c_ufr UINT8_C(0) +#define _c_ubl UINT8_C(1) +#define _c_dfl UINT8_C(2) +#define _c_dbr UINT8_C(3) +#define _c_ufl UINT8_C(4) +#define _c_ubr UINT8_C(5) +#define _c_dfr UINT8_C(6) +#define _c_dbl UINT8_C(7) -#define _e_uf 0U -#define _e_ub 1U -#define _e_db 2U -#define _e_df 3U -#define _e_ur 4U -#define _e_ul 5U -#define _e_dl 6U -#define _e_dr 7U -#define _e_fr 8U -#define _e_fl 9U -#define _e_bl 10U -#define _e_br 11U +#define _e_uf UINT8_C(0) +#define _e_ub UINT8_C(1) +#define _e_db UINT8_C(2) +#define _e_df UINT8_C(3) +#define _e_ur UINT8_C(4) +#define _e_ul UINT8_C(5) +#define _e_dl UINT8_C(6) +#define _e_dr UINT8_C(7) +#define _e_fr UINT8_C(8) +#define _e_fl UINT8_C(9) +#define _e_bl UINT8_C(10) +#define _e_br UINT8_C(11) -#define _eoshift 4U -#define _coshift 5U +#define _eoshift UINT8_C(4) +#define _coshift UINT8_C(5) -#define _pbits 0xFU -#define _esepbit1 0x4U -#define _esepbit2 0x8U -#define _csepbit 0x4U -#define _eobit 0x10U -#define _cobits 0xF0U -#define _cobits2 0x60U -#define _ctwist_cw 0x20U -#define _ctwist_ccw 0x40U -#define _eflip 0x10U -#define _error 0xFFU +#define _pbits UINT8_C(0xF) +#define _esepbit1 UINT8_C(0x4) +#define _esepbit2 UINT8_C(0x8) +#define _csepbit UINT8_C(0x4) +#define _eobit UINT8_C(0x10) +#define _cobits UINT8_C(0xF0) +#define _cobits2 UINT8_C(0x60) +#define _ctwist_cw UINT8_C(0x20) +#define _ctwist_ccw UINT8_C(0x40) +#define _eflip UINT8_C(0x10) +#define _error UINT8_C(0xFF) _static cube_t zero = { .corner = {0}, .edge = {0} }; _static cube_t solved = { diff --git a/src/cube_avx2.h b/src/cube_avx2.h @@ -1,10 +1,12 @@ typedef __m256i cube_fast_t; -#define _co2_avx2 _mm256_set_epi64x(0, 0, 0, 0x6060606060606060) -#define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, 0x2020202020202020) -#define _cp_avx2 _mm256_set_epi64x(0, 0, 0, 0x0707070707070707) -#define _ep_avx2 _mm256_set_epi64x(0x0F0F0F0F, 0x0F0F0F0F0F0F0F0F, 0, 0) -#define _eo_avx2 _mm256_set_epi64x(0x10101010, 0x1010101010101010, 0, 0) +#define _co2_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x6060606060606060)) +#define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x2020202020202020)) +#define _cp_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x0707070707070707)) +#define _ep_avx2 \ + _mm256_set_epi64x(INT64_C(0x0F0F0F0F), INT64_C(0x0F0F0F0F0F0F0F0F), 0, 0) +#define _eo_avx2 \ + _mm256_set_epi64x(INT64_C(0x10101010), INT64_C(0x1010101010101010), 0, 0) _static_inline cube_fast_t fastcube( uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, @@ -194,9 +196,9 @@ coord_fast_co(cube_fast_t c) co = _mm256_and_si256(c, _co2_avx2); _mm256_storeu_si256((__m256i *)mem, co); - mem[0] >>= 5L; - for (i = 0, ret = 0, p = 1; i < 7; i++, mem[0] >>= 8L, p *= 3) - ret += (mem[0] & 3L) * p; + mem[0] >>= 5; + for (i = 0, ret = 0, p = 1; i < 7; i++, mem[0] >>= 8, p *= 3) + ret += (mem[0] & 3) * p; return ret; } @@ -242,14 +244,14 @@ coord_fast_esep(cube_fast_t c) ep = _mm256_and_si256(c, _ep_avx2); _mm256_storeu_si256((__m256i *)mem, ep); - mem[3] <<= 8L; + mem[3] <<= 8; ret1 = ret2 = 0; k = l = 4; - for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8L) { + for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8) { e = mem[i/8 + 2]; - bit1 = (e & _esepbit1) >> 2L; - bit2 = (e & _esepbit2) >> 3L; + bit1 = (e & _esepbit1) >> 2; + bit2 = (e & _esepbit2) >> 3; is1 = (1 - bit2) * bit1; ret1 += bit2 * binomial[11-i][k]; @@ -305,8 +307,8 @@ _static_inline cube_fast_t invcoord_fast_esep(int64_t esep) { cube_fast_t eee, ret; - int64_t i, j, jj, k, l, s, v, w, is1, set1, set2; - uint8_t bit2, bit1, mem[32]; + int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2; + uint8_t mem[32]; uint8_t slice[3] = {0}; set1 = esep % 70; @@ -327,7 +329,7 @@ invcoord_fast_esep(int64_t esep) j += (1-bit2); s = 2*bit2 + (1-bit2)*bit1; - mem[i+16] = (slice[s]++) | (s << 2); + mem[i+16] = (slice[s]++) | (uint8_t)(s << 2); } ret = cubetofast(solved); diff --git a/src/cube_portable.h b/src/cube_portable.h @@ -143,7 +143,7 @@ compose_corners_inplace(cube_fast_t c1, cube_fast_t c2, cube_fast_t *ret) p = piece2 & _pbits; piece1 = c1.corner[p]; aux = (piece2 & _cobits) + (piece1 & _cobits); - auy = (aux + _ctwist_cw) >> 2U; + auy = (aux + _ctwist_cw) >> 2; orien = (aux + auy) & _cobits2; ret->corner[i] = (piece1 & _pbits) | orien; } @@ -206,7 +206,7 @@ coord_fast_csep(cube_fast_t c) int64_t ret; for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2) - ret += p * ((c.corner[i] & _csepbit) >> 2U); + ret += p * ((c.corner[i] & _csepbit) >> 2); return ret; } @@ -249,8 +249,8 @@ coord_fast_esep(cube_fast_t c) } */ - bit1 = (c.edge[i] & _esepbit1) >> 2U; - bit2 = (c.edge[i] & _esepbit2) >> 3U; + bit1 = (c.edge[i] & _esepbit1) >> 2; + bit2 = (c.edge[i] & _esepbit2) >> 3; is1 = (1 - bit2) * bit1; ret1 += bit2 * binomial[11-i][k]; @@ -280,22 +280,21 @@ copy_edges_fast(cube_fast_t *dest, cube_fast_t src) _static_inline void set_eo_fast(cube_fast_t *cube, int64_t eo) { - int i, sum, flip; + uint8_t i, sum, flip; for (sum = 0, i = 1; i < 12; i++, eo >>= 1) { flip = eo % 2; sum += flip; cube->edge[i] = (cube->edge[i] & ~_eobit) | (_eobit * flip); } - cube->edge[0] = (cube->edge[0] & ~_eobit) | (_eobit * (sum%2)); + cube->edge[0] = (cube->edge[0] & ~_eobit) | (_eobit * (sum % 2)); } _static_inline cube_fast_t invcoord_fast_esep(int64_t esep) { cube_fast_t ret; - int64_t i, j, jj, k, l, s, v, w, is1, set1, set2; - uint8_t bit2, bit1; + int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2; uint8_t slice[3] = {0}; ret = cubetofast(solved); @@ -317,7 +316,7 @@ invcoord_fast_esep(int64_t esep) j += (1-bit2); s = 2*bit2 + (1-bit2)*bit1; - ret.edge[i] = (slice[s]++) | (s << 2); + ret.edge[i] = (slice[s]++) | (uint8_t)(s << 2); } return ret; diff --git a/src/cube_routines.h b/src/cube_routines.h @@ -422,8 +422,9 @@ _static int writepiece_LST(uint8_t piece, char *buf) { char digits[3]; - int i, len = 0; + int i, len; + len = 0; while (piece != 0) { digits[len++] = (piece % 10) + '0'; piece /= 10; @@ -473,7 +474,8 @@ writecube_H48(cube_t cube, char *buf) _static void writecube_LST(cube_t cube, char *buf) { - int i, ptr; + int i; + size_t ptr; uint8_t piece; ptr = 0; diff --git a/src/solve_h48.h b/src/solve_h48.h @@ -1,18 +1,23 @@ -#define COCSEP_CLASSES 3393U -#define COCSEP_TABLESIZE (_3p7 << 7ULL) -#define COCSEP_VISITEDSIZE ((COCSEP_TABLESIZE + 7ULL) / 8ULL) -#define COCSEP_FULLSIZE (4*(COCSEP_TABLESIZE + 12)) - -#define ESEP_MAX(h) ((COCSEP_CLASSES * _12c4 * _8c4) << (h)) -#define ESEP_TABLESIZE(h, k) (ESEP_MAX((h)) / (8U / (k))) - -#define H48_ESIZE(h) ((_12c4 * _8c4) << (h)) - -#define _esep_ind(i) (i / 8ULL) -#define _esep_shift(i) (4ULL * (i % 8ULL)) -#define _esep_mask(i) (((1ULL << 4ULL) - 1ULL) << _esep_shift(i)) -#define _visited_ind(i) (i / 8ULL) -#define _visited_mask(i) (1ULL << (i % 8ULL)) +#define COCSEP_CLASSES ((size_t)3393) +#define COCSEP_TABLESIZE ((size_t)_3p7 << (size_t)7) +#define COCSEP_VISITEDSIZE ((COCSEP_TABLESIZE + (size_t)7) / (size_t)8) +#define COCSEP_FULLSIZE ((size_t)4 * (COCSEP_TABLESIZE + (size_t)12)) + +#define ESEP_NOEO (COCSEP_CLASSES * (size_t)_12c4 * (size_t)_8c4) +#define ESEP_MAX(h) (ESEP_NOEO << (size_t)(h)) +#define ESEP_TABLESIZE(h, k) (ESEP_MAX((h)) / ((size_t)8 / (size_t)(k))) + +#define COCLASS_MASK (UINT32_C(0xFFFF) << UINT32_C(16)) +#define COCLASS(x) (((x) & COCLASS_MASK) >> UINT32_C(16)) +#define TTREP_MASK (UINT32_C(0xFF) << UINT32_C(8)) +#define TTREP(x) (((x) & TTREP_MASK) >> UINT32_C(8)) +#define H48_ESIZE(h) ((_12c4 * _8c4) << (int64_t)(h)) + +#define ESEP_IND(i) ((uint32_t)(i) / UINT32_C(8)) +#define ESEP_SHIFT(i) (UINT32_C(4) * ((uint32_t)(i) % UINT32_C(8))) +#define ESEP_MASK(i) ((_bit_u32(4) - (uint32_t)(1)) << ESEP_SHIFT(i)) +#define VISITED_IND(i) ((uint32_t)(i) / UINT32_C(8)) +#define VISITED_MASK(i) (UINT32_C(1) << ((uint32_t)(i) % UINT32_C(8))) typedef struct { cube_fast_t cube; @@ -59,8 +64,8 @@ coord_h48(cube_fast_t c, const uint32_t *cocsepdata, uint8_t h) cocsep = coord_fast_cocsep(c); data = cocsepdata[cocsep]; - coclass = (data & (0xFFFFU << 16U)) >> 16U; - ttrep = (data & (0xFFU << 8U)) >> 8U; + coclass = (int64_t)COCLASS(data); + ttrep = (int64_t)TTREP(data); return coord_h48_edges(c, coclass, ttrep, h); } @@ -69,13 +74,14 @@ _static_inline int64_t coord_h48_edges(cube_fast_t c, int64_t coclass, uint8_t t, uint8_t h) { cube_fast_t d; - int64_t esep, eo; + int64_t esep, eo, edges; d = transform_edges(c, t); esep = coord_fast_esep(d); eo = coord_fast_eo(d); + edges = (esep << (int64_t)h) + (eo >> (11 - (int64_t)h)); - return (coclass * H48_ESIZE(h)) + (esep << h) + (eo >> (11-h)); + return coclass * H48_ESIZE(h) + edges; } /* @@ -85,15 +91,17 @@ returned cube is a transformed cube of one that gives the correct value. */ _static_inline cube_fast_t invcoord_h48(int64_t i, const cube_fast_t *crep, uint8_t h) { - cube_fast_t ret; int64_t coclass, ee, esep, eo; + cube_fast_t ret; + int64_t hh, coclass, ee, esep, eo; DBG_ASSERT(h <= 11, cubetofast(zero), "invcoord_h48: h must be between 0 and 11\n"); + hh = (int64_t)h; coclass = i / H48_ESIZE(h); ee = i % H48_ESIZE(h); - esep = ee >> h; - eo = (ee & ((1<<h)-1)) << (11-h); + esep = ee >> hh; + eo = (ee & ((1 << hh) - 1)) << (11 - hh); ret = invcoord_fast_esep(esep); copy_corners_fast(&ret, crep[coclass]); @@ -123,7 +131,7 @@ gendata_cocsep(void *buf, uint64_t *selfsim, cube_fast_t *rep) buf32 = (uint32_t *)buf; info = buf32 + COCSEP_TABLESIZE; - memset(buf32, 0xFFU, sizeof(uint32_t) * COCSEP_TABLESIZE); + memset(buf32, 0xFF, sizeof(uint32_t) * COCSEP_TABLESIZE); memset(selfsim, 0, sizeof(uint64_t) * COCSEP_CLASSES); arg = (dfsarg_cocsep_t) { @@ -145,10 +153,10 @@ gendata_cocsep(void *buf, uint64_t *selfsim, cube_fast_t *rep) } info[0] = (uint32_t)n; - info[1] = 9U; /* Known max pruning value */ + info[1] = 9; /* Known max pruning value */ DBG_ASSERT(n == COCSEP_CLASSES, 0, "cocsep: computed %" PRIu16 " symmetry classes, " - "expected %" PRIu16 "\n", n, COCSEP_CLASSES); + "expected %zu\n", n, COCSEP_CLASSES); DBG_LOG("cocsep data computed\n"); DBG_LOG("Symmetry classes: %" PRIu32 "\n", info[0]); @@ -163,33 +171,37 @@ gendata_cocsep(void *buf, uint64_t *selfsim, cube_fast_t *rep) _static uint32_t gendata_cocsep_dfs(dfsarg_cocsep_t *arg) { - uint8_t m, tinv, olddepth; - uint32_t cc; + uint8_t m, tinv; + uint32_t cc, class, ttrep, depth, olddepth; uint64_t t, is; - int64_t i, ii; + int64_t i, j; cube_fast_t d; dfsarg_cocsep_t nextarg; i = coord_fast_cocsep(arg->cube); - olddepth = (uint8_t)(arg->buf32[i] & 0xFFU); + olddepth = (uint8_t)(arg->buf32[i] & 0xFF); if (olddepth < arg->depth || get_visited(arg->visited, i)) return 0; set_visited(arg->visited, i); if (arg->depth == arg->maxdepth) { - if ((arg->buf32[i] & 0xFFU) != 0xFFU) + if ((arg->buf32[i] & 0xFF) != 0xFF) return 0; for (t = 0, cc = 0; t < 48; t++) { d = transform_corners(arg->cube, t); - ii = coord_fast_cocsep(d); - is = (i == ii); + j = coord_fast_cocsep(d); + is = (i == j); arg->selfsim[*arg->n] |= is << t; - set_visited(arg->visited, ii); + set_visited(arg->visited, j); tinv = inverse_trans(t); - cc += (arg->buf32[ii] & 0xFFU) == 0xFFU; - arg->buf32[ii] = - (*arg->n << 16U) | (tinv << 8U) | arg->depth; + olddepth = (uint8_t)(arg->buf32[j] & 0xFF); + cc += olddepth == 0xFF; + + class = (uint32_t)(*arg->n) << 16; + ttrep = (uint32_t)tinv << 8; + depth = (uint32_t)arg->depth; + arg->buf32[j] = class | ttrep | depth; } arg->rep[*arg->n] = arg->cube; (*arg->n)++; @@ -217,16 +229,17 @@ gendata_h48(void *buf, uint8_t h, uint8_t maxdepth) const int k = 4; /* TODO: other cases? */ uint32_t j, *buf32, *info, *cocsepdata; bfsarg_esep_t arg; - int64_t sc, cc, tot; + int64_t sc, cc, tot, esep_max; uint64_t selfsim[COCSEP_CLASSES]; cube_fast_t crep[COCSEP_CLASSES]; - size_t cocsepsize; + size_t cocsepsize, infosize; + esep_max = (int64_t)ESEP_MAX(h); cocsepsize = gendata_cocsep(buf, selfsim, crep); cocsepdata = (uint32_t *)buf; - buf32 = cocsepdata + cocsepsize/4; + buf32 = cocsepdata + cocsepsize / 4; info = buf32 + (ESEP_TABLESIZE(h, k) / sizeof(uint32_t)); - memset(buf32, 0xFFU, ESEP_TABLESIZE(h, k)); + memset(buf32, 0xFF, ESEP_TABLESIZE(h, k)); sc = coord_h48(cubetofast(solved), cocsepdata, h); set_esep_pval(buf32, sc, 0); @@ -240,7 +253,7 @@ gendata_h48(void *buf, uint8_t h, uint8_t maxdepth) }; for ( tot = 1, arg.depth = 1, cc = 0; - tot < ESEP_MAX(h) && arg.depth <= maxdepth; + tot < esep_max && arg.depth <= maxdepth; arg.depth++ ) { DBG_LOG("esep: generating depth %" PRIu8 "\n", arg.depth); @@ -251,6 +264,7 @@ gendata_h48(void *buf, uint8_t h, uint8_t maxdepth) } info[0] = arg.depth-1; + infosize = 4 * (size_t)(info[0] + 2); DBG_LOG("h48 pruning table computed\n"); DBG_LOG("Maximum pruning value: %" PRIu32 "\n", info[0]); @@ -258,7 +272,7 @@ gendata_h48(void *buf, uint8_t h, uint8_t maxdepth) for (j = 0; j <= info[0]; j++) DBG_LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+1]); - return COCSEP_FULLSIZE + ESEP_TABLESIZE(h, k) + 4*(info[0]+2); + return cocsepsize + ESEP_TABLESIZE(h, k) + infosize; } _static uint64_t @@ -266,10 +280,12 @@ gendata_esep_bfs(bfsarg_esep_t *arg) { uint8_t c, m, x; uint32_t cc; - uint64_t i, j, k, t, cocsep_coord, sim; + int64_t i, j, k, t, cocsep_coord, sim, esep_max; cube_fast_t cube, moved, transd; - for (i = 0, cc = 0; i < ESEP_MAX(arg->h); i++) { + esep_max = (uint64_t)ESEP_MAX(arg->h); + + for (i = 0, cc = 0; i < esep_max; i++) { c = get_esep_pval(arg->buf32, i); if (c != arg->depth - 1) continue; @@ -288,9 +304,9 @@ gendata_esep_bfs(bfsarg_esep_t *arg) set_esep_pval(arg->buf32, j, arg->depth); cc += x != arg->depth; cocsep_coord = j / H48_ESIZE(arg->h); - sim = arg->selfsim[cocsep_coord] >> 1ULL; - for (t = 1; t < 48 && sim; t++, sim >>= 1ULL) { - if (!(sim & 1ULL)) + sim = arg->selfsim[cocsep_coord] >> 1; + for (t = 1; t < 48 && sim; t++, sim >>= 1) { + if (!(sim & 1)) continue; transd = transform(moved, t); k = coord_h48(transd, arg->cocsepdata, arg->h); @@ -308,23 +324,23 @@ gendata_esep_bfs(bfsarg_esep_t *arg) _static_inline bool get_visited(const uint8_t *a, int64_t i) { - return a[_visited_ind(i)] & _visited_mask(i); + return a[VISITED_IND(i)] & VISITED_MASK(i); } _static_inline void set_visited(uint8_t *a, int64_t i) { - a[_visited_ind(i)] |= _visited_mask(i); + a[VISITED_IND(i)] |= VISITED_MASK(i); } _static_inline uint8_t get_esep_pval(const uint32_t *buf32, int64_t i) { - return (buf32[_esep_ind(i)] & _esep_mask(i)) >> _esep_shift(i); + return (buf32[ESEP_IND(i)] & ESEP_MASK(i)) >> ESEP_SHIFT(i); } _static_inline void set_esep_pval(uint32_t *buf32, int64_t i, uint8_t val) { - buf32[_esep_ind(i)] = - (buf32[_esep_ind(i)] & (~_esep_mask(i))) | (val << _esep_shift(i)); + buf32[ESEP_IND(i)] = + (buf32[ESEP_IND(i)] & (~ESEP_MASK(i))) | (val << ESEP_SHIFT(i)); } diff --git a/test/075_set_eo/set_eo_tests.c b/test/075_set_eo/set_eo_tests.c @@ -22,9 +22,11 @@ int main(void) { printf("Error setting EO\n"); } else if (!isconsistent(cube)) { fprintf(stderr, "edges: "); - for (int i = 0; i < 12; i++) fprintf(stderr, "%d ", cube.edge[i]); + for (int i = 0; i < 12; i++) + fprintf(stderr, "%d ", cube.edge[i]); fprintf(stderr, "\n"); - for (int i = 0; i < 8; i++) fprintf(stderr, "%d ", cube.corner[i]); + for (int i = 0; i < 8; i++) + fprintf(stderr, "%d ", cube.corner[i]); fprintf(stderr, "\n"); printf("Setting EO resulted in inconsistent cube\n"); } else { diff --git a/test/100_gendata_cocsep/gendata_cocsep_tests.c b/test/100_gendata_cocsep/gendata_cocsep_tests.c @@ -1,6 +1,6 @@ #include "../test.h" -#define COCSEP_CLASSES 3393U +#define COCSEP_CLASSES 3393 size_t gendata_cocsep(void *, uint64_t *, cube_fast_t *); diff --git a/test/101_cocsep_selfsim/cocsep_selfsim_tests.c b/test/101_cocsep_selfsim/cocsep_selfsim_tests.c @@ -7,7 +7,7 @@ */ #include "../test.h" -#define COCSEP_CLASSES 3393U +#define COCSEP_CLASSES 3393 size_t gendata_cocsep(void *, uint64_t *, cube_fast_t *); int64_t coord_fast_cocsep(cube_fast_t); @@ -27,10 +27,10 @@ int main(void) { fast = cubetofast(readcube("H48", str)); coord = coord_fast_cocsep(fast); data = buf[coord]; - coclass = (data & (0xFFFU << 16U)) >> 16U; + coclass = (data & (0xFFFU << 16)) >> 16; sim = selfsim[coclass]; - for (t = 0; t < 48 && sim; t++, sim >>= 1ULL) { - if (sim & 1ULL) + for (t = 0; t < 48 && sim; t++, sim >>= 1) { + if (sim & 1) printf("%" PRId64 " ", t); } printf("\n"); diff --git a/test/101_coord_invcoord_h48/coord_invcoord_h48_tests.c b/test/101_coord_invcoord_h48/coord_invcoord_h48_tests.c @@ -1,40 +0,0 @@ -#include "../test.h" - -#define COCSEP_CLASSES 3393U - -size_t gendata_cocsep(void *, uint64_t *, cube_fast_t *); -int64_t coord_h48(cube_fast_t, const uint32_t *, uint8_t); -cube_fast_t invcoord_h48(int64_t, const cube_fast_t *, uint8_t); -cube_fast_t transform(cube_fast_t, uint8_t); - -int main(void) { - char str[STRLENMAX]; - int i; - bool found; - uint8_t h, t; - uint32_t cocsepdata[300000]; - uint64_t selfsim[COCSEP_CLASSES]; - int64_t c, cc; - cube_t cube; - cube_fast_t fast, invc, rep[COCSEP_CLASSES]; - - gendata_cocsep(cocsepdata, selfsim, rep); - - i = 1; - h = 11; - while (fgets(str, STRLENMAX, stdin) != NULL) { - cube = readcube("H48", str); - fast = cubetofast(cube); - c = coord_h48(fast, cocsepdata, h); - invc = invcoord_h48(c, rep, h); - for (t = 0, found = false; t < 48; t++) { - fast = transform(invc, t); - cc = coord_h48(fast, cocsepdata, h); - found = found || cc == c; - } - printf("%d %s\n", i, found ? "ok" : "ERROR"); - i++; - } - - return 0; -} diff --git a/test/101_coord_invcoord_h48/00_all.in b/test/102_coord_invcoord_h48/00_all.in diff --git a/test/101_coord_invcoord_h48/00_all.out b/test/102_coord_invcoord_h48/00_all.out diff --git a/test/102_coord_invcoord_h48/coord_invcoord_h48_tests.c b/test/102_coord_invcoord_h48/coord_invcoord_h48_tests.c @@ -0,0 +1,40 @@ +#include "../test.h" + +#define COCSEP_CLASSES 3393 + +size_t gendata_cocsep(void *, uint64_t *, cube_fast_t *); +int64_t coord_h48(cube_fast_t, const uint32_t *, uint8_t); +cube_fast_t invcoord_h48(int64_t, const cube_fast_t *, uint8_t); +cube_fast_t transform(cube_fast_t, uint8_t); + +int main(void) { + char str[STRLENMAX]; + int i; + bool found; + uint8_t h, t; + uint32_t cocsepdata[300000]; + uint64_t selfsim[COCSEP_CLASSES]; + int64_t c, cc; + cube_t cube; + cube_fast_t fast, invc, rep[COCSEP_CLASSES]; + + gendata_cocsep(cocsepdata, selfsim, rep); + + i = 1; + h = 11; + while (fgets(str, STRLENMAX, stdin) != NULL) { + cube = readcube("H48", str); + fast = cubetofast(cube); + c = coord_h48(fast, cocsepdata, h); + invc = invcoord_h48(c, rep, h); + for (t = 0, found = false; t < 48; t++) { + fast = transform(invc, t); + cc = coord_h48(fast, cocsepdata, h); + found = found || cc == c; + } + printf("%d %s\n", i, found ? "ok" : "ERROR"); + i++; + } + + return 0; +} diff --git a/test/102_gendata_h48/gendata_h48_tests.c b/test/102_gendata_h48/gendata_h48_tests.c @@ -1,35 +0,0 @@ -#include "../test.h" - -#define MAXDEPTH 5 -#define COCSEPSIZE 1119792 -#define ETABLESIZE(h) (((3393 * 495 * 70) >> 1) << (h)) - -size_t gendata_h48(void *, uint8_t, uint8_t); - -int main(void) { - char str[STRLENMAX]; - uint8_t h, i; - uint32_t *buf, *h48info; - size_t result; - - fgets(str, STRLENMAX, stdin); - h = atoi(str); - buf = (uint32_t *)malloc(sizeof(uint32_t) * (60000000 << h)); - result = gendata_h48(buf, h, MAXDEPTH); - h48info = buf + (ETABLESIZE(h) + COCSEPSIZE) / 4; - - printf("%zu\n\n", result); - - printf("cocsepdata:\n"); - printf("Classes: %" PRIu32 "\n", buf[COCSEPSIZE/4-12]); - printf("Max value: %" PRIu32 "\n", buf[COCSEPSIZE/4-11]); - for (i = 0; i < 10; i++) - printf("%" PRIu32 ": %" PRIu32 "\n", i, buf[COCSEPSIZE/4-10+i]); - - printf("\nh48:\n"); - for (i = 0; i < MAXDEPTH+1; i++) - printf("%" PRIu32 ": %" PRIu32 "\n", i, h48info[i+1]); - - free(buf); - return 0; -} diff --git a/test/102_gendata_h48/00_h_0.in b/test/103_gendata_h48/00_h_0.in diff --git a/test/102_gendata_h48/00_h_0.out b/test/103_gendata_h48/00_h_0.out diff --git a/test/102_gendata_h48/01_h_1.in b/test/103_gendata_h48/01_h_1.in diff --git a/test/102_gendata_h48/01_h_1.out b/test/103_gendata_h48/01_h_1.out diff --git a/test/103_gendata_h48/gendata_h48_tests.c b/test/103_gendata_h48/gendata_h48_tests.c @@ -0,0 +1,35 @@ +#include "../test.h" + +#define MAXDEPTH 5 +#define COCSEPSIZE 1119792 +#define ETABLESIZE(h) (((3393 * 495 * 70) >> 1) << (size_t)(h)) + +size_t gendata_h48(void *, uint8_t, uint8_t); + +int main(void) { + char str[STRLENMAX]; + uint8_t h, i; + uint32_t *buf, *h48info; + size_t result; + + fgets(str, STRLENMAX, stdin); + h = atoi(str); + buf = (uint32_t *)malloc(sizeof(uint32_t) * (60000000 << h)); + result = gendata_h48(buf, h, MAXDEPTH); + h48info = buf + (ETABLESIZE(h) + COCSEPSIZE) / 4; + + printf("%zu\n\n", result); + + printf("cocsepdata:\n"); + printf("Classes: %" PRIu32 "\n", buf[COCSEPSIZE/4-12]); + printf("Max value: %" PRIu32 "\n", buf[COCSEPSIZE/4-11]); + for (i = 0; i < 10; i++) + printf("%" PRIu32 ": %" PRIu32 "\n", i, buf[COCSEPSIZE/4-10+i]); + + printf("\nh48:\n"); + for (i = 0; i < MAXDEPTH+1; i++) + printf("%" PRIu32 ": %" PRIu32 "\n", i, h48info[i+1]); + + free(buf); + return 0; +}