Some type fixes and some cleanup - h48 - A prototype for an optimal Rubik's cube solver, work in progress.

commit 49b9c4a516f72e03d53277075f4580e2df8f0e63
parent fde8dfa9df8df1e3d42a9a8840836e33817cd498
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Mon, 27 May 2024 12:04:53 +0200

Some type fixes and some cleanup

Diffstat:
M TODO.txt  | 4 ++--
M src/constants.h  | 214 ++++++++++++++++++++++++++++++++++++++++---------------------------------------
M src/cube_avx2.h  | 32 +++++++++++++++++---------------
M src/cube_portable.h  | 17 ++++++++---------
M src/cube_routines.h  | 6 ++++--
M src/solve_h48.h  | 120 +++++++++++++++++++++++++++++++++++++++++++++----------------------------------
M test/075_set_eo/set_eo_tests.c  | 6 ++++--
M test/100_gendata_cocsep/gendata_cocsep_tests.c  | 2 +-
M test/101_cocsep_selfsim/cocsep_selfsim_tests.c  | 8 ++++----
D test/101_coord_invcoord_h48/coord_invcoord_h48_tests.c  | 40 ----------------------------------------
R test/101_coord_invcoord_h48/00_all.in -> test/102_coord_invcoord_h48/00_all.in  | 0 
R test/101_coord_invcoord_h48/00_all.out -> test/102_coord_invcoord_h48/00_all.out  | 0 
A test/102_coord_invcoord_h48/coord_invcoord_h48_tests.c  | 40 ++++++++++++++++++++++++++++++++++++++++
D test/102_gendata_h48/gendata_h48_tests.c  | 35 -----------------------------------
R test/102_gendata_h48/00_h_0.in -> test/103_gendata_h48/00_h_0.in  | 0 
R test/102_gendata_h48/00_h_0.out -> test/103_gendata_h48/00_h_0.out  | 0 
R test/102_gendata_h48/01_h_1.in -> test/103_gendata_h48/01_h_1.in  | 0 
R test/102_gendata_h48/01_h_1.out -> test/103_gendata_h48/01_h_1.out  | 0 
A test/103_gendata_h48/gendata_h48_tests.c  | 35 +++++++++++++++++++++++++++++++++++

19 files changed, 292 insertions(+), 267 deletions(-)
diff --git a/TODO.txt b/TODO.txt
@@ -1,6 +1,6 @@
 Correctness
-  - Check all cases of bitwise operations for type mismatch,
-    force cast to same type. Change to ULL where appropriate.
+  - check all bitwise operations specifically
+  - consider adding more warnings (-pedantic?) or using more static analyzers
 
 Solver
   - write a solver (how many tricks? some, but not all are needed)
diff --git a/src/constants.h b/src/constants.h
@@ -1,9 +1,13 @@
-#define _2p11  2048U
-#define _2p12  4096U
-#define _3p7   2187U
-#define _3p8   6561U
-#define _12c4  495U
-#define _8c4   70U
+#define _bit_u8(i)  (UINT8_C(1) << (uint8_t)(i))
+#define _bit_u32(i) (UINT32_C(1) << (uint32_t)(i))
+#define _bit_u64(i) (UINT64_C(1) << (uint64_t)(i))
+
+#define _2p11  INT64_C(2048)
+#define _2p12  INT64_C(4096)
+#define _3p7   INT64_C(2187)
+#define _3p8   INT64_C(6561)
+#define _12c4  INT64_C(495)
+#define _8c4   INT64_C(70)
 
 _static int64_t binomial[12][12] = {
 	{1,  0,  0,   0,   0,   0,   0,   0,   0,  0,  0, 0},
@@ -20,111 +24,111 @@ _static int64_t binomial[12][12] = {
 	{1, 11, 55, 165, 330, 462, 462, 330, 165, 55, 11, 1},
 };
 
-#define _move_U  0U
-#define _move_U2 1U
-#define _move_U3 2U
-#define _move_D  3U
-#define _move_D2 4U
-#define _move_D3 5U
-#define _move_R  6U
-#define _move_R2 7U
-#define _move_R3 8U
-#define _move_L  9U
-#define _move_L2 10U
-#define _move_L3 11U
-#define _move_F  12U
-#define _move_F2 13U
-#define _move_F3 14U
-#define _move_B  15U
-#define _move_B2 16U
-#define _move_B3 17U
+#define _move_U  UINT8_C(0)
+#define _move_U2 UINT8_C(1)
+#define _move_U3 UINT8_C(2)
+#define _move_D  UINT8_C(3)
+#define _move_D2 UINT8_C(4)
+#define _move_D3 UINT8_C(5)
+#define _move_R  UINT8_C(6)
+#define _move_R2 UINT8_C(7)
+#define _move_R3 UINT8_C(8)
+#define _move_L  UINT8_C(9)
+#define _move_L2 UINT8_C(10)
+#define _move_L3 UINT8_C(11)
+#define _move_F  UINT8_C(12)
+#define _move_F2 UINT8_C(13)
+#define _move_F3 UINT8_C(14)
+#define _move_B  UINT8_C(15)
+#define _move_B2 UINT8_C(16)
+#define _move_B3 UINT8_C(17)
 
-#define _trans_UFr 0
-#define _trans_ULr 1
-#define _trans_UBr 2
-#define _trans_URr 3
-#define _trans_DFr 4
-#define _trans_DLr 5
-#define _trans_DBr 6
-#define _trans_DRr 7
-#define _trans_RUr 8
-#define _trans_RFr 9
-#define _trans_RDr 10
-#define _trans_RBr 11
-#define _trans_LUr 12
-#define _trans_LFr 13
-#define _trans_LDr 14
-#define _trans_LBr 15
-#define _trans_FUr 16
-#define _trans_FRr 17
-#define _trans_FDr 18
-#define _trans_FLr 19
-#define _trans_BUr 20
-#define _trans_BRr 21
-#define _trans_BDr 22
-#define _trans_BLr 23
+#define _trans_UFr UINT8_C(0)
+#define _trans_ULr UINT8_C(1)
+#define _trans_UBr UINT8_C(2)
+#define _trans_URr UINT8_C(3)
+#define _trans_DFr UINT8_C(4)
+#define _trans_DLr UINT8_C(5)
+#define _trans_DBr UINT8_C(6)
+#define _trans_DRr UINT8_C(7)
+#define _trans_RUr UINT8_C(8)
+#define _trans_RFr UINT8_C(9)
+#define _trans_RDr UINT8_C(10)
+#define _trans_RBr UINT8_C(11)
+#define _trans_LUr UINT8_C(12)
+#define _trans_LFr UINT8_C(13)
+#define _trans_LDr UINT8_C(14)
+#define _trans_LBr UINT8_C(15)
+#define _trans_FUr UINT8_C(16)
+#define _trans_FRr UINT8_C(17)
+#define _trans_FDr UINT8_C(18)
+#define _trans_FLr UINT8_C(19)
+#define _trans_BUr UINT8_C(20)
+#define _trans_BRr UINT8_C(21)
+#define _trans_BDr UINT8_C(22)
+#define _trans_BLr UINT8_C(23)
 
-#define _trans_UFm 24
-#define _trans_ULm 25
-#define _trans_UBm 26
-#define _trans_URm 27
-#define _trans_DFm 28
-#define _trans_DLm 29
-#define _trans_DBm 30
-#define _trans_DRm 31
-#define _trans_RUm 32
-#define _trans_RFm 33
-#define _trans_RDm 34
-#define _trans_RBm 35
-#define _trans_LUm 36
-#define _trans_LFm 37
-#define _trans_LDm 38
-#define _trans_LBm 39
-#define _trans_FUm 40
-#define _trans_FRm 41
-#define _trans_FDm 42
-#define _trans_FLm 43
-#define _trans_BUm 44
-#define _trans_BRm 45
-#define _trans_BDm 46
-#define _trans_BLm 47
+#define _trans_UFm UINT8_C(24)
+#define _trans_ULm UINT8_C(25)
+#define _trans_UBm UINT8_C(26)
+#define _trans_URm UINT8_C(27)
+#define _trans_DFm UINT8_C(28)
+#define _trans_DLm UINT8_C(29)
+#define _trans_DBm UINT8_C(30)
+#define _trans_DRm UINT8_C(31)
+#define _trans_RUm UINT8_C(32)
+#define _trans_RFm UINT8_C(33)
+#define _trans_RDm UINT8_C(34)
+#define _trans_RBm UINT8_C(35)
+#define _trans_LUm UINT8_C(36)
+#define _trans_LFm UINT8_C(37)
+#define _trans_LDm UINT8_C(38)
+#define _trans_LBm UINT8_C(39)
+#define _trans_FUm UINT8_C(40)
+#define _trans_FRm UINT8_C(41)
+#define _trans_FDm UINT8_C(42)
+#define _trans_FLm UINT8_C(43)
+#define _trans_BUm UINT8_C(44)
+#define _trans_BRm UINT8_C(45)
+#define _trans_BDm UINT8_C(46)
+#define _trans_BLm UINT8_C(47)
 
-#define _c_ufr      0U
-#define _c_ubl      1U
-#define _c_dfl      2U
-#define _c_dbr      3U
-#define _c_ufl      4U
-#define _c_ubr      5U
-#define _c_dfr      6U
-#define _c_dbl      7U
+#define _c_ufr      UINT8_C(0)
+#define _c_ubl      UINT8_C(1)
+#define _c_dfl      UINT8_C(2)
+#define _c_dbr      UINT8_C(3)
+#define _c_ufl      UINT8_C(4)
+#define _c_ubr      UINT8_C(5)
+#define _c_dfr      UINT8_C(6)
+#define _c_dbl      UINT8_C(7)
 
-#define _e_uf       0U
-#define _e_ub       1U
-#define _e_db       2U
-#define _e_df       3U
-#define _e_ur       4U
-#define _e_ul       5U
-#define _e_dl       6U
-#define _e_dr       7U
-#define _e_fr       8U
-#define _e_fl       9U
-#define _e_bl       10U
-#define _e_br       11U
+#define _e_uf       UINT8_C(0)
+#define _e_ub       UINT8_C(1)
+#define _e_db       UINT8_C(2)
+#define _e_df       UINT8_C(3)
+#define _e_ur       UINT8_C(4)
+#define _e_ul       UINT8_C(5)
+#define _e_dl       UINT8_C(6)
+#define _e_dr       UINT8_C(7)
+#define _e_fr       UINT8_C(8)
+#define _e_fl       UINT8_C(9)
+#define _e_bl       UINT8_C(10)
+#define _e_br       UINT8_C(11)
 
-#define _eoshift    4U
-#define _coshift    5U
+#define _eoshift    UINT8_C(4)
+#define _coshift    UINT8_C(5)
 
-#define _pbits      0xFU
-#define _esepbit1   0x4U
-#define _esepbit2   0x8U
-#define _csepbit    0x4U
-#define _eobit      0x10U
-#define _cobits     0xF0U
-#define _cobits2    0x60U
-#define _ctwist_cw  0x20U
-#define _ctwist_ccw 0x40U
-#define _eflip      0x10U
-#define _error      0xFFU
+#define _pbits      UINT8_C(0xF)
+#define _esepbit1   UINT8_C(0x4)
+#define _esepbit2   UINT8_C(0x8)
+#define _csepbit    UINT8_C(0x4)
+#define _eobit      UINT8_C(0x10)
+#define _cobits     UINT8_C(0xF0)
+#define _cobits2    UINT8_C(0x60)
+#define _ctwist_cw  UINT8_C(0x20)
+#define _ctwist_ccw UINT8_C(0x40)
+#define _eflip      UINT8_C(0x10)
+#define _error      UINT8_C(0xFF)
 
 _static cube_t zero = { .corner = {0}, .edge = {0} };
 _static cube_t solved = {
diff --git a/src/cube_avx2.h b/src/cube_avx2.h
@@ -1,10 +1,12 @@
 typedef __m256i cube_fast_t;
 
-#define _co2_avx2 _mm256_set_epi64x(0, 0, 0, 0x6060606060606060)
-#define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, 0x2020202020202020)
-#define _cp_avx2 _mm256_set_epi64x(0, 0, 0, 0x0707070707070707)
-#define _ep_avx2 _mm256_set_epi64x(0x0F0F0F0F, 0x0F0F0F0F0F0F0F0F, 0, 0)
-#define _eo_avx2 _mm256_set_epi64x(0x10101010, 0x1010101010101010, 0, 0)
+#define _co2_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x6060606060606060))
+#define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x2020202020202020))
+#define _cp_avx2 _mm256_set_epi64x(0, 0, 0, INT64_C(0x0707070707070707))
+#define _ep_avx2 \
+    _mm256_set_epi64x(INT64_C(0x0F0F0F0F), INT64_C(0x0F0F0F0F0F0F0F0F), 0, 0)
+#define _eo_avx2 \
+    _mm256_set_epi64x(INT64_C(0x10101010), INT64_C(0x1010101010101010), 0, 0)
 
 _static_inline cube_fast_t fastcube(
     uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
@@ -194,9 +196,9 @@ coord_fast_co(cube_fast_t c)
 	co = _mm256_and_si256(c, _co2_avx2);
 	_mm256_storeu_si256((__m256i *)mem, co);
 
-	mem[0] >>= 5L;
-	for (i = 0, ret = 0, p = 1; i < 7; i++, mem[0] >>= 8L, p *= 3)
-		ret += (mem[0] & 3L) * p;
+	mem[0] >>= 5;
+	for (i = 0, ret = 0, p = 1; i < 7; i++, mem[0] >>= 8, p *= 3)
+		ret += (mem[0] & 3) * p;
 
 	return ret;
 }
@@ -242,14 +244,14 @@ coord_fast_esep(cube_fast_t c)
 	ep = _mm256_and_si256(c, _ep_avx2);
 	_mm256_storeu_si256((__m256i *)mem, ep);
 
-	mem[3] <<= 8L;
+	mem[3] <<= 8;
 	ret1 = ret2 = 0;
 	k = l = 4;
-	for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8L) {
+	for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8) {
 		e = mem[i/8 + 2];
 
-		bit1 = (e & _esepbit1) >> 2L;
-		bit2 = (e & _esepbit2) >> 3L;
+		bit1 = (e & _esepbit1) >> 2;
+		bit2 = (e & _esepbit2) >> 3;
 		is1 = (1 - bit2) * bit1;
 
 		ret1 += bit2 * binomial[11-i][k];
@@ -305,8 +307,8 @@ _static_inline cube_fast_t
 invcoord_fast_esep(int64_t esep)
 {
 	cube_fast_t eee, ret;
-	int64_t i, j, jj, k, l, s, v, w, is1, set1, set2;
-	uint8_t bit2, bit1, mem[32];
+	int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2;
+	uint8_t mem[32];
 	uint8_t slice[3] = {0};
 
 	set1 = esep % 70;
@@ -327,7 +329,7 @@ invcoord_fast_esep(int64_t esep)
 		j += (1-bit2);
 		s = 2*bit2 + (1-bit2)*bit1;
 
-		mem[i+16] = (slice[s]++) | (s << 2);
+		mem[i+16] = (slice[s]++) | (uint8_t)(s << 2);
 	}
 
 	ret = cubetofast(solved);
diff --git a/src/cube_portable.h b/src/cube_portable.h
@@ -143,7 +143,7 @@ compose_corners_inplace(cube_fast_t c1, cube_fast_t c2, cube_fast_t *ret)
 		p = piece2 & _pbits;
 		piece1 = c1.corner[p];
 		aux = (piece2 & _cobits) + (piece1 & _cobits);
-		auy = (aux + _ctwist_cw) >> 2U;
+		auy = (aux + _ctwist_cw) >> 2;
 		orien = (aux + auy) & _cobits2;
 		ret->corner[i] = (piece1 & _pbits) | orien;
 	}
@@ -206,7 +206,7 @@ coord_fast_csep(cube_fast_t c)
 	int64_t ret;
 
 	for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2)
-		ret += p * ((c.corner[i] & _csepbit) >> 2U);
+		ret += p * ((c.corner[i] & _csepbit) >> 2);
 
 	return ret;
 }
@@ -249,8 +249,8 @@ coord_fast_esep(cube_fast_t c)
 		}
 		*/
 
-		bit1 = (c.edge[i] & _esepbit1) >> 2U;
-		bit2 = (c.edge[i] & _esepbit2) >> 3U;
+		bit1 = (c.edge[i] & _esepbit1) >> 2;
+		bit2 = (c.edge[i] & _esepbit2) >> 3;
 		is1 = (1 - bit2) * bit1;
 
 		ret1 += bit2 * binomial[11-i][k];
@@ -280,22 +280,21 @@ copy_edges_fast(cube_fast_t *dest, cube_fast_t src)
 _static_inline void
 set_eo_fast(cube_fast_t *cube, int64_t eo)
 {
-	int i, sum, flip;
+	uint8_t i, sum, flip;
 
 	for (sum = 0, i = 1; i < 12; i++, eo >>= 1) {
 		flip = eo % 2;
 		sum += flip;
 		cube->edge[i] = (cube->edge[i] & ~_eobit) | (_eobit * flip);
 	}
-	cube->edge[0] = (cube->edge[0] & ~_eobit) | (_eobit * (sum%2));
+	cube->edge[0] = (cube->edge[0] & ~_eobit) | (_eobit * (sum % 2));
 }
 
 _static_inline cube_fast_t
 invcoord_fast_esep(int64_t esep)
 {
 	cube_fast_t ret;
-	int64_t i, j, jj, k, l, s, v, w, is1, set1, set2;
-	uint8_t bit2, bit1;
+	int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2;
 	uint8_t slice[3] = {0};
 
 	ret = cubetofast(solved);
@@ -317,7 +316,7 @@ invcoord_fast_esep(int64_t esep)
 		j += (1-bit2);
 		s = 2*bit2 + (1-bit2)*bit1;
 
-		ret.edge[i] = (slice[s]++) | (s << 2);
+		ret.edge[i] = (slice[s]++) | (uint8_t)(s << 2);
 	}
 
 	return ret;
diff --git a/src/cube_routines.h b/src/cube_routines.h
@@ -422,8 +422,9 @@ _static int
 writepiece_LST(uint8_t piece, char *buf)
 {
 	char digits[3];
-	int i, len = 0;
+	int i, len;
 
+	len = 0;
 	while (piece != 0) {
 		digits[len++] = (piece % 10) + '0';
 		piece /= 10;
@@ -473,7 +474,8 @@ writecube_H48(cube_t cube, char *buf)
 _static void
 writecube_LST(cube_t cube, char *buf)
 {
-	int i, ptr;
+	int i;
+	size_t ptr;
 	uint8_t piece;
 
 	ptr = 0;
diff --git a/src/solve_h48.h b/src/solve_h48.h
@@ -1,18 +1,23 @@
-#define COCSEP_CLASSES        3393U
-#define COCSEP_TABLESIZE      (_3p7 << 7ULL)
-#define COCSEP_VISITEDSIZE    ((COCSEP_TABLESIZE + 7ULL) / 8ULL)
-#define COCSEP_FULLSIZE       (4*(COCSEP_TABLESIZE + 12))
-
-#define ESEP_MAX(h)           ((COCSEP_CLASSES * _12c4 * _8c4) << (h))
-#define ESEP_TABLESIZE(h, k)  (ESEP_MAX((h)) / (8U / (k)))
-
-#define H48_ESIZE(h)          ((_12c4 * _8c4) << (h))
-
-#define _esep_ind(i)          (i / 8ULL)
-#define _esep_shift(i)        (4ULL * (i % 8ULL))
-#define _esep_mask(i)         (((1ULL << 4ULL) - 1ULL) << _esep_shift(i))
-#define _visited_ind(i)       (i / 8ULL)
-#define _visited_mask(i)      (1ULL << (i % 8ULL))
+#define COCSEP_CLASSES        ((size_t)3393)
+#define COCSEP_TABLESIZE      ((size_t)_3p7 << (size_t)7)
+#define COCSEP_VISITEDSIZE    ((COCSEP_TABLESIZE + (size_t)7) / (size_t)8)
+#define COCSEP_FULLSIZE       ((size_t)4 * (COCSEP_TABLESIZE + (size_t)12))
+
+#define ESEP_NOEO             (COCSEP_CLASSES * (size_t)_12c4 * (size_t)_8c4)
+#define ESEP_MAX(h)           (ESEP_NOEO << (size_t)(h))
+#define ESEP_TABLESIZE(h, k)  (ESEP_MAX((h)) / ((size_t)8 / (size_t)(k)))
+
+#define COCLASS_MASK          (UINT32_C(0xFFFF) << UINT32_C(16))
+#define COCLASS(x)            (((x) & COCLASS_MASK) >> UINT32_C(16))
+#define TTREP_MASK            (UINT32_C(0xFF) << UINT32_C(8))
+#define TTREP(x)              (((x) & TTREP_MASK) >> UINT32_C(8))
+#define H48_ESIZE(h)          ((_12c4 * _8c4) << (int64_t)(h))
+
+#define ESEP_IND(i)           ((uint32_t)(i) / UINT32_C(8))
+#define ESEP_SHIFT(i)         (UINT32_C(4) * ((uint32_t)(i) % UINT32_C(8)))
+#define ESEP_MASK(i)          ((_bit_u32(4) - (uint32_t)(1)) << ESEP_SHIFT(i))
+#define VISITED_IND(i)        ((uint32_t)(i) / UINT32_C(8))
+#define VISITED_MASK(i)       (UINT32_C(1) << ((uint32_t)(i) % UINT32_C(8)))
 
 typedef struct {
 	cube_fast_t cube;
@@ -59,8 +64,8 @@ coord_h48(cube_fast_t c, const uint32_t *cocsepdata, uint8_t h)
 
 	cocsep = coord_fast_cocsep(c);
 	data = cocsepdata[cocsep];
-	coclass = (data & (0xFFFFU << 16U)) >> 16U;
-	ttrep = (data & (0xFFU << 8U)) >> 8U;
+	coclass = (int64_t)COCLASS(data);
+	ttrep = (int64_t)TTREP(data);
 
 	return coord_h48_edges(c, coclass, ttrep, h);
 }
@@ -69,13 +74,14 @@ _static_inline int64_t
 coord_h48_edges(cube_fast_t c, int64_t coclass, uint8_t t, uint8_t h)
 {
 	cube_fast_t d;
-	int64_t esep, eo;
+	int64_t esep, eo, edges;
 
 	d = transform_edges(c, t);
 	esep = coord_fast_esep(d);
 	eo = coord_fast_eo(d);
+	edges = (esep << (int64_t)h) + (eo >> (11 - (int64_t)h));
 
-	return (coclass * H48_ESIZE(h)) + (esep << h) + (eo >> (11-h));
+	return coclass * H48_ESIZE(h) + edges;
 }
 
 /*
@@ -85,15 +91,17 @@ returned cube is a transformed cube of one that gives the correct value.
 */
 _static_inline cube_fast_t
 invcoord_h48(int64_t i, const cube_fast_t *crep, uint8_t h) {
-	cube_fast_t ret; int64_t coclass, ee, esep, eo;
+	cube_fast_t ret;
+	int64_t hh, coclass, ee, esep, eo;
 
 	DBG_ASSERT(h <= 11, cubetofast(zero),
 	    "invcoord_h48: h must be between 0 and 11\n");
 
+	hh = (int64_t)h;
 	coclass = i / H48_ESIZE(h);
 	ee = i % H48_ESIZE(h);
-	esep = ee >> h;
-	eo = (ee & ((1<<h)-1)) << (11-h);
+	esep = ee >> hh;
+	eo = (ee & ((1 << hh) - 1)) << (11 - hh);
 
 	ret = invcoord_fast_esep(esep);
 	copy_corners_fast(&ret, crep[coclass]);
@@ -123,7 +131,7 @@ gendata_cocsep(void *buf, uint64_t *selfsim, cube_fast_t *rep)
 
 	buf32 = (uint32_t *)buf;
 	info = buf32 + COCSEP_TABLESIZE;
-	memset(buf32, 0xFFU, sizeof(uint32_t) * COCSEP_TABLESIZE);
+	memset(buf32, 0xFF, sizeof(uint32_t) * COCSEP_TABLESIZE);
 	memset(selfsim, 0, sizeof(uint64_t) * COCSEP_CLASSES);
 
 	arg = (dfsarg_cocsep_t) {
@@ -145,10 +153,10 @@ gendata_cocsep(void *buf, uint64_t *selfsim, cube_fast_t *rep)
 	}
 
 	info[0] = (uint32_t)n;
-	info[1] = 9U; /* Known max pruning value */
+	info[1] = 9; /* Known max pruning value */
 	DBG_ASSERT(n == COCSEP_CLASSES, 0,
 	    "cocsep: computed %" PRIu16 " symmetry classes, "
-	    "expected %" PRIu16 "\n", n, COCSEP_CLASSES);
+	    "expected %zu\n", n, COCSEP_CLASSES);
 
 	DBG_LOG("cocsep data computed\n");
 	DBG_LOG("Symmetry classes: %" PRIu32 "\n", info[0]);
@@ -163,33 +171,37 @@ gendata_cocsep(void *buf, uint64_t *selfsim, cube_fast_t *rep)
 _static uint32_t
 gendata_cocsep_dfs(dfsarg_cocsep_t *arg)
 {
-	uint8_t m, tinv, olddepth;
-	uint32_t cc;
+	uint8_t m, tinv;
+	uint32_t cc, class, ttrep, depth, olddepth;
 	uint64_t t, is;
-	int64_t i, ii;
+	int64_t i, j;
 	cube_fast_t d;
 	dfsarg_cocsep_t nextarg;
 
 	i = coord_fast_cocsep(arg->cube);
-	olddepth = (uint8_t)(arg->buf32[i] & 0xFFU);
+	olddepth = (uint8_t)(arg->buf32[i] & 0xFF);
 	if (olddepth < arg->depth || get_visited(arg->visited, i))
 		return 0;
 	set_visited(arg->visited, i);
 
 	if (arg->depth == arg->maxdepth) {
-		if ((arg->buf32[i] & 0xFFU) != 0xFFU)
+		if ((arg->buf32[i] & 0xFF) != 0xFF)
 			return 0;
 
 		for (t = 0, cc = 0; t < 48; t++) {
 			d = transform_corners(arg->cube, t);
-			ii = coord_fast_cocsep(d);
-			is = (i == ii);
+			j = coord_fast_cocsep(d);
+			is = (i == j);
 			arg->selfsim[*arg->n] |= is << t;
-			set_visited(arg->visited, ii);
+			set_visited(arg->visited, j);
 			tinv = inverse_trans(t);
-			cc += (arg->buf32[ii] & 0xFFU) == 0xFFU;
-			arg->buf32[ii] =
-			    (*arg->n << 16U) | (tinv << 8U) | arg->depth;
+			olddepth = (uint8_t)(arg->buf32[j] & 0xFF);
+			cc += olddepth == 0xFF;
+
+			class = (uint32_t)(*arg->n) << 16;
+			ttrep = (uint32_t)tinv << 8;
+			depth = (uint32_t)arg->depth;
+			arg->buf32[j] = class | ttrep | depth;
 		}
 		arg->rep[*arg->n] = arg->cube;
 		(*arg->n)++;
@@ -217,16 +229,17 @@ gendata_h48(void *buf, uint8_t h, uint8_t maxdepth)
 	const int k = 4; /* TODO: other cases? */
 	uint32_t j, *buf32, *info, *cocsepdata;
 	bfsarg_esep_t arg;
-	int64_t sc, cc, tot;
+	int64_t sc, cc, tot, esep_max;
 	uint64_t selfsim[COCSEP_CLASSES];
 	cube_fast_t crep[COCSEP_CLASSES];
-	size_t cocsepsize;
+	size_t cocsepsize, infosize;
 
+	esep_max = (int64_t)ESEP_MAX(h);
 	cocsepsize = gendata_cocsep(buf, selfsim, crep);
 	cocsepdata = (uint32_t *)buf;
-	buf32 = cocsepdata + cocsepsize/4;
+	buf32 = cocsepdata + cocsepsize / 4;
 	info = buf32 + (ESEP_TABLESIZE(h, k) / sizeof(uint32_t));
-	memset(buf32, 0xFFU, ESEP_TABLESIZE(h, k));
+	memset(buf32, 0xFF, ESEP_TABLESIZE(h, k));
 
 	sc = coord_h48(cubetofast(solved), cocsepdata, h);
 	set_esep_pval(buf32, sc, 0);
@@ -240,7 +253,7 @@ gendata_h48(void *buf, uint8_t h, uint8_t maxdepth)
 	};
 	for (
 		tot = 1, arg.depth = 1, cc = 0;
-		tot < ESEP_MAX(h) && arg.depth <= maxdepth;
+		tot < esep_max && arg.depth <= maxdepth;
 		arg.depth++
 	) {
 		DBG_LOG("esep: generating depth %" PRIu8 "\n", arg.depth);
@@ -251,6 +264,7 @@ gendata_h48(void *buf, uint8_t h, uint8_t maxdepth)
 	}
 
 	info[0] = arg.depth-1;
+	infosize = 4 * (size_t)(info[0] + 2);
 
 	DBG_LOG("h48 pruning table computed\n");
 	DBG_LOG("Maximum pruning value: %" PRIu32 "\n", info[0]);
@@ -258,7 +272,7 @@ gendata_h48(void *buf, uint8_t h, uint8_t maxdepth)
 	for (j = 0; j <= info[0]; j++)
 		DBG_LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+1]);
 
-	return COCSEP_FULLSIZE + ESEP_TABLESIZE(h, k) + 4*(info[0]+2);
+	return cocsepsize + ESEP_TABLESIZE(h, k) + infosize;
 }
 
 _static uint64_t
@@ -266,10 +280,12 @@ gendata_esep_bfs(bfsarg_esep_t *arg)
 {
 	uint8_t c, m, x;
 	uint32_t cc;
-	uint64_t i, j, k, t, cocsep_coord, sim;
+	int64_t i, j, k, t, cocsep_coord, sim, esep_max;
 	cube_fast_t cube, moved, transd;
 
-	for (i = 0, cc = 0; i < ESEP_MAX(arg->h); i++) {
+	esep_max = (uint64_t)ESEP_MAX(arg->h);
+
+	for (i = 0, cc = 0; i < esep_max; i++) {
 		c = get_esep_pval(arg->buf32, i);
 		if (c != arg->depth - 1)
 			continue;
@@ -288,9 +304,9 @@ gendata_esep_bfs(bfsarg_esep_t *arg)
 			set_esep_pval(arg->buf32, j, arg->depth);
 			cc += x != arg->depth;
 			cocsep_coord = j / H48_ESIZE(arg->h);
-			sim = arg->selfsim[cocsep_coord] >> 1ULL;
-			for (t = 1; t < 48 && sim; t++, sim >>= 1ULL) {
-				if (!(sim & 1ULL))
+			sim = arg->selfsim[cocsep_coord] >> 1;
+			for (t = 1; t < 48 && sim; t++, sim >>= 1) {
+				if (!(sim & 1))
 					continue;
 				transd = transform(moved, t);
 				k = coord_h48(transd, arg->cocsepdata, arg->h);
@@ -308,23 +324,23 @@ gendata_esep_bfs(bfsarg_esep_t *arg)
 
 _static_inline bool get_visited(const uint8_t *a, int64_t i)
 {
-	return a[_visited_ind(i)] & _visited_mask(i);
+	return a[VISITED_IND(i)] & VISITED_MASK(i);
 }
 
 _static_inline void set_visited(uint8_t *a, int64_t i)
 {
-	a[_visited_ind(i)] |= _visited_mask(i);
+	a[VISITED_IND(i)] |= VISITED_MASK(i);
 }
 
 _static_inline uint8_t
 get_esep_pval(const uint32_t *buf32, int64_t i)
 {
-	return (buf32[_esep_ind(i)] & _esep_mask(i)) >> _esep_shift(i);
+	return (buf32[ESEP_IND(i)] & ESEP_MASK(i)) >> ESEP_SHIFT(i);
 }
 
 _static_inline void
 set_esep_pval(uint32_t *buf32, int64_t i, uint8_t val)
 {
-	buf32[_esep_ind(i)] =
-	    (buf32[_esep_ind(i)] & (~_esep_mask(i))) | (val << _esep_shift(i));
+	buf32[ESEP_IND(i)] =
+	    (buf32[ESEP_IND(i)] & (~ESEP_MASK(i))) | (val << ESEP_SHIFT(i));
 }
diff --git a/test/075_set_eo/set_eo_tests.c b/test/075_set_eo/set_eo_tests.c
@@ -22,9 +22,11 @@ int main(void) {
 		printf("Error setting EO\n");
 	} else if (!isconsistent(cube)) {
 		fprintf(stderr, "edges: ");
-		for (int i = 0; i < 12; i++) fprintf(stderr, "%d ", cube.edge[i]);
+		for (int i = 0; i < 12; i++)
+			fprintf(stderr, "%d ", cube.edge[i]);
 		fprintf(stderr, "\n");
-		for (int i = 0; i < 8; i++) fprintf(stderr, "%d ", cube.corner[i]);
+		for (int i = 0; i < 8; i++)
+			fprintf(stderr, "%d ", cube.corner[i]);
 		fprintf(stderr, "\n");
 		printf("Setting EO resulted in inconsistent cube\n");
 	} else {
diff --git a/test/100_gendata_cocsep/gendata_cocsep_tests.c b/test/100_gendata_cocsep/gendata_cocsep_tests.c
@@ -1,6 +1,6 @@
 #include "../test.h"
 
-#define COCSEP_CLASSES 3393U
+#define COCSEP_CLASSES 3393
 
 size_t gendata_cocsep(void *, uint64_t *, cube_fast_t *);
 
diff --git a/test/101_cocsep_selfsim/cocsep_selfsim_tests.c b/test/101_cocsep_selfsim/cocsep_selfsim_tests.c
@@ -7,7 +7,7 @@
  */
 #include "../test.h"
 
-#define COCSEP_CLASSES 3393U
+#define COCSEP_CLASSES 3393
 
 size_t gendata_cocsep(void *, uint64_t *, cube_fast_t *);
 int64_t coord_fast_cocsep(cube_fast_t);
@@ -27,10 +27,10 @@ int main(void) {
 		fast = cubetofast(readcube("H48", str));
 		coord = coord_fast_cocsep(fast);
 		data = buf[coord];
-		coclass = (data & (0xFFFU << 16U)) >> 16U;
+		coclass = (data & (0xFFFU << 16)) >> 16;
 		sim = selfsim[coclass];
-		for (t = 0; t < 48 && sim; t++, sim >>= 1ULL) {
-			if (sim & 1ULL)
+		for (t = 0; t < 48 && sim; t++, sim >>= 1) {
+			if (sim & 1)
 				printf("%" PRId64 " ", t);
 		}
 		printf("\n");
diff --git a/test/101_coord_invcoord_h48/coord_invcoord_h48_tests.c b/test/101_coord_invcoord_h48/coord_invcoord_h48_tests.c
@@ -1,40 +0,0 @@
-#include "../test.h"
-
-#define COCSEP_CLASSES 3393U
-
-size_t gendata_cocsep(void *, uint64_t *, cube_fast_t *);
-int64_t coord_h48(cube_fast_t, const uint32_t *, uint8_t);
-cube_fast_t invcoord_h48(int64_t, const cube_fast_t *, uint8_t);
-cube_fast_t transform(cube_fast_t, uint8_t);
-
-int main(void) {
-	char str[STRLENMAX];
-	int i;
-	bool found;
-	uint8_t h, t;
-	uint32_t cocsepdata[300000];
-	uint64_t selfsim[COCSEP_CLASSES];
-	int64_t c, cc;
-	cube_t cube;
-	cube_fast_t fast, invc, rep[COCSEP_CLASSES];
-
-	gendata_cocsep(cocsepdata, selfsim, rep);
-
-	i = 1;
-	h = 11;
-	while (fgets(str, STRLENMAX, stdin) != NULL) {
-		cube = readcube("H48", str);
-		fast = cubetofast(cube);
-		c = coord_h48(fast, cocsepdata, h);
-		invc = invcoord_h48(c, rep, h);
-		for (t = 0, found = false; t < 48; t++) {
-			fast = transform(invc, t);
-			cc = coord_h48(fast, cocsepdata, h);
-			found = found || cc == c;
-		}
-		printf("%d %s\n", i, found ? "ok" : "ERROR");
-		i++;
-	}
-
-	return 0;
-}
diff --git a/test/101_coord_invcoord_h48/00_all.in b/test/102_coord_invcoord_h48/00_all.in
diff --git a/test/101_coord_invcoord_h48/00_all.out b/test/102_coord_invcoord_h48/00_all.out
diff --git a/test/102_coord_invcoord_h48/coord_invcoord_h48_tests.c b/test/102_coord_invcoord_h48/coord_invcoord_h48_tests.c
@@ -0,0 +1,40 @@
+#include "../test.h"
+
+#define COCSEP_CLASSES 3393
+
+size_t gendata_cocsep(void *, uint64_t *, cube_fast_t *);
+int64_t coord_h48(cube_fast_t, const uint32_t *, uint8_t);
+cube_fast_t invcoord_h48(int64_t, const cube_fast_t *, uint8_t);
+cube_fast_t transform(cube_fast_t, uint8_t);
+
+int main(void) {
+	char str[STRLENMAX];
+	int i;
+	bool found;
+	uint8_t h, t;
+	uint32_t cocsepdata[300000];
+	uint64_t selfsim[COCSEP_CLASSES];
+	int64_t c, cc;
+	cube_t cube;
+	cube_fast_t fast, invc, rep[COCSEP_CLASSES];
+
+	gendata_cocsep(cocsepdata, selfsim, rep);
+
+	i = 1;
+	h = 11;
+	while (fgets(str, STRLENMAX, stdin) != NULL) {
+		cube = readcube("H48", str);
+		fast = cubetofast(cube);
+		c = coord_h48(fast, cocsepdata, h);
+		invc = invcoord_h48(c, rep, h);
+		for (t = 0, found = false; t < 48; t++) {
+			fast = transform(invc, t);
+			cc = coord_h48(fast, cocsepdata, h);
+			found = found || cc == c;
+		}
+		printf("%d %s\n", i, found ? "ok" : "ERROR");
+		i++;
+	}
+
+	return 0;
+}
diff --git a/test/102_gendata_h48/gendata_h48_tests.c b/test/102_gendata_h48/gendata_h48_tests.c
@@ -1,35 +0,0 @@
-#include "../test.h"
-
-#define MAXDEPTH 5
-#define COCSEPSIZE 1119792
-#define ETABLESIZE(h) (((3393 * 495 * 70) >> 1) << (h))
-
-size_t gendata_h48(void *, uint8_t, uint8_t);
-
-int main(void) {
-	char str[STRLENMAX];
-	uint8_t h, i;
-	uint32_t *buf, *h48info;
-	size_t result;
-
-	fgets(str, STRLENMAX, stdin);
-	h = atoi(str);
-	buf = (uint32_t *)malloc(sizeof(uint32_t) * (60000000 << h));
-	result = gendata_h48(buf, h, MAXDEPTH);
-	h48info = buf + (ETABLESIZE(h) + COCSEPSIZE) / 4;
-
-	printf("%zu\n\n", result);
-
-	printf("cocsepdata:\n");
-	printf("Classes: %" PRIu32 "\n", buf[COCSEPSIZE/4-12]);
-	printf("Max value: %" PRIu32 "\n", buf[COCSEPSIZE/4-11]);
-	for (i = 0; i < 10; i++)
-		printf("%" PRIu32 ": %" PRIu32 "\n", i, buf[COCSEPSIZE/4-10+i]);
-
-	printf("\nh48:\n");
-	for (i = 0; i < MAXDEPTH+1; i++)
-		printf("%" PRIu32 ": %" PRIu32 "\n", i, h48info[i+1]);
-
-	free(buf);
-	return 0;
-}
diff --git a/test/102_gendata_h48/00_h_0.in b/test/103_gendata_h48/00_h_0.in
diff --git a/test/102_gendata_h48/00_h_0.out b/test/103_gendata_h48/00_h_0.out
diff --git a/test/102_gendata_h48/01_h_1.in b/test/103_gendata_h48/01_h_1.in
diff --git a/test/102_gendata_h48/01_h_1.out b/test/103_gendata_h48/01_h_1.out
diff --git a/test/103_gendata_h48/gendata_h48_tests.c b/test/103_gendata_h48/gendata_h48_tests.c
@@ -0,0 +1,35 @@
+#include "../test.h"
+
+#define MAXDEPTH 5
+#define COCSEPSIZE 1119792
+#define ETABLESIZE(h) (((3393 * 495 * 70) >> 1) << (size_t)(h))
+
+size_t gendata_h48(void *, uint8_t, uint8_t);
+
+int main(void) {
+	char str[STRLENMAX];
+	uint8_t h, i;
+	uint32_t *buf, *h48info;
+	size_t result;
+
+	fgets(str, STRLENMAX, stdin);
+	h = atoi(str);
+	buf = (uint32_t *)malloc(sizeof(uint32_t) * (60000000 << h));
+	result = gendata_h48(buf, h, MAXDEPTH);
+	h48info = buf + (ETABLESIZE(h) + COCSEPSIZE) / 4;
+
+	printf("%zu\n\n", result);
+
+	printf("cocsepdata:\n");
+	printf("Classes: %" PRIu32 "\n", buf[COCSEPSIZE/4-12]);
+	printf("Max value: %" PRIu32 "\n", buf[COCSEPSIZE/4-11]);
+	for (i = 0; i < 10; i++)
+		printf("%" PRIu32 ": %" PRIu32 "\n", i, buf[COCSEPSIZE/4-10+i]);
+
+	printf("\nh48:\n");
+	for (i = 0; i < MAXDEPTH+1; i++)
+		printf("%" PRIu32 ": %" PRIu32 "\n", i, h48info[i+1]);
+
+	free(buf);
+	return 0;
+}

	h48 A prototype for an optimal Rubik's cube solver, work in progress.
	git clone https://git.tronto.net/h48
	Download \| Log \| Files \| Refs \| README \| LICENSE

M	TODO.txt	\|	4	++--
M	src/constants.h	\|	214	++++++++++++++++++++++++++++++++++++++++---------------------------------------
M	src/cube_avx2.h	\|	32	+++++++++++++++++---------------
M	src/cube_portable.h	\|	17	++++++++---------
M	src/cube_routines.h	\|	6	++++--
M	src/solve_h48.h	\|	120	+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
M	test/075_set_eo/set_eo_tests.c	\|	6	++++--
M	test/100_gendata_cocsep/gendata_cocsep_tests.c	\|	2	+-
M	test/101_cocsep_selfsim/cocsep_selfsim_tests.c	\|	8	++++----
D	test/101_coord_invcoord_h48/coord_invcoord_h48_tests.c	\|	40	----------------------------------------
R	test/101_coord_invcoord_h48/00_all.in -> test/102_coord_invcoord_h48/00_all.in	\|	0
R	test/101_coord_invcoord_h48/00_all.out -> test/102_coord_invcoord_h48/00_all.out	\|	0
A	test/102_coord_invcoord_h48/coord_invcoord_h48_tests.c	\|	40	++++++++++++++++++++++++++++++++++++++++
D	test/102_gendata_h48/gendata_h48_tests.c	\|	35	-----------------------------------
R	test/102_gendata_h48/00_h_0.in -> test/103_gendata_h48/00_h_0.in	\|	0
R	test/102_gendata_h48/00_h_0.out -> test/103_gendata_h48/00_h_0.out	\|	0
R	test/102_gendata_h48/01_h_1.in -> test/103_gendata_h48/01_h_1.in	\|	0
R	test/102_gendata_h48/01_h_1.out -> test/103_gendata_h48/01_h_1.out	\|	0
A	test/103_gendata_h48/gendata_h48_tests.c	\|	35	+++++++++++++++++++++++++++++++++++