Added esep coordinate - h48 - A prototype for an optimal Rubik's cube solver, work in progress.

commit ec564e62dcfe701b5ee3111b92c7d72ffd0dda6c
parent 9b0be93fb5fe23ae9f1a7a57b9db60561d8673e8
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Fri, 29 Mar 2024 12:11:24 +0100

Added esep coordinate

Diffstat:
M TODO.txt  | 3 ++-
M cube.c  | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
A test/074_coord_esep/00_solved.in  | 1 +
A test/074_coord_esep/00_solved.out  | 1 +
A test/074_coord_esep/01_U.in  | 1 +
A test/074_coord_esep/01_U.out  | 1 +
A test/074_coord_esep/02_U2.in  | 1 +
A test/074_coord_esep/02_U2.out  | 1 +
A test/074_coord_esep/20_scrambled.in  | 3 +++
A test/074_coord_esep/20_scrambled.out  | 1 +
A test/074_coord_esep/21_swapaxis_FB.in  | 1 +
A test/074_coord_esep/21_swapaxis_FB.out  | 1 +
A test/074_coord_esep/22_swapaxis_RL.in  | 1 +
A test/074_coord_esep/22_swapaxis_RL.out  | 1 +
A test/074_coord_esep/23_swapaxis_UD.in  | 1 +
A test/074_coord_esep/23_swapaxis_UD.out  | 1 +
A test/074_coord_esep/coord_esep_tests.c  | 21 +++++++++++++++++++++

17 files changed, 133 insertions(+), 2 deletions(-)
diff --git a/TODO.txt b/TODO.txt
@@ -1,6 +1,7 @@
 ## (find better name) H48 solver, ideas
 
-TODO next: epsep, precise coordinate unlike cpsep which is redundant
+TODO implement lookup tables
+TODO alternative: ARM NEON part
 
 First compute co + csep. Use csep as a binary number (2^7 instead of 70,
 loose a factor of 1.8 but still fits in a few megabytes or less). Use
diff --git a/cube.c b/cube.c
@@ -126,6 +126,9 @@ Section: constants, strings and other stuff
 #define _coshift    5U
 
 #define _pbits      0xFU
+#define _esepbit1   0x4U
+#define _esepbit2   0x8U
+#define _csepbit    0x4U
 #define _eobit      0x10U
 #define _cobits     0xF0U
 #define _cobits2    0x60U
@@ -484,6 +487,21 @@ static char *transstr[] = {
 	[BLm] = "mirrored BL",
 };
 
+static int64_t binomial[12][12] = {
+	{1,  0,  0,   0,   0,   0,   0,   0,   0,  0,  0, 0},
+	{1,  1,  0,   0,   0,   0,   0,   0,   0,  0,  0, 0},
+	{1,  2,  1,   0,   0,   0,   0,   0,   0,  0,  0, 0},
+	{1,  3,  3,   1,   0,   0,   0,   0,   0,  0,  0, 0},
+	{1,  4,  6,   4,   1,   0,   0,   0,   0,  0,  0, 0},
+	{1,  5, 10,  10,   5,   1,   0,   0,   0,  0,  0, 0},
+	{1,  6, 15,  20,  15,   6,   1,   0,   0,  0,  0, 0},
+	{1,  7, 21,  35,  35,  21,   7,   1,   0,  0,  0, 0},
+	{1,  8, 28,  56,  70,  56,  28,   8,   1,  0,  0, 0},
+	{1,  9, 36,  84, 126, 126,  84,  36,   9,  1,  0, 0},
+	{1, 10, 45, 120, 210, 252, 210, 120,  45, 10,  1, 0},
+	{1, 11, 55, 165, 330, 462, 462, 330, 165, 55, 11, 1},
+};
+
 /******************************************************************************
 Section: AVX2 fast methods
 
@@ -500,6 +518,7 @@ typedef __m256i cube_fast_t;
 #define _co2_avx2 _mm256_set_epi64x(0, 0, 0, 0x6060606060606060)
 #define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, 0x2020202020202020)
 #define _cp_avx2 _mm256_set_epi64x(0, 0, 0, 0x0707070707070707)
+#define _ep_avx2 _mm256_set_epi64x(0x0F0F0F0F, 0x0F0F0F0F0F0F0F0F, 0, 0)
 #define _eo_avx2 _mm256_set_epi64x(0x10101010, 0x1010101010101010, 0, 0)
 
 _static_inline cube_fast_t fastcube(
@@ -518,6 +537,7 @@ _static_inline cube_fast_t compose_fast(cube_fast_t, cube_fast_t);
 _static_inline int64_t coord_fast_co(cube_fast_t);
 _static_inline int64_t coord_fast_csep(cube_fast_t);
 _static_inline int64_t coord_fast_eo(cube_fast_t);
+_static_inline int64_t coord_fast_esep(cube_fast_t);
 
 _static_inline cube_fast_t
 fastcube(
@@ -686,6 +706,36 @@ coord_fast_eo(cube_fast_t c)
 	return mask >> 17;
 }
 
+_static_inline int64_t
+coord_fast_esep(cube_fast_t c)
+{
+	cube_fast_t ep;
+	int64_t e, mem[4], i, j, k, l, ret1, ret2, bit1, bit2, is1;
+
+	ep = _mm256_and_si256(c, _ep_avx2);
+	_mm256_storeu_si256((__m256i *)mem, ep);
+
+	mem[3] <<= 8L;
+	ret1 = ret2 = 0;
+	k = l = 4;
+	for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8L) {
+		e = mem[i/8 + 2];
+
+		bit1 = (e & _esepbit1) >> 2L;
+		bit2 = (e & _esepbit2) >> 3L;
+		is1 = (1 - bit2) * bit1;
+
+		ret1 += bit2 * binomial[11-i][k];
+		k -= bit2;
+
+		ret2 += is1 * binomial[7-j][l];
+		l -= is1;
+		j += (1-bit2);
+	}
+
+	return ret1 * 70 + ret2;
+}
+
 /******************************************************************************
 Section: ARM NEON fast methods
 
@@ -730,6 +780,7 @@ _static_inline cube_fast_t compose_fast(cube_fast_t, cube_fast_t);
 _static_inline int64_t coord_fast_co(cube_fast_t);
 _static_inline int64_t coord_fast_csep(cube_fast_t);
 _static_inline int64_t coord_fast_eo(cube_fast_t);
+_static_inline int64_t coord_fast_esep(cube_fast_t);
 
 _static_inline cube_fast_t
 fastcube(
@@ -863,6 +914,13 @@ coord_fast_co(cube_fast_t c)
 	return ret;
 }
 
+/*
+For corner separation, we consider the axis (a.k.a. tetrad) each
+corner belongs to as 0 or 1 and we translate this sequence into binary.
+Ignoring the last bit, we have a value up to 2^7, but not all values are
+possible. Encoding this as a number from 0 to C(8,4) would save about 40%
+of space, but we are not going to use this coordinate in large tables.
+*/
 _static_inline int64_t
 coord_fast_csep(cube_fast_t c)
 {
@@ -870,7 +928,7 @@ coord_fast_csep(cube_fast_t c)
 	int64_t ret;
 
 	for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2)
-		ret += p * ((c.corner[i] & _pbits) >> 2U);
+		ret += p * ((c.corner[i] & _csepbit) >> 2U);
 
 	return ret;
 }
@@ -887,6 +945,41 @@ coord_fast_eo(cube_fast_t c)
 	return ret;
 }
 
+/*
+We encode the edge separation as a number from 0 to C(12,4)*C(8,4).
+It can be seen as the composition of two "subset index" coordinates.
+*/
+_static_inline int64_t
+coord_fast_esep(cube_fast_t c)
+{
+	int64_t i, j, k, l, ret1, ret2, bit1, bit2, is1;
+
+	for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++) {
+		/* Simple version:
+		if (c.edge[i] & _esepbit2) {
+			ret1 += binomial[11-i][k--];
+		} else {
+			if (c.edge[i] & _esepbit1)
+				ret2 += binomial[7-j][l--];
+			j++;
+		}
+		*/
+
+		bit1 = (c.edge[i] & _esepbit1) >> 2U;
+		bit2 = (c.edge[i] & _esepbit2) >> 3U;
+		is1 = (1 - bit2) * bit1;
+
+		ret1 += bit2 * binomial[11-i][k];
+		k -= bit2;
+
+		ret2 += is1 * binomial[7-j][l];
+		l -= is1;
+		j += (1-bit2);
+	}
+
+	return ret1 * 70 + ret2;
+}
+
 #endif
 
 /******************************************************************************
diff --git a/test/074_coord_esep/00_solved.in b/test/074_coord_esep/00_solved.in
@@ -0,0 +1 @@
+UF0 UB0 DB0 DF0 UR0 UL0 DL0 DR0 FR0 FL0 BL0 BR0 UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0
diff --git a/test/074_coord_esep/00_solved.out b/test/074_coord_esep/00_solved.out
@@ -0,0 +1 @@
+0
diff --git a/test/074_coord_esep/01_U.in b/test/074_coord_esep/01_U.in
@@ -0,0 +1 @@
+UR0 UL0 DB0 DF0 UB0 UF0 DL0 DR0 FR0 FL0 BL0 BR0 UBR0 UFL0 DFL0 DBR0 UFR0 UBL0 DFR0 DBL0
diff --git a/test/074_coord_esep/01_U.out b/test/074_coord_esep/01_U.out
@@ -0,0 +1 @@
+55
diff --git a/test/074_coord_esep/02_U2.in b/test/074_coord_esep/02_U2.in
@@ -0,0 +1 @@
+UB0 UF0 DB0 DF0 UL0 UR0 DL0 DR0 FR0 FL0 BL0 BR0 UBL0 UFR0 DFL0 DBR0 UBR0 UFL0 DFR0 DBL0
diff --git a/test/074_coord_esep/02_U2.out b/test/074_coord_esep/02_U2.out
@@ -0,0 +1 @@
+0
diff --git a/test/074_coord_esep/20_scrambled.in b/test/074_coord_esep/20_scrambled.in
@@ -0,0 +1,3 @@
+UL0 BL0 BR1 DL0 FR0 DF0 DB1 DR1 UB0 FL0 UF0 UR1 DFL0 UFR1 DBR1 UBR2 DBL2 DFR0 UFL1 UBL2
+
+// Scramble: U R D' L D' F L2 D L F B D2 B' L2 F U2 L2 D2 R2 L2 B'
diff --git a/test/074_coord_esep/20_scrambled.out b/test/074_coord_esep/20_scrambled.out
@@ -0,0 +1 @@
+22248
diff --git a/test/074_coord_esep/21_swapaxis_FB.in b/test/074_coord_esep/21_swapaxis_FB.in
@@ -0,0 +1 @@
+FR0 BR0 BL0 FL0 UR0 UL0 DL0 DR0 DF0 UF0 UB0 DB0 UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0
diff --git a/test/074_coord_esep/21_swapaxis_FB.out b/test/074_coord_esep/21_swapaxis_FB.out
@@ -0,0 +1 @@
+34649
diff --git a/test/074_coord_esep/22_swapaxis_RL.in b/test/074_coord_esep/22_swapaxis_RL.in
@@ -0,0 +1 @@
+UF0 UB0 DB0 DF0 FR0 FL0 BL0 BR0 DR0 DL0 UL0 UR0 UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0
diff --git a/test/074_coord_esep/22_swapaxis_RL.out b/test/074_coord_esep/22_swapaxis_RL.out
@@ -0,0 +1 @@
+4830
diff --git a/test/074_coord_esep/23_swapaxis_UD.in b/test/074_coord_esep/23_swapaxis_UD.in
@@ -0,0 +1 @@
+UR0 UL0 DL0 DR0 UB0 UF0 DF0 DB0 FR0 FL0 BL0 BR0 UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0
diff --git a/test/074_coord_esep/23_swapaxis_UD.out b/test/074_coord_esep/23_swapaxis_UD.out
@@ -0,0 +1 @@
+69
diff --git a/test/074_coord_esep/coord_esep_tests.c b/test/074_coord_esep/coord_esep_tests.c
@@ -0,0 +1,21 @@
+#include "../test.h"
+
+int64_t coord_fast_esep(cube_fast_t);
+cube_fast_t cubetofast(cube_t);
+
+int main(void) {
+	char str[STRLENMAX];
+	cube_t cube;
+	cube_fast_t fast;
+	int64_t result;
+
+	fgets(str, STRLENMAX, stdin);
+	cube = readcube("H48", str);
+	fast = cubetofast(cube);
+
+	result = coord_fast_esep(fast);
+
+	printf("%" PRId64 "\n", result);
+
+	return 0;
+}

	h48 A prototype for an optimal Rubik's cube solver, work in progress.
	git clone https://git.tronto.net/h48
	Download \| Log \| Files \| Refs \| README \| LICENSE

M	TODO.txt	\|	3	++-
M	cube.c	\|	95	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
A	test/074_coord_esep/00_solved.in	\|	1	+
A	test/074_coord_esep/00_solved.out	\|	1	+
A	test/074_coord_esep/01_U.in	\|	1	+
A	test/074_coord_esep/01_U.out	\|	1	+
A	test/074_coord_esep/02_U2.in	\|	1	+
A	test/074_coord_esep/02_U2.out	\|	1	+
A	test/074_coord_esep/20_scrambled.in	\|	3	+++
A	test/074_coord_esep/20_scrambled.out	\|	1	+
A	test/074_coord_esep/21_swapaxis_FB.in	\|	1	+
A	test/074_coord_esep/21_swapaxis_FB.out	\|	1	+
A	test/074_coord_esep/22_swapaxis_RL.in	\|	1	+
A	test/074_coord_esep/22_swapaxis_RL.out	\|	1	+
A	test/074_coord_esep/23_swapaxis_UD.in	\|	1	+
A	test/074_coord_esep/23_swapaxis_UD.out	\|	1	+
A	test/074_coord_esep/coord_esep_tests.c	\|	21	+++++++++++++++++++++