commit ec564e62dcfe701b5ee3111b92c7d72ffd0dda6c
parent 9b0be93fb5fe23ae9f1a7a57b9db60561d8673e8
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Fri, 29 Mar 2024 12:11:24 +0100
Added esep coordinate
Diffstat:
17 files changed, 133 insertions(+), 2 deletions(-)
diff --git a/TODO.txt b/TODO.txt
@@ -1,6 +1,7 @@
## (find better name) H48 solver, ideas
-TODO next: epsep, precise coordinate unlike cpsep which is redundant
+TODO implement lookup tables
+TODO alternative: ARM NEON part
First compute co + csep. Use csep as a binary number (2^7 instead of 70,
loose a factor of 1.8 but still fits in a few megabytes or less). Use
diff --git a/cube.c b/cube.c
@@ -126,6 +126,9 @@ Section: constants, strings and other stuff
#define _coshift 5U
#define _pbits 0xFU
+#define _esepbit1 0x4U
+#define _esepbit2 0x8U
+#define _csepbit 0x4U
#define _eobit 0x10U
#define _cobits 0xF0U
#define _cobits2 0x60U
@@ -484,6 +487,21 @@ static char *transstr[] = {
[BLm] = "mirrored BL",
};
+static int64_t binomial[12][12] = {
+ {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 3, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 4, 6, 4, 1, 0, 0, 0, 0, 0, 0, 0},
+ {1, 5, 10, 10, 5, 1, 0, 0, 0, 0, 0, 0},
+ {1, 6, 15, 20, 15, 6, 1, 0, 0, 0, 0, 0},
+ {1, 7, 21, 35, 35, 21, 7, 1, 0, 0, 0, 0},
+ {1, 8, 28, 56, 70, 56, 28, 8, 1, 0, 0, 0},
+ {1, 9, 36, 84, 126, 126, 84, 36, 9, 1, 0, 0},
+ {1, 10, 45, 120, 210, 252, 210, 120, 45, 10, 1, 0},
+ {1, 11, 55, 165, 330, 462, 462, 330, 165, 55, 11, 1},
+};
+
/******************************************************************************
Section: AVX2 fast methods
@@ -500,6 +518,7 @@ typedef __m256i cube_fast_t;
#define _co2_avx2 _mm256_set_epi64x(0, 0, 0, 0x6060606060606060)
#define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, 0x2020202020202020)
#define _cp_avx2 _mm256_set_epi64x(0, 0, 0, 0x0707070707070707)
+#define _ep_avx2 _mm256_set_epi64x(0x0F0F0F0F, 0x0F0F0F0F0F0F0F0F, 0, 0)
#define _eo_avx2 _mm256_set_epi64x(0x10101010, 0x1010101010101010, 0, 0)
_static_inline cube_fast_t fastcube(
@@ -518,6 +537,7 @@ _static_inline cube_fast_t compose_fast(cube_fast_t, cube_fast_t);
_static_inline int64_t coord_fast_co(cube_fast_t);
_static_inline int64_t coord_fast_csep(cube_fast_t);
_static_inline int64_t coord_fast_eo(cube_fast_t);
+_static_inline int64_t coord_fast_esep(cube_fast_t);
_static_inline cube_fast_t
fastcube(
@@ -686,6 +706,36 @@ coord_fast_eo(cube_fast_t c)
return mask >> 17;
}
+_static_inline int64_t
+coord_fast_esep(cube_fast_t c)
+{
+ cube_fast_t ep;
+ int64_t e, mem[4], i, j, k, l, ret1, ret2, bit1, bit2, is1;
+
+ ep = _mm256_and_si256(c, _ep_avx2);
+ _mm256_storeu_si256((__m256i *)mem, ep);
+
+ mem[3] <<= 8L;
+ ret1 = ret2 = 0;
+ k = l = 4;
+ for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8L) {
+ e = mem[i/8 + 2];
+
+ bit1 = (e & _esepbit1) >> 2L;
+ bit2 = (e & _esepbit2) >> 3L;
+ is1 = (1 - bit2) * bit1;
+
+ ret1 += bit2 * binomial[11-i][k];
+ k -= bit2;
+
+ ret2 += is1 * binomial[7-j][l];
+ l -= is1;
+ j += (1-bit2);
+ }
+
+ return ret1 * 70 + ret2;
+}
+
/******************************************************************************
Section: ARM NEON fast methods
@@ -730,6 +780,7 @@ _static_inline cube_fast_t compose_fast(cube_fast_t, cube_fast_t);
_static_inline int64_t coord_fast_co(cube_fast_t);
_static_inline int64_t coord_fast_csep(cube_fast_t);
_static_inline int64_t coord_fast_eo(cube_fast_t);
+_static_inline int64_t coord_fast_esep(cube_fast_t);
_static_inline cube_fast_t
fastcube(
@@ -863,6 +914,13 @@ coord_fast_co(cube_fast_t c)
return ret;
}
+/*
+For corner separation, we consider the axis (a.k.a. tetrad) each
+corner belongs to as 0 or 1 and we translate this sequence into binary.
+Ignoring the last bit, we have a value up to 2^7, but not all values are
+possible. Encoding this as a number from 0 to C(8,4) would save about 40%
+of space, but we are not going to use this coordinate in large tables.
+*/
_static_inline int64_t
coord_fast_csep(cube_fast_t c)
{
@@ -870,7 +928,7 @@ coord_fast_csep(cube_fast_t c)
int64_t ret;
for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2)
- ret += p * ((c.corner[i] & _pbits) >> 2U);
+ ret += p * ((c.corner[i] & _csepbit) >> 2U);
return ret;
}
@@ -887,6 +945,41 @@ coord_fast_eo(cube_fast_t c)
return ret;
}
+/*
+We encode the edge separation as a number from 0 to C(12,4)*C(8,4).
+It can be seen as the composition of two "subset index" coordinates.
+*/
+_static_inline int64_t
+coord_fast_esep(cube_fast_t c)
+{
+ int64_t i, j, k, l, ret1, ret2, bit1, bit2, is1;
+
+ for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++) {
+ /* Simple version:
+ if (c.edge[i] & _esepbit2) {
+ ret1 += binomial[11-i][k--];
+ } else {
+ if (c.edge[i] & _esepbit1)
+ ret2 += binomial[7-j][l--];
+ j++;
+ }
+ */
+
+ bit1 = (c.edge[i] & _esepbit1) >> 2U;
+ bit2 = (c.edge[i] & _esepbit2) >> 3U;
+ is1 = (1 - bit2) * bit1;
+
+ ret1 += bit2 * binomial[11-i][k];
+ k -= bit2;
+
+ ret2 += is1 * binomial[7-j][l];
+ l -= is1;
+ j += (1-bit2);
+ }
+
+ return ret1 * 70 + ret2;
+}
+
#endif
/******************************************************************************
diff --git a/test/074_coord_esep/00_solved.in b/test/074_coord_esep/00_solved.in
@@ -0,0 +1 @@
+UF0 UB0 DB0 DF0 UR0 UL0 DL0 DR0 FR0 FL0 BL0 BR0 UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0
diff --git a/test/074_coord_esep/00_solved.out b/test/074_coord_esep/00_solved.out
@@ -0,0 +1 @@
+0
diff --git a/test/074_coord_esep/01_U.in b/test/074_coord_esep/01_U.in
@@ -0,0 +1 @@
+UR0 UL0 DB0 DF0 UB0 UF0 DL0 DR0 FR0 FL0 BL0 BR0 UBR0 UFL0 DFL0 DBR0 UFR0 UBL0 DFR0 DBL0
diff --git a/test/074_coord_esep/01_U.out b/test/074_coord_esep/01_U.out
@@ -0,0 +1 @@
+55
diff --git a/test/074_coord_esep/02_U2.in b/test/074_coord_esep/02_U2.in
@@ -0,0 +1 @@
+UB0 UF0 DB0 DF0 UL0 UR0 DL0 DR0 FR0 FL0 BL0 BR0 UBL0 UFR0 DFL0 DBR0 UBR0 UFL0 DFR0 DBL0
diff --git a/test/074_coord_esep/02_U2.out b/test/074_coord_esep/02_U2.out
@@ -0,0 +1 @@
+0
diff --git a/test/074_coord_esep/20_scrambled.in b/test/074_coord_esep/20_scrambled.in
@@ -0,0 +1,3 @@
+UL0 BL0 BR1 DL0 FR0 DF0 DB1 DR1 UB0 FL0 UF0 UR1 DFL0 UFR1 DBR1 UBR2 DBL2 DFR0 UFL1 UBL2
+
+// Scramble: U R D' L D' F L2 D L F B D2 B' L2 F U2 L2 D2 R2 L2 B'
diff --git a/test/074_coord_esep/20_scrambled.out b/test/074_coord_esep/20_scrambled.out
@@ -0,0 +1 @@
+22248
diff --git a/test/074_coord_esep/21_swapaxis_FB.in b/test/074_coord_esep/21_swapaxis_FB.in
@@ -0,0 +1 @@
+FR0 BR0 BL0 FL0 UR0 UL0 DL0 DR0 DF0 UF0 UB0 DB0 UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0
diff --git a/test/074_coord_esep/21_swapaxis_FB.out b/test/074_coord_esep/21_swapaxis_FB.out
@@ -0,0 +1 @@
+34649
diff --git a/test/074_coord_esep/22_swapaxis_RL.in b/test/074_coord_esep/22_swapaxis_RL.in
@@ -0,0 +1 @@
+UF0 UB0 DB0 DF0 FR0 FL0 BL0 BR0 DR0 DL0 UL0 UR0 UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0
diff --git a/test/074_coord_esep/22_swapaxis_RL.out b/test/074_coord_esep/22_swapaxis_RL.out
@@ -0,0 +1 @@
+4830
diff --git a/test/074_coord_esep/23_swapaxis_UD.in b/test/074_coord_esep/23_swapaxis_UD.in
@@ -0,0 +1 @@
+UR0 UL0 DL0 DR0 UB0 UF0 DF0 DB0 FR0 FL0 BL0 BR0 UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0
diff --git a/test/074_coord_esep/23_swapaxis_UD.out b/test/074_coord_esep/23_swapaxis_UD.out
@@ -0,0 +1 @@
+69
diff --git a/test/074_coord_esep/coord_esep_tests.c b/test/074_coord_esep/coord_esep_tests.c
@@ -0,0 +1,21 @@
+#include "../test.h"
+
+int64_t coord_fast_esep(cube_fast_t);
+cube_fast_t cubetofast(cube_t);
+
+int main(void) {
+ char str[STRLENMAX];
+ cube_t cube;
+ cube_fast_t fast;
+ int64_t result;
+
+ fgets(str, STRLENMAX, stdin);
+ cube = readcube("H48", str);
+ fast = cubetofast(cube);
+
+ result = coord_fast_esep(fast);
+
+ printf("%" PRId64 "\n", result);
+
+ return 0;
+}