nissy-core

The "engine" of nissy, including the H48 optimal solver.
git clone https://git.tronto.net/nissy-core
Download | Log | Files | Refs | README | LICENSE

commit 1bd4691d9cc46ec75adbe899b289a1ebc5460721
parent 0844b0ecd00bd6d68a7bafaeae0ee3d420db8ff6
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Fri,  1 Aug 2025 16:52:44 +0200

Added CPEPE coordinate in preparation of full DRFIN solver

Diffstat:
Msrc/arch/avx2.h | 73++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Msrc/arch/common.h | 2++
Msrc/arch/neon.h | 79++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Msrc/arch/portable.h | 22++++++++++++++++++++++
Asrc/solvers/coord/cpepe.h | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/utils/constants.h | 1+
Atest/082_invcoord_epe/00_all.in | 0
Atest/082_invcoord_epe/00_all.out | 1+
Atest/082_invcoord_epe/invcoord_epe_tests.c | 32++++++++++++++++++++++++++++++++
9 files changed, 282 insertions(+), 14 deletions(-)

diff --git a/src/arch/avx2.h b/src/arch/avx2.h @@ -24,9 +24,9 @@ #define ZERO_CUBE _mm256_set_epi64x(0, 0, 0, 0) #define SOLVED_CUBE _mm256_set_epi64x(SOLVED_H, SOLVED_L, 0, SOLVED_L) - -STATIC_INLINE uint64_t permtoindex_8x8(int64_t); +STATIC_INLINE uint64_t permtoindex_Nx8(uint64_t, int64_t); STATIC_INLINE int64_t indextoperm_8x8(uint64_t); +STATIC_INLINE int64_t indextoperm_4x8(uint64_t); STATIC_INLINE int popcount_u32(uint32_t x) @@ -295,17 +295,17 @@ set_eo(cube_t cube[static 1], uint64_t eo) } STATIC_INLINE uint64_t -permtoindex_8x8(int64_t a) +permtoindex_Nx8(uint64_t n, int64_t a) { uint64_t i, c, ret; __m64 cmp; - for (i = 0, ret = 0; i < 8; i++) { + for (i = 0, ret = 0; i < n; i++) { cmp = _mm_set1_pi8(a & INT64_C(0xFF)); a = (a >> INT64_C(8)) | INT64_C(0x0F00000000000000); cmp = _mm_cmpgt_pi8(cmp, _mm_cvtsi64_m64(a)); c = _mm_popcnt_u64(_mm_cvtm64_si64(cmp)) >> UINT64_C(3); - ret += c * factorial[7-i]; + ret += c * factorial[n-1-i]; } return ret; @@ -332,6 +332,39 @@ indextoperm_8x8(uint64_t p) return ret; } +STATIC_INLINE int64_t +indextoperm_4x8(uint64_t p) +{ + static const int64_t A[FACT_4] = { + [0] = INT64_C(0x03020100), + [1] = INT64_C(0x02030100), + [2] = INT64_C(0x03010200), + [3] = INT64_C(0x01030200), + [4] = INT64_C(0x02010300), + [5] = INT64_C(0x01020300), + [6] = INT64_C(0x03020001), + [7] = INT64_C(0x02030001), + [8] = INT64_C(0x03000201), + [9] = INT64_C(0x00030201), + [10] = INT64_C(0x02000301), + [11] = INT64_C(0x00020301), + [12] = INT64_C(0x03010002), + [13] = INT64_C(0x01030002), + [14] = INT64_C(0x03000102), + [15] = INT64_C(0x00030102), + [16] = INT64_C(0x01000302), + [17] = INT64_C(0x00010302), + [18] = INT64_C(0x02010003), + [19] = INT64_C(0x01020003), + [20] = INT64_C(0x02000103), + [21] = INT64_C(0x00020103), + [22] = INT64_C(0x01000203), + [23] = INT64_C(0x00010203), + }; + + return A[p]; +} + STATIC_INLINE uint64_t coord_cp(cube_t cube) { @@ -341,7 +374,7 @@ coord_cp(cube_t cube) cp = _mm256_and_si256(cube, CP_AVX2); _mm256_storeu_si256((__m256i_u *)aux, cp); - return permtoindex_8x8(aux[0]); + return permtoindex_Nx8(8, aux[0]); } STATIC_INLINE cube_t @@ -359,7 +392,7 @@ coord_epud(cube_t cube) ep = _mm256_and_si256(cube, EP_AVX2); _mm256_storeu_si256((__m256i_u *)aux, ep); - return permtoindex_8x8(aux[2]); + return permtoindex_Nx8(8, aux[2]); } STATIC_INLINE cube_t @@ -367,3 +400,29 @@ invcoord_epud(uint64_t i) { return _mm256_set_epi64x(SOLVED_H, indextoperm_8x8(i), 0, SOLVED_L); } + +STATIC_INLINE uint64_t +coord_epe(cube_t cube) +{ + cube_t ep; + int64_t aux[4]; + + ep = _mm256_and_si256(cube, EP_AVX2); + ep = _mm256_xor_si256(ep, _mm256_set1_epi8(8)); + _mm256_storeu_si256((__m256i_u *)aux, ep); + + return permtoindex_Nx8(4, aux[3]); +} + +STATIC_INLINE cube_t +invcoord_epe(uint64_t i) +{ + int64_t a; + __m64 a64; + + a = indextoperm_4x8(i); + a64 = _mm_add_pi8(_mm_cvtsi64_m64(a), _mm_set_pi32(0, 0x08080808)); + a = _mm_cvtm64_si64(a64); + + return _mm256_set_epi64x(a, SOLVED_L, 0, SOLVED_L); +} diff --git a/src/arch/common.h b/src/arch/common.h @@ -41,6 +41,8 @@ STATIC_INLINE uint64_t coord_cp(cube_t); STATIC_INLINE cube_t invcoord_cp(uint64_t); STATIC_INLINE uint64_t coord_epud(cube_t); STATIC_INLINE cube_t invcoord_epud(uint64_t); +STATIC_INLINE uint64_t coord_epe(cube_t); +STATIC_INLINE cube_t invcoord_epe(uint64_t); STATIC_INLINE void invcoord_esep_array(uint64_t set1, uint64_t set2, uint8_t mem[static 12]) diff --git a/src/arch/neon.h b/src/arch/neon.h @@ -1,6 +1,6 @@ #define CO2_NEON vdup_n_u8(0x60) #define COCW_NEON vdup_n_u8(0x20) -#define PBITS8_NEON vdup_n_u8(0x07) +#define PBITS8_NEON vdup_n_u8(PBITS) STATIC_INLINE uint8x16_t compose_edges_slim(uint8x16_t, uint8x16_t); STATIC_INLINE uint8x8_t compose_corners_slim(uint8x8_t, uint8x8_t); @@ -29,8 +29,9 @@ STATIC_INLINE uint8x8_t compose_corners_slim(uint8x8_t, uint8x8_t); const uint8_t SOLVED_L[8] = {0, 1, 2, 3, 4, 5, 6, 7}; const uint8_t SOLVED_H[8] = {8, 9, 10, 11, 0, 0, 0}; -STATIC_INLINE uint64_t permtoindex_8x8(uint8x8_t); +STATIC_INLINE uint64_t permtoindex_Nx8(uint64_t, uint8x8_t); STATIC_INLINE uint8x8_t indextoperm_8x8(uint64_t); +STATIC_INLINE uint8x8_t indextoperm_4x8(uint64_t); STATIC_INLINE int popcount_u32(uint32_t x) @@ -364,14 +365,14 @@ invcoord_esep(uint64_t esep) } STATIC_INLINE uint64_t -permtoindex_8x8(uint8x8_t a) +permtoindex_Nx8(uint64_t n, uint8x8_t a) { uint64_t i, c, ret; uint8x8_t cmp; uint64x1_t anum; uint8_t or[8] = {0, 0, 0, 0, 0, 0, 0, 0x0F}; - for (i = 0, ret = 0; i < 8; i++) { + for (i = 0, ret = 0; i < n; i++) { cmp = vdup_lane_u8(a, 0); anum = vreinterpret_u64_u8(a); anum = vshr_n_u64(anum, 8); @@ -379,7 +380,7 @@ permtoindex_8x8(uint8x8_t a) a = vorr_u8(a, vld1_u8(or)); cmp = vcgt_u8(cmp, a); c = vaddv_u8(vshr_n_u8(cmp, 7)); - ret += c * factorial[7-i]; + ret += c * factorial[n-1-i]; } return ret; @@ -407,10 +408,43 @@ indextoperm_8x8(uint64_t p) return vld1_u8(ret); } +STATIC_INLINE uint8x8_t +indextoperm_4x8(uint64_t p) +{ + static const int64_t A[FACT_4] = { + [0] = UINT64_C(0x03020100), + [1] = UINT64_C(0x02030100), + [2] = UINT64_C(0x03010200), + [3] = UINT64_C(0x01030200), + [4] = UINT64_C(0x02010300), + [5] = UINT64_C(0x01020300), + [6] = UINT64_C(0x03020001), + [7] = UINT64_C(0x02030001), + [8] = UINT64_C(0x03000201), + [9] = UINT64_C(0x00030201), + [10] = UINT64_C(0x02000301), + [11] = UINT64_C(0x00020301), + [12] = UINT64_C(0x03010002), + [13] = UINT64_C(0x01030002), + [14] = UINT64_C(0x03000102), + [15] = UINT64_C(0x00030102), + [16] = UINT64_C(0x01000302), + [17] = UINT64_C(0x00010302), + [18] = UINT64_C(0x02010003), + [19] = UINT64_C(0x01020003), + [20] = UINT64_C(0x02000103), + [21] = UINT64_C(0x00020103), + [22] = UINT64_C(0x01000203), + [23] = UINT64_C(0x00010203), + }; + + return vreinterpret_u8_u64(vdup_n_u64(A[p])); +} + STATIC_INLINE uint64_t coord_cp(cube_t cube) { - return permtoindex_8x8(vand_u8(cube.corner, PBITS8_NEON)); + return permtoindex_Nx8(8, vand_u8(cube.corner, PBITS8_NEON)); } STATIC_INLINE cube_t @@ -425,7 +459,12 @@ invcoord_cp(uint64_t i) STATIC_INLINE uint64_t coord_epud(cube_t cube) { - return permtoindex_8x8(vand_u8(vget_low_u8(cube.edge), PBITS8_NEON)); + uint8x8_t a; + + a = vget_low_u8(cube.edge); + a = vand_u8(a, PBITS8_NEON); + + return permtoindex_Nx8(8, a); } STATIC_INLINE cube_t @@ -436,3 +475,29 @@ invcoord_epud(uint64_t i) .edge = vcombine_u8(indextoperm_8x8(i), vld1_u8(SOLVED_H)) }; } + +STATIC_INLINE uint64_t +coord_epe(cube_t cube) +{ + uint8x8_t a; + + a = vget_high_u8(cube.edge); + a = vand_u8(a, PBITS8_NEON); + a = veor_u8(a, vdup_n_u8(8)); + + return permtoindex_Nx8(4, a); +} + +STATIC_INLINE cube_t +invcoord_epe(uint64_t i) +{ + uint8x8_t a; + + a = indextoperm_4x8(i); + a = vadd_u8(a, vreinterpret_u8_u64(vdup_n_u64(UINT64_C(0x08080808)))); + + return (cube_t) { + .corner = vld1_u8(SOLVED_L), + .edge = vcombine_u8(vld1_u8(SOLVED_L), a) + }; +} diff --git a/src/arch/portable.h b/src/arch/portable.h @@ -334,3 +334,25 @@ invcoord_epud(uint64_t i) return STATIC_CUBE(0, 1, 2, 3, 4, 5, 6, 7, e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], 8, 9, 10, 11); } + +STATIC_INLINE uint64_t +coord_epe(cube_t cube) +{ + int i; + + for (i = 8; i < 12; i++) + cube.edge[i] = (cube.edge[i] & PBITS) - 8; + + return permtoindex(4, cube.edge+8); +} + +STATIC_INLINE cube_t +invcoord_epe(uint64_t i) +{ + uint8_t e[4]; + + indextoperm(i, 4, e); + + return STATIC_CUBE(0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, e[0]+8, e[1]+8, e[2]+8, e[3]+8); +} diff --git a/src/solvers/coord/cpepe.h b/src/solvers/coord/cpepe.h @@ -0,0 +1,86 @@ +STATIC cube_t coordinate_cpepe_merge(const cube_t, const cube_t); +STATIC uint64_t coordinate_cpepe_coord(const cube_t, const unsigned char *); +STATIC cube_t coordinate_cpepe_cube(uint64_t, const unsigned char *); +STATIC bool coordinate_cpepe_isnasty(uint64_t, const unsigned char *); +STATIC size_t coordinate_cpepe_gendata(unsigned char *); + +STATIC coord_t coordinate_cpepe = { + .name = "CPEPE", + .coord = &coordinate_cpepe_coord, + .cube = &coordinate_cpepe_cube, + .isnasty = &coordinate_cpepe_isnasty, + .gendata = coordinate_cpepe_gendata, + .max = CLASSES_CP_16 * FACT_4, + .trans_mask = TM_UDFIX, + .moves_mask_gendata = MM18_DR, + .moves_mask_solve = MM18_DR, + .is_admissible = &solution_always_valid, + .is_solvable = &is_drfin_solvable, + .is_solved = NULL, + .allow_niss = false, + .pruning_distribution = { + /* TODO */ + [0] = 0, + [1] = 0, + [2] = 0, + [3] = 0, + [4] = 0, + [5] = 0, + [6] = 0, + [7] = 0, + [8] = 0, + [9] = 0, + [10] = 0, + [11] = 0, + [12] = 0, + [13] = 0, + [14] = 0, + [15] = 0, + }, + .pruning_max = 15, /* TODO */ + .sym = { + .classes = CLASSES_CP_16, + .max = FACT_8, + .coord = &coord_cp, + .cube = &invcoord_cp, + .max2 = FACT_4, + .coord2 = &coord_epe, + .cube2 = &invcoord_epe, + .merge = &coordinate_cpepe_merge, + }, +}; + +STATIC cube_t +coordinate_cpepe_merge(const cube_t c1, const cube_t c2) +{ + cube_t merged; + + merged = c1; + copy_edges(&merged, c2); + + return merged; +} + +STATIC uint64_t +coordinate_cpepe_coord(const cube_t cube, const unsigned char *data) +{ + return coord_coord_generic(&coordinate_cpepe, cube, data); +} + +STATIC cube_t +coordinate_cpepe_cube(uint64_t i, const unsigned char *data) +{ + return coord_cube_generic(&coordinate_cpepe, i, data); +} + +STATIC bool +coordinate_cpepe_isnasty(uint64_t i, const unsigned char *data) +{ + return coord_isnasty_generic(&coordinate_cpepe, i, data); +} + +STATIC size_t +coordinate_cpepe_gendata(unsigned char *data) +{ + return coord_gendata_generic(&coordinate_cpepe, data); +} diff --git a/src/utils/constants.h b/src/utils/constants.h @@ -6,6 +6,7 @@ #define POW_3_7 UINT64_C(2187) #define FACT_12 UINT64_C(479001600) #define FACT_8 UINT64_C(40320) +#define FACT_4 UINT64_C(24) #define COMB_12_4 UINT64_C(495) #define COMB_8_4 UINT64_C(70) diff --git a/test/082_invcoord_epe/00_all.in b/test/082_invcoord_epe/00_all.in diff --git a/test/082_invcoord_epe/00_all.out b/test/082_invcoord_epe/00_all.out @@ -0,0 +1 @@ +All good diff --git a/test/082_invcoord_epe/invcoord_epe_tests.c b/test/082_invcoord_epe/invcoord_epe_tests.c @@ -0,0 +1,32 @@ +#include "../test.h" + +#define FACT_4 24 + +uint64_t coord_epe(cube_t); +cube_t invcoord_epe(uint64_t); + +void run(void) { + oriented_cube_t cube; + uint64_t coord, coord2; + + cube.orientation = 0; + + /* Test all possible values for CP coordinate */ + for (coord = 0; coord < FACT_4; coord++) { + cube.cube = invcoord_epe(coord); + + if (!isconsistent(cube)) { + printf("Not consistent\n"); + return; + } + + coord2 = coord_epe(cube.cube); + if (coord != coord2) { + printf("Error: invcoord of %" PRIu64 + " returns %" PRIu64 "\n", coord, coord2); + return; + } + } + + printf("All good\n"); +}