h48

A prototype for an optimal Rubik's cube solver, work in progress.
git clone https://git.tronto.net/h48
Download | Log | Files | Refs | README | LICENSE

commit 2d66a0ffea4b03cd27b0f4363f8c4b8b0a1242e4
parent 18c9a8b8905304cf5f8fc15825769046a3144866
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Sun, 18 Aug 2024 15:31:06 +0200

Removed duplication

Diffstat:
Msrc/arch/avx2.h | 25++-----------------------
Msrc/arch/common.h | 28++++++++++++++++++++++++++++
Msrc/arch/neon.h | 29++++-------------------------
Msrc/arch/portable.h | 23+----------------------
4 files changed, 35 insertions(+), 70 deletions(-)

diff --git a/src/arch/avx2.h b/src/arch/avx2.h @@ -286,30 +286,9 @@ _static_inline cube_t invcoord_esep(int64_t esep) { cube_t eee, ret; - int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2; - uint8_t mem[32]; - uint8_t slice[3] = {0}; + uint8_t mem[32] = {0}; - set1 = esep % 70; - set2 = esep / 70; - - for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) { - v = binomial[11-i][k]; - jj = j < 8; - w = jj * binomial[7-(j*jj)][l]; - bit2 = set2 >= v; - bit1 = set1 >= w; - is1 = (1 - bit2) * bit1; - - set2 -= bit2 * v; - k -= bit2; - set1 -= is1 * w; - l -= is1; - j += (1-bit2); - s = 2*bit2 + (1-bit2)*bit1; - - mem[i+16] = (slice[s]++) | (uint8_t)(s << 2); - } + invcoord_esep_array(esep % 70, esep / 70, mem+16); ret = solved; eee = _mm256_loadu_si256((__m256i_u *)&mem); diff --git a/src/arch/common.h b/src/arch/common.h @@ -17,3 +17,31 @@ _static_inline void copy_corners(cube_t *, cube_t); _static_inline void copy_edges(cube_t *, cube_t); _static_inline void set_eo(cube_t *, int64_t); _static_inline cube_t invcoord_esep(int64_t); + +_static_inline void invcoord_esep_array(int64_t, int64_t, uint8_t[static 12]); + +_static_inline void +invcoord_esep_array(int64_t set1, int64_t set2, uint8_t mem[static 12]) +{ + int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1; + uint8_t slice[3] = {0}; + + for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) + { + v = binomial[11 - i][k]; + jj = j < 8; + w = jj * binomial[7 - (j * jj)][l]; + bit2 = set2 >= v; + bit1 = set1 >= w; + is1 = (1 - bit2) * bit1; + + set2 -= bit2 * v; + k -= bit2; + set1 -= is1 * w; + l -= is1; + j += (1 - bit2); + s = 2 * bit2 + (1 - bit2) * bit1; + + mem[i] = (slice[s]++) | (uint8_t)(s << 2); + } +} diff --git a/src/arch/neon.h b/src/arch/neon.h @@ -316,33 +316,12 @@ _static_inline cube_t invcoord_esep(int64_t esep) { cube_t ret; - int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2; - uint8_t slice[3] = {0}; + uint8_t mem[16] = {0}; - ret = solved; - uint8_t mem[16]; - set1 = esep % 70; - set2 = esep / 70; - - for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) - { - v = binomial[11 - i][k]; - jj = j < 8; - w = jj * binomial[7 - (j * jj)][l]; - bit2 = set2 >= v; - bit1 = set1 >= w; - is1 = (1 - bit2) * bit1; - - set2 -= bit2 * v; - k -= bit2; - set1 -= is1 * w; - l -= is1; - j += (1 - bit2); - s = 2 * bit2 + (1 - bit2) * bit1; - - mem[i] = (slice[s]++) | (uint8_t)(s << 2); - } + invcoord_esep_array(esep % 70, esep / 70, mem); + ret = solved; ret.edge = vld1q_u8(mem); + return ret; } diff --git a/src/arch/portable.h b/src/arch/portable.h @@ -243,30 +243,9 @@ _static_inline cube_t invcoord_esep(int64_t esep) { cube_t ret; - int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2; - uint8_t slice[3] = {0}; ret = solved; - set1 = esep % 70; - set2 = esep / 70; - - for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) { - v = binomial[11-i][k]; - jj = j < 8; - w = jj * binomial[7-(j*jj)][l]; - bit2 = set2 >= v; - bit1 = set1 >= w; - is1 = (1 - bit2) * bit1; - - set2 -= bit2 * v; - k -= bit2; - set1 -= is1 * w; - l -= is1; - j += (1-bit2); - s = 2*bit2 + (1-bit2)*bit1; - - ret.edge[i] = (slice[s]++) | (uint8_t)(s << 2); - } + invcoord_esep_array(esep % 70, esep / 70, ret.edge); return ret; }