commit 2d66a0ffea4b03cd27b0f4363f8c4b8b0a1242e4
parent 18c9a8b8905304cf5f8fc15825769046a3144866
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Sun, 18 Aug 2024 15:31:06 +0200
Removed duplication
Diffstat:
4 files changed, 35 insertions(+), 70 deletions(-)
diff --git a/src/arch/avx2.h b/src/arch/avx2.h
@@ -286,30 +286,9 @@ _static_inline cube_t
invcoord_esep(int64_t esep)
{
cube_t eee, ret;
- int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2;
- uint8_t mem[32];
- uint8_t slice[3] = {0};
+ uint8_t mem[32] = {0};
- set1 = esep % 70;
- set2 = esep / 70;
-
- for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) {
- v = binomial[11-i][k];
- jj = j < 8;
- w = jj * binomial[7-(j*jj)][l];
- bit2 = set2 >= v;
- bit1 = set1 >= w;
- is1 = (1 - bit2) * bit1;
-
- set2 -= bit2 * v;
- k -= bit2;
- set1 -= is1 * w;
- l -= is1;
- j += (1-bit2);
- s = 2*bit2 + (1-bit2)*bit1;
-
- mem[i+16] = (slice[s]++) | (uint8_t)(s << 2);
- }
+ invcoord_esep_array(esep % 70, esep / 70, mem+16);
ret = solved;
eee = _mm256_loadu_si256((__m256i_u *)&mem);
diff --git a/src/arch/common.h b/src/arch/common.h
@@ -17,3 +17,31 @@ _static_inline void copy_corners(cube_t *, cube_t);
_static_inline void copy_edges(cube_t *, cube_t);
_static_inline void set_eo(cube_t *, int64_t);
_static_inline cube_t invcoord_esep(int64_t);
+
+_static_inline void invcoord_esep_array(int64_t, int64_t, uint8_t[static 12]);
+
+_static_inline void
+invcoord_esep_array(int64_t set1, int64_t set2, uint8_t mem[static 12])
+{
+ int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1;
+ uint8_t slice[3] = {0};
+
+ for (i = 0, j = 0, k = 4, l = 4; i < 12; i++)
+ {
+ v = binomial[11 - i][k];
+ jj = j < 8;
+ w = jj * binomial[7 - (j * jj)][l];
+ bit2 = set2 >= v;
+ bit1 = set1 >= w;
+ is1 = (1 - bit2) * bit1;
+
+ set2 -= bit2 * v;
+ k -= bit2;
+ set1 -= is1 * w;
+ l -= is1;
+ j += (1 - bit2);
+ s = 2 * bit2 + (1 - bit2) * bit1;
+
+ mem[i] = (slice[s]++) | (uint8_t)(s << 2);
+ }
+}
diff --git a/src/arch/neon.h b/src/arch/neon.h
@@ -316,33 +316,12 @@ _static_inline cube_t
invcoord_esep(int64_t esep)
{
cube_t ret;
- int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2;
- uint8_t slice[3] = {0};
+ uint8_t mem[16] = {0};
- ret = solved;
- uint8_t mem[16];
- set1 = esep % 70;
- set2 = esep / 70;
-
- for (i = 0, j = 0, k = 4, l = 4; i < 12; i++)
- {
- v = binomial[11 - i][k];
- jj = j < 8;
- w = jj * binomial[7 - (j * jj)][l];
- bit2 = set2 >= v;
- bit1 = set1 >= w;
- is1 = (1 - bit2) * bit1;
-
- set2 -= bit2 * v;
- k -= bit2;
- set1 -= is1 * w;
- l -= is1;
- j += (1 - bit2);
- s = 2 * bit2 + (1 - bit2) * bit1;
-
- mem[i] = (slice[s]++) | (uint8_t)(s << 2);
- }
+ invcoord_esep_array(esep % 70, esep / 70, mem);
+ ret = solved;
ret.edge = vld1q_u8(mem);
+
return ret;
}
diff --git a/src/arch/portable.h b/src/arch/portable.h
@@ -243,30 +243,9 @@ _static_inline cube_t
invcoord_esep(int64_t esep)
{
cube_t ret;
- int64_t bit1, bit2, i, j, jj, k, l, s, v, w, is1, set1, set2;
- uint8_t slice[3] = {0};
ret = solved;
- set1 = esep % 70;
- set2 = esep / 70;
-
- for (i = 0, j = 0, k = 4, l = 4; i < 12; i++) {
- v = binomial[11-i][k];
- jj = j < 8;
- w = jj * binomial[7-(j*jj)][l];
- bit2 = set2 >= v;
- bit1 = set1 >= w;
- is1 = (1 - bit2) * bit1;
-
- set2 -= bit2 * v;
- k -= bit2;
- set1 -= is1 * w;
- l -= is1;
- j += (1-bit2);
- s = 2*bit2 + (1-bit2)*bit1;
-
- ret.edge[i] = (slice[s]++) | (uint8_t)(s << 2);
- }
+ invcoord_esep_array(esep % 70, esep / 70, ret.edge);
return ret;
}