commit b05fa355c49c619cce1226716a395381a944f578
parent fabb1ae495c4404af0f26380c97d0bca7e881a27
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Sat, 28 Oct 2023 20:42:24 +0200
Started working on AVX2 (not working yet)
Diffstat:
12 files changed, 359 insertions(+), 223 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,15 +1,5 @@
-# See LICENSE file for copyright and license details.
-
include config.mk
-CFLAGS = -std=c99 -pthread -pedantic -Wall -Wextra \
- -Wno-unused-parameter -O3
-DBGFLAGS = -DDEBUG -std=c99 -pthread -pedantic -Wall -Wextra \
- -Wno-unused-parameter -Wno-unused-function -g3 \
- -fsanitize=address -fsanitize=undefined
-
-CC = cc
-
all: cube.o debugcube.o
cube.o: clean
diff --git a/README.md b/README.md
@@ -2,18 +2,32 @@
Work in progress.
-TODO:
+## Running tests
+
+```
+$ ./configure.sh # Run 'TYPE=AVX2 ./configure.sh' to use AVX2 instead
+$ make test
+```
+
+## TODO:
+
+### AVX2
+
+* static `solvedcube` and co don't work, turn into functions?
+* implement missing stuff (moves, transform)
+* optimize things that use _base functions
+
+### More features
-* AVX2 compile-time switch
* coordinates: co, eo, epsep, cpsep_sym, cocpsep_sym, cphtr_sym, cocphtr_sym
* pruning tables (1 bit per entry + fallback)
* solve.c
-Optimizations:
+### Optimizations:
* multi-move (up to 4/5 moves at once)
-Things I need to learn:
+### Things I need to learn:
* Use AVX2 instructions, in particular
[_mm256_shuffle_epi8](https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-10/mm256-shuffle-epi8.html))
diff --git a/config.mk b/config.mk
@@ -1 +1,6 @@
-CUBETYPE = CUBE_$(TYPE)
+CUBETYPE = CUBE_
+
+CFLAGS = -std=c99 -pthread -pedantic -Wall -Wextra -Wno-unused-parameter -O3
+DBGFLAGS = -DDEBUG -std=c99 -pthread -pedantic -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -g3 -fsanitize=address -fsanitize=undefined
+
+CC = cc
diff --git a/configure.sh b/configure.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+CFLAGS="-std=c99 -pthread -pedantic -Wall -Wextra -Wno-unused-parameter -O3"
+DBGFLAGS="-DDEBUG -std=c99 -pthread -pedantic -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -g3 -fsanitize=address -fsanitize=undefined"
+
+if [ "$TYPE" = "AVX2" ]; then
+ CFLAGS="$CFLAGS -mavx2"
+ DBGFLAGS="$DBGFLAGS -mavx2"
+fi
+
+{
+echo "CUBETYPE = CUBE_$TYPE";
+echo "";
+echo "CFLAGS = $CFLAGS";
+echo "DBGFLAGS = $DBGFLAGS";
+echo "";
+echo "CC = cc"
+} > config.mk
diff --git a/src/_base_arr.c b/src/_base_arr.c
@@ -0,0 +1,30 @@
+cube_t solvedcube = {
+ .c = {0, 1, 2, 3, 4, 5, 6, 7},
+ .e = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
+};
+static cube_t errorcube = { .e = {0}, .c = {0} };
+static cube_t zerocube = { .e = {0}, .c = {0} };
+
+static uint8_t
+get_edge(cube_t c, uint8_t i)
+{
+ return c.e[i];
+}
+
+static uint8_t
+get_corner(cube_t c, uint8_t i)
+{
+ return c.c[i];
+}
+
+static void
+set_edge(cube_t *c, uint8_t i, uint8_t p)
+{
+ c->e[i] = p;
+}
+
+static void
+set_corner(cube_t *c, uint8_t i, uint8_t p)
+{
+ c->c[i] = p;
+}
diff --git a/src/_base_avx2.c b/src/_base_avx2.c
@@ -0,0 +1,28 @@
+#define solvedcube _mm256_set_epi8( \
+ 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, /* Corners */ \
+ 0, 0, 0, 0, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 /* Edges */ \
+)
+#define errorcube _mm256_setzero_si256()
+#define zerocube _mm256_setzero_si256()
+
+static uint8_t
+get_edge(cube_t c, uint8_t i)
+{
+ return 0;
+}
+
+static uint8_t
+get_corner(cube_t c, uint8_t i)
+{
+ return 0;
+}
+
+static void
+set_edge(cube_t *c, uint8_t i, uint8_t p)
+{
+}
+
+static void
+set_corner(cube_t *c, uint8_t i, uint8_t p)
+{
+}
diff --git a/src/_constants.c b/src/_constants.c
@@ -104,13 +104,6 @@ trans_t inverse_trans[] = {
[BRm] = RDm,
};
-cube_t solvedcube = {
- .c = {0, 1, 2, 3, 4, 5, 6, 7},
- .e = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
-};
-
-static cube_t errorcube = { .e = {0}, .c = {0} };
-
static char *cornerstr[] = {
[_c_ufr] = "UFR",
[_c_ubl] = "UBL",
diff --git a/src/_move_logic_arr.c b/src/_move_logic_arr.c
@@ -0,0 +1,149 @@
+#define PERM4(r, i, j, k, l) \
+ aux = r[i]; \
+ r[i] = r[l]; \
+ r[l] = r[k]; \
+ r[k] = r[j]; \
+ r[j] = aux;
+#define PERM22(r, i, j, k, l) \
+ aux = r[i]; \
+ r[i] = r[j]; \
+ r[j] = aux; \
+ aux = r[k]; \
+ r[k] = r[l]; \
+ r[l] = aux;
+#define CO(a, b) \
+ aux = (a & _cobits) + (b & _cobits); \
+ auy = (aux + _ctwist_cw) >> 2U; \
+ auz = (aux + auy) & _cobits2; \
+ a = (a & _pbits) | auz;
+#define CO4(r, i, j, k, l) \
+ CO(r[i], _ctwist_cw) \
+ CO(r[j], _ctwist_cw) \
+ CO(r[k], _ctwist_ccw) \
+ CO(r[l], _ctwist_ccw)
+#define EO4(r, i, j, k, l) \
+ r[i] ^= _eobit; \
+ r[j] ^= _eobit; \
+ r[k] ^= _eobit; \
+ r[l] ^= _eobit;
+
+uint8_t aux, auy, auz;
+cube_t ret = c;
+
+switch (m) {
+case U:
+ PERM4(ret.e, _e_uf, _e_ul, _e_ub, _e_ur)
+ PERM4(ret.c, _c_ufr, _c_ufl, _c_ubl, _c_ubr)
+
+ return ret;
+case U2:
+ PERM22(ret.e, _e_uf, _e_ub, _e_ul, _e_ur)
+ PERM22(ret.c, _c_ufr, _c_ubl, _c_ufl, _c_ubr)
+
+ return ret;
+case U3:
+ PERM4(ret.e, _e_uf, _e_ur, _e_ub, _e_ul)
+ PERM4(ret.c, _c_ufr, _c_ubr, _c_ubl, _c_ufl)
+
+ return ret;
+case D:
+ PERM4(ret.e, _e_df, _e_dr, _e_db, _e_dl)
+ PERM4(ret.c, _c_dfr, _c_dbr, _c_dbl, _c_dfl)
+
+ return ret;
+case D2:
+ PERM22(ret.e, _e_df, _e_db, _e_dr, _e_dl)
+ PERM22(ret.c, _c_dfr, _c_dbl, _c_dbr, _c_dfl)
+
+ return ret;
+case D3:
+ PERM4(ret.e, _e_df, _e_dl, _e_db, _e_dr)
+ PERM4(ret.c, _c_dfr, _c_dfl, _c_dbl, _c_dbr)
+
+ return ret;
+case R:
+ PERM4(ret.e, _e_ur, _e_br, _e_dr, _e_fr)
+ PERM4(ret.c, _c_ufr, _c_ubr, _c_dbr, _c_dfr)
+
+ CO4(ret.c, _c_ubr, _c_dfr, _c_ufr, _c_dbr)
+
+ return ret;
+case R2:
+ PERM22(ret.e, _e_ur, _e_dr, _e_fr, _e_br)
+ PERM22(ret.c, _c_ufr, _c_dbr, _c_ubr, _c_dfr)
+
+ return ret;
+case R3:
+ PERM4(ret.e, _e_ur, _e_fr, _e_dr, _e_br)
+ PERM4(ret.c, _c_ufr, _c_dfr, _c_dbr, _c_ubr)
+
+ CO4(ret.c, _c_ubr, _c_dfr, _c_ufr, _c_dbr)
+
+ return ret;
+case L:
+ PERM4(ret.e, _e_ul, _e_fl, _e_dl, _e_bl)
+ PERM4(ret.c, _c_ufl, _c_dfl, _c_dbl, _c_ubl)
+
+ CO4(ret.c, _c_ufl, _c_dbl, _c_dfl, _c_ubl)
+
+ return ret;
+case L2:
+ PERM22(ret.e, _e_ul, _e_dl, _e_fl, _e_bl)
+ PERM22(ret.c, _c_ufl, _c_dbl, _c_ubl, _c_dfl)
+
+ return ret;
+case L3:
+ PERM4(ret.e, _e_ul, _e_bl, _e_dl, _e_fl)
+ PERM4(ret.c, _c_ufl, _c_ubl, _c_dbl, _c_dfl)
+
+ CO4(ret.c, _c_ufl, _c_dbl, _c_dfl, _c_ubl)
+
+ return ret;
+case F:
+ PERM4(ret.e, _e_uf, _e_fr, _e_df, _e_fl)
+ PERM4(ret.c, _c_ufr, _c_dfr, _c_dfl, _c_ufl)
+
+ EO4(ret.e, _e_uf, _e_fr, _e_df, _e_fl)
+ CO4(ret.c, _c_ufr, _c_dfl, _c_dfr, _c_ufl)
+
+ return ret;
+case F2:
+ PERM22(ret.e, _e_uf, _e_df, _e_fr, _e_fl)
+ PERM22(ret.c, _c_ufr, _c_dfl, _c_ufl, _c_dfr)
+
+ return ret;
+case F3:
+ PERM4(ret.e, _e_uf, _e_fl, _e_df, _e_fr)
+ PERM4(ret.c, _c_ufr, _c_ufl, _c_dfl, _c_dfr)
+
+ EO4(ret.e, _e_uf, _e_fr, _e_df, _e_fl)
+ CO4(ret.c, _c_ufr, _c_dfl, _c_dfr, _c_ufl)
+
+ return ret;
+case B:
+ PERM4(ret.e, _e_ub, _e_bl, _e_db, _e_br)
+ PERM4(ret.c, _c_ubr, _c_ubl, _c_dbl, _c_dbr)
+
+ EO4(ret.e, _e_ub, _e_br, _e_db, _e_bl)
+ CO4(ret.c, _c_ubl, _c_dbr, _c_dbl, _c_ubr)
+
+ return ret;
+case B2:
+ PERM22(ret.e, _e_ub, _e_db, _e_br, _e_bl)
+ PERM22(ret.c, _c_ubr, _c_dbl, _c_ubl, _c_dbr)
+
+ return ret;
+case B3:
+ PERM4(ret.e, _e_ub, _e_br, _e_db, _e_bl)
+ PERM4(ret.c, _c_ubr, _c_dbr, _c_dbl, _c_ubl)
+
+ EO4(ret.e, _e_ub, _e_br, _e_db, _e_bl)
+ CO4(ret.c, _c_ubl, _c_dbr, _c_dbl, _c_ubr)
+
+ return ret;
+default:
+#ifdef DEBUG
+ fprintf(stderr, "mover error, unknown move\n");
+#endif
+ goto move_error;
+}
diff --git a/src/_move_logic_avx2.c b/src/_move_logic_avx2.c
@@ -0,0 +1,3 @@
+static cube_t move_cube[18]; /* TODO */
+
+return compose(move_cube[m], c);
diff --git a/src/_trans_move_avx2.c b/src/_trans_move_avx2.c
@@ -0,0 +1,2 @@
+static cube_t trans_move_cube[48];
+static cube_t trans_move_cube_inverse[48];
diff --git a/src/cube.c b/src/cube.c
@@ -2,6 +2,10 @@
#include <stdbool.h>
#include <string.h>
+#ifdef CUBE_AVX2
+#include <immintrin.h>
+#endif
+
#ifdef DEBUG
#include <stdio.h>
#endif
@@ -10,6 +14,12 @@
#include "_constants.c"
+#ifdef CUBE_AVX2
+#include "_base_avx2.c"
+#else
+#include "_base_arr.c"
+#endif
+
static bool isconsistent(cube_t);
static cube_t flipallcorners(cube_t);
static uint8_t readco(char *);
@@ -90,7 +100,7 @@ readcube_H48(char *buf)
{
int i;
uint8_t piece, orient;
- cube_t ret = {0};
+ cube_t ret = zerocube;
char *b = buf;
for (i = 0; i < 12; i++) {
@@ -102,7 +112,7 @@ readcube_H48(char *buf)
if ((orient = readeo(b)) == _error)
return errorcube;
b++;
- ret.e[i] = piece | orient;
+ set_edge(&ret, i, piece | orient);
}
for (i = 0; i < 8; i++) {
while (*b == ' ' || *b == '\t' || *b == '\n')
@@ -113,7 +123,7 @@ readcube_H48(char *buf)
if ((orient = readco(b)) == _error)
return errorcube;
b++;
- ret.c[i] = piece | orient;
+ set_corner(&ret, i, piece | orient);
}
return ret;
@@ -145,23 +155,25 @@ readcube(format_t format, char *buf)
static void
writecube_H48(cube_t cube, char *buf)
{
- uint8_t piece, orient;
+ uint8_t piece, perm, orient;
int i;
for (i = 0; i < 12; i++) {
- piece = cube.e[i] & _pbits;
- orient = (cube.e[i] & _eobit) >> _eoshift;
- buf[4*i ] = edgestr[piece][0];
- buf[4*i + 1] = edgestr[piece][1];
+ piece = get_edge(cube, i);
+ perm = piece & _pbits;
+ orient = (piece & _eobit) >> _eoshift;
+ buf[4*i ] = edgestr[perm][0];
+ buf[4*i + 1] = edgestr[perm][1];
buf[4*i + 2] = orient + '0';
buf[4*i + 3] = ' ';
}
for (i = 0; i < 8; i++) {
- piece = cube.c[i] & _pbits;
- orient = (cube.c[i] & _cobits) >> _coshift;
- buf[48 + 5*i ] = cornerstr[piece][0];
- buf[48 + 5*i + 1] = cornerstr[piece][1];
- buf[48 + 5*i + 2] = cornerstr[piece][2];
+ piece = get_corner(cube, i);
+ perm = piece & _pbits;
+ orient = (piece & _cobits) >> _coshift;
+ buf[48 + 5*i ] = cornerstr[perm][0];
+ buf[48 + 5*i + 1] = cornerstr[perm][1];
+ buf[48 + 5*i + 2] = cornerstr[perm][2];
buf[48 + 5*i + 3] = orient + '0';
buf[48 + 5*i + 4] = ' ';
}
@@ -196,18 +208,23 @@ static void
writecube_SRC(cube_t cube, char *buf)
{
int i, ptr;
+ uint8_t piece;
memcpy(buf, "{\n\t.c = {", 9);
ptr = 9;
- for (i = 0; i < 8; i++)
- ptr += writepiece_SRC(cube.c[i], buf + ptr);
+ for (i = 0; i < 8; i++) {
+ piece = get_corner(cube, i);
+ ptr += writepiece_SRC(piece, buf + ptr);
+ }
memcpy(buf+ptr-2, "},\n\t.e = {", 10);
ptr += 8;
- for (i = 0; i < 12; i++)
- ptr += writepiece_SRC(cube.e[i], buf + ptr);
+ for (i = 0; i < 12; i++) {
+ piece = get_edge(cube, i);
+ ptr += writepiece_SRC(piece, buf + ptr);
+ }
memcpy(buf+ptr-2, "}\n}\0", 4);
}
@@ -364,7 +381,7 @@ permsign(uint8_t *a, int n)
for (i = 0; i < n; i++)
for (j = i+1; j < n; j++)
- ret += (a[i] & _pbits) > (a[j] & _pbits) ? 1 : 0;
+ ret += a[i] > a[j] ? 1 : 0;
return ret % 2;
}
@@ -372,14 +389,15 @@ permsign(uint8_t *a, int n)
static bool
isconsistent(cube_t c)
{
- uint8_t i, p, e;
+ uint8_t i, p, e, piece;
bool found[12];
for (i = 0; i < 12; i++)
found[i] = false;
for (i = 0; i < 12; i++) {
- p = c.e[i] & _pbits;
- e = c.e[i] & ~_pbits;
+ piece = get_edge(c, i);
+ p = piece & _pbits;
+ e = piece & _eobit;
if (p >= 12)
goto inconsistent_ep;
if (e != 0 && e != _eobit)
@@ -393,8 +411,9 @@ isconsistent(cube_t c)
for (i = 0; i < 8; i++)
found[i] = false;
for (i = 0; i < 8; i++) {
- p = c.c[i] & _pbits;
- e = c.c[i] & ~_pbits;
+ piece = get_corner(c, i);
+ p = piece & _pbits;
+ e = piece & _cobits;
if (p >= 8)
goto inconsistent_cp;
if (e != 0 && e != _ctwist_cw && e != _ctwist_ccw)
@@ -432,7 +451,7 @@ inconsistent_co:
bool
issolvable(cube_t cube)
{
- int8_t i, eo, co;
+ uint8_t i, eo, co, piece, e[12], c[8];
#ifdef DEBUG
if (!isconsistent(cube)) {
@@ -441,18 +460,27 @@ issolvable(cube_t cube)
}
#endif
- if (permsign(cube.e, 12) != permsign(cube.c, 8))
+ for (i = 0; i < 12; i++)
+ e[i] = get_edge(cube, i) & _pbits;
+ for (i = 0; i < 8; i++)
+ c[i] = get_corner(cube, i) & _pbits;
+
+ if (permsign(e, 12) != permsign(c, 8))
goto issolvable_parity;
eo = 0;
- for (i = 0; i < 12; i++)
- eo += (cube.e[i] & _eobit) >> _eoshift;
+ for (i = 0; i < 12; i++) {
+ piece = get_edge(cube, i);
+ eo += (piece & _eobit) >> _eoshift;
+ }
if (eo % 2 != 0)
goto issolvable_eo;
co = 0;
- for (i = 0; i < 8; i++)
- co += (cube.c[i] & _cobits) >> _coshift;
+ for (i = 0; i < 8; i++) {
+ piece = get_corner(cube, i);
+ co += (piece & _cobits) >> _coshift;
+ }
if (co % 3 != 0)
goto issolvable_co;
@@ -478,6 +506,15 @@ issolvable_co:
bool
equal(cube_t cube1, cube_t cube2)
{
+#ifdef CUBE_AVX2
+ uint32_t mask;
+ __m256i cmp;
+
+ cmp = _mm256_cmpeq_epi8(cube1, cube2);
+ mask = _mm256_movemask_epi8(cmp);
+
+ return mask == 0xffffffffU;
+#else
uint8_t i;
for (i = 0; i < 12; i++)
@@ -489,6 +526,7 @@ equal(cube_t cube1, cube_t cube2)
return false;
return true;
+#endif
}
bool
@@ -506,9 +544,6 @@ iserror(cube_t cube)
cube_t
move(cube_t c, move_t m)
{
- cube_t ret;
- uint8_t aux, auy, auz;
-
#ifdef DEBUG
if (!isconsistent(c)) {
fprintf(stderr, "move error, inconsistent cube\n");
@@ -516,154 +551,11 @@ move(cube_t c, move_t m)
}
#endif
-#define PERM4(r, i, j, k, l) \
- aux = r[i]; \
- r[i] = r[l]; \
- r[l] = r[k]; \
- r[k] = r[j]; \
- r[j] = aux;
-#define PERM22(r, i, j, k, l) \
- aux = r[i]; \
- r[i] = r[j]; \
- r[j] = aux; \
- aux = r[k]; \
- r[k] = r[l]; \
- r[l] = aux;
-#define CO(a, b) \
- aux = (a & _cobits) + (b & _cobits); \
- auy = (aux + _ctwist_cw) >> 2U; \
- auz = (aux + auy) & _cobits2; \
- a = (a & _pbits) | auz;
-#define CO4(r, i, j, k, l) \
- CO(r[i], _ctwist_cw) \
- CO(r[j], _ctwist_cw) \
- CO(r[k], _ctwist_ccw) \
- CO(r[l], _ctwist_ccw)
-#define EO4(r, i, j, k, l) \
- r[i] ^= _eobit; \
- r[j] ^= _eobit; \
- r[k] ^= _eobit; \
- r[l] ^= _eobit;
-
- ret = c;
-
- switch (m) {
- case U:
- PERM4(ret.e, _e_uf, _e_ul, _e_ub, _e_ur)
- PERM4(ret.c, _c_ufr, _c_ufl, _c_ubl, _c_ubr)
-
- return ret;
- case U2:
- PERM22(ret.e, _e_uf, _e_ub, _e_ul, _e_ur)
- PERM22(ret.c, _c_ufr, _c_ubl, _c_ufl, _c_ubr)
-
- return ret;
- case U3:
- PERM4(ret.e, _e_uf, _e_ur, _e_ub, _e_ul)
- PERM4(ret.c, _c_ufr, _c_ubr, _c_ubl, _c_ufl)
-
- return ret;
- case D:
- PERM4(ret.e, _e_df, _e_dr, _e_db, _e_dl)
- PERM4(ret.c, _c_dfr, _c_dbr, _c_dbl, _c_dfl)
-
- return ret;
- case D2:
- PERM22(ret.e, _e_df, _e_db, _e_dr, _e_dl)
- PERM22(ret.c, _c_dfr, _c_dbl, _c_dbr, _c_dfl)
-
- return ret;
- case D3:
- PERM4(ret.e, _e_df, _e_dl, _e_db, _e_dr)
- PERM4(ret.c, _c_dfr, _c_dfl, _c_dbl, _c_dbr)
-
- return ret;
- case R:
- PERM4(ret.e, _e_ur, _e_br, _e_dr, _e_fr)
- PERM4(ret.c, _c_ufr, _c_ubr, _c_dbr, _c_dfr)
-
- CO4(ret.c, _c_ubr, _c_dfr, _c_ufr, _c_dbr)
-
- return ret;
- case R2:
- PERM22(ret.e, _e_ur, _e_dr, _e_fr, _e_br)
- PERM22(ret.c, _c_ufr, _c_dbr, _c_ubr, _c_dfr)
-
- return ret;
- case R3:
- PERM4(ret.e, _e_ur, _e_fr, _e_dr, _e_br)
- PERM4(ret.c, _c_ufr, _c_dfr, _c_dbr, _c_ubr)
-
- CO4(ret.c, _c_ubr, _c_dfr, _c_ufr, _c_dbr)
-
- return ret;
- case L:
- PERM4(ret.e, _e_ul, _e_fl, _e_dl, _e_bl)
- PERM4(ret.c, _c_ufl, _c_dfl, _c_dbl, _c_ubl)
-
- CO4(ret.c, _c_ufl, _c_dbl, _c_dfl, _c_ubl)
-
- return ret;
- case L2:
- PERM22(ret.e, _e_ul, _e_dl, _e_fl, _e_bl)
- PERM22(ret.c, _c_ufl, _c_dbl, _c_ubl, _c_dfl)
-
- return ret;
- case L3:
- PERM4(ret.e, _e_ul, _e_bl, _e_dl, _e_fl)
- PERM4(ret.c, _c_ufl, _c_ubl, _c_dbl, _c_dfl)
-
- CO4(ret.c, _c_ufl, _c_dbl, _c_dfl, _c_ubl)
-
- return ret;
- case F:
- PERM4(ret.e, _e_uf, _e_fr, _e_df, _e_fl)
- PERM4(ret.c, _c_ufr, _c_dfr, _c_dfl, _c_ufl)
-
- EO4(ret.e, _e_uf, _e_fr, _e_df, _e_fl)
- CO4(ret.c, _c_ufr, _c_dfl, _c_dfr, _c_ufl)
-
- return ret;
- case F2:
- PERM22(ret.e, _e_uf, _e_df, _e_fr, _e_fl)
- PERM22(ret.c, _c_ufr, _c_dfl, _c_ufl, _c_dfr)
-
- return ret;
- case F3:
- PERM4(ret.e, _e_uf, _e_fl, _e_df, _e_fr)
- PERM4(ret.c, _c_ufr, _c_ufl, _c_dfl, _c_dfr)
-
- EO4(ret.e, _e_uf, _e_fr, _e_df, _e_fl)
- CO4(ret.c, _c_ufr, _c_dfl, _c_dfr, _c_ufl)
-
- return ret;
- case B:
- PERM4(ret.e, _e_ub, _e_bl, _e_db, _e_br)
- PERM4(ret.c, _c_ubr, _c_ubl, _c_dbl, _c_dbr)
-
- EO4(ret.e, _e_ub, _e_br, _e_db, _e_bl)
- CO4(ret.c, _c_ubl, _c_dbr, _c_dbl, _c_ubr)
-
- return ret;
- case B2:
- PERM22(ret.e, _e_ub, _e_db, _e_br, _e_bl)
- PERM22(ret.c, _c_ubr, _c_dbl, _c_ubl, _c_dbr)
-
- return ret;
- case B3:
- PERM4(ret.e, _e_ub, _e_br, _e_db, _e_bl)
- PERM4(ret.c, _c_ubr, _c_dbr, _c_dbl, _c_ubl)
-
- EO4(ret.e, _e_ub, _e_br, _e_db, _e_bl)
- CO4(ret.c, _c_ubl, _c_dbr, _c_dbl, _c_ubr)
-
- return ret;
- default:
-#ifdef DEBUG
- fprintf(stderr, "mover error, unknown move\n");
+#ifdef CUBE_AVX2
+#include "_move_logic_avx2.c"
+#else
+#include "_move_logic_arr.c"
#endif
- goto move_error;
- }
move_error:
return errorcube;
@@ -672,8 +564,9 @@ move_error:
cube_t
inverse(cube_t c)
{
+ /* TODO: optimize for avx2 */
uint8_t i, piece, orien;
- cube_t ret = {0};
+ cube_t ret = zerocube;
#ifdef DEBUG
if (!isconsistent(c)) {
@@ -683,15 +576,15 @@ inverse(cube_t c)
#endif
for (i = 0; i < 12; i++) {
- piece = c.e[i & _pbits];
+ piece = get_edge(c, i);
orien = piece & _eobit;
- ret.e[piece & _pbits] = i | orien;
+ set_edge(&ret, piece & _pbits, i | orien);
}
for (i = 0; i < 8; i++) {
- piece = c.c[i & _pbits];
+ piece = get_corner(c, i);
orien = ((piece << 1) | (piece >> 1)) & _cobits2;
- ret.c[piece & _pbits] = i | orien;
+ set_corner(&ret, piece & _pbits, i | orien);
}
return ret;
@@ -700,8 +593,9 @@ inverse(cube_t c)
cube_t
compose(cube_t c1, cube_t c2)
{
- uint8_t i, piece, orien, aux, auy;
- cube_t ret = {0};
+ /* TODO: optimize for avx2 */
+ uint8_t i, piece1, piece2, p, orien, aux, auy;
+ cube_t ret = zerocube;
#ifdef DEBUG
if (!isconsistent(c1) || !isconsistent(c2)) {
@@ -711,17 +605,21 @@ compose(cube_t c1, cube_t c2)
#endif
for (i = 0; i < 12; i++) {
- piece = c2.e[i] & _pbits;
- orien = (c2.e[i] ^ c1.e[piece]) & _eobit;
- ret.e[i] = (c1.e[piece] & _pbits) | orien;
+ piece2 = get_edge(c2, i);
+ p = piece2 & _pbits;
+ piece1 = get_edge(c1, p);
+ orien = (piece2 ^ piece1) & _eobit;
+ set_edge(&ret, i, (piece1 & _pbits) | orien);
}
for (i = 0; i < 8; i++) {
- piece = c2.c[i] & _pbits;
- aux = (c2.c[i] & _cobits) + (c1.c[piece] & _cobits);
+ piece2 = get_corner(c2, i);
+ p = piece2 & _pbits;
+ piece1 = get_corner(c1, p);
+ aux = (piece2 & _cobits) + (piece1 & _cobits);
auy = (aux + _ctwist_cw) >> 2U;
orien = (aux + auy) & _cobits2;
- ret.c[i] = (c1.c[piece] & _pbits) | orien;
+ set_corner(&ret, i, (piece1 & _pbits) | orien);
}
return ret;
@@ -730,14 +628,16 @@ compose(cube_t c1, cube_t c2)
static cube_t
flipallcorners(cube_t c)
{
+/* TODO: optimize for avx2, can be a couple of instructions */
+
uint8_t i, piece, orien;
cube_t ret;
ret = c;
for (i = 0; i < 8; i++) {
- piece = ret.c[i];
+ piece = get_corner(c, i);
orien = ((piece << 1) | (piece >> 1)) & _cobits2;
- ret.c[i] = (piece & _pbits) | orien;
+ set_corner(&ret, i, (piece & _pbits) | orien);
}
return ret;
@@ -746,8 +646,6 @@ flipallcorners(cube_t c)
cube_t
transform(cube_t c, trans_t t)
{
-#include "_trans_move_arr.c"
-
cube_t ret;
#ifdef DEBUG
@@ -761,6 +659,12 @@ transform(cube_t c, trans_t t)
}
#endif
+#ifdef CUBE_AVX2
+#include "_trans_move_avx2.c"
+#else
+#include "_trans_move_arr.c"
+#endif
+
ret = compose(solvedcube, trans_move_cube[t]);
ret = compose(ret, c);
ret = compose(ret, trans_move_cube_inverse[t]);
diff --git a/src/cube.h b/src/cube.h
@@ -1,11 +1,11 @@
/* Types *********************************************************************/
-/* TODO: ifdef for different implementations */
/* See doc/CUBE_INTERNAL.md for a description of the cube format */
-typedef struct {
- uint8_t c[8];
- uint8_t e[12];
-} cube_t;
+#ifdef CUBE_AVX2
+typedef __m256i cube_t;
+#else
+typedef struct { uint8_t c[8]; uint8_t e[12]; } cube_t;
+#endif
typedef uint8_t move_t;
typedef uint8_t trans_t;