Split into .h files - h48 - A prototype for an optimal Rubik's cube solver, work in progress.

commit a660922d738b78b11fc78207daabea031058f710
parent 75966319fd5891c2c1bd35b1f7c93eab172fd28e
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Fri, 10 May 2024 09:10:12 +0200

Split into .h files

Diffstat:
A src/constants.h  | 530 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/cube.c  | 2371 +------------------------------------------------------------------------------
A src/cube_avx2.h  | 229 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/cube_neon.h  | 6 ++++++
A src/cube_portable.h  | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/cube_routines.h  | 730 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/moves.h  | 47 +++++++++++++++++++++++++++++++++++++++++++++++
A src/solve_generic.h  | 259 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/solve_h48.h  | 266 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

9 files changed, 2300 insertions(+), 2362 deletions(-)
diff --git a/src/constants.h b/src/constants.h
@@ -0,0 +1,530 @@
+#define _2p11  2048U
+#define _2p12  4096U
+#define _3p7   2187U
+#define _3p8   6561U
+#define _12c4  495U
+#define _8c4   70U
+
+#define COCSEP_CLASSES 3393U
+
+_static int64_t binomial[12][12] = {
+	{1,  0,  0,   0,   0,   0,   0,   0,   0,  0,  0, 0},
+	{1,  1,  0,   0,   0,   0,   0,   0,   0,  0,  0, 0},
+	{1,  2,  1,   0,   0,   0,   0,   0,   0,  0,  0, 0},
+	{1,  3,  3,   1,   0,   0,   0,   0,   0,  0,  0, 0},
+	{1,  4,  6,   4,   1,   0,   0,   0,   0,  0,  0, 0},
+	{1,  5, 10,  10,   5,   1,   0,   0,   0,  0,  0, 0},
+	{1,  6, 15,  20,  15,   6,   1,   0,   0,  0,  0, 0},
+	{1,  7, 21,  35,  35,  21,   7,   1,   0,  0,  0, 0},
+	{1,  8, 28,  56,  70,  56,  28,   8,   1,  0,  0, 0},
+	{1,  9, 36,  84, 126, 126,  84,  36,   9,  1,  0, 0},
+	{1, 10, 45, 120, 210, 252, 210, 120,  45, 10,  1, 0},
+	{1, 11, 55, 165, 330, 462, 462, 330, 165, 55, 11, 1},
+};
+
+#define _move_U  0U
+#define _move_U2 1U
+#define _move_U3 2U
+#define _move_D  3U
+#define _move_D2 4U
+#define _move_D3 5U
+#define _move_R  6U
+#define _move_R2 7U
+#define _move_R3 8U
+#define _move_L  9U
+#define _move_L2 10U
+#define _move_L3 11U
+#define _move_F  12U
+#define _move_F2 13U
+#define _move_F3 14U
+#define _move_B  15U
+#define _move_B2 16U
+#define _move_B3 17U
+
+#define _trans_UFr 0
+#define _trans_ULr 1
+#define _trans_UBr 2
+#define _trans_URr 3
+#define _trans_DFr 4
+#define _trans_DLr 5
+#define _trans_DBr 6
+#define _trans_DRr 7
+#define _trans_RUr 8
+#define _trans_RFr 9
+#define _trans_RDr 10
+#define _trans_RBr 11
+#define _trans_LUr 12
+#define _trans_LFr 13
+#define _trans_LDr 14
+#define _trans_LBr 15
+#define _trans_FUr 16
+#define _trans_FRr 17
+#define _trans_FDr 18
+#define _trans_FLr 19
+#define _trans_BUr 20
+#define _trans_BRr 21
+#define _trans_BDr 22
+#define _trans_BLr 23
+
+#define _trans_UFm 24
+#define _trans_ULm 25
+#define _trans_UBm 26
+#define _trans_URm 27
+#define _trans_DFm 28
+#define _trans_DLm 29
+#define _trans_DBm 30
+#define _trans_DRm 31
+#define _trans_RUm 32
+#define _trans_RFm 33
+#define _trans_RDm 34
+#define _trans_RBm 35
+#define _trans_LUm 36
+#define _trans_LFm 37
+#define _trans_LDm 38
+#define _trans_LBm 39
+#define _trans_FUm 40
+#define _trans_FRm 41
+#define _trans_FDm 42
+#define _trans_FLm 43
+#define _trans_BUm 44
+#define _trans_BRm 45
+#define _trans_BDm 46
+#define _trans_BLm 47
+
+#define _c_ufr      0U
+#define _c_ubl      1U
+#define _c_dfl      2U
+#define _c_dbr      3U
+#define _c_ufl      4U
+#define _c_ubr      5U
+#define _c_dfr      6U
+#define _c_dbl      7U
+
+#define _e_uf       0U
+#define _e_ub       1U
+#define _e_db       2U
+#define _e_df       3U
+#define _e_ur       4U
+#define _e_ul       5U
+#define _e_dl       6U
+#define _e_dr       7U
+#define _e_fr       8U
+#define _e_fl       9U
+#define _e_bl       10U
+#define _e_br       11U
+
+#define _eoshift    4U
+#define _coshift    5U
+
+#define _pbits      0xFU
+#define _esepbit1   0x4U
+#define _esepbit2   0x8U
+#define _csepbit    0x4U
+#define _eobit      0x10U
+#define _cobits     0xF0U
+#define _cobits2    0x60U
+#define _ctwist_cw  0x20U
+#define _ctwist_ccw 0x40U
+#define _eflip      0x10U
+#define _error      0xFFU
+
+_static cube_t zero = { .corner = {0}, .edge = {0} };
+_static cube_t solved = {
+	.corner = {0, 1, 2, 3, 4, 5, 6, 7},
+	.edge = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
+};
+
+#define zero_fast fastcube( \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+#define solved_fast fastcube( \
+    0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+
+#define _move_cube_U fastcube( \
+    5, 4, 2, 3, 0, 1, 6, 7, 4, 5, 2, 3, 1, 0, 6, 7, 8, 9, 10, 11)
+#define _move_cube_U2 fastcube( \
+    1, 0, 2, 3, 5, 4, 6, 7, 1, 0, 2, 3, 5, 4, 6, 7, 8, 9, 10, 11)
+#define _move_cube_U3 fastcube( \
+    4, 5, 2, 3, 1, 0, 6, 7, 5, 4, 2, 3, 0, 1, 6, 7, 8, 9, 10, 11)
+#define _move_cube_D fastcube( \
+    0, 1, 7, 6, 4, 5, 2, 3, 0, 1, 7, 6, 4, 5, 2, 3, 8, 9, 10, 11)
+#define _move_cube_D2 fastcube( \
+    0, 1, 3, 2, 4, 5, 7, 6, 0, 1, 3, 2, 4, 5, 7, 6, 8, 9, 10, 11)
+#define _move_cube_D3 fastcube( \
+    0, 1, 6, 7, 4, 5, 3, 2, 0, 1, 6, 7, 4, 5, 3, 2, 8, 9, 10, 11)
+#define _move_cube_R fastcube( \
+    70, 1, 2, 69, 4, 32, 35, 7, 0, 1, 2, 3, 8, 5, 6, 11, 7, 9, 10, 4)
+#define _move_cube_R2 fastcube( \
+    3, 1, 2, 0, 4, 6, 5, 7, 0, 1, 2, 3, 7, 5, 6, 4, 11, 9, 10, 8)
+#define _move_cube_R3 fastcube( \
+    69, 1, 2, 70, 4, 35, 32, 7, 0, 1, 2, 3, 11, 5, 6, 8, 4, 9, 10, 7)
+#define _move_cube_L fastcube( \
+    0, 71, 68, 3, 33, 5, 6, 34, 0, 1, 2, 3, 4, 10, 9, 7, 8, 5, 6, 11)
+#define _move_cube_L2 fastcube( \
+    0, 2, 1, 3, 7, 5, 6, 4, 0, 1, 2, 3, 4, 6, 5, 7, 8, 10, 9, 11)
+#define _move_cube_L3 fastcube( \
+    0, 68, 71, 3, 34, 5, 6, 33, 0, 1, 2, 3, 4, 9, 10, 7, 8, 6, 5, 11)
+#define _move_cube_F fastcube( \
+    36, 1, 38, 3, 66, 5, 64, 7, 25, 1, 2, 24, 4, 5, 6, 7, 16, 19, 10, 11)
+#define _move_cube_F2 fastcube( \
+    2, 1, 0, 3, 6, 5, 4, 7, 3, 1, 2, 0, 4, 5, 6, 7, 9, 8, 10, 11)
+#define _move_cube_F3 fastcube( \
+    38, 1, 36, 3, 64, 5, 66, 7, 24, 1, 2, 25, 4, 5, 6, 7, 19, 16, 10, 11)
+#define _move_cube_B fastcube( \
+    0, 37, 2, 39, 4, 67, 6, 65, 0, 27, 26, 3, 4, 5, 6, 7, 8, 9, 17, 18)
+#define _move_cube_B2 fastcube( \
+    0, 3, 2, 1, 4, 7, 6, 5, 0, 2, 1, 3, 4, 5, 6, 7, 8, 9, 11, 10)
+#define _move_cube_B3 fastcube( \
+    0, 39, 2, 37, 4, 65, 6, 67, 0, 26, 27, 3, 4, 5, 6, 7, 8, 9, 18, 17)
+
+#define _trans_cube_UFr fastcube( \
+    0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+#define _trans_cube_UFr_inverse fastcube( \
+    0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
+#define _trans_cube_ULr fastcube( \
+    4, 5, 7, 6, 1, 0, 2, 3, 5, 4, 7, 6, 0, 1, 2, 3, 25, 26, 27, 24)
+#define _trans_cube_ULr_inverse fastcube( \
+    5, 4, 6, 7, 0, 1, 3, 2, 4, 5, 6, 7, 1, 0, 3, 2, 27, 24, 25, 26)
+#define _trans_cube_UBr fastcube( \
+    1, 0, 3, 2, 5, 4, 7, 6, 1, 0, 3, 2, 5, 4, 7, 6, 10, 11, 8, 9)
+#define _trans_cube_UBr_inverse fastcube( \
+    1, 0, 3, 2, 5, 4, 7, 6, 1, 0, 3, 2, 5, 4, 7, 6, 10, 11, 8, 9)
+#define _trans_cube_URr fastcube( \
+    5, 4, 6, 7, 0, 1, 3, 2, 4, 5, 6, 7, 1, 0, 3, 2, 27, 24, 25, 26)
+#define _trans_cube_URr_inverse fastcube( \
+    4, 5, 7, 6, 1, 0, 2, 3, 5, 4, 7, 6, 0, 1, 2, 3, 25, 26, 27, 24)
+#define _trans_cube_DFr fastcube( \
+    2, 3, 0, 1, 6, 7, 4, 5, 3, 2, 1, 0, 6, 7, 4, 5, 9, 8, 11, 10)
+#define _trans_cube_DFr_inverse fastcube( \
+    2, 3, 0, 1, 6, 7, 4, 5, 3, 2, 1, 0, 6, 7, 4, 5, 9, 8, 11, 10)
+#define _trans_cube_DLr fastcube( \
+    7, 6, 4, 5, 2, 3, 1, 0, 6, 7, 4, 5, 2, 3, 0, 1, 26, 25, 24, 27)
+#define _trans_cube_DLr_inverse fastcube( \
+    7, 6, 4, 5, 2, 3, 1, 0, 6, 7, 4, 5, 2, 3, 0, 1, 26, 25, 24, 27)
+#define _trans_cube_DBr fastcube( \
+    3, 2, 1, 0, 7, 6, 5, 4, 2, 3, 0, 1, 7, 6, 5, 4, 11, 10, 9, 8)
+#define _trans_cube_DBr_inverse fastcube( \
+    3, 2, 1, 0, 7, 6, 5, 4, 2, 3, 0, 1, 7, 6, 5, 4, 11, 10, 9, 8)
+#define _trans_cube_DRr fastcube( \
+    6, 7, 5, 4, 3, 2, 0, 1, 7, 6, 5, 4, 3, 2, 1, 0, 24, 27, 26, 25)
+#define _trans_cube_DRr_inverse fastcube( \
+    6, 7, 5, 4, 3, 2, 0, 1, 7, 6, 5, 4, 3, 2, 1, 0, 24, 27, 26, 25)
+#define _trans_cube_RUr fastcube( \
+    64, 67, 65, 66, 37, 38, 36, 39, 20, 23, 22, 21, 24, 27, 26, 25, 0, 1, 2, 3)
+#define _trans_cube_RUr_inverse fastcube( \
+    32, 34, 35, 33, 70, 68, 69, 71, 8, 9, 10, 11, 16, 19, 18, 17, 20, 23, 22, 21)
+#define _trans_cube_RFr fastcube( \
+    38, 37, 36, 39, 64, 67, 66, 65, 24, 27, 26, 25, 23, 20, 21, 22, 19, 16, 17, 18)
+#define _trans_cube_RFr_inverse fastcube( \
+    36, 39, 38, 37, 66, 65, 64, 67, 25, 26, 27, 24, 21, 22, 23, 20, 16, 19, 18, 17)
+#define _trans_cube_RDr fastcube( \
+    67, 64, 66, 65, 38, 37, 39, 36, 23, 20, 21, 22, 27, 24, 25, 26, 2, 3, 0, 1)
+#define _trans_cube_RDr_inverse fastcube( \
+    33, 35, 34, 32, 71, 69, 68, 70, 10, 11, 8, 9, 17, 18, 19, 16, 21, 22, 23, 20)
+#define _trans_cube_RBr fastcube( \
+    37, 38, 39, 36, 67, 64, 65, 66, 27, 24, 25, 26, 20, 23, 22, 21, 17, 18, 19, 16)
+#define _trans_cube_RBr_inverse fastcube( \
+    37, 38, 39, 36, 67, 64, 65, 66, 27, 24, 25, 26, 20, 23, 22, 21, 17, 18, 19, 16)
+#define _trans_cube_LUr fastcube( \
+    65, 66, 64, 67, 36, 39, 37, 38, 21, 22, 23, 20, 26, 25, 24, 27, 1, 0, 3, 2)
+#define _trans_cube_LUr_inverse fastcube( \
+    34, 32, 33, 35, 68, 70, 71, 69, 9, 8, 11, 10, 19, 16, 17, 18, 22, 21, 20, 23)
+#define _trans_cube_LFr fastcube( \
+    36, 39, 38, 37, 66, 65, 64, 67, 25, 26, 27, 24, 21, 22, 23, 20, 16, 19, 18, 17)
+#define _trans_cube_LFr_inverse fastcube( \
+    38, 37, 36, 39, 64, 67, 66, 65, 24, 27, 26, 25, 23, 20, 21, 22, 19, 16, 17, 18)
+#define _trans_cube_LDr fastcube( \
+    66, 65, 67, 64, 39, 36, 38, 37, 22, 21, 20, 23, 25, 26, 27, 24, 3, 2, 1, 0)
+#define _trans_cube_LDr_inverse fastcube( \
+    35, 33, 32, 34, 69, 71, 70, 68, 11, 10, 9, 8, 18, 17, 16, 19, 23, 20, 21, 22)
+#define _trans_cube_LBr fastcube( \
+    39, 36, 37, 38, 65, 66, 67, 64, 26, 25, 24, 27, 22, 21, 20, 23, 18, 17, 16, 19)
+#define _trans_cube_LBr_inverse fastcube( \
+    39, 36, 37, 38, 65, 66, 67, 64, 26, 25, 24, 27, 22, 21, 20, 23, 18, 17, 16, 19)
+#define _trans_cube_FUr fastcube( \
+    68, 70, 69, 71, 32, 34, 33, 35, 16, 19, 18, 17, 9, 8, 11, 10, 5, 4, 7, 6)
+#define _trans_cube_FUr_inverse fastcube( \
+    68, 70, 69, 71, 32, 34, 33, 35, 16, 19, 18, 17, 9, 8, 11, 10, 5, 4, 7, 6)
+#define _trans_cube_FRr fastcube( \
+    32, 34, 35, 33, 70, 68, 69, 71, 8, 9, 10, 11, 16, 19, 18, 17, 20, 23, 22, 21)
+#define _trans_cube_FRr_inverse fastcube( \
+    64, 67, 65, 66, 37, 38, 36, 39, 20, 23, 22, 21, 24, 27, 26, 25, 0, 1, 2, 3)
+#define _trans_cube_FDr fastcube( \
+    70, 68, 71, 69, 34, 32, 35, 33, 19, 16, 17, 18, 8, 9, 10, 11, 7, 6, 5, 4)
+#define _trans_cube_FDr_inverse fastcube( \
+    69, 71, 68, 70, 33, 35, 32, 34, 17, 18, 19, 16, 11, 10, 9, 8, 4, 5, 6, 7)
+#define _trans_cube_FLr fastcube( \
+    34, 32, 33, 35, 68, 70, 71, 69, 9, 8, 11, 10, 19, 16, 17, 18, 22, 21, 20, 23)
+#define _trans_cube_FLr_inverse fastcube( \
+    65, 66, 64, 67, 36, 39, 37, 38, 21, 22, 23, 20, 26, 25, 24, 27, 1, 0, 3, 2)
+#define _trans_cube_BUr fastcube( \
+    69, 71, 68, 70, 33, 35, 32, 34, 17, 18, 19, 16, 11, 10, 9, 8, 4, 5, 6, 7)
+#define _trans_cube_BUr_inverse fastcube( \
+    70, 68, 71, 69, 34, 32, 35, 33, 19, 16, 17, 18, 8, 9, 10, 11, 7, 6, 5, 4)
+#define _trans_cube_BRr fastcube( \
+    35, 33, 32, 34, 69, 71, 70, 68, 11, 10, 9, 8, 18, 17, 16, 19, 23, 20, 21, 22)
+#define _trans_cube_BRr_inverse fastcube( \
+    66, 65, 67, 64, 39, 36, 38, 37, 22, 21, 20, 23, 25, 26, 27, 24, 3, 2, 1, 0)
+#define _trans_cube_BDr fastcube( \
+    71, 69, 70, 68, 35, 33, 34, 32, 18, 17, 16, 19, 10, 11, 8, 9, 6, 7, 4, 5)
+#define _trans_cube_BDr_inverse fastcube( \
+    71, 69, 70, 68, 35, 33, 34, 32, 18, 17, 16, 19, 10, 11, 8, 9, 6, 7, 4, 5)
+#define _trans_cube_BLr fastcube( \
+    33, 35, 34, 32, 71, 69, 68, 70, 10, 11, 8, 9, 17, 18, 19, 16, 21, 22, 23, 20)
+#define _trans_cube_BLr_inverse fastcube( \
+    67, 64, 66, 65, 38, 37, 39, 36, 23, 20, 21, 22, 27, 24, 25, 26, 2, 3, 0, 1)
+#define _trans_cube_UFm fastcube( \
+    4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 7, 6, 9, 8, 11, 10)
+#define _trans_cube_UFm_inverse fastcube( \
+    4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 7, 6, 9, 8, 11, 10)
+#define _trans_cube_ULm fastcube( \
+    0, 1, 3, 2, 5, 4, 6, 7, 4, 5, 6, 7, 0, 1, 2, 3, 24, 27, 26, 25)
+#define _trans_cube_ULm_inverse fastcube( \
+    0, 1, 3, 2, 5, 4, 6, 7, 4, 5, 6, 7, 0, 1, 2, 3, 24, 27, 26, 25)
+#define _trans_cube_UBm fastcube( \
+    5, 4, 7, 6, 1, 0, 3, 2, 1, 0, 3, 2, 4, 5, 6, 7, 11, 10, 9, 8)
+#define _trans_cube_UBm_inverse fastcube( \
+    5, 4, 7, 6, 1, 0, 3, 2, 1, 0, 3, 2, 4, 5, 6, 7, 11, 10, 9, 8)
+#define _trans_cube_URm fastcube( \
+    1, 0, 2, 3, 4, 5, 7, 6, 5, 4, 7, 6, 1, 0, 3, 2, 26, 25, 24, 27)
+#define _trans_cube_URm_inverse fastcube( \
+    1, 0, 2, 3, 4, 5, 7, 6, 5, 4, 7, 6, 1, 0, 3, 2, 26, 25, 24, 27)
+#define _trans_cube_DFm fastcube( \
+    6, 7, 4, 5, 2, 3, 0, 1, 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11)
+#define _trans_cube_DFm_inverse fastcube( \
+    6, 7, 4, 5, 2, 3, 0, 1, 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11)
+#define _trans_cube_DLm fastcube( \
+    3, 2, 0, 1, 6, 7, 5, 4, 7, 6, 5, 4, 2, 3, 0, 1, 27, 24, 25, 26)
+#define _trans_cube_DLm_inverse fastcube( \
+    2, 3, 1, 0, 7, 6, 4, 5, 6, 7, 4, 5, 3, 2, 1, 0, 25, 26, 27, 24)
+#define _trans_cube_DBm fastcube( \
+    7, 6, 5, 4, 3, 2, 1, 0, 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9)
+#define _trans_cube_DBm_inverse fastcube( \
+    7, 6, 5, 4, 3, 2, 1, 0, 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9)
+#define _trans_cube_DRm fastcube( \
+    2, 3, 1, 0, 7, 6, 4, 5, 6, 7, 4, 5, 3, 2, 1, 0, 25, 26, 27, 24)
+#define _trans_cube_DRm_inverse fastcube( \
+    3, 2, 0, 1, 6, 7, 5, 4, 7, 6, 5, 4, 2, 3, 0, 1, 27, 24, 25, 26)
+#define _trans_cube_RUm fastcube( \
+    68, 71, 69, 70, 33, 34, 32, 35, 21, 22, 23, 20, 25, 26, 27, 24, 0, 1, 2, 3)
+#define _trans_cube_RUm_inverse fastcube( \
+    70, 68, 69, 71, 32, 34, 35, 33, 8, 9, 10, 11, 19, 16, 17, 18, 23, 20, 21, 22)
+#define _trans_cube_RFm fastcube( \
+    34, 33, 32, 35, 68, 71, 70, 69, 25, 26, 27, 24, 22, 21, 20, 23, 19, 16, 17, 18)
+#define _trans_cube_RFm_inverse fastcube( \
+    66, 65, 64, 67, 36, 39, 38, 37, 25, 26, 27, 24, 22, 21, 20, 23, 19, 16, 17, 18)
+#define _trans_cube_RDm fastcube( \
+    71, 68, 70, 69, 34, 33, 35, 32, 22, 21, 20, 23, 26, 25, 24, 27, 2, 3, 0, 1)
+#define _trans_cube_RDm_inverse fastcube( \
+    71, 69, 68, 70, 33, 35, 34, 32, 10, 11, 8, 9, 18, 17, 16, 19, 22, 21, 20, 23)
+#define _trans_cube_RBm fastcube( \
+    33, 34, 35, 32, 71, 68, 69, 70, 26, 25, 24, 27, 21, 22, 23, 20, 17, 18, 19, 16)
+#define _trans_cube_RBm_inverse fastcube( \
+    67, 64, 65, 66, 37, 38, 39, 36, 27, 24, 25, 26, 23, 20, 21, 22, 18, 17, 16, 19)
+#define _trans_cube_LUm fastcube( \
+    69, 70, 68, 71, 32, 35, 33, 34, 20, 23, 22, 21, 27, 24, 25, 26, 1, 0, 3, 2)
+#define _trans_cube_LUm_inverse fastcube( \
+    68, 70, 71, 69, 34, 32, 33, 35, 9, 8, 11, 10, 16, 19, 18, 17, 21, 22, 23, 20)
+#define _trans_cube_LFm fastcube( \
+    32, 35, 34, 33, 70, 69, 68, 71, 24, 27, 26, 25, 20, 23, 22, 21, 16, 19, 18, 17)
+#define _trans_cube_LFm_inverse fastcube( \
+    64, 67, 66, 65, 38, 37, 36, 39, 24, 27, 26, 25, 20, 23, 22, 21, 16, 19, 18, 17)
+#define _trans_cube_LDm fastcube( \
+    70, 69, 71, 68, 35, 32, 34, 33, 23, 20, 21, 22, 24, 27, 26, 25, 3, 2, 1, 0)
+#define _trans_cube_LDm_inverse fastcube( \
+    69, 71, 70, 68, 35, 33, 32, 34, 11, 10, 9, 8, 17, 18, 19, 16, 20, 23, 22, 21)
+#define _trans_cube_LBm fastcube( \
+    35, 32, 33, 34, 69, 70, 71, 68, 27, 24, 25, 26, 23, 20, 21, 22, 18, 17, 16, 19)
+#define _trans_cube_LBm_inverse fastcube( \
+    65, 66, 67, 64, 39, 36, 37, 38, 26, 25, 24, 27, 21, 22, 23, 20, 17, 18, 19, 16)
+#define _trans_cube_FUm fastcube( \
+    64, 66, 65, 67, 36, 38, 37, 39, 16, 19, 18, 17, 8, 9, 10, 11, 4, 5, 6, 7)
+#define _trans_cube_FUm_inverse fastcube( \
+    32, 34, 33, 35, 68, 70, 69, 71, 16, 19, 18, 17, 8, 9, 10, 11, 4, 5, 6, 7)
+#define _trans_cube_FRm fastcube( \
+    36, 38, 39, 37, 66, 64, 65, 67, 9, 8, 11, 10, 16, 19, 18, 17, 21, 22, 23, 20)
+#define _trans_cube_FRm_inverse fastcube( \
+    37, 38, 36, 39, 64, 67, 65, 66, 20, 23, 22, 21, 27, 24, 25, 26, 1, 0, 3, 2)
+#define _trans_cube_FDm fastcube( \
+    66, 64, 67, 65, 38, 36, 39, 37, 19, 16, 17, 18, 9, 8, 11, 10, 6, 7, 4, 5)
+#define _trans_cube_FDm_inverse fastcube( \
+    33, 35, 32, 34, 69, 71, 68, 70, 17, 18, 19, 16, 10, 11, 8, 9, 5, 4, 7, 6)
+#define _trans_cube_FLm fastcube( \
+    38, 36, 37, 39, 64, 66, 67, 65, 8, 9, 10, 11, 19, 16, 17, 18, 23, 20, 21, 22)
+#define _trans_cube_FLm_inverse fastcube( \
+    36, 39, 37, 38, 65, 66, 64, 67, 21, 22, 23, 20, 25, 26, 27, 24, 0, 1, 2, 3)
+#define _trans_cube_BUm fastcube( \
+    65, 67, 64, 66, 37, 39, 36, 38, 17, 18, 19, 16, 10, 11, 8, 9, 5, 4, 7, 6)
+#define _trans_cube_BUm_inverse fastcube( \
+    34, 32, 35, 33, 70, 68, 71, 69, 19, 16, 17, 18, 9, 8, 11, 10, 6, 7, 4, 5)
+#define _trans_cube_BRm fastcube( \
+    39, 37, 36, 38, 65, 67, 66, 64, 10, 11, 8, 9, 18, 17, 16, 19, 22, 21, 20, 23)
+#define _trans_cube_BRm_inverse fastcube( \
+    39, 36, 38, 37, 66, 65, 67, 64, 22, 21, 20, 23, 26, 25, 24, 27, 2, 3, 0, 1)
+#define _trans_cube_BDm fastcube( \
+    67, 65, 66, 64, 39, 37, 38, 36, 18, 17, 16, 19, 11, 10, 9, 8, 7, 6, 5, 4)
+#define _trans_cube_BDm_inverse fastcube( \
+    35, 33, 34, 32, 71, 69, 70, 68, 18, 17, 16, 19, 11, 10, 9, 8, 7, 6, 5, 4)
+#define _trans_cube_BLm fastcube( \
+    37, 39, 38, 36, 67, 65, 64, 66, 11, 10, 9, 8, 17, 18, 19, 16, 20, 23, 22, 21)
+#define _trans_cube_BLm_inverse fastcube( \
+    38, 37, 39, 36, 67, 64, 66, 65, 23, 20, 21, 22, 24, 27, 26, 25, 3, 2, 1, 0)
+
+_static const char *cornerstr[] = {
+	[_c_ufr] = "UFR",
+	[_c_ubl] = "UBL",
+	[_c_dfl] = "DFL",
+	[_c_dbr] = "DBR",
+	[_c_ufl] = "UFL",
+	[_c_ubr] = "UBR",
+	[_c_dfr] = "DFR",
+	[_c_dbl] = "DBL"
+};
+
+_static const char *cornerstralt[] = {
+	[_c_ufr] = "URF",
+	[_c_ubl] = "ULB",
+	[_c_dfl] = "DLF",
+	[_c_dbr] = "DRB",
+	[_c_ufl] = "ULF",
+	[_c_ubr] = "URB",
+	[_c_dfr] = "DRF",
+	[_c_dbl] = "DLB"
+};
+
+_static const char *edgestr[] = {
+	[_e_uf] = "UF",
+	[_e_ub] = "UB",
+	[_e_db] = "DB",
+	[_e_df] = "DF",
+	[_e_ur] = "UR",
+	[_e_ul] = "UL",
+	[_e_dl] = "DL",
+	[_e_dr] = "DR",
+	[_e_fr] = "FR",
+	[_e_fl] = "FL",
+	[_e_bl] = "BL",
+	[_e_br] = "BR"
+};
+
+_static const char *movestr[] = {
+	[_move_U]  = "U",
+	[_move_U2] = "U2",
+	[_move_U3] = "U'",
+	[_move_D]  = "D",
+	[_move_D2] = "D2",
+	[_move_D3] = "D'",
+	[_move_R]  = "R",
+	[_move_R2] = "R2",
+	[_move_R3] = "R'",
+	[_move_L]  = "L",
+	[_move_L2] = "L2",
+	[_move_L3] = "L'",
+	[_move_F]  = "F",
+	[_move_F2] = "F2",
+	[_move_F3] = "F'",
+	[_move_B]  = "B",
+	[_move_B2] = "B2",
+	[_move_B3] = "B'",
+};
+
+_static const char *transstr[] = {
+	[_trans_UFr] = "rotation UF",
+	[_trans_UFm] = "mirrored UF",
+	[_trans_ULr] = "rotation UL",
+	[_trans_ULm] = "mirrored UL",
+	[_trans_UBr] = "rotation UB",
+	[_trans_UBm] = "mirrored UB",
+	[_trans_URr] = "rotation UR",
+	[_trans_URm] = "mirrored UR",
+	[_trans_DFr] = "rotation DF",
+	[_trans_DFm] = "mirrored DF",
+	[_trans_DLr] = "rotation DL",
+	[_trans_DLm] = "mirrored DL",
+	[_trans_DBr] = "rotation DB",
+	[_trans_DBm] = "mirrored DB",
+	[_trans_DRr] = "rotation DR",
+	[_trans_DRm] = "mirrored DR",
+	[_trans_RUr] = "rotation RU",
+	[_trans_RUm] = "mirrored RU",
+	[_trans_RFr] = "rotation RF",
+	[_trans_RFm] = "mirrored RF",
+	[_trans_RDr] = "rotation RD",
+	[_trans_RDm] = "mirrored RD",
+	[_trans_RBr] = "rotation RB",
+	[_trans_RBm] = "mirrored RB",
+	[_trans_LUr] = "rotation LU",
+	[_trans_LUm] = "mirrored LU",
+	[_trans_LFr] = "rotation LF",
+	[_trans_LFm] = "mirrored LF",
+	[_trans_LDr] = "rotation LD",
+	[_trans_LDm] = "mirrored LD",
+	[_trans_LBr] = "rotation LB",
+	[_trans_LBm] = "mirrored LB",
+	[_trans_FUr] = "rotation FU",
+	[_trans_FUm] = "mirrored FU",
+	[_trans_FRr] = "rotation FR",
+	[_trans_FRm] = "mirrored FR",
+	[_trans_FDr] = "rotation FD",
+	[_trans_FDm] = "mirrored FD",
+	[_trans_FLr] = "rotation FL",
+	[_trans_FLm] = "mirrored FL",
+	[_trans_BUr] = "rotation BU",
+	[_trans_BUm] = "mirrored BU",
+	[_trans_BRr] = "rotation BR",
+	[_trans_BRm] = "mirrored BR",
+	[_trans_BDr] = "rotation BD",
+	[_trans_BDm] = "mirrored BD",
+	[_trans_BLr] = "rotation BL",
+	[_trans_BLm] = "mirrored BL",
+};
+
+static uint8_t inverse_trans_table[48] = {
+	[_trans_UFr] = _trans_UFr,
+	[_trans_UFm] = _trans_UFm,
+	[_trans_ULr] = _trans_URr,
+	[_trans_ULm] = _trans_ULm,
+	[_trans_UBr] = _trans_UBr,
+	[_trans_UBm] = _trans_UBm,
+	[_trans_URr] = _trans_ULr,
+	[_trans_URm] = _trans_URm,
+	[_trans_DFr] = _trans_DFr,
+	[_trans_DFm] = _trans_DFm,
+	[_trans_DLr] = _trans_DLr,
+	[_trans_DLm] = _trans_DRm,
+	[_trans_DBr] = _trans_DBr,
+	[_trans_DBm] = _trans_DBm,
+	[_trans_DRr] = _trans_DRr,
+	[_trans_DRm] = _trans_DLm,
+	[_trans_RUr] = _trans_FRr,
+	[_trans_RUm] = _trans_FLm,
+	[_trans_RFr] = _trans_LFr,
+	[_trans_RFm] = _trans_RFm,
+	[_trans_RDr] = _trans_BLr,
+	[_trans_RDm] = _trans_BRm,
+	[_trans_RBr] = _trans_RBr,
+	[_trans_RBm] = _trans_LBm,
+	[_trans_LUr] = _trans_FLr,
+	[_trans_LUm] = _trans_FRm,
+	[_trans_LFr] = _trans_RFr,
+	[_trans_LFm] = _trans_LFm,
+	[_trans_LDr] = _trans_BRr,
+	[_trans_LDm] = _trans_BLm,
+	[_trans_LBr] = _trans_LBr,
+	[_trans_LBm] = _trans_RBm,
+	[_trans_FUr] = _trans_FUr,
+	[_trans_FUm] = _trans_FUm,
+	[_trans_FRr] = _trans_RUr,
+	[_trans_FRm] = _trans_LUm,
+	[_trans_FDr] = _trans_BUr,
+	[_trans_FDm] = _trans_BUm,
+	[_trans_FLr] = _trans_LUr,
+	[_trans_FLm] = _trans_RUm,
+	[_trans_BUr] = _trans_FDr,
+	[_trans_BUm] = _trans_FDm,
+	[_trans_BRr] = _trans_LDr,
+	[_trans_BRm] = _trans_RDm,
+	[_trans_BDr] = _trans_BDr,
+	[_trans_BDm] = _trans_BDm,
+	[_trans_BLr] = _trans_RDr,
+	[_trans_BLm] = _trans_LDm,
+};
diff --git a/src/cube.c b/src/cube.c
@@ -2,10 +2,7 @@
 #include <stdbool.h>
 #include <string.h>
 
-#include "cube.h"
-
 #ifdef DEBUG
-
 #include <stdio.h>
 #define _static
 #define _static_inline
@@ -16,2377 +13,27 @@
 		DBG_LOG(__VA_ARGS__);      \
 		return retval;             \
 	}
-
 #else
-
 #define _static static
 #define _static_inline static inline
 #define DBG_LOG(...)
 #define DBG_WARN(condition, ...)
 #define DBG_ASSERT(condition, retval, ...)
-
 #endif
 
-/******************************************************************************
-Section: mathematical constants
-******************************************************************************/
-
-#define _2p11  2048U
-#define _2p12  4096U
-#define _3p7   2187U
-#define _3p8   6561U
-#define _12c4  495U
-#define _8c4   70U
-
-#define COCSEP_CLASSES 3393U
-
-_static int64_t binomial[12][12] = {
-	{1,  0,  0,   0,   0,   0,   0,   0,   0,  0,  0, 0},
-	{1,  1,  0,   0,   0,   0,   0,   0,   0,  0,  0, 0},
-	{1,  2,  1,   0,   0,   0,   0,   0,   0,  0,  0, 0},
-	{1,  3,  3,   1,   0,   0,   0,   0,   0,  0,  0, 0},
-	{1,  4,  6,   4,   1,   0,   0,   0,   0,  0,  0, 0},
-	{1,  5, 10,  10,   5,   1,   0,   0,   0,  0,  0, 0},
-	{1,  6, 15,  20,  15,   6,   1,   0,   0,  0,  0, 0},
-	{1,  7, 21,  35,  35,  21,   7,   1,   0,  0,  0, 0},
-	{1,  8, 28,  56,  70,  56,  28,   8,   1,  0,  0, 0},
-	{1,  9, 36,  84, 126, 126,  84,  36,   9,  1,  0, 0},
-	{1, 10, 45, 120, 210, 252, 210, 120,  45, 10,  1, 0},
-	{1, 11, 55, 165, 330, 462, 462, 330, 165, 55, 11, 1},
-};
-
-/******************************************************************************
-Section: moves definitions and tables
-******************************************************************************/
-
-#define _move_U  0U
-#define _move_U2 1U
-#define _move_U3 2U
-#define _move_D  3U
-#define _move_D2 4U
-#define _move_D3 5U
-#define _move_R  6U
-#define _move_R2 7U
-#define _move_R3 8U
-#define _move_L  9U
-#define _move_L2 10U
-#define _move_L3 11U
-#define _move_F  12U
-#define _move_F2 13U
-#define _move_F3 14U
-#define _move_B  15U
-#define _move_B2 16U
-#define _move_B3 17U
-
-#define _trans_UFr 0
-#define _trans_ULr 1
-#define _trans_UBr 2
-#define _trans_URr 3
-#define _trans_DFr 4
-#define _trans_DLr 5
-#define _trans_DBr 6
-#define _trans_DRr 7
-#define _trans_RUr 8
-#define _trans_RFr 9
-#define _trans_RDr 10
-#define _trans_RBr 11
-#define _trans_LUr 12
-#define _trans_LFr 13
-#define _trans_LDr 14
-#define _trans_LBr 15
-#define _trans_FUr 16
-#define _trans_FRr 17
-#define _trans_FDr 18
-#define _trans_FLr 19
-#define _trans_BUr 20
-#define _trans_BRr 21
-#define _trans_BDr 22
-#define _trans_BLr 23
-
-#define _trans_UFm 24
-#define _trans_ULm 25
-#define _trans_UBm 26
-#define _trans_URm 27
-#define _trans_DFm 28
-#define _trans_DLm 29
-#define _trans_DBm 30
-#define _trans_DRm 31
-#define _trans_RUm 32
-#define _trans_RFm 33
-#define _trans_RDm 34
-#define _trans_RBm 35
-#define _trans_LUm 36
-#define _trans_LFm 37
-#define _trans_LDm 38
-#define _trans_LBm 39
-#define _trans_FUm 40
-#define _trans_FRm 41
-#define _trans_FDm 42
-#define _trans_FLm 43
-#define _trans_BUm 44
-#define _trans_BRm 45
-#define _trans_BDm 46
-#define _trans_BLm 47
-
-#define _c_ufr      0U
-#define _c_ubl      1U
-#define _c_dfl      2U
-#define _c_dbr      3U
-#define _c_ufl      4U
-#define _c_ubr      5U
-#define _c_dfr      6U
-#define _c_dbl      7U
-
-#define _e_uf       0U
-#define _e_ub       1U
-#define _e_db       2U
-#define _e_df       3U
-#define _e_ur       4U
-#define _e_ul       5U
-#define _e_dl       6U
-#define _e_dr       7U
-#define _e_fr       8U
-#define _e_fl       9U
-#define _e_bl       10U
-#define _e_br       11U
-
-#define _eoshift    4U
-#define _coshift    5U
-
-#define _pbits      0xFU
-#define _esepbit1   0x4U
-#define _esepbit2   0x8U
-#define _csepbit    0x4U
-#define _eobit      0x10U
-#define _cobits     0xF0U
-#define _cobits2    0x60U
-#define _ctwist_cw  0x20U
-#define _ctwist_ccw 0x40U
-#define _eflip      0x10U
-#define _error      0xFFU
-
-_static cube_t zero = { .corner = {0}, .edge = {0} };
-_static cube_t solved = {
-	.corner = {0, 1, 2, 3, 4, 5, 6, 7},
-	.edge = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
-};
-
-#define zero_fast fastcube( \
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-#define solved_fast fastcube( \
-    0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
-
-#define _move_cube_U fastcube( \
-    5, 4, 2, 3, 0, 1, 6, 7, 4, 5, 2, 3, 1, 0, 6, 7, 8, 9, 10, 11)
-#define _move_cube_U2 fastcube( \
-    1, 0, 2, 3, 5, 4, 6, 7, 1, 0, 2, 3, 5, 4, 6, 7, 8, 9, 10, 11)
-#define _move_cube_U3 fastcube( \
-    4, 5, 2, 3, 1, 0, 6, 7, 5, 4, 2, 3, 0, 1, 6, 7, 8, 9, 10, 11)
-#define _move_cube_D fastcube( \
-    0, 1, 7, 6, 4, 5, 2, 3, 0, 1, 7, 6, 4, 5, 2, 3, 8, 9, 10, 11)
-#define _move_cube_D2 fastcube( \
-    0, 1, 3, 2, 4, 5, 7, 6, 0, 1, 3, 2, 4, 5, 7, 6, 8, 9, 10, 11)
-#define _move_cube_D3 fastcube( \
-    0, 1, 6, 7, 4, 5, 3, 2, 0, 1, 6, 7, 4, 5, 3, 2, 8, 9, 10, 11)
-#define _move_cube_R fastcube( \
-    70, 1, 2, 69, 4, 32, 35, 7, 0, 1, 2, 3, 8, 5, 6, 11, 7, 9, 10, 4)
-#define _move_cube_R2 fastcube( \
-    3, 1, 2, 0, 4, 6, 5, 7, 0, 1, 2, 3, 7, 5, 6, 4, 11, 9, 10, 8)
-#define _move_cube_R3 fastcube( \
-    69, 1, 2, 70, 4, 35, 32, 7, 0, 1, 2, 3, 11, 5, 6, 8, 4, 9, 10, 7)
-#define _move_cube_L fastcube( \
-    0, 71, 68, 3, 33, 5, 6, 34, 0, 1, 2, 3, 4, 10, 9, 7, 8, 5, 6, 11)
-#define _move_cube_L2 fastcube( \
-    0, 2, 1, 3, 7, 5, 6, 4, 0, 1, 2, 3, 4, 6, 5, 7, 8, 10, 9, 11)
-#define _move_cube_L3 fastcube( \
-    0, 68, 71, 3, 34, 5, 6, 33, 0, 1, 2, 3, 4, 9, 10, 7, 8, 6, 5, 11)
-#define _move_cube_F fastcube( \
-    36, 1, 38, 3, 66, 5, 64, 7, 25, 1, 2, 24, 4, 5, 6, 7, 16, 19, 10, 11)
-#define _move_cube_F2 fastcube( \
-    2, 1, 0, 3, 6, 5, 4, 7, 3, 1, 2, 0, 4, 5, 6, 7, 9, 8, 10, 11)
-#define _move_cube_F3 fastcube( \
-    38, 1, 36, 3, 64, 5, 66, 7, 24, 1, 2, 25, 4, 5, 6, 7, 19, 16, 10, 11)
-#define _move_cube_B fastcube( \
-    0, 37, 2, 39, 4, 67, 6, 65, 0, 27, 26, 3, 4, 5, 6, 7, 8, 9, 17, 18)
-#define _move_cube_B2 fastcube( \
-    0, 3, 2, 1, 4, 7, 6, 5, 0, 2, 1, 3, 4, 5, 6, 7, 8, 9, 11, 10)
-#define _move_cube_B3 fastcube( \
-    0, 39, 2, 37, 4, 65, 6, 67, 0, 26, 27, 3, 4, 5, 6, 7, 8, 9, 18, 17)
-
-#define _trans_cube_UFr fastcube( \
-    0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
-#define _trans_cube_UFr_inverse fastcube( \
-    0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
-#define _trans_cube_ULr fastcube( \
-    4, 5, 7, 6, 1, 0, 2, 3, 5, 4, 7, 6, 0, 1, 2, 3, 25, 26, 27, 24)
-#define _trans_cube_ULr_inverse fastcube( \
-    5, 4, 6, 7, 0, 1, 3, 2, 4, 5, 6, 7, 1, 0, 3, 2, 27, 24, 25, 26)
-#define _trans_cube_UBr fastcube( \
-    1, 0, 3, 2, 5, 4, 7, 6, 1, 0, 3, 2, 5, 4, 7, 6, 10, 11, 8, 9)
-#define _trans_cube_UBr_inverse fastcube( \
-    1, 0, 3, 2, 5, 4, 7, 6, 1, 0, 3, 2, 5, 4, 7, 6, 10, 11, 8, 9)
-#define _trans_cube_URr fastcube( \
-    5, 4, 6, 7, 0, 1, 3, 2, 4, 5, 6, 7, 1, 0, 3, 2, 27, 24, 25, 26)
-#define _trans_cube_URr_inverse fastcube( \
-    4, 5, 7, 6, 1, 0, 2, 3, 5, 4, 7, 6, 0, 1, 2, 3, 25, 26, 27, 24)
-#define _trans_cube_DFr fastcube( \
-    2, 3, 0, 1, 6, 7, 4, 5, 3, 2, 1, 0, 6, 7, 4, 5, 9, 8, 11, 10)
-#define _trans_cube_DFr_inverse fastcube( \
-    2, 3, 0, 1, 6, 7, 4, 5, 3, 2, 1, 0, 6, 7, 4, 5, 9, 8, 11, 10)
-#define _trans_cube_DLr fastcube( \
-    7, 6, 4, 5, 2, 3, 1, 0, 6, 7, 4, 5, 2, 3, 0, 1, 26, 25, 24, 27)
-#define _trans_cube_DLr_inverse fastcube( \
-    7, 6, 4, 5, 2, 3, 1, 0, 6, 7, 4, 5, 2, 3, 0, 1, 26, 25, 24, 27)
-#define _trans_cube_DBr fastcube( \
-    3, 2, 1, 0, 7, 6, 5, 4, 2, 3, 0, 1, 7, 6, 5, 4, 11, 10, 9, 8)
-#define _trans_cube_DBr_inverse fastcube( \
-    3, 2, 1, 0, 7, 6, 5, 4, 2, 3, 0, 1, 7, 6, 5, 4, 11, 10, 9, 8)
-#define _trans_cube_DRr fastcube( \
-    6, 7, 5, 4, 3, 2, 0, 1, 7, 6, 5, 4, 3, 2, 1, 0, 24, 27, 26, 25)
-#define _trans_cube_DRr_inverse fastcube( \
-    6, 7, 5, 4, 3, 2, 0, 1, 7, 6, 5, 4, 3, 2, 1, 0, 24, 27, 26, 25)
-#define _trans_cube_RUr fastcube( \
-    64, 67, 65, 66, 37, 38, 36, 39, 20, 23, 22, 21, 24, 27, 26, 25, 0, 1, 2, 3)
-#define _trans_cube_RUr_inverse fastcube( \
-    32, 34, 35, 33, 70, 68, 69, 71, 8, 9, 10, 11, 16, 19, 18, 17, 20, 23, 22, 21)
-#define _trans_cube_RFr fastcube( \
-    38, 37, 36, 39, 64, 67, 66, 65, 24, 27, 26, 25, 23, 20, 21, 22, 19, 16, 17, 18)
-#define _trans_cube_RFr_inverse fastcube( \
-    36, 39, 38, 37, 66, 65, 64, 67, 25, 26, 27, 24, 21, 22, 23, 20, 16, 19, 18, 17)
-#define _trans_cube_RDr fastcube( \
-    67, 64, 66, 65, 38, 37, 39, 36, 23, 20, 21, 22, 27, 24, 25, 26, 2, 3, 0, 1)
-#define _trans_cube_RDr_inverse fastcube( \
-    33, 35, 34, 32, 71, 69, 68, 70, 10, 11, 8, 9, 17, 18, 19, 16, 21, 22, 23, 20)
-#define _trans_cube_RBr fastcube( \
-    37, 38, 39, 36, 67, 64, 65, 66, 27, 24, 25, 26, 20, 23, 22, 21, 17, 18, 19, 16)
-#define _trans_cube_RBr_inverse fastcube( \
-    37, 38, 39, 36, 67, 64, 65, 66, 27, 24, 25, 26, 20, 23, 22, 21, 17, 18, 19, 16)
-#define _trans_cube_LUr fastcube( \
-    65, 66, 64, 67, 36, 39, 37, 38, 21, 22, 23, 20, 26, 25, 24, 27, 1, 0, 3, 2)
-#define _trans_cube_LUr_inverse fastcube( \
-    34, 32, 33, 35, 68, 70, 71, 69, 9, 8, 11, 10, 19, 16, 17, 18, 22, 21, 20, 23)
-#define _trans_cube_LFr fastcube( \
-    36, 39, 38, 37, 66, 65, 64, 67, 25, 26, 27, 24, 21, 22, 23, 20, 16, 19, 18, 17)
-#define _trans_cube_LFr_inverse fastcube( \
-    38, 37, 36, 39, 64, 67, 66, 65, 24, 27, 26, 25, 23, 20, 21, 22, 19, 16, 17, 18)
-#define _trans_cube_LDr fastcube( \
-    66, 65, 67, 64, 39, 36, 38, 37, 22, 21, 20, 23, 25, 26, 27, 24, 3, 2, 1, 0)
-#define _trans_cube_LDr_inverse fastcube( \
-    35, 33, 32, 34, 69, 71, 70, 68, 11, 10, 9, 8, 18, 17, 16, 19, 23, 20, 21, 22)
-#define _trans_cube_LBr fastcube( \
-    39, 36, 37, 38, 65, 66, 67, 64, 26, 25, 24, 27, 22, 21, 20, 23, 18, 17, 16, 19)
-#define _trans_cube_LBr_inverse fastcube( \
-    39, 36, 37, 38, 65, 66, 67, 64, 26, 25, 24, 27, 22, 21, 20, 23, 18, 17, 16, 19)
-#define _trans_cube_FUr fastcube( \
-    68, 70, 69, 71, 32, 34, 33, 35, 16, 19, 18, 17, 9, 8, 11, 10, 5, 4, 7, 6)
-#define _trans_cube_FUr_inverse fastcube( \
-    68, 70, 69, 71, 32, 34, 33, 35, 16, 19, 18, 17, 9, 8, 11, 10, 5, 4, 7, 6)
-#define _trans_cube_FRr fastcube( \
-    32, 34, 35, 33, 70, 68, 69, 71, 8, 9, 10, 11, 16, 19, 18, 17, 20, 23, 22, 21)
-#define _trans_cube_FRr_inverse fastcube( \
-    64, 67, 65, 66, 37, 38, 36, 39, 20, 23, 22, 21, 24, 27, 26, 25, 0, 1, 2, 3)
-#define _trans_cube_FDr fastcube( \
-    70, 68, 71, 69, 34, 32, 35, 33, 19, 16, 17, 18, 8, 9, 10, 11, 7, 6, 5, 4)
-#define _trans_cube_FDr_inverse fastcube( \
-    69, 71, 68, 70, 33, 35, 32, 34, 17, 18, 19, 16, 11, 10, 9, 8, 4, 5, 6, 7)
-#define _trans_cube_FLr fastcube( \
-    34, 32, 33, 35, 68, 70, 71, 69, 9, 8, 11, 10, 19, 16, 17, 18, 22, 21, 20, 23)
-#define _trans_cube_FLr_inverse fastcube( \
-    65, 66, 64, 67, 36, 39, 37, 38, 21, 22, 23, 20, 26, 25, 24, 27, 1, 0, 3, 2)
-#define _trans_cube_BUr fastcube( \
-    69, 71, 68, 70, 33, 35, 32, 34, 17, 18, 19, 16, 11, 10, 9, 8, 4, 5, 6, 7)
-#define _trans_cube_BUr_inverse fastcube( \
-    70, 68, 71, 69, 34, 32, 35, 33, 19, 16, 17, 18, 8, 9, 10, 11, 7, 6, 5, 4)
-#define _trans_cube_BRr fastcube( \
-    35, 33, 32, 34, 69, 71, 70, 68, 11, 10, 9, 8, 18, 17, 16, 19, 23, 20, 21, 22)
-#define _trans_cube_BRr_inverse fastcube( \
-    66, 65, 67, 64, 39, 36, 38, 37, 22, 21, 20, 23, 25, 26, 27, 24, 3, 2, 1, 0)
-#define _trans_cube_BDr fastcube( \
-    71, 69, 70, 68, 35, 33, 34, 32, 18, 17, 16, 19, 10, 11, 8, 9, 6, 7, 4, 5)
-#define _trans_cube_BDr_inverse fastcube( \
-    71, 69, 70, 68, 35, 33, 34, 32, 18, 17, 16, 19, 10, 11, 8, 9, 6, 7, 4, 5)
-#define _trans_cube_BLr fastcube( \
-    33, 35, 34, 32, 71, 69, 68, 70, 10, 11, 8, 9, 17, 18, 19, 16, 21, 22, 23, 20)
-#define _trans_cube_BLr_inverse fastcube( \
-    67, 64, 66, 65, 38, 37, 39, 36, 23, 20, 21, 22, 27, 24, 25, 26, 2, 3, 0, 1)
-#define _trans_cube_UFm fastcube( \
-    4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 7, 6, 9, 8, 11, 10)
-#define _trans_cube_UFm_inverse fastcube( \
-    4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 7, 6, 9, 8, 11, 10)
-#define _trans_cube_ULm fastcube( \
-    0, 1, 3, 2, 5, 4, 6, 7, 4, 5, 6, 7, 0, 1, 2, 3, 24, 27, 26, 25)
-#define _trans_cube_ULm_inverse fastcube( \
-    0, 1, 3, 2, 5, 4, 6, 7, 4, 5, 6, 7, 0, 1, 2, 3, 24, 27, 26, 25)
-#define _trans_cube_UBm fastcube( \
-    5, 4, 7, 6, 1, 0, 3, 2, 1, 0, 3, 2, 4, 5, 6, 7, 11, 10, 9, 8)
-#define _trans_cube_UBm_inverse fastcube( \
-    5, 4, 7, 6, 1, 0, 3, 2, 1, 0, 3, 2, 4, 5, 6, 7, 11, 10, 9, 8)
-#define _trans_cube_URm fastcube( \
-    1, 0, 2, 3, 4, 5, 7, 6, 5, 4, 7, 6, 1, 0, 3, 2, 26, 25, 24, 27)
-#define _trans_cube_URm_inverse fastcube( \
-    1, 0, 2, 3, 4, 5, 7, 6, 5, 4, 7, 6, 1, 0, 3, 2, 26, 25, 24, 27)
-#define _trans_cube_DFm fastcube( \
-    6, 7, 4, 5, 2, 3, 0, 1, 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11)
-#define _trans_cube_DFm_inverse fastcube( \
-    6, 7, 4, 5, 2, 3, 0, 1, 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11)
-#define _trans_cube_DLm fastcube( \
-    3, 2, 0, 1, 6, 7, 5, 4, 7, 6, 5, 4, 2, 3, 0, 1, 27, 24, 25, 26)
-#define _trans_cube_DLm_inverse fastcube( \
-    2, 3, 1, 0, 7, 6, 4, 5, 6, 7, 4, 5, 3, 2, 1, 0, 25, 26, 27, 24)
-#define _trans_cube_DBm fastcube( \
-    7, 6, 5, 4, 3, 2, 1, 0, 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9)
-#define _trans_cube_DBm_inverse fastcube( \
-    7, 6, 5, 4, 3, 2, 1, 0, 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9)
-#define _trans_cube_DRm fastcube( \
-    2, 3, 1, 0, 7, 6, 4, 5, 6, 7, 4, 5, 3, 2, 1, 0, 25, 26, 27, 24)
-#define _trans_cube_DRm_inverse fastcube( \
-    3, 2, 0, 1, 6, 7, 5, 4, 7, 6, 5, 4, 2, 3, 0, 1, 27, 24, 25, 26)
-#define _trans_cube_RUm fastcube( \
-    68, 71, 69, 70, 33, 34, 32, 35, 21, 22, 23, 20, 25, 26, 27, 24, 0, 1, 2, 3)
-#define _trans_cube_RUm_inverse fastcube( \
-    70, 68, 69, 71, 32, 34, 35, 33, 8, 9, 10, 11, 19, 16, 17, 18, 23, 20, 21, 22)
-#define _trans_cube_RFm fastcube( \
-    34, 33, 32, 35, 68, 71, 70, 69, 25, 26, 27, 24, 22, 21, 20, 23, 19, 16, 17, 18)
-#define _trans_cube_RFm_inverse fastcube( \
-    66, 65, 64, 67, 36, 39, 38, 37, 25, 26, 27, 24, 22, 21, 20, 23, 19, 16, 17, 18)
-#define _trans_cube_RDm fastcube( \
-    71, 68, 70, 69, 34, 33, 35, 32, 22, 21, 20, 23, 26, 25, 24, 27, 2, 3, 0, 1)
-#define _trans_cube_RDm_inverse fastcube( \
-    71, 69, 68, 70, 33, 35, 34, 32, 10, 11, 8, 9, 18, 17, 16, 19, 22, 21, 20, 23)
-#define _trans_cube_RBm fastcube( \
-    33, 34, 35, 32, 71, 68, 69, 70, 26, 25, 24, 27, 21, 22, 23, 20, 17, 18, 19, 16)
-#define _trans_cube_RBm_inverse fastcube( \
-    67, 64, 65, 66, 37, 38, 39, 36, 27, 24, 25, 26, 23, 20, 21, 22, 18, 17, 16, 19)
-#define _trans_cube_LUm fastcube( \
-    69, 70, 68, 71, 32, 35, 33, 34, 20, 23, 22, 21, 27, 24, 25, 26, 1, 0, 3, 2)
-#define _trans_cube_LUm_inverse fastcube( \
-    68, 70, 71, 69, 34, 32, 33, 35, 9, 8, 11, 10, 16, 19, 18, 17, 21, 22, 23, 20)
-#define _trans_cube_LFm fastcube( \
-    32, 35, 34, 33, 70, 69, 68, 71, 24, 27, 26, 25, 20, 23, 22, 21, 16, 19, 18, 17)
-#define _trans_cube_LFm_inverse fastcube( \
-    64, 67, 66, 65, 38, 37, 36, 39, 24, 27, 26, 25, 20, 23, 22, 21, 16, 19, 18, 17)
-#define _trans_cube_LDm fastcube( \
-    70, 69, 71, 68, 35, 32, 34, 33, 23, 20, 21, 22, 24, 27, 26, 25, 3, 2, 1, 0)
-#define _trans_cube_LDm_inverse fastcube( \
-    69, 71, 70, 68, 35, 33, 32, 34, 11, 10, 9, 8, 17, 18, 19, 16, 20, 23, 22, 21)
-#define _trans_cube_LBm fastcube( \
-    35, 32, 33, 34, 69, 70, 71, 68, 27, 24, 25, 26, 23, 20, 21, 22, 18, 17, 16, 19)
-#define _trans_cube_LBm_inverse fastcube( \
-    65, 66, 67, 64, 39, 36, 37, 38, 26, 25, 24, 27, 21, 22, 23, 20, 17, 18, 19, 16)
-#define _trans_cube_FUm fastcube( \
-    64, 66, 65, 67, 36, 38, 37, 39, 16, 19, 18, 17, 8, 9, 10, 11, 4, 5, 6, 7)
-#define _trans_cube_FUm_inverse fastcube( \
-    32, 34, 33, 35, 68, 70, 69, 71, 16, 19, 18, 17, 8, 9, 10, 11, 4, 5, 6, 7)
-#define _trans_cube_FRm fastcube( \
-    36, 38, 39, 37, 66, 64, 65, 67, 9, 8, 11, 10, 16, 19, 18, 17, 21, 22, 23, 20)
-#define _trans_cube_FRm_inverse fastcube( \
-    37, 38, 36, 39, 64, 67, 65, 66, 20, 23, 22, 21, 27, 24, 25, 26, 1, 0, 3, 2)
-#define _trans_cube_FDm fastcube( \
-    66, 64, 67, 65, 38, 36, 39, 37, 19, 16, 17, 18, 9, 8, 11, 10, 6, 7, 4, 5)
-#define _trans_cube_FDm_inverse fastcube( \
-    33, 35, 32, 34, 69, 71, 68, 70, 17, 18, 19, 16, 10, 11, 8, 9, 5, 4, 7, 6)
-#define _trans_cube_FLm fastcube( \
-    38, 36, 37, 39, 64, 66, 67, 65, 8, 9, 10, 11, 19, 16, 17, 18, 23, 20, 21, 22)
-#define _trans_cube_FLm_inverse fastcube( \
-    36, 39, 37, 38, 65, 66, 64, 67, 21, 22, 23, 20, 25, 26, 27, 24, 0, 1, 2, 3)
-#define _trans_cube_BUm fastcube( \
-    65, 67, 64, 66, 37, 39, 36, 38, 17, 18, 19, 16, 10, 11, 8, 9, 5, 4, 7, 6)
-#define _trans_cube_BUm_inverse fastcube( \
-    34, 32, 35, 33, 70, 68, 71, 69, 19, 16, 17, 18, 9, 8, 11, 10, 6, 7, 4, 5)
-#define _trans_cube_BRm fastcube( \
-    39, 37, 36, 38, 65, 67, 66, 64, 10, 11, 8, 9, 18, 17, 16, 19, 22, 21, 20, 23)
-#define _trans_cube_BRm_inverse fastcube( \
-    39, 36, 38, 37, 66, 65, 67, 64, 22, 21, 20, 23, 26, 25, 24, 27, 2, 3, 0, 1)
-#define _trans_cube_BDm fastcube( \
-    67, 65, 66, 64, 39, 37, 38, 36, 18, 17, 16, 19, 11, 10, 9, 8, 7, 6, 5, 4)
-#define _trans_cube_BDm_inverse fastcube( \
-    35, 33, 34, 32, 71, 69, 70, 68, 18, 17, 16, 19, 11, 10, 9, 8, 7, 6, 5, 4)
-#define _trans_cube_BLm fastcube( \
-    37, 39, 38, 36, 67, 65, 64, 66, 11, 10, 9, 8, 17, 18, 19, 16, 20, 23, 22, 21)
-#define _trans_cube_BLm_inverse fastcube( \
-    38, 37, 39, 36, 67, 64, 66, 65, 23, 20, 21, 22, 24, 27, 26, 25, 3, 2, 1, 0)
-
-_static const char *cornerstr[] = {
-	[_c_ufr] = "UFR",
-	[_c_ubl] = "UBL",
-	[_c_dfl] = "DFL",
-	[_c_dbr] = "DBR",
-	[_c_ufl] = "UFL",
-	[_c_ubr] = "UBR",
-	[_c_dfr] = "DFR",
-	[_c_dbl] = "DBL"
-};
-
-_static const char *cornerstralt[] = {
-	[_c_ufr] = "URF",
-	[_c_ubl] = "ULB",
-	[_c_dfl] = "DLF",
-	[_c_dbr] = "DRB",
-	[_c_ufl] = "ULF",
-	[_c_ubr] = "URB",
-	[_c_dfr] = "DRF",
-	[_c_dbl] = "DLB"
-};
-
-_static const char *edgestr[] = {
-	[_e_uf] = "UF",
-	[_e_ub] = "UB",
-	[_e_db] = "DB",
-	[_e_df] = "DF",
-	[_e_ur] = "UR",
-	[_e_ul] = "UL",
-	[_e_dl] = "DL",
-	[_e_dr] = "DR",
-	[_e_fr] = "FR",
-	[_e_fl] = "FL",
-	[_e_bl] = "BL",
-	[_e_br] = "BR"
-};
-
-_static const char *movestr[] = {
-	[_move_U]  = "U",
-	[_move_U2] = "U2",
-	[_move_U3] = "U'",
-	[_move_D]  = "D",
-	[_move_D2] = "D2",
-	[_move_D3] = "D'",
-	[_move_R]  = "R",
-	[_move_R2] = "R2",
-	[_move_R3] = "R'",
-	[_move_L]  = "L",
-	[_move_L2] = "L2",
-	[_move_L3] = "L'",
-	[_move_F]  = "F",
-	[_move_F2] = "F2",
-	[_move_F3] = "F'",
-	[_move_B]  = "B",
-	[_move_B2] = "B2",
-	[_move_B3] = "B'",
-};
-
-_static const char *transstr[] = {
-	[_trans_UFr] = "rotation UF",
-	[_trans_UFm] = "mirrored UF",
-	[_trans_ULr] = "rotation UL",
-	[_trans_ULm] = "mirrored UL",
-	[_trans_UBr] = "rotation UB",
-	[_trans_UBm] = "mirrored UB",
-	[_trans_URr] = "rotation UR",
-	[_trans_URm] = "mirrored UR",
-	[_trans_DFr] = "rotation DF",
-	[_trans_DFm] = "mirrored DF",
-	[_trans_DLr] = "rotation DL",
-	[_trans_DLm] = "mirrored DL",
-	[_trans_DBr] = "rotation DB",
-	[_trans_DBm] = "mirrored DB",
-	[_trans_DRr] = "rotation DR",
-	[_trans_DRm] = "mirrored DR",
-	[_trans_RUr] = "rotation RU",
-	[_trans_RUm] = "mirrored RU",
-	[_trans_RFr] = "rotation RF",
-	[_trans_RFm] = "mirrored RF",
-	[_trans_RDr] = "rotation RD",
-	[_trans_RDm] = "mirrored RD",
-	[_trans_RBr] = "rotation RB",
-	[_trans_RBm] = "mirrored RB",
-	[_trans_LUr] = "rotation LU",
-	[_trans_LUm] = "mirrored LU",
-	[_trans_LFr] = "rotation LF",
-	[_trans_LFm] = "mirrored LF",
-	[_trans_LDr] = "rotation LD",
-	[_trans_LDm] = "mirrored LD",
-	[_trans_LBr] = "rotation LB",
-	[_trans_LBm] = "mirrored LB",
-	[_trans_FUr] = "rotation FU",
-	[_trans_FUm] = "mirrored FU",
-	[_trans_FRr] = "rotation FR",
-	[_trans_FRm] = "mirrored FR",
-	[_trans_FDr] = "rotation FD",
-	[_trans_FDm] = "mirrored FD",
-	[_trans_FLr] = "rotation FL",
-	[_trans_FLm] = "mirrored FL",
-	[_trans_BUr] = "rotation BU",
-	[_trans_BUm] = "mirrored BU",
-	[_trans_BRr] = "rotation BR",
-	[_trans_BRm] = "mirrored BR",
-	[_trans_BDr] = "rotation BD",
-	[_trans_BDm] = "mirrored BD",
-	[_trans_BLr] = "rotation BL",
-	[_trans_BLm] = "mirrored BL",
-};
-
-static uint8_t inverse_trans_table[48] = {
-	[_trans_UFr] = _trans_UFr,
-	[_trans_UFm] = _trans_UFm,
-	[_trans_ULr] = _trans_URr,
-	[_trans_ULm] = _trans_ULm,
-	[_trans_UBr] = _trans_UBr,
-	[_trans_UBm] = _trans_UBm,
-	[_trans_URr] = _trans_ULr,
-	[_trans_URm] = _trans_URm,
-	[_trans_DFr] = _trans_DFr,
-	[_trans_DFm] = _trans_DFm,
-	[_trans_DLr] = _trans_DLr,
-	[_trans_DLm] = _trans_DRm,
-	[_trans_DBr] = _trans_DBr,
-	[_trans_DBm] = _trans_DBm,
-	[_trans_DRr] = _trans_DRr,
-	[_trans_DRm] = _trans_DLm,
-	[_trans_RUr] = _trans_FRr,
-	[_trans_RUm] = _trans_FLm,
-	[_trans_RFr] = _trans_LFr,
-	[_trans_RFm] = _trans_RFm,
-	[_trans_RDr] = _trans_BLr,
-	[_trans_RDm] = _trans_BRm,
-	[_trans_RBr] = _trans_RBr,
-	[_trans_RBm] = _trans_LBm,
-	[_trans_LUr] = _trans_FLr,
-	[_trans_LUm] = _trans_FRm,
-	[_trans_LFr] = _trans_RFr,
-	[_trans_LFm] = _trans_LFm,
-	[_trans_LDr] = _trans_BRr,
-	[_trans_LDm] = _trans_BLm,
-	[_trans_LBr] = _trans_LBr,
-	[_trans_LBm] = _trans_RBm,
-	[_trans_FUr] = _trans_FUr,
-	[_trans_FUm] = _trans_FUm,
-	[_trans_FRr] = _trans_RUr,
-	[_trans_FRm] = _trans_LUm,
-	[_trans_FDr] = _trans_BUr,
-	[_trans_FDm] = _trans_BUm,
-	[_trans_FLr] = _trans_LUr,
-	[_trans_FLm] = _trans_RUm,
-	[_trans_BUr] = _trans_FDr,
-	[_trans_BUm] = _trans_FDm,
-	[_trans_BRr] = _trans_LDr,
-	[_trans_BRm] = _trans_RDm,
-	[_trans_BDr] = _trans_BDr,
-	[_trans_BDm] = _trans_BDm,
-	[_trans_BLr] = _trans_RDr,
-	[_trans_BLm] = _trans_LDm,
-};
-
-/******************************************************************************
-Section: AVX2 fast methods
-
-This section contains performance-critical methods that rely on AVX2
-intructions such as routines for moving or transforming the cube.
-******************************************************************************/
+#include "cube.h"
+#include "constants.h"
 
 #if defined(CUBE_AVX2)
-
 #include <immintrin.h>
-
-typedef __m256i cube_fast_t;
-
-#define _co2_avx2 _mm256_set_epi64x(0, 0, 0, 0x6060606060606060)
-#define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, 0x2020202020202020)
-#define _cp_avx2 _mm256_set_epi64x(0, 0, 0, 0x0707070707070707)
-#define _ep_avx2 _mm256_set_epi64x(0x0F0F0F0F, 0x0F0F0F0F0F0F0F0F, 0, 0)
-#define _eo_avx2 _mm256_set_epi64x(0x10101010, 0x1010101010101010, 0, 0)
-
-_static_inline cube_fast_t fastcube(
-    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
-    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
-    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
-    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
-);
-_static cube_fast_t cubetofast(cube_t);
-_static cube_t fasttocube(cube_fast_t);
-_static_inline bool equal_fast(cube_fast_t, cube_fast_t);
-_static_inline bool issolved_fast(cube_fast_t);
-_static_inline cube_fast_t invertco_fast(cube_fast_t);
-_static_inline cube_fast_t compose_fast(cube_fast_t, cube_fast_t);
-
-_static_inline int64_t coord_fast_co(cube_fast_t);
-_static_inline int64_t coord_fast_csep(cube_fast_t);
-_static_inline int64_t coord_fast_cocsep(cube_fast_t);
-_static_inline int64_t coord_fast_eo(cube_fast_t);
-_static_inline int64_t coord_fast_esep(cube_fast_t);
-
-_static_inline cube_fast_t
-fastcube(
-	uint8_t c_ufr,
-	uint8_t c_ubl,
-	uint8_t c_dfl,
-	uint8_t c_dbr,
-	uint8_t c_ufl,
-	uint8_t c_ubr,
-	uint8_t c_dfr,
-	uint8_t c_dbl,
-
-	uint8_t e_uf,
-	uint8_t e_ub,
-	uint8_t e_db,
-	uint8_t e_df,
-	uint8_t e_ur,
-	uint8_t e_ul,
-	uint8_t e_dl,
-	uint8_t e_dr,
-	uint8_t e_fr,
-	uint8_t e_fl,
-	uint8_t e_bl,
-	uint8_t e_br
-)
-{
-	return _mm256_set_epi8(
-		0, 0, 0, 0, e_br, e_bl, e_fl, e_fr,
-		e_dr, e_dl, e_ul, e_ur, e_df, e_db, e_ub, e_uf,
-		0, 0, 0, 0, 0, 0, 0, 0,
-		c_dbl, c_dfr, c_ubr, c_ufl, c_dbr, c_dfl, c_ubl, c_ufr
-	);
-}
-
-_static cube_fast_t
-cubetofast(cube_t a)
-{
-	uint8_t aux[32];
-
-	memset(aux, 0, 32);
-	memcpy(aux, &a.corner, 8);
-	memcpy(aux + 16, &a.edge, 12);
-
-	return _mm256_loadu_si256((__m256i_u *)&aux);
-}
-
-_static cube_t
-fasttocube(cube_fast_t c)
-{
-	cube_t a;
-	uint8_t aux[32];
-
-	_mm256_storeu_si256((__m256i_u *)aux, c);
-	memcpy(&a.corner, aux, 8);
-	memcpy(&a.edge, aux + 16, 12);
-
-	return a;
-}
-
-_static_inline bool
-equal_fast(cube_fast_t c1, cube_fast_t c2)
-{
-	int32_t mask;
-	__m256i cmp;
-
-	cmp = _mm256_cmpeq_epi8(c1, c2);
-	mask = _mm256_movemask_epi8(cmp);
-
-	return mask == ~0;
-}
-
-_static_inline bool
-issolved_fast(cube_fast_t cube)
-{
-	return equal_fast(cube, solved_fast);
-}
-
-_static_inline cube_fast_t
-invertco_fast(cube_fast_t c)
-{
-        cube_fast_t co, shleft, shright, summed, newco, cleanco, ret;
-
-        co = _mm256_and_si256(c, _co2_avx2);
-        shleft = _mm256_slli_epi32(co, 1);
-        shright = _mm256_srli_epi32(co, 1);
-        summed = _mm256_or_si256(shleft, shright);
-        newco = _mm256_and_si256(summed, _co2_avx2);
-        cleanco = _mm256_xor_si256(c, co);
-        ret = _mm256_or_si256(cleanco, newco);
-
-        return ret;
-}
-
-_static_inline cube_fast_t
-compose_fast(cube_fast_t c1, cube_fast_t c2)
-{
-	cube_fast_t s, b, eo2, co1, co2, aux, auy1, auy2, auz1, auz2;
-
-	/* Permute and clean unused bits */
-	s = _mm256_shuffle_epi8(c1, c2);
-	b = _mm256_set_epi8(
-		~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0
-	);
-	s = _mm256_andnot_si256(b, s);
-
-	/* Change EO */
-	eo2 = _mm256_and_si256(c2, _eo_avx2);
-	s = _mm256_xor_si256(s, eo2);
-
-	/* Change CO */
-	co1 = _mm256_and_si256(s, _co2_avx2);
-	co2 = _mm256_and_si256(c2, _co2_avx2);
-	aux = _mm256_add_epi8(co1, co2);
-	auy1 = _mm256_add_epi8(aux, _cocw_avx2);
-	auy2 = _mm256_srli_epi32(auy1, 2);
-	auz1 = _mm256_add_epi8(aux, auy2);
-	auz2 = _mm256_and_si256(auz1, _co2_avx2);
-
-	/* Put together */
-	s = _mm256_andnot_si256(_co2_avx2, s);
-	s = _mm256_or_si256(s, auz2);
-
-	return s;
-}
-
-_static_inline int64_t
-coord_fast_co(cube_fast_t c)
-{
-	cube_fast_t co;
-	int64_t mem[4], ret, i, p;
-
-	co = _mm256_and_si256(c, _co2_avx2);
-	_mm256_storeu_si256((__m256i *)mem, co);
-
-	mem[0] >>= 5L;
-	for (i = 0, ret = 0, p = 1; i < 7; i++, mem[0] >>= 8L, p *= 3)
-		ret += (mem[0] & 3L) * p;
-
-	return ret;
-}
-
-_static_inline int64_t
-coord_fast_csep(cube_fast_t c)
-{
-	cube_fast_t cp, shifted;
-	int64_t mask;
-
-	cp = _mm256_and_si256(c, _cp_avx2);
-	shifted = _mm256_slli_epi32(cp, 5);
-	mask = _mm256_movemask_epi8(shifted);
-
-	return mask & 0x7F;
-}
-
-_static_inline int64_t
-coord_fast_cocsep(cube_fast_t c)
-{
-	return (coord_fast_co(c) << 7) + coord_fast_csep(c);
-}
-
-_static_inline int64_t
-coord_fast_eo(cube_fast_t c)
-{
-	cube_fast_t eo, shifted;
-	int64_t mask;
-
-	eo = _mm256_and_si256(c, _eo_avx2);
-	shifted = _mm256_slli_epi32(eo, 3);
-	mask = _mm256_movemask_epi8(shifted);
-
-	return mask >> 17;
-}
-
-_static_inline int64_t
-coord_fast_esep(cube_fast_t c)
-{
-	cube_fast_t ep;
-	int64_t e, mem[4], i, j, k, l, ret1, ret2, bit1, bit2, is1;
-
-	ep = _mm256_and_si256(c, _ep_avx2);
-	_mm256_storeu_si256((__m256i *)mem, ep);
-
-	mem[3] <<= 8L;
-	ret1 = ret2 = 0;
-	k = l = 4;
-	for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8L) {
-		e = mem[i/8 + 2];
-
-		bit1 = (e & _esepbit1) >> 2L;
-		bit2 = (e & _esepbit2) >> 3L;
-		is1 = (1 - bit2) * bit1;
-
-		ret1 += bit2 * binomial[11-i][k];
-		k -= bit2;
-
-		ret2 += is1 * binomial[7-j][l];
-		l -= is1;
-		j += (1-bit2);
-	}
-
-	return ret1 * 70 + ret2;
-}
-
-/******************************************************************************
-Section: ARM NEON fast methods
-
-This section contains performance-critical methods that rely on ARM NEON
-intructions such as routines for moving or transforming the cube.
-******************************************************************************/
-
+#include "cube_avx2.h"
 #elif defined(CUBE_NEON)
-
-typedef struct {
-	uint8x16_t corner;
-	uint8x16_t edge;
-} cube_fast_t;
-
-/* TODO! */
-
-/******************************************************************************
-Section: portable fast methods
-
-This section contains performance-critical methods that do not use
-advanced CPU instructions. They are used as an alternative to the ones
-in the previous section(s) for unsupported architectures.
-******************************************************************************/
-
+#include "cube_neon.h"
 #else
-
-typedef cube_t cube_fast_t;
-
-_static_inline cube_fast_t fastcube(
-    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
-    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
-    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
-    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
-);
-_static cube_fast_t cubetofast(cube_t);
-_static cube_t fasttocube(cube_fast_t);
-_static_inline bool equal_fast(cube_fast_t, cube_fast_t);
-_static_inline bool issolved_fast(cube_fast_t);
-_static_inline cube_fast_t invertco_fast(cube_fast_t);
-_static_inline cube_fast_t compose_fast(cube_fast_t, cube_fast_t);
-
-_static_inline int64_t coord_fast_co(cube_fast_t);
-_static_inline int64_t coord_fast_csep(cube_fast_t);
-_static_inline int64_t coord_fast_cocsep(cube_fast_t);
-_static_inline int64_t coord_fast_eo(cube_fast_t);
-_static_inline int64_t coord_fast_esep(cube_fast_t);
-
-_static_inline cube_fast_t
-fastcube(
-	uint8_t c_ufr,
-	uint8_t c_ubl,
-	uint8_t c_dfl,
-	uint8_t c_dbr,
-	uint8_t c_ufl,
-	uint8_t c_ubr,
-	uint8_t c_dfr,
-	uint8_t c_dbl,
-
-	uint8_t e_uf,
-	uint8_t e_ub,
-	uint8_t e_db,
-	uint8_t e_df,
-	uint8_t e_ur,
-	uint8_t e_ul,
-	uint8_t e_dl,
-	uint8_t e_dr,
-	uint8_t e_fr,
-	uint8_t e_fl,
-	uint8_t e_bl,
-	uint8_t e_br
-)
-{
-	cube_fast_t cube = {
-		.corner = {
-			c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl
-		},
-		.edge = {
-			e_uf, e_ub, e_db, e_df, e_ur, e_ul,
-			e_dl, e_dr, e_fr, e_fl, e_bl, e_br
-		}
-	};
-
-	return cube;
-}
-
-_static cube_fast_t
-cubetofast(cube_t cube)
-{
-	cube_fast_t fast;
-	memcpy(&fast, &cube, sizeof(cube_fast_t));
-	return fast;
-}
-
-_static cube_t
-fasttocube(cube_fast_t fast)
-{
-	cube_t cube;
-	memcpy(&cube, &fast, sizeof(cube_fast_t));
-	return cube;
-}
-
-_static_inline bool
-equal_fast(cube_fast_t c1, cube_fast_t c2)
-{
-	uint8_t i;
-	bool ret;
-
-	ret = true;
-	for (i = 0; i < 8; i++)
-		ret = ret && c1.corner[i] == c2.corner[i];
-	for (i = 0; i < 12; i++)
-		ret = ret && c1.edge[i] == c2.edge[i];
-
-	return ret;
-}
-
-_static_inline bool
-issolved_fast(cube_fast_t cube)
-{
-	return equal_fast(cube, solved_fast);
-}
-
-_static_inline cube_fast_t
-invertco_fast(cube_fast_t c)
-{
-	uint8_t i, piece, orien;
-	cube_fast_t ret;
-
-	ret = c;
-	for (i = 0; i < 8; i++) {
-		piece = c.corner[i];
-		orien = ((piece << 1) | (piece >> 1)) & _cobits2;
-		ret.corner[i] = (piece & _pbits) | orien;
-	}
-
-	return ret;
-}
-
-_static_inline cube_fast_t
-compose_fast(cube_fast_t c1, cube_fast_t c2)
-{
-	cube_fast_t ret;
-	uint8_t i, piece1, piece2, p, orien, aux, auy;
-
-	ret = zero_fast;
-
-	for (i = 0; i < 12; i++) {
-		piece2 = c2.edge[i];
-		p = piece2 & _pbits;
-		piece1 = c1.edge[p];
-		orien = (piece2 ^ piece1) & _eobit;
-		ret.edge[i] = (piece1 & _pbits) | orien;
-	}
-
-	for (i = 0; i < 8; i++) {
-		piece2 = c2.corner[i];
-		p = piece2 & _pbits;
-		piece1 = c1.corner[p];
-		aux = (piece2 & _cobits) + (piece1 & _cobits);
-		auy = (aux + _ctwist_cw) >> 2U;
-		orien = (aux + auy) & _cobits2;
-		ret.corner[i] = (piece1 & _pbits) | orien;
-	}
-
-	return ret;
-}
-
-_static_inline int64_t
-coord_fast_co(cube_fast_t c)
-{
-	int i, p;
-	int64_t ret;
-
-	for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 3)
-		ret += p * (c.corner[i] >> _coshift);
-
-	return ret;
-}
-
-/*
-For corner separation, we consider the axis (a.k.a. tetrad) each
-corner belongs to as 0 or 1 and we translate this sequence into binary.
-Ignoring the last bit, we have a value up to 2^7, but not all values are
-possible. Encoding this as a number from 0 to C(8,4) would save about 40%
-of space, but we are not going to use this coordinate in large tables.
-*/
-_static_inline int64_t
-coord_fast_csep(cube_fast_t c)
-{
-	int i, p;
-	int64_t ret;
-
-	for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2)
-		ret += p * ((c.corner[i] & _csepbit) >> 2U);
-
-	return ret;
-}
-
-_static_inline int64_t
-coord_fast_cocsep(cube_fast_t c)
-{
-	return (coord_fast_co(c) << 7) + coord_fast_csep(c);
-}
-
-_static_inline int64_t
-coord_fast_eo(cube_fast_t c)
-{
-	int i, p;
-	int64_t ret;
-
-	for (ret = 0, i = 1, p = 1; i < 12; i++, p *= 2)
-		ret += p * (c.edge[i] >> _eoshift);
-
-	return ret;
-}
-
-/*
-We encode the edge separation as a number from 0 to C(12,4)*C(8,4).
-It can be seen as the composition of two "subset index" coordinates.
-*/
-_static_inline int64_t
-coord_fast_esep(cube_fast_t c)
-{
-	int64_t i, j, k, l, ret1, ret2, bit1, bit2, is1;
-
-	for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++) {
-		/* Simple version:
-		if (c.edge[i] & _esepbit2) {
-			ret1 += binomial[11-i][k--];
-		} else {
-			if (c.edge[i] & _esepbit1)
-				ret2 += binomial[7-j][l--];
-			j++;
-		}
-		*/
-
-		bit1 = (c.edge[i] & _esepbit1) >> 2U;
-		bit2 = (c.edge[i] & _esepbit2) >> 3U;
-		is1 = (1 - bit2) * bit1;
-
-		ret1 += bit2 * binomial[11-i][k];
-		k -= bit2;
-
-		ret2 += is1 * binomial[7-j][l];
-		l -= is1;
-		j += (1-bit2);
-	}
-
-	return ret1 * 70 + ret2;
-}
-
+#include "cube_portable.h"
 #endif
 
-/******************************************************************************
-Section: generic methods
-
-This section contains generic functionality, including the public functions.
-Some of these routines depend on the efficient functions implemented in the
-previous sections, while some other operate directly on the cube.
-******************************************************************************/
-
-#define _move(M, c) compose_fast(c, _move_cube_ ## M)
-#define _premove(M, c) compose_fast(_move_cube_ ## M, c)
-#define _trans_rotation(T, c) \
-	compose_fast(compose_fast(_trans_cube_ ## T, c), \
-	_trans_cube_ ## T ## _inverse)
-#define _trans_mirrored(T, c) \
-	invertco_fast(compose_fast(compose_fast(_trans_cube_ ## T, c), \
-	_trans_cube_ ## T ## _inverse))
-
-_static int permsign(uint8_t *, int);
-_static uint8_t readco(const char *);
-_static uint8_t readcp(const char *);
-_static uint8_t readeo(const char *);
-_static uint8_t readep(const char *);
-_static cube_t readcube_H48(const char *);
-_static uint8_t readpiece_LST(const char **);
-_static cube_t readcube_LST(const char *);
-_static int writepiece_LST(uint8_t, char *);
-_static void writecube_H48(cube_t, char *);
-_static void writecube_LST(cube_t, char *);
-_static uint8_t readmove(char);
-_static uint8_t readmodifier(char);
-_static uint8_t readtrans(const char *);
-_static int writemoves(uint8_t *, int, char *);
-_static void writetrans(uint8_t, char *);
-_static cube_fast_t move(cube_fast_t, uint8_t);
-_static cube_fast_t transform(cube_fast_t, uint8_t);
-
-_static_inline int64_t coord_h48(cube_fast_t, uint32_t *, uint8_t);
-
-cube_t
-solvedcube(void)
-{
-	return solved;
-}
-
-bool
-isconsistent(cube_t cube)
-{
-	uint8_t i, p, e, piece;
-	bool found[12];
-
-	for (i = 0; i < 12; i++)
-		found[i] = false;
-	for (i = 0; i < 12; i++) {
-		piece = cube.edge[i];
-		p = piece & _pbits;
-		e = piece & _eobit;
-		if (p >= 12)
-			goto inconsistent_ep;
-		if (e != 0 && e != _eobit)
-			goto inconsistent_eo;
-		found[p] = true;
-	}
-	for (i = 0; i < 12; i++)
-		if (!found[i])
-			goto inconsistent_ep;
-
-	for (i = 0; i < 8; i++)
-		found[i] = false;
-	for (i = 0; i < 8; i++) {
-		piece = cube.corner[i];
-		p = piece & _pbits;
-		e = piece & _cobits;
-		if (p >= 8)
-			goto inconsistent_cp;
-		if (e != 0 && e != _ctwist_cw && e != _ctwist_ccw)
-			goto inconsistent_co;
-		found[p] = true;
-	}
-	for (i = 0; i < 8; i++)
-		if (!found[i])
-			goto inconsistent_co;
-
-	return true;
-
-inconsistent_ep:
-	DBG_LOG("Inconsistent EP\n");
-	return false;
-inconsistent_cp:
-	DBG_LOG("Inconsistent CP\n");
-	return false;
-inconsistent_eo:
-	DBG_LOG("Inconsistent EO\n");
-	return false;
-inconsistent_co:
-	DBG_LOG("Inconsistent CO\n");
-	return false;
-}
-
-bool
-issolvable(cube_t cube)
-{
-	uint8_t i, eo, co, piece, edges[12], corners[8];
-
-	DBG_ASSERT(isconsistent(cube), false,
-	    "issolvable: cube is inconsistent\n");
-
-	for (i = 0; i < 12; i++)
-		edges[i] = cube.edge[i] & _pbits;
-	for (i = 0; i < 8; i++)
-		corners[i] = cube.corner[i] & _pbits;
-
-	if (permsign(edges, 12) != permsign(corners, 8))
-		goto issolvable_parity;
-
-	eo = 0;
-	for (i = 0; i < 12; i++) {
-		piece = cube.edge[i];
-		eo += (piece & _eobit) >> _eoshift;
-	}
-	if (eo % 2 != 0)
-		goto issolvable_eo;
-
-	co = 0;
-	for (i = 0; i < 8; i++) {
-		piece = cube.corner[i];
-		co += (piece & _cobits) >> _coshift;
-	}
-	if (co % 3 != 0)
-		goto issolvable_co;
-
-	return true;
-
-issolvable_parity:
-	DBG_LOG("EP and CP parities are different\n");
-	return false;
-issolvable_eo:
-	DBG_LOG("Odd number of flipped edges\n");
-	return false;
-issolvable_co:
-	DBG_LOG("Sum of corner orientation is not multiple of 3\n");
-	return false;
-}
-
-bool
-issolved(cube_t cube)
-{
-	return equal(cube, solved);
-}
-
-bool
-equal(cube_t c1, cube_t c2)
-{
-	int i;
-	bool ret;
-
-	ret = true;
-	for (i = 0; i < 8; i++)
-		ret = ret && c1.corner[i] == c2.corner[i];
-	for (i = 0; i < 12; i++)
-		ret = ret && c1.edge[i] == c2.edge[i];
-
-	return ret;
-}
-
-bool
-iserror(cube_t cube)
-{
-	return equal(cube, zero);
-}
-
-cube_t
-compose(cube_t c1, cube_t c2)
-{
-	DBG_ASSERT(isconsistent(c1) && isconsistent(c2),
-	    zero, "compose error: inconsistent cube\n")
-
-	return fasttocube(compose_fast(cubetofast(c1), cubetofast(c2)));
-}
-
-cube_t
-inverse(cube_t cube)
-{
-	cube_t ret;
-	uint8_t i, piece, orien;
-
-	DBG_ASSERT(isconsistent(cube), zero,
-	    "inverse error: inconsistent cube\n");
-
-	ret = zero;
-
-	for (i = 0; i < 12; i++) {
-		piece = cube.edge[i];
-		orien = piece & _eobit;
-		ret.edge[piece & _pbits] = i | orien;
-	}
-
-	for (i = 0; i < 8; i++) {
-		piece = cube.corner[i];
-		orien = ((piece << 1) | (piece >> 1)) & _cobits2;
-		ret.corner[piece & _pbits] = i | orien;
-	}
-
-	return ret;
-}
-
-cube_t
-applymoves(cube_t cube, const char *buf)
-{
-	cube_fast_t fast;
-	uint8_t r, m;
-	const char *b;
-
-	DBG_ASSERT(isconsistent(cube), zero,
-	    "move error: inconsistent cube\n");
-
-	fast = cubetofast(cube);
-
-	for (b = buf; *b != '\0'; b++) {
-		while (*b == ' ' || *b == '\t' || *b == '\n')
-			b++;
-		if (*b == '\0')
-			goto applymoves_finish;
-		if ((r = readmove(*b)) == _error)
-			goto applymoves_error;
-		if ((m = readmodifier(*(b+1))) != 0)
-			b++;
-		fast = move(fast, r + m);
-	}
-
-applymoves_finish:
-	return fasttocube(fast);
-
-applymoves_error:
-	DBG_LOG("applymoves error\n");
-	return zero;
-}
-
-cube_t
-applytrans(cube_t cube, const char *buf)
-{
-	cube_fast_t fast;
-	uint8_t t;
-
-	DBG_ASSERT(isconsistent(cube), zero,
-	    "transformation error: inconsistent cube\n");
-
-	t = readtrans(buf);
-	fast = cubetofast(cube);
-	fast = transform(fast, t);
-
-	return fasttocube(fast);
-}
-
-cube_t
-readcube(const char *format, const char *buf)
-{
-	cube_t cube;
-
-	if (!strcmp(format, "H48")) {
-		cube = readcube_H48(buf);
-	} else if (!strcmp(format, "LST")) {
-		cube = readcube_LST(buf);
-	} else {
-		DBG_LOG("Cannot read cube in the given format\n");
-		cube = zero;
-	}
-
-	return cube;
-}
-
-void
-writecube(const char *format, cube_t cube, char *buf)
-{
-	char *errormsg;
-	size_t len;
-
-	if (!isconsistent(cube)) {
-		errormsg = "ERROR: cannot write inconsistent cube";
-		goto writecube_error;
-	}
-
-	if (!strcmp(format, "H48")) {
-		writecube_H48(cube, buf);
-	} else if (!strcmp(format, "LST")) {
-		writecube_LST(cube, buf);
-	} else {
-		errormsg = "ERROR: cannot write cube in the given format";
-		goto writecube_error;
-	}
-
-	return;
-
-writecube_error:
-	DBG_LOG("writecube error, see stdout for details\n");
-	len = strlen(errormsg);
-	memcpy(buf, errormsg, len);
-	buf[len] = '\n';
-	buf[len+1] = '\0';
-}
-
-_static int
-permsign(uint8_t *a, int n)
-{
-	int i, j;
-	uint8_t ret = 0;
-
-	for (i = 0; i < n; i++)
-		for (j = i+1; j < n; j++)
-			ret += a[i] > a[j] ? 1 : 0;
-
-	return ret % 2;
-}
-
-_static uint8_t
-readco(const char *str)
-{
-	if (*str == '0')
-		return 0;
-	if (*str == '1')
-		return _ctwist_cw;
-	if (*str == '2')
-		return _ctwist_ccw;
-
-	DBG_LOG("Error reading CO\n");
-	return _error;
-}
-
-_static uint8_t
-readcp(const char *str)
-{
-	uint8_t c;
-
-	for (c = 0; c < 8; c++)
-		if (!strncmp(str, cornerstr[c], 3) ||
-		    !strncmp(str, cornerstralt[c], 3))
-			return c;
-
-	DBG_LOG("Error reading CP\n");
-	return _error;
-}
-
-_static uint8_t
-readeo(const char *str)
-{
-	if (*str == '0')
-		return 0;
-	if (*str == '1')
-		return _eflip;
-
-	DBG_LOG("Error reading EO\n");
-	return _error;
-}
-
-_static uint8_t
-readep(const char *str)
-{
-	uint8_t e;
-
-	for (e = 0; e < 12; e++)
-		if (!strncmp(str, edgestr[e], 2))
-			return e;
-
-	DBG_LOG("Error reading EP\n");
-	return _error;
-}
-
-_static cube_t
-readcube_H48(const char *buf)
-{
-	int i;
-	uint8_t piece, orient;
-	cube_t ret = {0};
-	const char *b;
-	
-	b = buf;
-
-	for (i = 0; i < 12; i++) {
-		while (*b == ' ' || *b == '\t' || *b == '\n')
-			b++;
-		if ((piece = readep(b)) == _error)
-			return zero;
-		b += 2;
-		if ((orient = readeo(b)) == _error)
-			return zero;
-		b++;
-		ret.edge[i] = piece | orient;
-	}
-	for (i = 0; i < 8; i++) {
-		while (*b == ' ' || *b == '\t' || *b == '\n')
-			b++;
-		if ((piece = readcp(b)) == _error)
-			return zero;
-		b += 3;
-		if ((orient = readco(b)) == _error)
-			return zero;
-		b++;
-		ret.corner[i] = piece | orient;
-	}
-
-	return ret;
-}
-
-_static uint8_t
-readpiece_LST(const char **b)
-{
-	uint8_t ret;
-	bool read;
-
-	while (**b == ',' || **b == ' ' || **b == '\t' || **b == '\n')
-		(*b)++;
-
-	for (ret = 0, read = false; **b >= '0' && **b <= '9'; (*b)++) {
-		read = true;
-		ret = ret * 10 + (**b) - '0';
-	}
-
-	return read ? ret : _error;
-}
-
-_static cube_t
-readcube_LST(const char *buf)
-{
-	int i;
-	cube_t ret = {0};
-
-	for (i = 0; i < 8; i++)
-		ret.corner[i] = readpiece_LST(&buf);
-
-	for (i = 0; i < 12; i++)
-		ret.edge[i] = readpiece_LST(&buf);
-
-	return ret;
-}
-
-_static int
-writepiece_LST(uint8_t piece, char *buf)
-{
-	char digits[3];
-	int i, len = 0;
-
-	while (piece != 0) {
-		digits[len++] = (piece % 10) + '0';
-		piece /= 10;
-	}
-
-	if (len == 0)
-		digits[len++] = '0';
-
-	for (i = 0; i < len; i++)
-		buf[i] = digits[len-i-1];
-
-	buf[len] = ',';
-	buf[len+1] = ' ';
-
-	return len+2;
-}
-
-_static void
-writecube_H48(cube_t cube, char *buf)
-{
-	uint8_t piece, perm, orient;
-	int i;
-
-	for (i = 0; i < 12; i++) {
-		piece = cube.edge[i];
-		perm = piece & _pbits;
-		orient = (piece & _eobit) >> _eoshift;
-		buf[4*i    ] = edgestr[perm][0];
-		buf[4*i + 1] = edgestr[perm][1];
-		buf[4*i + 2] = orient + '0';
-		buf[4*i + 3] = ' ';
-	}
-	for (i = 0; i < 8; i++) {
-		piece = cube.corner[i];
-		perm = piece & _pbits;
-		orient = (piece & _cobits) >> _coshift;
-		buf[48 + 5*i    ] = cornerstr[perm][0];
-		buf[48 + 5*i + 1] = cornerstr[perm][1];
-		buf[48 + 5*i + 2] = cornerstr[perm][2];
-		buf[48 + 5*i + 3] = orient + '0';
-		buf[48 + 5*i + 4] = ' ';
-	}
-
-	buf[48+39] = '\0';
-}
-
-_static void
-writecube_LST(cube_t cube, char *buf)
-{
-	int i, ptr;
-	uint8_t piece;
-
-	ptr = 0;
-
-	for (i = 0; i < 8; i++) {
-		piece = cube.corner[i];
-		ptr += writepiece_LST(piece, buf + ptr);
-	}
-
-	for (i = 0; i < 12; i++) {
-		piece = cube.edge[i];
-		ptr += writepiece_LST(piece, buf + ptr);
-	}
-
-	*(buf+ptr-2) = 0;
-}
-
-_static uint8_t
-readmove(char c)
-{
-	switch (c) {
-	case 'U':
-		return _move_U;
-	case 'D':
-		return _move_D;
-	case 'R':
-		return _move_R;
-	case 'L':
-		return _move_L;
-	case 'F':
-		return _move_F;
-	case 'B':
-		return _move_B;
-	default:
-		return _error;
-	}
-}
-
-_static uint8_t
-readmodifier(char c)
-{
-	switch (c) {
-	case '1': /* Fallthrough */
-	case '2': /* Fallthrough */
-	case '3':
-		return c - '0' - 1;
-	case '\'':
-		return 2;
-	default:
-		return 0;
-	}
-}
-
-_static uint8_t
-readtrans(const char *buf)
-{
-	uint8_t t;
-
-	for (t = 0; t < 48; t++)
-		if (!strncmp(buf, transstr[t], 11))
-			return t;
-
-	DBG_LOG("readtrans error\n");
-	return _error;
-}
-
-_static int
-writemoves(uint8_t *m, int n, char *buf)
-{
-	int i;
-	size_t len;
-	const char *s;
-	char *b;
-
-	for (i = 0, b = buf; i < n; i++, b++) {
-		s = movestr[m[i]];
-		len = strlen(s);
-		memcpy(b, s, len);
-		b += len;	
-		*b = ' ';
-	}
-
-	if (b != buf)
-		b--; /* Remove last space */
-	*b = '\0';
-
-	return b - buf;
-}
-
-_static void
-writetrans(uint8_t t, char *buf)
-{
-	if (t >= 48)
-		memcpy(buf, "error trans", 11);
-	else
-		memcpy(buf, transstr[t], 11);
-	buf[11] = '\0';
-}
-
-_static cube_fast_t
-move(cube_fast_t c, uint8_t m)
-{
-	switch (m) {
-	case _move_U:
-		return _move(U, c);
-	case _move_U2:
-		return _move(U2, c);
-	case _move_U3:
-		return _move(U3, c);
-	case _move_D:
-		return _move(D, c);
-	case _move_D2:
-		return _move(D2, c);
-	case _move_D3:
-		return _move(D3, c);
-	case _move_R:
-		return _move(R, c);
-	case _move_R2:
-		return _move(R2, c);
-	case _move_R3:
-		return _move(R3, c);
-	case _move_L:
-		return _move(L, c);
-	case _move_L2:
-		return _move(L2, c);
-	case _move_L3:
-		return _move(L3, c);
-	case _move_F:
-		return _move(F, c);
-	case _move_F2:
-		return _move(F2, c);
-	case _move_F3:
-		return _move(F3, c);
-	case _move_B:
-		return _move(B, c);
-	case _move_B2:
-		return _move(B2, c);
-	case _move_B3:
-		return _move(B3, c);
-	default:
-		DBG_LOG("move error, unknown move\n");
-		return zero_fast;
-	}
-}
-
-_static cube_fast_t
-transform(cube_fast_t c, uint8_t t)
-{
-	switch (t) {
-	case _trans_UFr:
-		return _trans_rotation(UFr, c);
-	case _trans_ULr:
-		return _trans_rotation(ULr, c);
-	case _trans_UBr:
-		return _trans_rotation(UBr, c);
-	case _trans_URr:
-		return _trans_rotation(URr, c);
-	case _trans_DFr:
-		return _trans_rotation(DFr, c);
-	case _trans_DLr:
-		return _trans_rotation(DLr, c);
-	case _trans_DBr:
-		return _trans_rotation(DBr, c);
-	case _trans_DRr:
-		return _trans_rotation(DRr, c);
-	case _trans_RUr:
-		return _trans_rotation(RUr, c);
-	case _trans_RFr:
-		return _trans_rotation(RFr, c);
-	case _trans_RDr:
-		return _trans_rotation(RDr, c);
-	case _trans_RBr:
-		return _trans_rotation(RBr, c);
-	case _trans_LUr:
-		return _trans_rotation(LUr, c);
-	case _trans_LFr:
-		return _trans_rotation(LFr, c);
-	case _trans_LDr:
-		return _trans_rotation(LDr, c);
-	case _trans_LBr:
-		return _trans_rotation(LBr, c);
-	case _trans_FUr:
-		return _trans_rotation(FUr, c);
-	case _trans_FRr:
-		return _trans_rotation(FRr, c);
-	case _trans_FDr:
-		return _trans_rotation(FDr, c);
-	case _trans_FLr:
-		return _trans_rotation(FLr, c);
-	case _trans_BUr:
-		return _trans_rotation(BUr, c);
-	case _trans_BRr:
-		return _trans_rotation(BRr, c);
-	case _trans_BDr:
-		return _trans_rotation(BDr, c);
-	case _trans_BLr:
-		return _trans_rotation(BLr, c);
-	case _trans_UFm:
-		return _trans_mirrored(UFm, c);
-	case _trans_ULm:
-		return _trans_mirrored(ULm, c);
-	case _trans_UBm:
-		return _trans_mirrored(UBm, c);
-	case _trans_URm:
-		return _trans_mirrored(URm, c);
-	case _trans_DFm:
-		return _trans_mirrored(DFm, c);
-	case _trans_DLm:
-		return _trans_mirrored(DLm, c);
-	case _trans_DBm:
-		return _trans_mirrored(DBm, c);
-	case _trans_DRm:
-		return _trans_mirrored(DRm, c);
-	case _trans_RUm:
-		return _trans_mirrored(RUm, c);
-	case _trans_RFm:
-		return _trans_mirrored(RFm, c);
-	case _trans_RDm:
-		return _trans_mirrored(RDm, c);
-	case _trans_RBm:
-		return _trans_mirrored(RBm, c);
-	case _trans_LUm:
-		return _trans_mirrored(LUm, c);
-	case _trans_LFm:
-		return _trans_mirrored(LFm, c);
-	case _trans_LDm:
-		return _trans_mirrored(LDm, c);
-	case _trans_LBm:
-		return _trans_mirrored(LBm, c);
-	case _trans_FUm:
-		return _trans_mirrored(FUm, c);
-	case _trans_FRm:
-		return _trans_mirrored(FRm, c);
-	case _trans_FDm:
-		return _trans_mirrored(FDm, c);
-	case _trans_FLm:
-		return _trans_mirrored(FLm, c);
-	case _trans_BUm:
-		return _trans_mirrored(BUm, c);
-	case _trans_BRm:
-		return _trans_mirrored(BRm, c);
-	case _trans_BDm:
-		return _trans_mirrored(BDm, c);
-	case _trans_BLm:
-		return _trans_mirrored(BLm, c);
-	default:
-		DBG_LOG("transform error, unknown transformation\n");
-		return zero_fast;
-	}
-}
-
-/* h is the number of eo bits used */
-_static_inline int64_t
-coord_h48(cube_fast_t c, uint32_t *cocsepdata, uint8_t h)
-{
-	cube_fast_t d;
-	int64_t cocsep, coclass, esep, eo, esize, ret;
-	uint32_t data;
-	uint8_t ttrep;
-
-	DBG_ASSERT(h <= 11, -1, "coord_h48: h must be between 0 and 11\n");
-
-	cocsep = coord_fast_cocsep(c);
-	data = cocsepdata[cocsep];
-	coclass = (data & (0xFFFFU << 16U)) >> 16U;
-	ttrep = (data & (0xFFU << 8U)) >> 8U;
-
-	d = transform(c, ttrep); /* TODO: transform only edges */
-	esep = coord_fast_esep(d);
-	eo = coord_fast_eo(d);
-
-	esize = (_12c4 * _8c4) << h;
-	ret = (coclass * esize) + (esep << h) + (eo >> (11-h));
-
-	return ret;
-}
-
-/******************************************************************************
-Section: moves, move sequences and transformations
-
-This section contains methods to work with moves and arrays of moves. They
-do not rely on the cube structure.
-******************************************************************************/
-
-_static_inline bool allowednextmove(uint8_t *, uint8_t);
-_static_inline uint8_t inverse_trans(uint8_t);
-_static_inline uint8_t movebase(uint8_t);
-_static_inline uint8_t moveaxis(uint8_t);
-
-_static bool
-allowednextmove(uint8_t *moves, uint8_t n)
-{
-	uint8_t base[3], axis[3];
-
-	if (n < 2)
-		return true;
-
-	base[0] = movebase(moves[n-1]);
-	axis[0] = moveaxis(moves[n-1]);
-	base[1] = movebase(moves[n-2]);
-	axis[1] = moveaxis(moves[n-2]);
-
-	if (base[0] == base[1] || (axis[0] == axis[1] && base[0] < base[1]))
-		return false;
-
-	if (n == 2)
-		return true;
-
-	base[2] = movebase(moves[n-3]);
-	axis[2] = moveaxis(moves[n-3]);
-
-	return axis[1] != axis[2] || base[0] != base[2];
-}
-
-_static_inline uint8_t
-inverse_trans(uint8_t t)
-{
-	return inverse_trans_table[t];
-}
-
-_static_inline uint8_t
-movebase(uint8_t move)
-{
-	return move / 3;
-}
-
-_static_inline uint8_t
-moveaxis(uint8_t move)
-{
-	return move / 6;
-}
-
-/******************************************************************************
-Section: auxiliary procedures for H48 optimal solver (temporary)
-******************************************************************************/
-
-#define _esep_ind(i)      (i / 8U)
-#define _esep_shift(i)    (4U * (i % 8U))
-#define _esep_mask(i)     (((1U << 4U) - 1U) << _esep_shift(i))
-#define _visited_ind(i)   (i / 8U)
-#define _visited_mask(i)  (1U << (i % 8U))
-
-typedef struct {
-	cube_fast_t cube;
-	uint8_t *visited;
-	uint8_t *moves;
-	uint8_t nmoves;
-	uint8_t depth;
-	uint16_t *nclasses;
-	uint32_t *cocsepdata;
-	uint32_t *buf32;
-} dfsarg_gendata_t;
-
-_static size_t gendata_cocsep(void *);
-_static uint32_t gendata_cocsep_dfs( /* TODO: use dfsarg */
-    cube_fast_t, uint8_t, uint8_t, uint16_t *, uint32_t *, uint8_t *);
-
-_static size_t gendata_esep(const void *, void *);
-_static uint32_t gendata_esep_dfs(dfsarg_gendata_t *);
-
-_static_inline bool get_visited(const uint8_t *, int64_t);
-_static_inline void set_visited(uint8_t *, int64_t);
-_static_inline uint8_t get_esep_pval(const uint32_t *, int64_t);
-_static_inline void set_esep_pval(uint32_t *, int64_t, uint8_t);
-
-/*
-Each element of the cocsep table is a uint32_t used as follows:
-  - Lowest 8-bit block: pruning value
-  - Second-lower 8-bit block: "ttrep" (transformation to representative)
-  - Top 16-bit block: symcoord value
-After the data as described above, more auxiliary information is appended:
-  - A uint32_t representing the number of symmetry classes
-  - A uint32_t representing the highest value of the pruning table
-  - One uint32_t for each "line" of the pruning table, representing the number
-    of positions having that pruning value.
-*/
-_static size_t
-gendata_cocsep(void *buf)
-{
-	size_t tablesize = _3p7 << 7U;
-	size_t visitedsize = (tablesize + 7U) / 8U;
-	size_t infosize = 12;
-
-	cube_fast_t solved;
-	uint32_t *buf32, *info, cc;
-	uint16_t n;
-	uint8_t i, j, visited[visitedsize];
-
-	buf32 = (uint32_t *)buf;
-	info = buf32 + tablesize;
-	memset(buf32, 0xFFU, 4*tablesize);
-	memset(info, 0, 4*infosize);
-
-	solved = cubetofast(solvedcube());
-	for (i = 0, n = 0, cc = 0; i < 10; i++) {
-		memset(visited, 0, visitedsize);
-		DBG_LOG("cocsep: generating depth %" PRIu8 "\n", i);
-		cc = gendata_cocsep_dfs(solved, 0, i, &n, buf32, visited);
-		info[i+2] = cc;
-		DBG_LOG("found %" PRIu32 "\n", cc);
-	}
-
-	info[0] = (uint32_t)n;
-	info[1] = 9U; /* Known max pruning value */
-	DBG_ASSERT(n == COCSEP_CLASSES, 0,
-	    "cocsep: computed %" PRIu16 " symmetry classes, "
-	    "expected %" PRIu16 "\n", n, COCSEP_CLASSES);
-
-	DBG_LOG("cocsep data computed\n");
-	DBG_LOG("Symmetry classes: %" PRIu32 "\n", info[0]);
-	DBG_LOG("Maximum pruning value: %" PRIu32 "\n", info[1]);
-	DBG_LOG("Pruning value distribution:\n");
-	for (j = 0; j < 10; j++)
-		DBG_LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+2]);
-
-	return 4*(tablesize + infosize);
-}
-
-_static uint32_t
-gendata_cocsep_dfs(
-	cube_fast_t c,
-	uint8_t depth,
-	uint8_t maxdepth,
-	uint16_t *n,
-	uint32_t *buf32,
-	uint8_t *visited
-)
-{
-	uint8_t m, t, tinv, olddepth;
-	uint32_t cc;
-	int64_t i;
-	cube_fast_t d;
-
-	i = coord_fast_cocsep(c);
-	olddepth = (uint8_t)(buf32[i] & 0xFFU);
-	if (olddepth < depth || get_visited(visited, i))
-		return 0;
-	set_visited(visited, i);
-
-	if (depth == maxdepth) {
-		if ((buf32[i] & 0xFFU) != 0xFFU)
-			return 0;
-
-		for (t = 0, cc = 0; t < 48; t++) {
-			d = transform(c, t);
-			i = coord_fast_cocsep(d);
-			set_visited(visited, i);
-			tinv = inverse_trans(t);
-			cc += (buf32[i] & 0xFFU) == 0xFFU;
-			buf32[i] = (*n << 16U) | (tinv << 8U) | depth;
-		}
-		(*n)++;
-
-		return cc;
-	}
-
-	for (m = 0, cc = 0; m < 18; m++) {
-		d = move(c, m);
-		cc += gendata_cocsep_dfs(d, depth+1, maxdepth, n, buf32, visited);
-	}
-
-	return cc;
-}
-
-/*
-TODO description
-generating fixed table with h=0, k=4
-*/
-_static size_t
-gendata_esep(const void *cocsepdata, void *buf)
-{
-	size_t tablesize = (COCSEP_CLASSES * _12c4 * _8c4) / 2U;
-	size_t visitedsize = (tablesize * 2U + 7U) / 8U;
-	size_t infosize = 25; /* TODO unknown yet */
-
-	uint32_t *buf32, *info, cc;
-	uint8_t moves[20];
-	dfsarg_gendata_t arg;
-
-	arg.visited = malloc(visitedsize);
-	buf32 = (uint32_t *)buf;
-	info = buf32 + tablesize;
-	memset(buf32, 0xFFU, 4*tablesize);
-	memset(info, 0, 4*infosize);
-
-	arg.cube = cubetofast(solvedcube());
-	arg.moves = moves;
-	arg.nmoves = 0;
-	arg.cocsepdata = (uint32_t *)cocsepdata;
-	arg.buf32 = buf32;
-	/* TODO loop until no more is done, not until 12! (or hardcode limits)*/
-	for (arg.depth = 0, cc = 0; arg.depth < 12; arg.depth++) {
-		DBG_LOG("esep: generating depth %" PRIu8 "\n", arg.depth);
-		memset(arg.visited, 0, visitedsize);
-		cc = gendata_esep_dfs(&arg);
-		info[arg.depth+1] = cc;
-		DBG_LOG("found %" PRIu32 "\n", cc);
-	}
-	free(arg.visited);
-
-	return cc;
-}
-
-_static uint32_t
-gendata_esep_dfs(dfsarg_gendata_t *arg)
-{
-	uint8_t m, t, olddepth;
-	uint32_t cc;
-	uint64_t i;
-	cube_fast_t d;
-	dfsarg_gendata_t nextarg;
-
-	if (!allowednextmove(arg->moves, arg->nmoves))
-		return 0;
-
-	if (arg->nmoves > 0)
-		arg->cube = move(arg->cube, arg->moves[arg->nmoves-1]);
-
-	i = coord_h48(arg->cube, arg->cocsepdata, 0U);
-	olddepth = get_esep_pval(arg->buf32, i);
-
-	if (olddepth < arg->nmoves || get_visited(arg->visited, i))
-		return 0;
-	set_visited(arg->visited, i);
-
-	if (arg->nmoves == arg->depth) {
-		if (olddepth != 15U)
-			return 0;
-
-		for (t = 0, cc = 0; t < 48; t++) {
-			d = transform(arg->cube, t);
-			i = coord_h48(d, arg->cocsepdata, 0U);
-			set_visited(arg->visited, i);
-			cc += get_esep_pval(arg->buf32, i) == 15U;
-			set_esep_pval(arg->buf32, i, arg->depth);
-		}
-
-		return cc;
-	}
-
-
-	nextarg = *arg;
-	nextarg.nmoves = arg->nmoves + 1;
-	for (m = 0, cc = 0; m < 18; m++) {
-		nextarg.cube = arg->cube;
-		nextarg.moves[arg->nmoves] = m;
-		cc += gendata_esep_dfs(&nextarg);
-	}
-
-	return cc;
-}
-
-_static_inline bool get_visited(const uint8_t *a, int64_t i)
-{
-	return a[_visited_ind(i)] & _visited_mask(i);
-}
-
-_static_inline void set_visited(uint8_t *a, int64_t i)
-{
-	a[_visited_ind(i)] |= _visited_mask(i);
-}
-
-_static_inline uint8_t
-get_esep_pval(const uint32_t *buf32, int64_t i)
-{
-	return (buf32[_esep_ind(i)] & _esep_mask(i)) >> _esep_shift(i);
-}
-
-_static_inline void
-set_esep_pval(uint32_t *buf32, int64_t i, uint8_t val)
-{
-	buf32[_esep_ind(i)] =
-	    (buf32[_esep_ind(i)] & (~_esep_mask(i))) | (val << _esep_shift(i));
-}
-
-/******************************************************************************
-Section: solvers
-
-Here you can find the implementation of all the solving algorithms.
-******************************************************************************/
-
-typedef struct {
-	cube_fast_t cube;
-	uint8_t depth;
-	int64_t maxsols;
-	char **nextsol;
-	int64_t *nsols;
-	uint8_t nmoves;
-	uint8_t moves[20];
-	uint8_t (*estimate)(cube_fast_t);
-} dfsarg_generic_t;
-
-_static void solve_generic_appendsolution(dfsarg_generic_t *);
-_static int solve_generic_dfs(dfsarg_generic_t *);
-_static int64_t solve_generic(cube_t, const char *, int8_t, int8_t, int64_t,
-    int8_t, char *, uint8_t (*)(cube_fast_t));
-_static uint8_t estimate_simple(cube_fast_t);
-_static int64_t solve_simple(cube_t, int8_t, int8_t, int64_t, int8_t, char *);
-
-int64_t
-solve(
-	cube_t cube,
-	const char *solver,
-	const char *options,
-	const char *nisstype,
-	int8_t minmoves,
-	int8_t maxmoves,
-	int64_t maxsols,
-	int8_t optimal,
-	const void *data,
-	char *solutions
-)
-{
-	DBG_WARN(!strcmp(options, ""),
-	     "solve: 'options' not implemented yet, ignoring\n");
-
-	DBG_WARN(!strcmp(nisstype, ""),
-	     "solve: NISS not implemented yet, ignoring 'nisstype'\n");
-
-	DBG_WARN(data == NULL,
-	    "solve: 'data' not implemented yet, ignoring\n");
-
-	if (!strcmp(solver, "optimal") || !strcmp(solver, "simple")) {
-		return solve_simple(
-			cube,
-			minmoves,
-			maxmoves,
-			maxsols,
-			optimal,
-			solutions
-		);
-	} else {
-		DBG_LOG("solve: unknown solver '%s'\n", solver);
-		return -1;
-	}
-
-	DBG_LOG("solve: error\n");
-	return -1;
-}
-
-void
-multisolve(
-	int n,
-	cube_t *cube,
-	const char *solver,
-	const void *data,
-	char *sols
-)
-{
-	char *s;
-	int i;
-
-	s = sols;
-	for (i = 0; i < n; i++) {
-		solve(cube[i], solver, "", "normal", 0, -1, 1, 0, NULL, s);
-		while (s++);
-	}
-}
-
-int64_t
-gendata(const char *solver, void *data)
-{
-	DBG_LOG("gendata: not implemented yet\n");
-
-	return -1;
-}
-
-_static void
-solve_generic_appendsolution(dfsarg_generic_t *arg)
-{
-	int strl;
-
-	strl = writemoves(arg->moves, arg->depth, *arg->nextsol);
-	DBG_LOG("Solution found: %s\n", *arg->nextsol);
-	*arg->nextsol += strl;
-	**arg->nextsol = '\n';
-	(*arg->nextsol)++;
-	(*arg->nsols)++;
-}
-
-_static int
-solve_generic_dfs(dfsarg_generic_t *arg)
-{
-	dfsarg_generic_t nextarg;
-	uint8_t m, bound;
-	int64_t ret;
-
-	if (!allowednextmove(arg->moves, arg->nmoves))
-		return 0;
-
-	if (arg->nmoves > 0)
-		arg->cube = move(arg->cube, arg->moves[arg->nmoves-1]);
-
-	bound = arg->estimate(arg->cube);
-	if (*arg->nsols == arg->maxsols || bound + arg->nmoves > arg->depth)
-		return 0;
-
-	if (bound == 0) {
-		if (arg->nmoves != arg->depth)
-			return 0;
-		solve_generic_appendsolution(arg);
-		return 1;
-	}
-
-	/* memcpy(&nextarg, arg, sizeof(dfsarg_generic_t)); */
-	nextarg = *arg;
-	nextarg.nmoves = arg->nmoves + 1;
-	for (m = 0, ret = 0; m < 18; m++) {
-		nextarg.cube = arg->cube;
-		nextarg.moves[arg->nmoves] = m;
-		ret += solve_generic_dfs(&nextarg);
-	}
-
-	return ret;
-}
-
-_static int64_t
-solve_generic(
-	cube_t cube,
-	const char *nisstype,
-	/* TODO: handle NISS */
-	int8_t minmoves,
-	int8_t maxmoves,
-	int64_t maxsols,
-	int8_t optimal,
-	char *sols,
-	uint8_t (*estimate)(cube_fast_t)
-	/* TODO: add validator */
-	/* TODO: maybe add data for estimate */
-	/* TODO: add moveset (and allowednext?) */
-)
-{
-	dfsarg_generic_t arg;
-	int64_t ret, tmp, first;
-
-	if (!issolvable(cube)) {
-		DBG_LOG("solve: cube is not solvable\n");
-		return -1;
-	}
-
-	if (issolved(cube)) {
-		DBG_LOG("solve: cube is already solved\n");
-		sols[0] = '\n';
-		sols[1] = 0;
-		return 1;
-	}
-
-	DBG_WARN(!strcmp(nisstype, ""),
-	    "solve: NISS not implemented yet, 'nisstype' ignored\n");
-
-	if (minmoves < 0) {
-		DBG_LOG("solve: 'minmoves' is negative, setting to 0\n");
-		minmoves = 0;
-	}
-
-	if (maxmoves < 0) {
-		DBG_LOG("solve: invalid 'maxmoves', setting to 20\n");
-		maxmoves = 20;
-	}
-
-	if (maxsols < 0) {
-		DBG_LOG("solve: 'maxsols' is negative\n");
-		return -1;
-	}
-
-	if (maxsols == 0) {
-		DBG_LOG("solve: 'maxsols' is 0\n");
-		return 0;
-	}
-
-	if (sols == NULL) {
-		DBG_LOG("solve: return parameter 'sols' is NULL\n");
-		return -1;
-	}
-
-	if (estimate == NULL) {
-		DBG_LOG("solve: 'estimate' is NULL\n");
-		return -1;
-	}
-
-	arg = (dfsarg_generic_t) {
-		.cube = cubetofast(cube),
-		.maxsols = maxsols,
-		.nextsol = &sols,
-		.nsols = &ret,
-		.nmoves = 0,
-		.moves = {0},
-		.estimate = estimate,
-	};
-
-	ret = 0;
-	first = -1;
-	for (arg.depth = minmoves; arg.depth <= maxmoves; arg.depth++) {
-		tmp = solve_generic_dfs(&arg);
-		if (tmp != 0)
-			first = arg.depth;
-
-		DBG_LOG("Found %" PRId64 " solution%s at depth %" PRIu8 "\n",
-		        tmp, tmp == 1 ? "" : "s", arg.depth);
-
-		if (ret >= maxsols)
-			break;
-
-		if (optimal >= 0 && first >= 0 && arg.depth - first == optimal)
-			break;
-	}
-
-	DBG_ASSERT(ret <= maxsols, ret,
-	    "solve: found more than 'maxsols' solutions\n");
-
-	return ret;
-}
-
-_static uint8_t
-estimate_simple(cube_fast_t cube)
-{
-	return issolved_fast(cube) ? 0 : 1;
-}
-
-_static int64_t
-solve_simple(
-	cube_t cube,
-	int8_t minmoves,
-	int8_t maxmoves,
-	int64_t maxsols,
-	int8_t optimal,
-	char *solutions
-)
-{
-	return solve_generic(
-		cube,
-		"",
-		minmoves,
-		maxmoves,
-		maxsols,
-		optimal,
-		solutions,
-		&estimate_simple
-	);
-}
+#include "cube_routines.h"
+#include "moves.h"
+#include "solve_h48.h"
+#include "solve_generic.h"
diff --git a/src/cube_avx2.h b/src/cube_avx2.h
@@ -0,0 +1,229 @@
+typedef __m256i cube_fast_t;
+
+#define _co2_avx2 _mm256_set_epi64x(0, 0, 0, 0x6060606060606060)
+#define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, 0x2020202020202020)
+#define _cp_avx2 _mm256_set_epi64x(0, 0, 0, 0x0707070707070707)
+#define _ep_avx2 _mm256_set_epi64x(0x0F0F0F0F, 0x0F0F0F0F0F0F0F0F, 0, 0)
+#define _eo_avx2 _mm256_set_epi64x(0x10101010, 0x1010101010101010, 0, 0)
+
+_static_inline cube_fast_t fastcube(
+    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
+    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
+    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
+    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
+);
+_static cube_fast_t cubetofast(cube_t);
+_static cube_t fasttocube(cube_fast_t);
+_static_inline bool equal_fast(cube_fast_t, cube_fast_t);
+_static_inline bool issolved_fast(cube_fast_t);
+_static_inline cube_fast_t invertco_fast(cube_fast_t);
+_static_inline cube_fast_t compose_fast(cube_fast_t, cube_fast_t);
+
+_static_inline int64_t coord_fast_co(cube_fast_t);
+_static_inline int64_t coord_fast_csep(cube_fast_t);
+_static_inline int64_t coord_fast_cocsep(cube_fast_t);
+_static_inline int64_t coord_fast_eo(cube_fast_t);
+_static_inline int64_t coord_fast_esep(cube_fast_t);
+
+_static_inline cube_fast_t
+fastcube(
+	uint8_t c_ufr,
+	uint8_t c_ubl,
+	uint8_t c_dfl,
+	uint8_t c_dbr,
+	uint8_t c_ufl,
+	uint8_t c_ubr,
+	uint8_t c_dfr,
+	uint8_t c_dbl,
+
+	uint8_t e_uf,
+	uint8_t e_ub,
+	uint8_t e_db,
+	uint8_t e_df,
+	uint8_t e_ur,
+	uint8_t e_ul,
+	uint8_t e_dl,
+	uint8_t e_dr,
+	uint8_t e_fr,
+	uint8_t e_fl,
+	uint8_t e_bl,
+	uint8_t e_br
+)
+{
+	return _mm256_set_epi8(
+		0, 0, 0, 0, e_br, e_bl, e_fl, e_fr,
+		e_dr, e_dl, e_ul, e_ur, e_df, e_db, e_ub, e_uf,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		c_dbl, c_dfr, c_ubr, c_ufl, c_dbr, c_dfl, c_ubl, c_ufr
+	);
+}
+
+_static cube_fast_t
+cubetofast(cube_t a)
+{
+	uint8_t aux[32];
+
+	memset(aux, 0, 32);
+	memcpy(aux, &a.corner, 8);
+	memcpy(aux + 16, &a.edge, 12);
+
+	return _mm256_loadu_si256((__m256i_u *)&aux);
+}
+
+_static cube_t
+fasttocube(cube_fast_t c)
+{
+	cube_t a;
+	uint8_t aux[32];
+
+	_mm256_storeu_si256((__m256i_u *)aux, c);
+	memcpy(&a.corner, aux, 8);
+	memcpy(&a.edge, aux + 16, 12);
+
+	return a;
+}
+
+_static_inline bool
+equal_fast(cube_fast_t c1, cube_fast_t c2)
+{
+	int32_t mask;
+	__m256i cmp;
+
+	cmp = _mm256_cmpeq_epi8(c1, c2);
+	mask = _mm256_movemask_epi8(cmp);
+
+	return mask == ~0;
+}
+
+_static_inline bool
+issolved_fast(cube_fast_t cube)
+{
+	return equal_fast(cube, solved_fast);
+}
+
+_static_inline cube_fast_t
+invertco_fast(cube_fast_t c)
+{
+        cube_fast_t co, shleft, shright, summed, newco, cleanco, ret;
+
+        co = _mm256_and_si256(c, _co2_avx2);
+        shleft = _mm256_slli_epi32(co, 1);
+        shright = _mm256_srli_epi32(co, 1);
+        summed = _mm256_or_si256(shleft, shright);
+        newco = _mm256_and_si256(summed, _co2_avx2);
+        cleanco = _mm256_xor_si256(c, co);
+        ret = _mm256_or_si256(cleanco, newco);
+
+        return ret;
+}
+
+_static_inline cube_fast_t
+compose_fast(cube_fast_t c1, cube_fast_t c2)
+{
+	cube_fast_t s, b, eo2, co1, co2, aux, auy1, auy2, auz1, auz2;
+
+	/* Permute and clean unused bits */
+	s = _mm256_shuffle_epi8(c1, c2);
+	b = _mm256_set_epi8(
+		~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 0, 0, 0, 0, 0, 0, 0
+	);
+	s = _mm256_andnot_si256(b, s);
+
+	/* Change EO */
+	eo2 = _mm256_and_si256(c2, _eo_avx2);
+	s = _mm256_xor_si256(s, eo2);
+
+	/* Change CO */
+	co1 = _mm256_and_si256(s, _co2_avx2);
+	co2 = _mm256_and_si256(c2, _co2_avx2);
+	aux = _mm256_add_epi8(co1, co2);
+	auy1 = _mm256_add_epi8(aux, _cocw_avx2);
+	auy2 = _mm256_srli_epi32(auy1, 2);
+	auz1 = _mm256_add_epi8(aux, auy2);
+	auz2 = _mm256_and_si256(auz1, _co2_avx2);
+
+	/* Put together */
+	s = _mm256_andnot_si256(_co2_avx2, s);
+	s = _mm256_or_si256(s, auz2);
+
+	return s;
+}
+
+_static_inline int64_t
+coord_fast_co(cube_fast_t c)
+{
+	cube_fast_t co;
+	int64_t mem[4], ret, i, p;
+
+	co = _mm256_and_si256(c, _co2_avx2);
+	_mm256_storeu_si256((__m256i *)mem, co);
+
+	mem[0] >>= 5L;
+	for (i = 0, ret = 0, p = 1; i < 7; i++, mem[0] >>= 8L, p *= 3)
+		ret += (mem[0] & 3L) * p;
+
+	return ret;
+}
+
+_static_inline int64_t
+coord_fast_csep(cube_fast_t c)
+{
+	cube_fast_t cp, shifted;
+	int64_t mask;
+
+	cp = _mm256_and_si256(c, _cp_avx2);
+	shifted = _mm256_slli_epi32(cp, 5);
+	mask = _mm256_movemask_epi8(shifted);
+
+	return mask & 0x7F;
+}
+
+_static_inline int64_t
+coord_fast_cocsep(cube_fast_t c)
+{
+	return (coord_fast_co(c) << 7) + coord_fast_csep(c);
+}
+
+_static_inline int64_t
+coord_fast_eo(cube_fast_t c)
+{
+	cube_fast_t eo, shifted;
+	int64_t mask;
+
+	eo = _mm256_and_si256(c, _eo_avx2);
+	shifted = _mm256_slli_epi32(eo, 3);
+	mask = _mm256_movemask_epi8(shifted);
+
+	return mask >> 17;
+}
+
+_static_inline int64_t
+coord_fast_esep(cube_fast_t c)
+{
+	cube_fast_t ep;
+	int64_t e, mem[4], i, j, k, l, ret1, ret2, bit1, bit2, is1;
+
+	ep = _mm256_and_si256(c, _ep_avx2);
+	_mm256_storeu_si256((__m256i *)mem, ep);
+
+	mem[3] <<= 8L;
+	ret1 = ret2 = 0;
+	k = l = 4;
+	for (i = 0, j = 0; i < 12; i++, mem[i/8 + 2] >>= 8L) {
+		e = mem[i/8 + 2];
+
+		bit1 = (e & _esepbit1) >> 2L;
+		bit2 = (e & _esepbit2) >> 3L;
+		is1 = (1 - bit2) * bit1;
+
+		ret1 += bit2 * binomial[11-i][k];
+		k -= bit2;
+
+		ret2 += is1 * binomial[7-j][l];
+		l -= is1;
+		j += (1-bit2);
+	}
+
+	return ret1 * 70 + ret2;
+}
diff --git a/src/cube_neon.h b/src/cube_neon.h
@@ -0,0 +1,6 @@
+/* TODO! */
+
+typedef struct {
+	uint8x16_t corner;
+	uint8x16_t edge;
+} cube_fast_t;
diff --git a/src/cube_portable.h b/src/cube_portable.h
@@ -0,0 +1,224 @@
+typedef cube_t cube_fast_t;
+
+_static_inline cube_fast_t fastcube(
+    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
+    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
+    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
+    uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
+);
+_static cube_fast_t cubetofast(cube_t);
+_static cube_t fasttocube(cube_fast_t);
+_static_inline bool equal_fast(cube_fast_t, cube_fast_t);
+_static_inline bool issolved_fast(cube_fast_t);
+_static_inline cube_fast_t invertco_fast(cube_fast_t);
+_static_inline cube_fast_t compose_fast(cube_fast_t, cube_fast_t);
+
+_static_inline int64_t coord_fast_co(cube_fast_t);
+_static_inline int64_t coord_fast_csep(cube_fast_t);
+_static_inline int64_t coord_fast_cocsep(cube_fast_t);
+_static_inline int64_t coord_fast_eo(cube_fast_t);
+_static_inline int64_t coord_fast_esep(cube_fast_t);
+
+_static_inline cube_fast_t
+fastcube(
+	uint8_t c_ufr,
+	uint8_t c_ubl,
+	uint8_t c_dfl,
+	uint8_t c_dbr,
+	uint8_t c_ufl,
+	uint8_t c_ubr,
+	uint8_t c_dfr,
+	uint8_t c_dbl,
+
+	uint8_t e_uf,
+	uint8_t e_ub,
+	uint8_t e_db,
+	uint8_t e_df,
+	uint8_t e_ur,
+	uint8_t e_ul,
+	uint8_t e_dl,
+	uint8_t e_dr,
+	uint8_t e_fr,
+	uint8_t e_fl,
+	uint8_t e_bl,
+	uint8_t e_br
+)
+{
+	cube_fast_t cube = {
+		.corner = {
+			c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl
+		},
+		.edge = {
+			e_uf, e_ub, e_db, e_df, e_ur, e_ul,
+			e_dl, e_dr, e_fr, e_fl, e_bl, e_br
+		}
+	};
+
+	return cube;
+}
+
+_static cube_fast_t
+cubetofast(cube_t cube)
+{
+	cube_fast_t fast;
+	memcpy(&fast, &cube, sizeof(cube_fast_t));
+	return fast;
+}
+
+_static cube_t
+fasttocube(cube_fast_t fast)
+{
+	cube_t cube;
+	memcpy(&cube, &fast, sizeof(cube_fast_t));
+	return cube;
+}
+
+_static_inline bool
+equal_fast(cube_fast_t c1, cube_fast_t c2)
+{
+	uint8_t i;
+	bool ret;
+
+	ret = true;
+	for (i = 0; i < 8; i++)
+		ret = ret && c1.corner[i] == c2.corner[i];
+	for (i = 0; i < 12; i++)
+		ret = ret && c1.edge[i] == c2.edge[i];
+
+	return ret;
+}
+
+_static_inline bool
+issolved_fast(cube_fast_t cube)
+{
+	return equal_fast(cube, solved_fast);
+}
+
+_static_inline cube_fast_t
+invertco_fast(cube_fast_t c)
+{
+	uint8_t i, piece, orien;
+	cube_fast_t ret;
+
+	ret = c;
+	for (i = 0; i < 8; i++) {
+		piece = c.corner[i];
+		orien = ((piece << 1) | (piece >> 1)) & _cobits2;
+		ret.corner[i] = (piece & _pbits) | orien;
+	}
+
+	return ret;
+}
+
+_static_inline cube_fast_t
+compose_fast(cube_fast_t c1, cube_fast_t c2)
+{
+	cube_fast_t ret;
+	uint8_t i, piece1, piece2, p, orien, aux, auy;
+
+	ret = zero_fast;
+
+	for (i = 0; i < 12; i++) {
+		piece2 = c2.edge[i];
+		p = piece2 & _pbits;
+		piece1 = c1.edge[p];
+		orien = (piece2 ^ piece1) & _eobit;
+		ret.edge[i] = (piece1 & _pbits) | orien;
+	}
+
+	for (i = 0; i < 8; i++) {
+		piece2 = c2.corner[i];
+		p = piece2 & _pbits;
+		piece1 = c1.corner[p];
+		aux = (piece2 & _cobits) + (piece1 & _cobits);
+		auy = (aux + _ctwist_cw) >> 2U;
+		orien = (aux + auy) & _cobits2;
+		ret.corner[i] = (piece1 & _pbits) | orien;
+	}
+
+	return ret;
+}
+
+_static_inline int64_t
+coord_fast_co(cube_fast_t c)
+{
+	int i, p;
+	int64_t ret;
+
+	for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 3)
+		ret += p * (c.corner[i] >> _coshift);
+
+	return ret;
+}
+
+/*
+For corner separation, we consider the axis (a.k.a. tetrad) each
+corner belongs to as 0 or 1 and we translate this sequence into binary.
+Ignoring the last bit, we have a value up to 2^7, but not all values are
+possible. Encoding this as a number from 0 to C(8,4) would save about 40%
+of space, but we are not going to use this coordinate in large tables.
+*/
+_static_inline int64_t
+coord_fast_csep(cube_fast_t c)
+{
+	int i, p;
+	int64_t ret;
+
+	for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2)
+		ret += p * ((c.corner[i] & _csepbit) >> 2U);
+
+	return ret;
+}
+
+_static_inline int64_t
+coord_fast_cocsep(cube_fast_t c)
+{
+	return (coord_fast_co(c) << 7) + coord_fast_csep(c);
+}
+
+_static_inline int64_t
+coord_fast_eo(cube_fast_t c)
+{
+	int i, p;
+	int64_t ret;
+
+	for (ret = 0, i = 1, p = 1; i < 12; i++, p *= 2)
+		ret += p * (c.edge[i] >> _eoshift);
+
+	return ret;
+}
+
+/*
+We encode the edge separation as a number from 0 to C(12,4)*C(8,4).
+It can be seen as the composition of two "subset index" coordinates.
+*/
+_static_inline int64_t
+coord_fast_esep(cube_fast_t c)
+{
+	int64_t i, j, k, l, ret1, ret2, bit1, bit2, is1;
+
+	for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++) {
+		/* Simple version:
+		if (c.edge[i] & _esepbit2) {
+			ret1 += binomial[11-i][k--];
+		} else {
+			if (c.edge[i] & _esepbit1)
+				ret2 += binomial[7-j][l--];
+			j++;
+		}
+		*/
+
+		bit1 = (c.edge[i] & _esepbit1) >> 2U;
+		bit2 = (c.edge[i] & _esepbit2) >> 3U;
+		is1 = (1 - bit2) * bit1;
+
+		ret1 += bit2 * binomial[11-i][k];
+		k -= bit2;
+
+		ret2 += is1 * binomial[7-j][l];
+		l -= is1;
+		j += (1-bit2);
+	}
+
+	return ret1 * 70 + ret2;
+}
diff --git a/src/cube_routines.h b/src/cube_routines.h
@@ -0,0 +1,730 @@
+#define _move(M, c) compose_fast(c, _move_cube_ ## M)
+#define _premove(M, c) compose_fast(_move_cube_ ## M, c)
+#define _trans_rotation(T, c) \
+	compose_fast(compose_fast(_trans_cube_ ## T, c), \
+	_trans_cube_ ## T ## _inverse)
+#define _trans_mirrored(T, c) \
+	invertco_fast(compose_fast(compose_fast(_trans_cube_ ## T, c), \
+	_trans_cube_ ## T ## _inverse))
+
+_static int permsign(uint8_t *, int);
+_static uint8_t readco(const char *);
+_static uint8_t readcp(const char *);
+_static uint8_t readeo(const char *);
+_static uint8_t readep(const char *);
+_static cube_t readcube_H48(const char *);
+_static uint8_t readpiece_LST(const char **);
+_static cube_t readcube_LST(const char *);
+_static int writepiece_LST(uint8_t, char *);
+_static void writecube_H48(cube_t, char *);
+_static void writecube_LST(cube_t, char *);
+_static uint8_t readmove(char);
+_static uint8_t readmodifier(char);
+_static uint8_t readtrans(const char *);
+_static int writemoves(uint8_t *, int, char *);
+_static void writetrans(uint8_t, char *);
+_static cube_fast_t move(cube_fast_t, uint8_t);
+_static cube_fast_t transform(cube_fast_t, uint8_t);
+
+cube_t
+solvedcube(void)
+{
+	return solved;
+}
+
+bool
+isconsistent(cube_t cube)
+{
+	uint8_t i, p, e, piece;
+	bool found[12];
+
+	for (i = 0; i < 12; i++)
+		found[i] = false;
+	for (i = 0; i < 12; i++) {
+		piece = cube.edge[i];
+		p = piece & _pbits;
+		e = piece & _eobit;
+		if (p >= 12)
+			goto inconsistent_ep;
+		if (e != 0 && e != _eobit)
+			goto inconsistent_eo;
+		found[p] = true;
+	}
+	for (i = 0; i < 12; i++)
+		if (!found[i])
+			goto inconsistent_ep;
+
+	for (i = 0; i < 8; i++)
+		found[i] = false;
+	for (i = 0; i < 8; i++) {
+		piece = cube.corner[i];
+		p = piece & _pbits;
+		e = piece & _cobits;
+		if (p >= 8)
+			goto inconsistent_cp;
+		if (e != 0 && e != _ctwist_cw && e != _ctwist_ccw)
+			goto inconsistent_co;
+		found[p] = true;
+	}
+	for (i = 0; i < 8; i++)
+		if (!found[i])
+			goto inconsistent_co;
+
+	return true;
+
+inconsistent_ep:
+	DBG_LOG("Inconsistent EP\n");
+	return false;
+inconsistent_cp:
+	DBG_LOG("Inconsistent CP\n");
+	return false;
+inconsistent_eo:
+	DBG_LOG("Inconsistent EO\n");
+	return false;
+inconsistent_co:
+	DBG_LOG("Inconsistent CO\n");
+	return false;
+}
+
+bool
+issolvable(cube_t cube)
+{
+	uint8_t i, eo, co, piece, edges[12], corners[8];
+
+	DBG_ASSERT(isconsistent(cube), false,
+	    "issolvable: cube is inconsistent\n");
+
+	for (i = 0; i < 12; i++)
+		edges[i] = cube.edge[i] & _pbits;
+	for (i = 0; i < 8; i++)
+		corners[i] = cube.corner[i] & _pbits;
+
+	if (permsign(edges, 12) != permsign(corners, 8))
+		goto issolvable_parity;
+
+	eo = 0;
+	for (i = 0; i < 12; i++) {
+		piece = cube.edge[i];
+		eo += (piece & _eobit) >> _eoshift;
+	}
+	if (eo % 2 != 0)
+		goto issolvable_eo;
+
+	co = 0;
+	for (i = 0; i < 8; i++) {
+		piece = cube.corner[i];
+		co += (piece & _cobits) >> _coshift;
+	}
+	if (co % 3 != 0)
+		goto issolvable_co;
+
+	return true;
+
+issolvable_parity:
+	DBG_LOG("EP and CP parities are different\n");
+	return false;
+issolvable_eo:
+	DBG_LOG("Odd number of flipped edges\n");
+	return false;
+issolvable_co:
+	DBG_LOG("Sum of corner orientation is not multiple of 3\n");
+	return false;
+}
+
+bool
+issolved(cube_t cube)
+{
+	return equal(cube, solved);
+}
+
+bool
+equal(cube_t c1, cube_t c2)
+{
+	int i;
+	bool ret;
+
+	ret = true;
+	for (i = 0; i < 8; i++)
+		ret = ret && c1.corner[i] == c2.corner[i];
+	for (i = 0; i < 12; i++)
+		ret = ret && c1.edge[i] == c2.edge[i];
+
+	return ret;
+}
+
+bool
+iserror(cube_t cube)
+{
+	return equal(cube, zero);
+}
+
+cube_t
+compose(cube_t c1, cube_t c2)
+{
+	DBG_ASSERT(isconsistent(c1) && isconsistent(c2),
+	    zero, "compose error: inconsistent cube\n")
+
+	return fasttocube(compose_fast(cubetofast(c1), cubetofast(c2)));
+}
+
+cube_t
+inverse(cube_t cube)
+{
+	cube_t ret;
+	uint8_t i, piece, orien;
+
+	DBG_ASSERT(isconsistent(cube), zero,
+	    "inverse error: inconsistent cube\n");
+
+	ret = zero;
+
+	for (i = 0; i < 12; i++) {
+		piece = cube.edge[i];
+		orien = piece & _eobit;
+		ret.edge[piece & _pbits] = i | orien;
+	}
+
+	for (i = 0; i < 8; i++) {
+		piece = cube.corner[i];
+		orien = ((piece << 1) | (piece >> 1)) & _cobits2;
+		ret.corner[piece & _pbits] = i | orien;
+	}
+
+	return ret;
+}
+
+cube_t
+applymoves(cube_t cube, const char *buf)
+{
+	cube_fast_t fast;
+	uint8_t r, m;
+	const char *b;
+
+	DBG_ASSERT(isconsistent(cube), zero,
+	    "move error: inconsistent cube\n");
+
+	fast = cubetofast(cube);
+
+	for (b = buf; *b != '\0'; b++) {
+		while (*b == ' ' || *b == '\t' || *b == '\n')
+			b++;
+		if (*b == '\0')
+			goto applymoves_finish;
+		if ((r = readmove(*b)) == _error)
+			goto applymoves_error;
+		if ((m = readmodifier(*(b+1))) != 0)
+			b++;
+		fast = move(fast, r + m);
+	}
+
+applymoves_finish:
+	return fasttocube(fast);
+
+applymoves_error:
+	DBG_LOG("applymoves error\n");
+	return zero;
+}
+
+cube_t
+applytrans(cube_t cube, const char *buf)
+{
+	cube_fast_t fast;
+	uint8_t t;
+
+	DBG_ASSERT(isconsistent(cube), zero,
+	    "transformation error: inconsistent cube\n");
+
+	t = readtrans(buf);
+	fast = cubetofast(cube);
+	fast = transform(fast, t);
+
+	return fasttocube(fast);
+}
+
+cube_t
+readcube(const char *format, const char *buf)
+{
+	cube_t cube;
+
+	if (!strcmp(format, "H48")) {
+		cube = readcube_H48(buf);
+	} else if (!strcmp(format, "LST")) {
+		cube = readcube_LST(buf);
+	} else {
+		DBG_LOG("Cannot read cube in the given format\n");
+		cube = zero;
+	}
+
+	return cube;
+}
+
+void
+writecube(const char *format, cube_t cube, char *buf)
+{
+	char *errormsg;
+	size_t len;
+
+	if (!isconsistent(cube)) {
+		errormsg = "ERROR: cannot write inconsistent cube";
+		goto writecube_error;
+	}
+
+	if (!strcmp(format, "H48")) {
+		writecube_H48(cube, buf);
+	} else if (!strcmp(format, "LST")) {
+		writecube_LST(cube, buf);
+	} else {
+		errormsg = "ERROR: cannot write cube in the given format";
+		goto writecube_error;
+	}
+
+	return;
+
+writecube_error:
+	DBG_LOG("writecube error, see stdout for details\n");
+	len = strlen(errormsg);
+	memcpy(buf, errormsg, len);
+	buf[len] = '\n';
+	buf[len+1] = '\0';
+}
+
+_static int
+permsign(uint8_t *a, int n)
+{
+	int i, j;
+	uint8_t ret = 0;
+
+	for (i = 0; i < n; i++)
+		for (j = i+1; j < n; j++)
+			ret += a[i] > a[j] ? 1 : 0;
+
+	return ret % 2;
+}
+
+_static uint8_t
+readco(const char *str)
+{
+	if (*str == '0')
+		return 0;
+	if (*str == '1')
+		return _ctwist_cw;
+	if (*str == '2')
+		return _ctwist_ccw;
+
+	DBG_LOG("Error reading CO\n");
+	return _error;
+}
+
+_static uint8_t
+readcp(const char *str)
+{
+	uint8_t c;
+
+	for (c = 0; c < 8; c++)
+		if (!strncmp(str, cornerstr[c], 3) ||
+		    !strncmp(str, cornerstralt[c], 3))
+			return c;
+
+	DBG_LOG("Error reading CP\n");
+	return _error;
+}
+
+_static uint8_t
+readeo(const char *str)
+{
+	if (*str == '0')
+		return 0;
+	if (*str == '1')
+		return _eflip;
+
+	DBG_LOG("Error reading EO\n");
+	return _error;
+}
+
+_static uint8_t
+readep(const char *str)
+{
+	uint8_t e;
+
+	for (e = 0; e < 12; e++)
+		if (!strncmp(str, edgestr[e], 2))
+			return e;
+
+	DBG_LOG("Error reading EP\n");
+	return _error;
+}
+
+_static cube_t
+readcube_H48(const char *buf)
+{
+	int i;
+	uint8_t piece, orient;
+	cube_t ret = {0};
+	const char *b;
+	
+	b = buf;
+
+	for (i = 0; i < 12; i++) {
+		while (*b == ' ' || *b == '\t' || *b == '\n')
+			b++;
+		if ((piece = readep(b)) == _error)
+			return zero;
+		b += 2;
+		if ((orient = readeo(b)) == _error)
+			return zero;
+		b++;
+		ret.edge[i] = piece | orient;
+	}
+	for (i = 0; i < 8; i++) {
+		while (*b == ' ' || *b == '\t' || *b == '\n')
+			b++;
+		if ((piece = readcp(b)) == _error)
+			return zero;
+		b += 3;
+		if ((orient = readco(b)) == _error)
+			return zero;
+		b++;
+		ret.corner[i] = piece | orient;
+	}
+
+	return ret;
+}
+
+_static uint8_t
+readpiece_LST(const char **b)
+{
+	uint8_t ret;
+	bool read;
+
+	while (**b == ',' || **b == ' ' || **b == '\t' || **b == '\n')
+		(*b)++;
+
+	for (ret = 0, read = false; **b >= '0' && **b <= '9'; (*b)++) {
+		read = true;
+		ret = ret * 10 + (**b) - '0';
+	}
+
+	return read ? ret : _error;
+}
+
+_static cube_t
+readcube_LST(const char *buf)
+{
+	int i;
+	cube_t ret = {0};
+
+	for (i = 0; i < 8; i++)
+		ret.corner[i] = readpiece_LST(&buf);
+
+	for (i = 0; i < 12; i++)
+		ret.edge[i] = readpiece_LST(&buf);
+
+	return ret;
+}
+
+_static int
+writepiece_LST(uint8_t piece, char *buf)
+{
+	char digits[3];
+	int i, len = 0;
+
+	while (piece != 0) {
+		digits[len++] = (piece % 10) + '0';
+		piece /= 10;
+	}
+
+	if (len == 0)
+		digits[len++] = '0';
+
+	for (i = 0; i < len; i++)
+		buf[i] = digits[len-i-1];
+
+	buf[len] = ',';
+	buf[len+1] = ' ';
+
+	return len+2;
+}
+
+_static void
+writecube_H48(cube_t cube, char *buf)
+{
+	uint8_t piece, perm, orient;
+	int i;
+
+	for (i = 0; i < 12; i++) {
+		piece = cube.edge[i];
+		perm = piece & _pbits;
+		orient = (piece & _eobit) >> _eoshift;
+		buf[4*i    ] = edgestr[perm][0];
+		buf[4*i + 1] = edgestr[perm][1];
+		buf[4*i + 2] = orient + '0';
+		buf[4*i + 3] = ' ';
+	}
+	for (i = 0; i < 8; i++) {
+		piece = cube.corner[i];
+		perm = piece & _pbits;
+		orient = (piece & _cobits) >> _coshift;
+		buf[48 + 5*i    ] = cornerstr[perm][0];
+		buf[48 + 5*i + 1] = cornerstr[perm][1];
+		buf[48 + 5*i + 2] = cornerstr[perm][2];
+		buf[48 + 5*i + 3] = orient + '0';
+		buf[48 + 5*i + 4] = ' ';
+	}
+
+	buf[48+39] = '\0';
+}
+
+_static void
+writecube_LST(cube_t cube, char *buf)
+{
+	int i, ptr;
+	uint8_t piece;
+
+	ptr = 0;
+
+	for (i = 0; i < 8; i++) {
+		piece = cube.corner[i];
+		ptr += writepiece_LST(piece, buf + ptr);
+	}
+
+	for (i = 0; i < 12; i++) {
+		piece = cube.edge[i];
+		ptr += writepiece_LST(piece, buf + ptr);
+	}
+
+	*(buf+ptr-2) = 0;
+}
+
+_static uint8_t
+readmove(char c)
+{
+	switch (c) {
+	case 'U':
+		return _move_U;
+	case 'D':
+		return _move_D;
+	case 'R':
+		return _move_R;
+	case 'L':
+		return _move_L;
+	case 'F':
+		return _move_F;
+	case 'B':
+		return _move_B;
+	default:
+		return _error;
+	}
+}
+
+_static uint8_t
+readmodifier(char c)
+{
+	switch (c) {
+	case '1': /* Fallthrough */
+	case '2': /* Fallthrough */
+	case '3':
+		return c - '0' - 1;
+	case '\'':
+		return 2;
+	default:
+		return 0;
+	}
+}
+
+_static uint8_t
+readtrans(const char *buf)
+{
+	uint8_t t;
+
+	for (t = 0; t < 48; t++)
+		if (!strncmp(buf, transstr[t], 11))
+			return t;
+
+	DBG_LOG("readtrans error\n");
+	return _error;
+}
+
+_static int
+writemoves(uint8_t *m, int n, char *buf)
+{
+	int i;
+	size_t len;
+	const char *s;
+	char *b;
+
+	for (i = 0, b = buf; i < n; i++, b++) {
+		s = movestr[m[i]];
+		len = strlen(s);
+		memcpy(b, s, len);
+		b += len;	
+		*b = ' ';
+	}
+
+	if (b != buf)
+		b--; /* Remove last space */
+	*b = '\0';
+
+	return b - buf;
+}
+
+_static void
+writetrans(uint8_t t, char *buf)
+{
+	if (t >= 48)
+		memcpy(buf, "error trans", 11);
+	else
+		memcpy(buf, transstr[t], 11);
+	buf[11] = '\0';
+}
+
+_static cube_fast_t
+move(cube_fast_t c, uint8_t m)
+{
+	switch (m) {
+	case _move_U:
+		return _move(U, c);
+	case _move_U2:
+		return _move(U2, c);
+	case _move_U3:
+		return _move(U3, c);
+	case _move_D:
+		return _move(D, c);
+	case _move_D2:
+		return _move(D2, c);
+	case _move_D3:
+		return _move(D3, c);
+	case _move_R:
+		return _move(R, c);
+	case _move_R2:
+		return _move(R2, c);
+	case _move_R3:
+		return _move(R3, c);
+	case _move_L:
+		return _move(L, c);
+	case _move_L2:
+		return _move(L2, c);
+	case _move_L3:
+		return _move(L3, c);
+	case _move_F:
+		return _move(F, c);
+	case _move_F2:
+		return _move(F2, c);
+	case _move_F3:
+		return _move(F3, c);
+	case _move_B:
+		return _move(B, c);
+	case _move_B2:
+		return _move(B2, c);
+	case _move_B3:
+		return _move(B3, c);
+	default:
+		DBG_LOG("move error, unknown move\n");
+		return zero_fast;
+	}
+}
+
+_static cube_fast_t
+transform(cube_fast_t c, uint8_t t)
+{
+	switch (t) {
+	case _trans_UFr:
+		return _trans_rotation(UFr, c);
+	case _trans_ULr:
+		return _trans_rotation(ULr, c);
+	case _trans_UBr:
+		return _trans_rotation(UBr, c);
+	case _trans_URr:
+		return _trans_rotation(URr, c);
+	case _trans_DFr:
+		return _trans_rotation(DFr, c);
+	case _trans_DLr:
+		return _trans_rotation(DLr, c);
+	case _trans_DBr:
+		return _trans_rotation(DBr, c);
+	case _trans_DRr:
+		return _trans_rotation(DRr, c);
+	case _trans_RUr:
+		return _trans_rotation(RUr, c);
+	case _trans_RFr:
+		return _trans_rotation(RFr, c);
+	case _trans_RDr:
+		return _trans_rotation(RDr, c);
+	case _trans_RBr:
+		return _trans_rotation(RBr, c);
+	case _trans_LUr:
+		return _trans_rotation(LUr, c);
+	case _trans_LFr:
+		return _trans_rotation(LFr, c);
+	case _trans_LDr:
+		return _trans_rotation(LDr, c);
+	case _trans_LBr:
+		return _trans_rotation(LBr, c);
+	case _trans_FUr:
+		return _trans_rotation(FUr, c);
+	case _trans_FRr:
+		return _trans_rotation(FRr, c);
+	case _trans_FDr:
+		return _trans_rotation(FDr, c);
+	case _trans_FLr:
+		return _trans_rotation(FLr, c);
+	case _trans_BUr:
+		return _trans_rotation(BUr, c);
+	case _trans_BRr:
+		return _trans_rotation(BRr, c);
+	case _trans_BDr:
+		return _trans_rotation(BDr, c);
+	case _trans_BLr:
+		return _trans_rotation(BLr, c);
+	case _trans_UFm:
+		return _trans_mirrored(UFm, c);
+	case _trans_ULm:
+		return _trans_mirrored(ULm, c);
+	case _trans_UBm:
+		return _trans_mirrored(UBm, c);
+	case _trans_URm:
+		return _trans_mirrored(URm, c);
+	case _trans_DFm:
+		return _trans_mirrored(DFm, c);
+	case _trans_DLm:
+		return _trans_mirrored(DLm, c);
+	case _trans_DBm:
+		return _trans_mirrored(DBm, c);
+	case _trans_DRm:
+		return _trans_mirrored(DRm, c);
+	case _trans_RUm:
+		return _trans_mirrored(RUm, c);
+	case _trans_RFm:
+		return _trans_mirrored(RFm, c);
+	case _trans_RDm:
+		return _trans_mirrored(RDm, c);
+	case _trans_RBm:
+		return _trans_mirrored(RBm, c);
+	case _trans_LUm:
+		return _trans_mirrored(LUm, c);
+	case _trans_LFm:
+		return _trans_mirrored(LFm, c);
+	case _trans_LDm:
+		return _trans_mirrored(LDm, c);
+	case _trans_LBm:
+		return _trans_mirrored(LBm, c);
+	case _trans_FUm:
+		return _trans_mirrored(FUm, c);
+	case _trans_FRm:
+		return _trans_mirrored(FRm, c);
+	case _trans_FDm:
+		return _trans_mirrored(FDm, c);
+	case _trans_FLm:
+		return _trans_mirrored(FLm, c);
+	case _trans_BUm:
+		return _trans_mirrored(BUm, c);
+	case _trans_BRm:
+		return _trans_mirrored(BRm, c);
+	case _trans_BDm:
+		return _trans_mirrored(BDm, c);
+	case _trans_BLm:
+		return _trans_mirrored(BLm, c);
+	default:
+		DBG_LOG("transform error, unknown transformation\n");
+		return zero_fast;
+	}
+}
diff --git a/src/moves.h b/src/moves.h
@@ -0,0 +1,47 @@
+_static_inline bool allowednextmove(uint8_t *, uint8_t);
+_static_inline uint8_t inverse_trans(uint8_t);
+_static_inline uint8_t movebase(uint8_t);
+_static_inline uint8_t moveaxis(uint8_t);
+
+_static bool
+allowednextmove(uint8_t *moves, uint8_t n)
+{
+	uint8_t base[3], axis[3];
+
+	if (n < 2)
+		return true;
+
+	base[0] = movebase(moves[n-1]);
+	axis[0] = moveaxis(moves[n-1]);
+	base[1] = movebase(moves[n-2]);
+	axis[1] = moveaxis(moves[n-2]);
+
+	if (base[0] == base[1] || (axis[0] == axis[1] && base[0] < base[1]))
+		return false;
+
+	if (n == 2)
+		return true;
+
+	base[2] = movebase(moves[n-3]);
+	axis[2] = moveaxis(moves[n-3]);
+
+	return axis[1] != axis[2] || base[0] != base[2];
+}
+
+_static_inline uint8_t
+inverse_trans(uint8_t t)
+{
+	return inverse_trans_table[t];
+}
+
+_static_inline uint8_t
+movebase(uint8_t move)
+{
+	return move / 3;
+}
+
+_static_inline uint8_t
+moveaxis(uint8_t move)
+{
+	return move / 6;
+}
diff --git a/src/solve_generic.h b/src/solve_generic.h
@@ -0,0 +1,259 @@
+typedef struct {
+	cube_fast_t cube;
+	uint8_t depth;
+	int64_t maxsols;
+	char **nextsol;
+	int64_t *nsols;
+	uint8_t nmoves;
+	uint8_t moves[20];
+	uint8_t (*estimate)(cube_fast_t);
+} dfsarg_generic_t;
+
+_static void solve_generic_appendsolution(dfsarg_generic_t *);
+_static int solve_generic_dfs(dfsarg_generic_t *);
+_static int64_t solve_generic(cube_t, const char *, int8_t, int8_t, int64_t,
+    int8_t, char *, uint8_t (*)(cube_fast_t));
+_static uint8_t estimate_simple(cube_fast_t);
+_static int64_t solve_simple(cube_t, int8_t, int8_t, int64_t, int8_t, char *);
+
+int64_t
+solve(
+	cube_t cube,
+	const char *solver,
+	const char *options,
+	const char *nisstype,
+	int8_t minmoves,
+	int8_t maxmoves,
+	int64_t maxsols,
+	int8_t optimal,
+	const void *data,
+	char *solutions
+)
+{
+	DBG_WARN(!strcmp(options, ""),
+	     "solve: 'options' not implemented yet, ignoring\n");
+
+	DBG_WARN(!strcmp(nisstype, ""),
+	     "solve: NISS not implemented yet, ignoring 'nisstype'\n");
+
+	DBG_WARN(data == NULL,
+	    "solve: 'data' not implemented yet, ignoring\n");
+
+	if (!strcmp(solver, "optimal") || !strcmp(solver, "simple")) {
+		return solve_simple(
+			cube,
+			minmoves,
+			maxmoves,
+			maxsols,
+			optimal,
+			solutions
+		);
+	} else {
+		DBG_LOG("solve: unknown solver '%s'\n", solver);
+		return -1;
+	}
+
+	DBG_LOG("solve: error\n");
+	return -1;
+}
+
+void
+multisolve(
+	int n,
+	cube_t *cube,
+	const char *solver,
+	const void *data,
+	char *sols
+)
+{
+	char *s;
+	int i;
+
+	s = sols;
+	for (i = 0; i < n; i++) {
+		solve(cube[i], solver, "", "normal", 0, -1, 1, 0, NULL, s);
+		while (s++);
+	}
+}
+
+int64_t
+gendata(const char *solver, void *data)
+{
+	DBG_LOG("gendata: not implemented yet\n");
+
+	return -1;
+}
+
+_static void
+solve_generic_appendsolution(dfsarg_generic_t *arg)
+{
+	int strl;
+
+	strl = writemoves(arg->moves, arg->depth, *arg->nextsol);
+	DBG_LOG("Solution found: %s\n", *arg->nextsol);
+	*arg->nextsol += strl;
+	**arg->nextsol = '\n';
+	(*arg->nextsol)++;
+	(*arg->nsols)++;
+}
+
+_static int
+solve_generic_dfs(dfsarg_generic_t *arg)
+{
+	dfsarg_generic_t nextarg;
+	uint8_t m, bound;
+	int64_t ret;
+
+	if (!allowednextmove(arg->moves, arg->nmoves))
+		return 0;
+
+	if (arg->nmoves > 0)
+		arg->cube = move(arg->cube, arg->moves[arg->nmoves-1]);
+
+	bound = arg->estimate(arg->cube);
+	if (*arg->nsols == arg->maxsols || bound + arg->nmoves > arg->depth)
+		return 0;
+
+	if (bound == 0) {
+		if (arg->nmoves != arg->depth)
+			return 0;
+		solve_generic_appendsolution(arg);
+		return 1;
+	}
+
+	/* memcpy(&nextarg, arg, sizeof(dfsarg_generic_t)); */
+	nextarg = *arg;
+	nextarg.nmoves = arg->nmoves + 1;
+	for (m = 0, ret = 0; m < 18; m++) {
+		nextarg.cube = arg->cube;
+		nextarg.moves[arg->nmoves] = m;
+		ret += solve_generic_dfs(&nextarg);
+	}
+
+	return ret;
+}
+
+_static int64_t
+solve_generic(
+	cube_t cube,
+	const char *nisstype,
+	/* TODO: handle NISS */
+	int8_t minmoves,
+	int8_t maxmoves,
+	int64_t maxsols,
+	int8_t optimal,
+	char *sols,
+	uint8_t (*estimate)(cube_fast_t)
+	/* TODO: add validator */
+	/* TODO: maybe add data for estimate */
+	/* TODO: add moveset (and allowednext?) */
+)
+{
+	dfsarg_generic_t arg;
+	int64_t ret, tmp, first;
+
+	if (!issolvable(cube)) {
+		DBG_LOG("solve: cube is not solvable\n");
+		return -1;
+	}
+
+	if (issolved(cube)) {
+		DBG_LOG("solve: cube is already solved\n");
+		sols[0] = '\n';
+		sols[1] = 0;
+		return 1;
+	}
+
+	DBG_WARN(!strcmp(nisstype, ""),
+	    "solve: NISS not implemented yet, 'nisstype' ignored\n");
+
+	if (minmoves < 0) {
+		DBG_LOG("solve: 'minmoves' is negative, setting to 0\n");
+		minmoves = 0;
+	}
+
+	if (maxmoves < 0) {
+		DBG_LOG("solve: invalid 'maxmoves', setting to 20\n");
+		maxmoves = 20;
+	}
+
+	if (maxsols < 0) {
+		DBG_LOG("solve: 'maxsols' is negative\n");
+		return -1;
+	}
+
+	if (maxsols == 0) {
+		DBG_LOG("solve: 'maxsols' is 0\n");
+		return 0;
+	}
+
+	if (sols == NULL) {
+		DBG_LOG("solve: return parameter 'sols' is NULL\n");
+		return -1;
+	}
+
+	if (estimate == NULL) {
+		DBG_LOG("solve: 'estimate' is NULL\n");
+		return -1;
+	}
+
+	arg = (dfsarg_generic_t) {
+		.cube = cubetofast(cube),
+		.maxsols = maxsols,
+		.nextsol = &sols,
+		.nsols = &ret,
+		.nmoves = 0,
+		.moves = {0},
+		.estimate = estimate,
+	};
+
+	ret = 0;
+	first = -1;
+	for (arg.depth = minmoves; arg.depth <= maxmoves; arg.depth++) {
+		tmp = solve_generic_dfs(&arg);
+		if (tmp != 0)
+			first = arg.depth;
+
+		DBG_LOG("Found %" PRId64 " solution%s at depth %" PRIu8 "\n",
+		        tmp, tmp == 1 ? "" : "s", arg.depth);
+
+		if (ret >= maxsols)
+			break;
+
+		if (optimal >= 0 && first >= 0 && arg.depth - first == optimal)
+			break;
+	}
+
+	DBG_ASSERT(ret <= maxsols, ret,
+	    "solve: found more than 'maxsols' solutions\n");
+
+	return ret;
+}
+
+_static uint8_t
+estimate_simple(cube_fast_t cube)
+{
+	return issolved_fast(cube) ? 0 : 1;
+}
+
+_static int64_t
+solve_simple(
+	cube_t cube,
+	int8_t minmoves,
+	int8_t maxmoves,
+	int64_t maxsols,
+	int8_t optimal,
+	char *solutions
+)
+{
+	return solve_generic(
+		cube,
+		"",
+		minmoves,
+		maxmoves,
+		maxsols,
+		optimal,
+		solutions,
+		&estimate_simple
+	);
+}
diff --git a/src/solve_h48.h b/src/solve_h48.h
@@ -0,0 +1,266 @@
+#define _esep_ind(i)      (i / 8U)
+#define _esep_shift(i)    (4U * (i % 8U))
+#define _esep_mask(i)     (((1U << 4U) - 1U) << _esep_shift(i))
+#define _visited_ind(i)   (i / 8U)
+#define _visited_mask(i)  (1U << (i % 8U))
+
+typedef struct {
+	cube_fast_t cube;
+	uint8_t *visited;
+	uint8_t *moves;
+	uint8_t nmoves;
+	uint8_t depth;
+	uint16_t *nclasses;
+	uint32_t *cocsepdata;
+	uint32_t *buf32;
+} dfsarg_gendata_t;
+
+_static_inline int64_t coord_h48(cube_fast_t, uint32_t *, uint8_t);
+
+_static size_t gendata_cocsep(void *);
+_static uint32_t gendata_cocsep_dfs( /* TODO: use dfsarg */
+    cube_fast_t, uint8_t, uint8_t, uint16_t *, uint32_t *, uint8_t *);
+
+_static size_t gendata_esep(const void *, void *);
+_static uint32_t gendata_esep_dfs(dfsarg_gendata_t *);
+
+_static_inline bool get_visited(const uint8_t *, int64_t);
+_static_inline void set_visited(uint8_t *, int64_t);
+_static_inline uint8_t get_esep_pval(const uint32_t *, int64_t);
+_static_inline void set_esep_pval(uint32_t *, int64_t, uint8_t);
+
+/* h is the number of eo bits used */
+_static_inline int64_t
+coord_h48(cube_fast_t c, uint32_t *cocsepdata, uint8_t h)
+{
+	cube_fast_t d;
+	int64_t cocsep, coclass, esep, eo, esize, ret;
+	uint32_t data;
+	uint8_t ttrep;
+
+	DBG_ASSERT(h <= 11, -1, "coord_h48: h must be between 0 and 11\n");
+
+	cocsep = coord_fast_cocsep(c);
+	data = cocsepdata[cocsep];
+	coclass = (data & (0xFFFFU << 16U)) >> 16U;
+	ttrep = (data & (0xFFU << 8U)) >> 8U;
+
+	d = transform(c, ttrep); /* TODO: transform only edges */
+	esep = coord_fast_esep(d);
+	eo = coord_fast_eo(d);
+
+	esize = (_12c4 * _8c4) << h;
+	ret = (coclass * esize) + (esep << h) + (eo >> (11-h));
+
+	return ret;
+}
+
+/*
+Each element of the cocsep table is a uint32_t used as follows:
+  - Lowest 8-bit block: pruning value
+  - Second-lower 8-bit block: "ttrep" (transformation to representative)
+  - Top 16-bit block: symcoord value
+After the data as described above, more auxiliary information is appended:
+  - A uint32_t representing the number of symmetry classes
+  - A uint32_t representing the highest value of the pruning table
+  - One uint32_t for each "line" of the pruning table, representing the number
+    of positions having that pruning value.
+*/
+_static size_t
+gendata_cocsep(void *buf)
+{
+	size_t tablesize = _3p7 << 7U;
+	size_t visitedsize = (tablesize + 7U) / 8U;
+	size_t infosize = 12;
+
+	cube_fast_t solved;
+	uint32_t *buf32, *info, cc;
+	uint16_t n;
+	uint8_t i, j, visited[visitedsize];
+
+	buf32 = (uint32_t *)buf;
+	info = buf32 + tablesize;
+	memset(buf32, 0xFFU, 4*tablesize);
+	memset(info, 0, 4*infosize);
+
+	solved = cubetofast(solvedcube());
+	for (i = 0, n = 0, cc = 0; i < 10; i++) {
+		memset(visited, 0, visitedsize);
+		DBG_LOG("cocsep: generating depth %" PRIu8 "\n", i);
+		cc = gendata_cocsep_dfs(solved, 0, i, &n, buf32, visited);
+		info[i+2] = cc;
+		DBG_LOG("found %" PRIu32 "\n", cc);
+	}
+
+	info[0] = (uint32_t)n;
+	info[1] = 9U; /* Known max pruning value */
+	DBG_ASSERT(n == COCSEP_CLASSES, 0,
+	    "cocsep: computed %" PRIu16 " symmetry classes, "
+	    "expected %" PRIu16 "\n", n, COCSEP_CLASSES);
+
+	DBG_LOG("cocsep data computed\n");
+	DBG_LOG("Symmetry classes: %" PRIu32 "\n", info[0]);
+	DBG_LOG("Maximum pruning value: %" PRIu32 "\n", info[1]);
+	DBG_LOG("Pruning value distribution:\n");
+	for (j = 0; j < 10; j++)
+		DBG_LOG("%" PRIu8 ":\t%" PRIu32 "\n", j, info[j+2]);
+
+	return 4*(tablesize + infosize);
+}
+
+_static uint32_t
+gendata_cocsep_dfs(
+	cube_fast_t c,
+	uint8_t depth,
+	uint8_t maxdepth,
+	uint16_t *n,
+	uint32_t *buf32,
+	uint8_t *visited
+)
+{
+	uint8_t m, t, tinv, olddepth;
+	uint32_t cc;
+	int64_t i;
+	cube_fast_t d;
+
+	i = coord_fast_cocsep(c);
+	olddepth = (uint8_t)(buf32[i] & 0xFFU);
+	if (olddepth < depth || get_visited(visited, i))
+		return 0;
+	set_visited(visited, i);
+
+	if (depth == maxdepth) {
+		if ((buf32[i] & 0xFFU) != 0xFFU)
+			return 0;
+
+		for (t = 0, cc = 0; t < 48; t++) {
+			d = transform(c, t);
+			i = coord_fast_cocsep(d);
+			set_visited(visited, i);
+			tinv = inverse_trans(t);
+			cc += (buf32[i] & 0xFFU) == 0xFFU;
+			buf32[i] = (*n << 16U) | (tinv << 8U) | depth;
+		}
+		(*n)++;
+
+		return cc;
+	}
+
+	for (m = 0, cc = 0; m < 18; m++) {
+		d = move(c, m);
+		cc += gendata_cocsep_dfs(d, depth+1, maxdepth, n, buf32, visited);
+	}
+
+	return cc;
+}
+
+/*
+TODO description
+generating fixed table with h=0, k=4
+*/
+_static size_t
+gendata_esep(const void *cocsepdata, void *buf)
+{
+	size_t tablesize = (COCSEP_CLASSES * _12c4 * _8c4) / 2U;
+	size_t visitedsize = (tablesize * 2U + 7U) / 8U;
+	size_t infosize = 25; /* TODO unknown yet */
+
+	uint32_t *buf32, *info, cc;
+	uint8_t moves[20];
+	dfsarg_gendata_t arg;
+
+	arg.visited = malloc(visitedsize);
+	buf32 = (uint32_t *)buf;
+	info = buf32 + tablesize;
+	memset(buf32, 0xFFU, 4*tablesize);
+	memset(info, 0, 4*infosize);
+
+	arg.cube = cubetofast(solvedcube());
+	arg.moves = moves;
+	arg.nmoves = 0;
+	arg.cocsepdata = (uint32_t *)cocsepdata;
+	arg.buf32 = buf32;
+	/* TODO loop until no more is done, not until 12! (or hardcode limits)*/
+	for (arg.depth = 0, cc = 0; arg.depth < 12; arg.depth++) {
+		DBG_LOG("esep: generating depth %" PRIu8 "\n", arg.depth);
+		memset(arg.visited, 0, visitedsize);
+		cc = gendata_esep_dfs(&arg);
+		info[arg.depth+1] = cc;
+		DBG_LOG("found %" PRIu32 "\n", cc);
+	}
+	free(arg.visited);
+
+	return cc;
+}
+
+_static uint32_t
+gendata_esep_dfs(dfsarg_gendata_t *arg)
+{
+	uint8_t m, t, olddepth;
+	uint32_t cc;
+	uint64_t i;
+	cube_fast_t d;
+	dfsarg_gendata_t nextarg;
+
+	if (!allowednextmove(arg->moves, arg->nmoves))
+		return 0;
+
+	if (arg->nmoves > 0)
+		arg->cube = move(arg->cube, arg->moves[arg->nmoves-1]);
+
+	i = coord_h48(arg->cube, arg->cocsepdata, 0U);
+	olddepth = get_esep_pval(arg->buf32, i);
+
+	if (olddepth < arg->nmoves || get_visited(arg->visited, i))
+		return 0;
+	set_visited(arg->visited, i);
+
+	if (arg->nmoves == arg->depth) {
+		if (olddepth != 15U)
+			return 0;
+
+		for (t = 0, cc = 0; t < 48; t++) {
+			d = transform(arg->cube, t);
+			i = coord_h48(d, arg->cocsepdata, 0U);
+			set_visited(arg->visited, i);
+			cc += get_esep_pval(arg->buf32, i) == 15U;
+			set_esep_pval(arg->buf32, i, arg->depth);
+		}
+
+		return cc;
+	}
+
+
+	nextarg = *arg;
+	nextarg.nmoves = arg->nmoves + 1;
+	for (m = 0, cc = 0; m < 18; m++) {
+		nextarg.cube = arg->cube;
+		nextarg.moves[arg->nmoves] = m;
+		cc += gendata_esep_dfs(&nextarg);
+	}
+
+	return cc;
+}
+
+_static_inline bool get_visited(const uint8_t *a, int64_t i)
+{
+	return a[_visited_ind(i)] & _visited_mask(i);
+}
+
+_static_inline void set_visited(uint8_t *a, int64_t i)
+{
+	a[_visited_ind(i)] |= _visited_mask(i);
+}
+
+_static_inline uint8_t
+get_esep_pval(const uint32_t *buf32, int64_t i)
+{
+	return (buf32[_esep_ind(i)] & _esep_mask(i)) >> _esep_shift(i);
+}
+
+_static_inline void
+set_esep_pval(uint32_t *buf32, int64_t i, uint8_t val)
+{
+	buf32[_esep_ind(i)] =
+	    (buf32[_esep_ind(i)] & (~_esep_mask(i))) | (val << _esep_shift(i));
+}

	h48 A prototype for an optimal Rubik's cube solver, work in progress.
	git clone https://git.tronto.net/h48
	Download \| Log \| Files \| Refs \| README \| LICENSE

A	src/constants.h	\|	530	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/cube.c	\|	2371	+------------------------------------------------------------------------------
A	src/cube_avx2.h	\|	229	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/cube_neon.h	\|	6	++++++
A	src/cube_portable.h	\|	224	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/cube_routines.h	\|	730	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/moves.h	\|	47	+++++++++++++++++++++++++++++++++++++++++++++++
A	src/solve_generic.h	\|	259	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/solve_h48.h	\|	266	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++