Big changes to the interface - h48 - A prototype for an optimal Rubik's cube solver, work in progress.

commit fb9ae9e41eaf01b3651395fdd450ac1a4743e592
parent 04c3ee1f5acac47650be8d0ffbf90e238df0d12b
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date:   Fri, 10 Nov 2023 14:44:48 +0100

Big changes to the interface

Diffstat:
M TODO.txt  | 40 +++++++++++++++++++---------------------
M cube.c  | 3725 +++++++++++++++++++++++++++++++++++++++----------------------------------------
M cube.h  | 161 ++++++++++++++++++++++++++-----------------------------------------------------
M test/010_io_H48_read_write/io_H48_tests.c  | 4 ++--
M test/011_io_SRC_write/01_solved.out  | 4 ++--
M test/011_io_SRC_write/02_scrambled.out  | 4 ++--
M test/011_io_SRC_write/io_SRC_tests.c  | 4 ++--
M test/012_io_AVX_write/io_AVX_tests.c  | 4 ++--
M test/020_move/move_tests.c  | 24 +++++++-----------------
M test/030_inverse_cube/inverse_tests.c  | 4 ++--
M test/040_compose/compose_tests.c  | 6 +++---
M test/050_transform/transform_tests.c  | 22 +++++++---------------
M test/061_coord_eo/coord_eo_tests.c  | 2 +-
M utils/genmovecode.sh  | 8 ++++----
M utils/gentranscode.sh  | 16 ++++++++--------
M utils/gentransswitch.sh  | 2 +-

16 files changed, 1949 insertions(+), 2081 deletions(-)
diff --git a/TODO.txt b/TODO.txt
@@ -1,30 +1,11 @@
-## Big changes
+## Big change
 
-### cube type changess
-
-* rename cube_t to cube_internal_t and cube_array_t to cube_t
-* include only cube_t typedef in cube.h, remove ifdef from cube.h
-* rework public functions: for many the simple implementation
-  in the first section of cube.c is fine, other should first
-  convert and then call the internal function
-* for CO: move to bits 5 and 6, no need for padding bit
-
-### Remove stuff from API, use more strings
-
-* Remove move_t and trans_t
-* Remove all functions related to trans, not useful for users
-  (or maybe keep and let use transform? can see some use
-  for it, in strange cases)
-* Removes functions that read or write moves
-* All functions should take strings instead of moves
-* Performance is worse, more stuff must be done internally,
-  expose only stuf that users are likely to use
+* Add tests for multiple moves
 * Benchmark: add some simple benchmarking functions to nissy.h,
   bench.c becomes very short
 
 ### More for moves
 
-* keep move(cube_t, move), but prefer direct inline moves over it
 * define macro to loop over moves e.g. #define FOREACHMOVE(action)
 
 ### API goals:
@@ -131,6 +112,8 @@ What about symcoord?
 
 ## Improvements and other things
 
+* add centers (and moves...)
+* for CO: move to bits 5 and 6, no need for padding bit
 * NISS: Add mask to moves (e.g. U | NISS where NISS = 32 or something);
   adapt readmoves and writemoves.
 * Consider adding centers and other moves (for avx2: centers in the
@@ -150,3 +133,18 @@ What about symcoord?
   dart ffi, js
   java
 * add also example code (e.g. an optimal solver) in examples/
+
+## More documentation?
+
+* Add documentation comments inside cube.c?
+* Copy this to cube.c
+
+Transformations can be either simple rotations or a rotation composed
+with a mirroring.  A composed rotation + mirror is obtained by applying
+the corresponding rotation to the solved cube mirrored along the M plane.
+
+For example, to apply the transformation RBm (mirrored RB) to a cube C:
+	1. Apply a mirror along the M plane to the solved cube
+	2. Rotate the mirrored cube with z' y2
+	3. Apply the cube C to the transformed solved cube
+	4. Apply the transformations of step 1a and 1b in reverse
diff --git a/cube.c b/cube.c
@@ -22,7 +22,7 @@
 #include "cube.h"
 
 /******************************************************************************
-Section: constants and strings
+Section: constants, strings and other stuff
 ******************************************************************************/
 
 #define U  0U
@@ -238,528 +238,6 @@ static char *transstr[] = {
 };
 
 /******************************************************************************
-Section: cube_array
-
-This section contains non-optimized functions that operate on the cube in
-array format. These utilities are not used in performance-critical parts;
-for example, all I/O related stuff is here, as well as some checks on the
-state of the cube that are used in debugging.
-******************************************************************************/
-
-typedef struct {
-	uint8_t c[8];
-	uint8_t e[12];
-} cube_array_t;
-
-static bool equal_array(cube_array_t, cube_array_t);
-static bool iserror_array(cube_array_t);
-static bool isconsistent_array(cube_array_t);
-static bool issolvable_array(cube_array_t);
-static uint8_t readco(char *);
-static uint8_t readcp(char *);
-static uint8_t readeo(char *);
-static uint8_t readep(char *);
-static cube_array_t readcube_array(format_t, char *);
-static int permsign(uint8_t *, int);
-static cube_array_t readcube_array_H48(char *);
-static void writecube_array_AVX(cube_array_t, char *);
-static void writecube_array_H48(cube_array_t, char *);
-static int writepiece_SRC(uint8_t, char *);
-static void writecube_array_SRC(cube_array_t, char *);
-static uint8_t readmove(char);
-static uint8_t readmodifier(char);
-
-cube_array_t _solvedcube_array = {
-	.c = {0, 1, 2, 3, 4, 5, 6, 7},
-	.e = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
-};
-cube_array_t _zerocube_array = { .e = {0}, .c = {0} };
-
-static bool
-equal_array(cube_array_t c1, cube_array_t c2)
-{
-	int i;
-	bool ret;
-
-	ret = true;
-	for (i = 0; i < 8; i++)
-		ret = ret && c1.c[i] == c2.c[i];
-	for (i = 0; i < 12; i++)
-		ret = ret && c1.e[i] == c2.e[i];
-
-	return ret;
-}
-
-static bool
-iserror_array(cube_array_t arr)
-{
-	return equal_array(arr, _zerocube_array);
-}
-
-static uint8_t
-readco(char *str)
-{
-	if (*str == '0')
-		return 0;
-	if (*str == '1')
-		return _ctwist_cw;
-	if (*str == '2')
-		return _ctwist_ccw;
-
-	DBG_LOG("Error reading CO\n");
-	return _error;
-}
-
-static uint8_t
-readcp(char *str)
-{
-	uint8_t c;
-
-	for (c = 0; c < 8; c++)
-		if (!strncmp(str, cornerstr[c], 3) ||
-		    !strncmp(str, cornerstralt[c], 3))
-			return c;
-
-	DBG_LOG("Error reading CP\n");
-	return _error;
-}
-
-static uint8_t
-readeo(char *str)
-{
-	if (*str == '0')
-		return 0;
-	if (*str == '1')
-		return _eflip;
-
-	DBG_LOG("Error reading EO\n");
-	return _error;
-}
-
-static uint8_t
-readep(char *str)
-{
-	uint8_t e;
-
-	for (e = 0; e < 12; e++)
-		if (!strncmp(str, edgestr[e], 2))
-			return e;
-
-	DBG_LOG("Error reading EP\n");
-	return _error;
-}
-
-static cube_array_t
-readcube_array_H48(char *buf)
-{
-	int i;
-	uint8_t piece, orient;
-	cube_array_t ret = {0};
-	char *b;
-	
-	b = buf;
-
-	for (i = 0; i < 12; i++) {
-		while (*b == ' ' || *b == '\t' || *b == '\n')
-			b++;
-		if ((piece = readep(b)) == _error)
-			return _zerocube_array;
-		b += 2;
-		if ((orient = readeo(b)) == _error)
-			return _zerocube_array;
-		b++;
-		ret.e[i] = piece | orient;
-	}
-	for (i = 0; i < 8; i++) {
-		while (*b == ' ' || *b == '\t' || *b == '\n')
-			b++;
-		if ((piece = readcp(b)) == _error)
-			return _zerocube_array;
-		b += 3;
-		if ((orient = readco(b)) == _error)
-			return _zerocube_array;
-		b++;
-		ret.c[i] = piece | orient;
-	}
-
-	return ret;
-}
-
-cube_array_t
-readcube_array(format_t format, char *buf)
-{
-	cube_array_t arr;
-
-	switch (format) {
-	case H48:
-		arr = readcube_array_H48(buf);
-		break;
-	default:
-		DBG_LOG("Cannot read cube in the given format\n");
-		return _zerocube_array;
-	}
-
-	DBG_ASSERT(!iserror_array(arr), arr, "readcube error\n");
-	return arr;
-}
-
-
-static int
-writepiece_SRC(uint8_t piece, char *buf)
-{
-	char digits[3];
-	int i, len = 0;
-
-	while (piece != 0) {
-		digits[len++] = (piece % 10) + '0';
-		piece /= 10;
-	}
-
-	if (len == 0)
-		digits[len++] = '0';
-
-	for (i = 0; i < len; i++)
-		buf[i] = digits[len-i-1];
-
-	buf[len] = ',';
-	buf[len+1] = ' ';
-
-	return len+2;
-}
-
-static void
-writecube_array_AVX(cube_array_t cube, char *buf)
-{
-	int i, ptr;
-	uint8_t piece;
-
-	memcpy(buf, "_mm256_set_epi8(\n\t0, 0, 0, 0, ", 30);
-	ptr = 30;
-
-	for (i = 11; i >= 0; i--) {
-		piece = cube.e[i];
-		ptr += writepiece_SRC(piece, buf + ptr);
-	}
-
-	memcpy(buf+ptr-2, ",\n\t0, 0, 0, 0, 0, 0, 0, 0, ", 27);
-	ptr += 25;
-
-	for (i = 7; i >= 0; i--) {
-		piece = cube.c[i];
-		ptr += writepiece_SRC(piece, buf + ptr);
-	}
-
-	memcpy(buf+ptr-2, "\n)\0", 3);
-}
-
-static void
-writecube_array_H48(cube_array_t cube, char *buf)
-{
-	uint8_t piece, perm, orient;
-	int i;
-
-	for (i = 0; i < 12; i++) {
-		piece = cube.e[i];
-		perm = piece & _pbits;
-		orient = (piece & _eobit) >> _eoshift;
-		buf[4*i    ] = edgestr[perm][0];
-		buf[4*i + 1] = edgestr[perm][1];
-		buf[4*i + 2] = orient + '0';
-		buf[4*i + 3] = ' ';
-	}
-	for (i = 0; i < 8; i++) {
-		piece = cube.c[i];
-		perm = piece & _pbits;
-		orient = (piece & _cobits) >> _coshift;
-		buf[48 + 5*i    ] = cornerstr[perm][0];
-		buf[48 + 5*i + 1] = cornerstr[perm][1];
-		buf[48 + 5*i + 2] = cornerstr[perm][2];
-		buf[48 + 5*i + 3] = orient + '0';
-		buf[48 + 5*i + 4] = ' ';
-	}
-
-	buf[48+39] = '\0';
-}
-
-static void
-writecube_array_SRC(cube_array_t cube, char *buf)
-{
-	int i, ptr;
-	uint8_t piece;
-
-	memcpy(buf, "{\n\t.c = {", 9);
-	ptr = 9;
-
-	for (i = 0; i < 8; i++) {
-		piece = cube.c[i];
-		ptr += writepiece_SRC(piece, buf + ptr);
-	}
-
-	memcpy(buf+ptr-2, "},\n\t.e = {", 10);
-	ptr += 8;
-
-	for (i = 0; i < 12; i++) {
-		piece = cube.e[i];
-		ptr += writepiece_SRC(piece, buf + ptr);
-	}
-
-	memcpy(buf+ptr-2, "}\n}\0", 4);
-}
-
-void
-writecube_array(format_t format, cube_array_t a, char *buf)
-{
-	char *errormsg;
-	size_t len;
-
-	if (!isconsistent_array(a)) {
-		errormsg = "ERROR: cannot write inconsistent cube";
-		goto writecube_error;
-	}
-
-	switch (format) {
-	case AVX:
-		writecube_array_AVX(a, buf);
-		break;
-	case H48:
-		writecube_array_H48(a, buf);
-		break;
-	case SRC:
-		writecube_array_SRC(a, buf);
-		break;
-	default:
-		errormsg = "ERROR: cannot write cube in the given format";
-		goto writecube_error;
-	}
-
-	return;
-
-writecube_error:
-	DBG_LOG("writecube error, see stdout for details\n");
-	len = strlen(errormsg);
-	memcpy(buf, errormsg, len);
-	buf[len] = '\n';
-	buf[len+1] = '\0';
-}
-
-static uint8_t
-readmove(char c)
-{
-	switch (c) {
-	case 'U':
-		return U;
-	case 'D':
-		return D;
-	case 'R':
-		return R;
-	case 'L':
-		return L;
-	case 'F':
-		return F;
-	case 'B':
-		return B;
-	default:
-		return _error;
-	}
-}
-
-static uint8_t
-readmodifier(char c)
-{
-	switch (c) {
-	case '1': /* Fallthrough */
-	case '2': /* Fallthrough */
-	case '3':
-		return c - '0' - 1;
-	case '\'':
-		return 2;
-	default:
-		return 0;
-	}
-}
-
-int
-readmoves(char *buf, move_t *m)
-{
-	int n;
-	uint64_t r;
-	char *b;
-
-	for (b = buf, n = 0; *b != '\0'; b++) {
-		while (*b == ' ' || *b == '\t' || *b == '\n')
-			b++;
-		if (*b == '\0')
-			return n;
-		if ((r = readmove(*b)) == _error)
-			goto readmoves_error;
-		m[n] = (move_t)r;
-		if ((r = readmodifier(*(b+1))) != 0) {
-			b++;
-			m[n] += r;
-		}
-		n++;
-	}
-
-	return n;
-
-readmoves_error:
-	DBG_LOG("readmoves error\n");
-	return -1;
-}
-
-trans_t
-readtrans(char *buf)
-{
-	uint8_t t;
-
-	for (t = 0; t < 48; t++)
-		if (!strncmp(buf, transstr[t], 11))
-			return t;
-
-	DBG_LOG("readtrans error\n");
-	return _error;
-}
-
-void
-writemoves(move_t *m, int n, char *buf)
-{
-	int i;
-	size_t len;
-	char *b, *s;
-
-	for (i = 0, b = buf; i < n; i++, b++) {
-		s = movestr[m[i]];
-		len = strlen(s);
-		memcpy(b, s, len);
-		b += len;	
-		*b = ' ';
-	}
-	*b = '\0';
-}
-
-void
-writetrans(trans_t t, char *buf)
-{
-	if (t >= 48)
-		memcpy(buf, "error trans", 11);
-	else
-		memcpy(buf, transstr[t], 11);
-	buf[11] = '\0';
-}
-
-static int
-permsign(uint8_t *a, int n)
-{
-	int i, j;
-	uint8_t ret = 0;
-
-	for (i = 0; i < n; i++)
-		for (j = i+1; j < n; j++)
-			ret += a[i] > a[j] ? 1 : 0;
-
-	return ret % 2;
-}
-
-static bool
-isconsistent_array(cube_array_t c)
-{
-	uint8_t i, p, e, piece;
-	bool found[12];
-
-	for (i = 0; i < 12; i++)
-		found[i] = false;
-	for (i = 0; i < 12; i++) {
-		piece = c.e[i];
-		p = piece & _pbits;
-		e = piece & _eobit;
-		if (p >= 12)
-			goto inconsistent_ep;
-		if (e != 0 && e != _eobit)
-			goto inconsistent_eo;
-		found[p] = true;
-	}
-	for (i = 0; i < 12; i++)
-		if (!found[i])
-			goto inconsistent_ep;
-
-	for (i = 0; i < 8; i++)
-		found[i] = false;
-	for (i = 0; i < 8; i++) {
-		piece = c.c[i];
-		p = piece & _pbits;
-		e = piece & _cobits;
-		if (p >= 8)
-			goto inconsistent_cp;
-		if (e != 0 && e != _ctwist_cw && e != _ctwist_ccw)
-			goto inconsistent_co;
-		found[p] = true;
-	}
-	for (i = 0; i < 8; i++)
-		if (!found[i])
-			goto inconsistent_co;
-
-	return true;
-
-inconsistent_ep:
-	DBG_LOG("Inconsistent EP\n");
-	return false;
-inconsistent_cp:
-	DBG_LOG("Inconsistent CP\n");
-	return false;
-inconsistent_eo:
-	DBG_LOG("Inconsistent EO\n");
-	return false;
-inconsistent_co:
-	DBG_LOG("Inconsistent CO\n");
-	return false;
-}
-
-bool
-issolvable_array(cube_array_t c)
-{
-	uint8_t i, eo, co, piece, edges[12], corners[8];
-
-	DBG_ASSERT(isconsistent_array(c), false,
-	    "issolvable: cube is inconsistent\n");
-
-	for (i = 0; i < 12; i++)
-		edges[i] = c.e[i] & _pbits;
-	for (i = 0; i < 8; i++)
-		corners[i] = c.c[i] & _pbits;
-
-	if (permsign(edges, 12) != permsign(corners, 8))
-		goto issolvable_parity;
-
-	eo = 0;
-	for (i = 0; i < 12; i++) {
-		piece = c.e[i];
-		eo += (piece & _eobit) >> _eoshift;
-	}
-	if (eo % 2 != 0)
-		goto issolvable_eo;
-
-	co = 0;
-	for (i = 0; i < 8; i++) {
-		piece = c.c[i];
-		co += (piece & _cobits) >> _coshift;
-	}
-	if (co % 3 != 0)
-		goto issolvable_co;
-
-	return true;
-
-issolvable_parity:
-	DBG_LOG("EP and CP parities are different\n");
-	return false;
-issolvable_eo:
-	DBG_LOG("Odd number of flipped edges\n");
-	return false;
-issolvable_co:
-	DBG_LOG("Sum of corner orientation is not multiple of 3\n");
-	return false;
-}
-
-/******************************************************************************
 Section: AVX2 fast methods
 
 This section contains performance-critical methods that rely on AVX2
@@ -770,28 +248,25 @@ Note: the #ifdef below is closed in the next section.
 
 #ifdef CUBE_AVX2
 
+typedef __m256i cube_fast_t;
+
 #define _co_avx2 _mm256_set_epi64x(0, 0, 0, 0xF0F0F0F0F0F0F0F0)
 #define _co2_avx2 _mm256_set_epi64x(0, 0, 0, 0x6060606060606060)
 #define _cocw_avx2 _mm256_set_epi64x(0, 0, 0, 0x2020202020202020)
 #define _eo_avx2 _mm256_set_epi64x(0x10101010, 0x1010101010101010, 0, 0)
-#define _zerocube _mm256_set_epi64x(0, 0, 0, 0);
-#define _solvedcube _mm256_set_epi8(                      \
-	0, 0, 0, 0, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, \
-	0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0    \
-)
-	
+#define zero_fast _mm256_set_epi64x(0, 0, 0, 0);
 
-static cube_t _arraytocube(cube_array_t);
-static void _cubetoarray(cube_t, cube_array_t *);
-static inline bool _equal(cube_t, cube_t);
-static inline cube_t _invertco(cube_t);
-static inline cube_t _inverse(cube_t);
-static inline cube_t _compose(cube_t, cube_t);
+static cube_fast_t cubetofast(cube_t);
+static cube_t fasttocube(cube_fast_t);
+static inline bool equal_fast(cube_fast_t, cube_fast_t);
+static inline cube_fast_t invertco_fast(cube_fast_t);
+static inline cube_fast_t inverse_fast(cube_fast_t);
+static inline cube_fast_t compose_fast(cube_fast_t, cube_fast_t);
 
-static inline cube_t
-_move_U(cube_t c)
+static inline cube_fast_t
+_move_U(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 9, 8, 7, 6, 0, 1, 3, 2, 5, 4,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 1, 0, 3, 2, 4, 5
 	);
@@ -799,10 +274,10 @@ _move_U(cube_t c)
 	return _mm256_shuffle_epi8(c, m);
 }
 
-static inline cube_t
-_move_U2(cube_t c)
+static inline cube_fast_t
+_move_U2(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 9, 8, 7, 6, 4, 5, 3, 2, 0, 1,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 4, 5, 3, 2, 0, 1
 	);
@@ -810,10 +285,10 @@ _move_U2(cube_t c)
 	return _mm256_shuffle_epi8(c, m);
 }
 
-static inline cube_t
-_move_U3(cube_t c)
+static inline cube_fast_t
+_move_U3(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 9, 8, 7, 6, 1, 0, 3, 2, 4, 5,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 0, 1, 3, 2, 5, 4
 	);
@@ -821,10 +296,10 @@ _move_U3(cube_t c)
 	return _mm256_shuffle_epi8(c, m);
 }
 
-static inline cube_t
-_move_D(cube_t c)
+static inline cube_fast_t
+_move_D(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 9, 8, 3, 2, 5, 4, 6, 7, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 5, 4, 6, 7, 1, 0
 	);
@@ -832,10 +307,10 @@ _move_D(cube_t c)
 	return _mm256_shuffle_epi8(c, m);
 }
 
-static inline cube_t
-_move_D2(cube_t c)
+static inline cube_fast_t
+_move_D2(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 9, 8, 6, 7, 5, 4, 2, 3, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 5, 4, 2, 3, 1, 0
 	);
@@ -843,10 +318,10 @@ _move_D2(cube_t c)
 	return _mm256_shuffle_epi8(c, m);
 }
 
-static inline cube_t
-_move_D3(cube_t c)
+static inline cube_fast_t
+_move_D3(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 9, 8, 2, 3, 5, 4, 7, 6, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 4, 7, 6, 1, 0
 	);
@@ -854,21 +329,21 @@ _move_D3(cube_t c)
 	return _mm256_shuffle_epi8(c, m);
 }
 
-static inline cube_t
-_move_R(cube_t c)
+static inline cube_fast_t
+_move_R(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 4, 10, 9, 7, 11, 6, 5, 8, 3, 2, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 35, 32, 4, 69, 2, 1, 70
 	);
 
-	return _compose(c, m);
+	return compose_fast(c, m);
 }
 
-static inline cube_t
-_move_R2(cube_t c)
+static inline cube_fast_t
+_move_R2(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 8, 10, 9, 11, 4, 6, 5, 7, 3, 2, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 5, 6, 4, 0, 2, 1, 3
 	);
@@ -876,32 +351,32 @@ _move_R2(cube_t c)
 	return _mm256_shuffle_epi8(c, m);
 }
 
-static inline cube_t
-_move_R3(cube_t c)
+static inline cube_fast_t
+_move_R3(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 7, 10, 9, 4, 8, 6, 5, 11, 3, 2, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 32, 35, 4, 70, 2, 1, 69
 	);
 
-	return _compose(c, m);
+	return compose_fast(c, m);
 }
 
-static inline cube_t
-_move_L(cube_t c)
+static inline cube_fast_t
+_move_L(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 6, 5, 8, 7, 9, 10, 4, 3, 2, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 34, 6, 5, 33, 3, 68, 71, 0
 	);
 
-	return _compose(c, m);
+	return compose_fast(c, m);
 }
 
-static inline cube_t
-_move_L2(cube_t c)
+static inline cube_fast_t
+_move_L2(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 9, 10, 8, 7, 5, 6, 4, 3, 2, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 4, 6, 5, 7, 3, 1, 2, 0
 	);
@@ -909,32 +384,32 @@ _move_L2(cube_t c)
 	return _mm256_shuffle_epi8(c, m);
 }
 
-static inline cube_t
-_move_L3(cube_t c)
+static inline cube_fast_t
+_move_L3(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 5, 6, 8, 7, 10, 9, 4, 3, 2, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 33, 6, 5, 34, 3, 71, 68, 0
 	);
 
-	return _compose(c, m);
+	return compose_fast(c, m);
 }
 
-static inline cube_t
-_move_F(cube_t c)
+static inline cube_fast_t
+_move_F(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 19, 16, 7, 6, 5, 4, 24, 2, 1, 25,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 64, 5, 66, 3, 38, 1, 36
 	);
 
-	return _compose(c, m);
+	return compose_fast(c, m);
 }
 
-static inline cube_t
-_move_F2(cube_t c)
+static inline cube_fast_t
+_move_F2(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 8, 9, 7, 6, 5, 4, 0, 2, 1, 3,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 4, 5, 6, 3, 0, 1, 2
 	);
@@ -942,32 +417,32 @@ _move_F2(cube_t c)
 	return _mm256_shuffle_epi8(c, m);
 }
 
-static inline cube_t
-_move_F3(cube_t c)
+static inline cube_fast_t
+_move_F3(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 16, 19, 7, 6, 5, 4, 25, 2, 1, 24,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 66, 5, 64, 3, 36, 1, 38
 	);
 
-	return _compose(c, m);
+	return compose_fast(c, m);
 }
 
-static inline cube_t
-_move_B(cube_t c)
+static inline cube_fast_t
+_move_B(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 18, 17, 9, 8, 7, 6, 5, 4, 3, 26, 27, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 65, 6, 67, 4, 39, 2, 37, 0
 	);
 
-	return _compose(c, m);
+	return compose_fast(c, m);
 }
 
-static inline cube_t
-_move_B2(cube_t c)
+static inline cube_fast_t
+_move_B2(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 10, 11, 9, 8, 7, 6, 5, 4, 3, 1, 2, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 4, 1, 2, 3, 0
 	);
@@ -975,1025 +450,1028 @@ _move_B2(cube_t c)
 	return _mm256_shuffle_epi8(c, m);
 }
 
-static inline cube_t
-_move_B3(cube_t c)
+static inline cube_fast_t
+_move_B3(cube_fast_t c)
 {
-	cube_t m = _mm256_set_epi8(
+	cube_fast_t m = _mm256_set_epi8(
 		0, 0, 0, 0, 17, 18, 9, 8, 7, 6, 5, 4, 3, 27, 26, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 67, 6, 65, 4, 37, 2, 39, 0
 	);
 
-	return _compose(c, m);
+	return compose_fast(c, m);
 }
 
-static inline cube_t
-_trans_UFr(cube_t c)
+static inline cube_fast_t
+_trans_UFr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_ULr(cube_t c)
+static inline cube_fast_t
+_trans_ULr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 24, 27, 26, 25, 3, 2, 1, 0, 6, 7, 4, 5,
 		0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 1, 6, 7, 5, 4
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 26, 25, 24, 27, 2, 3, 0, 1, 7, 6, 5, 4,
 		0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 1, 0, 7, 6, 4, 5
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_UBr(cube_t c)
+static inline cube_fast_t
+_trans_UBr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 9, 8, 11, 10, 6, 7, 4, 5, 2, 3, 0, 1,
 		0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 4, 5, 2, 3, 0, 1
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 9, 8, 11, 10, 6, 7, 4, 5, 2, 3, 0, 1,
 		0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 4, 5, 2, 3, 0, 1
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_URr(cube_t c)
+static inline cube_fast_t
+_trans_URr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 26, 25, 24, 27, 2, 3, 0, 1, 7, 6, 5, 4,
 		0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 1, 0, 7, 6, 4, 5
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 24, 27, 26, 25, 3, 2, 1, 0, 6, 7, 4, 5,
 		0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 1, 6, 7, 5, 4
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DFr(cube_t c)
+static inline cube_fast_t
+_trans_DFr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 10, 11, 8, 9, 5, 4, 7, 6, 0, 1, 2, 3,
 		0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 7, 6, 1, 0, 3, 2
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 10, 11, 8, 9, 5, 4, 7, 6, 0, 1, 2, 3,
 		0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 7, 6, 1, 0, 3, 2
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DLr(cube_t c)
+static inline cube_fast_t
+_trans_DLr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 27, 24, 25, 26, 1, 0, 3, 2, 5, 4, 7, 6,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 2, 5, 4, 6, 7
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 27, 24, 25, 26, 1, 0, 3, 2, 5, 4, 7, 6,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 2, 5, 4, 6, 7
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DBr(cube_t c)
+static inline cube_fast_t
+_trans_DBr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 8, 9, 10, 11, 4, 5, 6, 7, 1, 0, 3, 2,
 		0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7, 0, 1, 2, 3
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 8, 9, 10, 11, 4, 5, 6, 7, 1, 0, 3, 2,
 		0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7, 0, 1, 2, 3
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DRr(cube_t c)
+static inline cube_fast_t
+_trans_DRr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 25, 26, 27, 24, 0, 1, 2, 3, 4, 5, 6, 7,
 		0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 3, 4, 5, 7, 6
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 25, 26, 27, 24, 0, 1, 2, 3, 4, 5, 6, 7,
 		0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 3, 4, 5, 7, 6
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RUr(cube_t c)
+static inline cube_fast_t
+_trans_RUr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 3, 2, 1, 0, 25, 26, 27, 24, 21, 22, 23, 20,
 		0, 0, 0, 0, 0, 0, 0, 0, 39, 36, 38, 37, 66, 65, 67, 64
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 21, 22, 23, 20, 17, 18, 19, 16, 11, 10, 9, 8,
 		0, 0, 0, 0, 0, 0, 0, 0, 71, 69, 68, 70, 33, 35, 34, 32
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RFr(cube_t c)
+static inline cube_fast_t
+_trans_RFr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 18, 17, 16, 19, 22, 21, 20, 23, 25, 26, 27, 24,
 		0, 0, 0, 0, 0, 0, 0, 0, 65, 66, 67, 64, 39, 36, 37, 38
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 17, 18, 19, 16, 20, 23, 22, 21, 24, 27, 26, 25,
 		0, 0, 0, 0, 0, 0, 0, 0, 67, 64, 65, 66, 37, 38, 39, 36
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RDr(cube_t c)
+static inline cube_fast_t
+_trans_RDr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 1, 0, 3, 2, 26, 25, 24, 27, 22, 21, 20, 23,
 		0, 0, 0, 0, 0, 0, 0, 0, 36, 39, 37, 38, 65, 66, 64, 67
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 20, 23, 22, 21, 16, 19, 18, 17, 9, 8, 11, 10,
 		0, 0, 0, 0, 0, 0, 0, 0, 70, 68, 69, 71, 32, 34, 35, 33
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RBr(cube_t c)
+static inline cube_fast_t
+_trans_RBr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 16, 19, 18, 17, 21, 22, 23, 20, 26, 25, 24, 27,
 		0, 0, 0, 0, 0, 0, 0, 0, 66, 65, 64, 67, 36, 39, 38, 37
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 16, 19, 18, 17, 21, 22, 23, 20, 26, 25, 24, 27,
 		0, 0, 0, 0, 0, 0, 0, 0, 66, 65, 64, 67, 36, 39, 38, 37
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LUr(cube_t c)
+static inline cube_fast_t
+_trans_LUr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 2, 3, 0, 1, 27, 24, 25, 26, 20, 23, 22, 21,
 		0, 0, 0, 0, 0, 0, 0, 0, 38, 37, 39, 36, 67, 64, 66, 65
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 23, 20, 21, 22, 18, 17, 16, 19, 10, 11, 8, 9,
 		0, 0, 0, 0, 0, 0, 0, 0, 69, 71, 70, 68, 35, 33, 32, 34
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LFr(cube_t c)
+static inline cube_fast_t
+_trans_LFr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 17, 18, 19, 16, 20, 23, 22, 21, 24, 27, 26, 25,
 		0, 0, 0, 0, 0, 0, 0, 0, 67, 64, 65, 66, 37, 38, 39, 36
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 18, 17, 16, 19, 22, 21, 20, 23, 25, 26, 27, 24,
 		0, 0, 0, 0, 0, 0, 0, 0, 65, 66, 67, 64, 39, 36, 37, 38
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LDr(cube_t c)
+static inline cube_fast_t
+_trans_LDr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 0, 1, 2, 3, 24, 27, 26, 25, 23, 20, 21, 22,
 		0, 0, 0, 0, 0, 0, 0, 0, 37, 38, 36, 39, 64, 67, 65, 66
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 22, 21, 20, 23, 19, 16, 17, 18, 8, 9, 10, 11,
 		0, 0, 0, 0, 0, 0, 0, 0, 68, 70, 71, 69, 34, 32, 33, 35
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LBr(cube_t c)
+static inline cube_fast_t
+_trans_LBr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 19, 16, 17, 18, 23, 20, 21, 22, 27, 24, 25, 26,
 		0, 0, 0, 0, 0, 0, 0, 0, 64, 67, 66, 65, 38, 37, 36, 39
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 19, 16, 17, 18, 23, 20, 21, 22, 27, 24, 25, 26,
 		0, 0, 0, 0, 0, 0, 0, 0, 64, 67, 66, 65, 38, 37, 36, 39
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FUr(cube_t c)
+static inline cube_fast_t
+_trans_FUr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 6, 7, 4, 5, 10, 11, 8, 9, 17, 18, 19, 16,
 		0, 0, 0, 0, 0, 0, 0, 0, 35, 33, 34, 32, 71, 69, 70, 68
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 6, 7, 4, 5, 10, 11, 8, 9, 17, 18, 19, 16,
 		0, 0, 0, 0, 0, 0, 0, 0, 35, 33, 34, 32, 71, 69, 70, 68
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FRr(cube_t c)
+static inline cube_fast_t
+_trans_FRr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 21, 22, 23, 20, 17, 18, 19, 16, 11, 10, 9, 8,
 		0, 0, 0, 0, 0, 0, 0, 0, 71, 69, 68, 70, 33, 35, 34, 32
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 3, 2, 1, 0, 25, 26, 27, 24, 21, 22, 23, 20,
 		0, 0, 0, 0, 0, 0, 0, 0, 39, 36, 38, 37, 66, 65, 67, 64
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FDr(cube_t c)
+static inline cube_fast_t
+_trans_FDr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 4, 5, 6, 7, 11, 10, 9, 8, 18, 17, 16, 19,
 		0, 0, 0, 0, 0, 0, 0, 0, 33, 35, 32, 34, 69, 71, 68, 70
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 7, 6, 5, 4, 8, 9, 10, 11, 16, 19, 18, 17,
 		0, 0, 0, 0, 0, 0, 0, 0, 34, 32, 35, 33, 70, 68, 71, 69
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FLr(cube_t c)
+static inline cube_fast_t
+_trans_FLr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 23, 20, 21, 22, 18, 17, 16, 19, 10, 11, 8, 9,
 		0, 0, 0, 0, 0, 0, 0, 0, 69, 71, 70, 68, 35, 33, 32, 34
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 2, 3, 0, 1, 27, 24, 25, 26, 20, 23, 22, 21,
 		0, 0, 0, 0, 0, 0, 0, 0, 38, 37, 39, 36, 67, 64, 66, 65
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BUr(cube_t c)
+static inline cube_fast_t
+_trans_BUr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 7, 6, 5, 4, 8, 9, 10, 11, 16, 19, 18, 17,
 		0, 0, 0, 0, 0, 0, 0, 0, 34, 32, 35, 33, 70, 68, 71, 69
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 4, 5, 6, 7, 11, 10, 9, 8, 18, 17, 16, 19,
 		0, 0, 0, 0, 0, 0, 0, 0, 33, 35, 32, 34, 69, 71, 68, 70
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BRr(cube_t c)
+static inline cube_fast_t
+_trans_BRr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 22, 21, 20, 23, 19, 16, 17, 18, 8, 9, 10, 11,
 		0, 0, 0, 0, 0, 0, 0, 0, 68, 70, 71, 69, 34, 32, 33, 35
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 0, 1, 2, 3, 24, 27, 26, 25, 23, 20, 21, 22,
 		0, 0, 0, 0, 0, 0, 0, 0, 37, 38, 36, 39, 64, 67, 65, 66
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BDr(cube_t c)
+static inline cube_fast_t
+_trans_BDr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 5, 4, 7, 6, 9, 8, 11, 10, 19, 16, 17, 18,
 		0, 0, 0, 0, 0, 0, 0, 0, 32, 34, 33, 35, 68, 70, 69, 71
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 5, 4, 7, 6, 9, 8, 11, 10, 19, 16, 17, 18,
 		0, 0, 0, 0, 0, 0, 0, 0, 32, 34, 33, 35, 68, 70, 69, 71
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BLr(cube_t c)
+static inline cube_fast_t
+_trans_BLr(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 20, 23, 22, 21, 16, 19, 18, 17, 9, 8, 11, 10,
 		0, 0, 0, 0, 0, 0, 0, 0, 70, 68, 69, 71, 32, 34, 35, 33
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 1, 0, 3, 2, 26, 25, 24, 27, 22, 21, 20, 23,
 		0, 0, 0, 0, 0, 0, 0, 0, 36, 39, 37, 38, 65, 66, 64, 67
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_UFm(cube_t c)
+static inline cube_fast_t
+_trans_UFm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 10, 11, 8, 9, 6, 7, 4, 5, 3, 2, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 1, 0, 7, 6, 5, 4
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 10, 11, 8, 9, 6, 7, 4, 5, 3, 2, 1, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 1, 0, 7, 6, 5, 4
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_ULm(cube_t c)
+static inline cube_fast_t
+_trans_ULm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 25, 26, 27, 24, 3, 2, 1, 0, 7, 6, 5, 4,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 4, 5, 2, 3, 1, 0
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 25, 26, 27, 24, 3, 2, 1, 0, 7, 6, 5, 4,
 		0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 4, 5, 2, 3, 1, 0
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_UBm(cube_t c)
+static inline cube_fast_t
+_trans_UBm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 8, 9, 10, 11, 7, 6, 5, 4, 2, 3, 0, 1,
 		0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 1, 6, 7, 4, 5
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 8, 9, 10, 11, 7, 6, 5, 4, 2, 3, 0, 1,
 		0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 1, 6, 7, 4, 5
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_URm(cube_t c)
+static inline cube_fast_t
+_trans_URm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 27, 24, 25, 26, 2, 3, 0, 1, 6, 7, 4, 5,
 		0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 5, 4, 3, 2, 0, 1
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 27, 24, 25, 26, 2, 3, 0, 1, 6, 7, 4, 5,
 		0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 5, 4, 3, 2, 0, 1
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DFm(cube_t c)
+static inline cube_fast_t
+_trans_DFm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 9, 8, 4, 5, 6, 7, 0, 1, 2, 3,
 		0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 5, 4, 7, 6
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 11, 10, 9, 8, 4, 5, 6, 7, 0, 1, 2, 3,
 		0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 5, 4, 7, 6
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DLm(cube_t c)
+static inline cube_fast_t
+_trans_DLm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 26, 25, 24, 27, 1, 0, 3, 2, 4, 5, 6, 7,
 		0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 7, 6, 1, 0, 2, 3
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 24, 27, 26, 25, 0, 1, 2, 3, 5, 4, 7, 6,
 		0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 6, 7, 0, 1, 3, 2
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DBm(cube_t c)
+static inline cube_fast_t
+_trans_DBm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DRm(cube_t c)
+static inline cube_fast_t
+_trans_DRm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 24, 27, 26, 25, 0, 1, 2, 3, 5, 4, 7, 6,
 		0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 6, 7, 0, 1, 3, 2
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 26, 25, 24, 27, 1, 0, 3, 2, 4, 5, 6, 7,
 		0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 7, 6, 1, 0, 2, 3
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RUm(cube_t c)
+static inline cube_fast_t
+_trans_RUm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 3, 2, 1, 0, 24, 27, 26, 25, 20, 23, 22, 21,
 		0, 0, 0, 0, 0, 0, 0, 0, 35, 32, 34, 33, 70, 69, 71, 68
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 22, 21, 20, 23, 18, 17, 16, 19, 11, 10, 9, 8,
 		0, 0, 0, 0, 0, 0, 0, 0, 33, 35, 34, 32, 71, 69, 68, 70
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RFm(cube_t c)
+static inline cube_fast_t
+_trans_RFm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 18, 17, 16, 19, 23, 20, 21, 22, 24, 27, 26, 25,
 		0, 0, 0, 0, 0, 0, 0, 0, 69, 70, 71, 68, 35, 32, 33, 34
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 18, 17, 16, 19, 23, 20, 21, 22, 24, 27, 26, 25,
 		0, 0, 0, 0, 0, 0, 0, 0, 37, 38, 39, 36, 67, 64, 65, 66
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RDm(cube_t c)
+static inline cube_fast_t
+_trans_RDm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 1, 0, 3, 2, 27, 24, 25, 26, 23, 20, 21, 22,
 		0, 0, 0, 0, 0, 0, 0, 0, 32, 35, 33, 34, 69, 70, 68, 71
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 23, 20, 21, 22, 19, 16, 17, 18, 9, 8, 11, 10,
 		0, 0, 0, 0, 0, 0, 0, 0, 32, 34, 35, 33, 70, 68, 69, 71
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RBm(cube_t c)
+static inline cube_fast_t
+_trans_RBm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 16, 19, 18, 17, 20, 23, 22, 21, 27, 24, 25, 26,
 		0, 0, 0, 0, 0, 0, 0, 0, 70, 69, 68, 71, 32, 35, 34, 33
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 19, 16, 17, 18, 22, 21, 20, 23, 26, 25, 24, 27,
 		0, 0, 0, 0, 0, 0, 0, 0, 36, 39, 38, 37, 66, 65, 64, 67
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LUm(cube_t c)
+static inline cube_fast_t
+_trans_LUm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 2, 3, 0, 1, 26, 25, 24, 27, 21, 22, 23, 20,
 		0, 0, 0, 0, 0, 0, 0, 0, 34, 33, 35, 32, 71, 68, 70, 69
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 20, 23, 22, 21, 17, 18, 19, 16, 10, 11, 8, 9,
 		0, 0, 0, 0, 0, 0, 0, 0, 35, 33, 32, 34, 69, 71, 70, 68
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LFm(cube_t c)
+static inline cube_fast_t
+_trans_LFm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 17, 18, 19, 16, 21, 22, 23, 20, 25, 26, 27, 24,
 		0, 0, 0, 0, 0, 0, 0, 0, 71, 68, 69, 70, 33, 34, 35, 32
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 17, 18, 19, 16, 21, 22, 23, 20, 25, 26, 27, 24,
 		0, 0, 0, 0, 0, 0, 0, 0, 39, 36, 37, 38, 65, 66, 67, 64
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LDm(cube_t c)
+static inline cube_fast_t
+_trans_LDm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 0, 1, 2, 3, 25, 26, 27, 24, 22, 21, 20, 23,
 		0, 0, 0, 0, 0, 0, 0, 0, 33, 34, 32, 35, 68, 71, 69, 70
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 21, 22, 23, 20, 16, 19, 18, 17, 8, 9, 10, 11,
 		0, 0, 0, 0, 0, 0, 0, 0, 34, 32, 33, 35, 68, 70, 71, 69
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LBm(cube_t c)
+static inline cube_fast_t
+_trans_LBm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 19, 16, 17, 18, 22, 21, 20, 23, 26, 25, 24, 27,
 		0, 0, 0, 0, 0, 0, 0, 0, 68, 71, 70, 69, 34, 33, 32, 35
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 16, 19, 18, 17, 20, 23, 22, 21, 27, 24, 25, 26,
 		0, 0, 0, 0, 0, 0, 0, 0, 38, 37, 36, 39, 64, 67, 66, 65
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FUm(cube_t c)
+static inline cube_fast_t
+_trans_FUm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 7, 6, 5, 4, 11, 10, 9, 8, 17, 18, 19, 16,
 		0, 0, 0, 0, 0, 0, 0, 0, 39, 37, 38, 36, 67, 65, 66, 64
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 7, 6, 5, 4, 11, 10, 9, 8, 17, 18, 19, 16,
 		0, 0, 0, 0, 0, 0, 0, 0, 71, 69, 70, 68, 35, 33, 34, 32
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FRm(cube_t c)
+static inline cube_fast_t
+_trans_FRm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 20, 23, 22, 21, 17, 18, 19, 16, 10, 11, 8, 9,
 		0, 0, 0, 0, 0, 0, 0, 0, 67, 65, 64, 66, 37, 39, 38, 36
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 2, 3, 0, 1, 26, 25, 24, 27, 21, 22, 23, 20,
 		0, 0, 0, 0, 0, 0, 0, 0, 66, 65, 67, 64, 39, 36, 38, 37
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FDm(cube_t c)
+static inline cube_fast_t
+_trans_FDm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 5, 4, 7, 6, 10, 11, 8, 9, 18, 17, 16, 19,
 		0, 0, 0, 0, 0, 0, 0, 0, 37, 39, 36, 38, 65, 67, 64, 66
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 6, 7, 4, 5, 9, 8, 11, 10, 16, 19, 18, 17,
 		0, 0, 0, 0, 0, 0, 0, 0, 70, 68, 71, 69, 34, 32, 35, 33
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FLm(cube_t c)
+static inline cube_fast_t
+_trans_FLm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 22, 21, 20, 23, 18, 17, 16, 19, 11, 10, 9, 8,
 		0, 0, 0, 0, 0, 0, 0, 0, 65, 67, 66, 64, 39, 37, 36, 38
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 3, 2, 1, 0, 24, 27, 26, 25, 20, 23, 22, 21,
 		0, 0, 0, 0, 0, 0, 0, 0, 67, 64, 66, 65, 38, 37, 39, 36
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BUm(cube_t c)
+static inline cube_fast_t
+_trans_BUm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 6, 7, 4, 5, 9, 8, 11, 10, 16, 19, 18, 17,
 		0, 0, 0, 0, 0, 0, 0, 0, 38, 36, 39, 37, 66, 64, 67, 65
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 5, 4, 7, 6, 10, 11, 8, 9, 18, 17, 16, 19,
 		0, 0, 0, 0, 0, 0, 0, 0, 69, 71, 68, 70, 33, 35, 32, 34
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BRm(cube_t c)
+static inline cube_fast_t
+_trans_BRm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 23, 20, 21, 22, 19, 16, 17, 18, 9, 8, 11, 10,
 		0, 0, 0, 0, 0, 0, 0, 0, 64, 66, 67, 65, 38, 36, 37, 39
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 1, 0, 3, 2, 27, 24, 25, 26, 23, 20, 21, 22,
 		0, 0, 0, 0, 0, 0, 0, 0, 64, 67, 65, 66, 37, 38, 36, 39
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BDm(cube_t c)
+static inline cube_fast_t
+_trans_BDm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 4, 5, 6, 7, 8, 9, 10, 11, 19, 16, 17, 18,
 		0, 0, 0, 0, 0, 0, 0, 0, 36, 38, 37, 39, 64, 66, 65, 67
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 4, 5, 6, 7, 8, 9, 10, 11, 19, 16, 17, 18,
 		0, 0, 0, 0, 0, 0, 0, 0, 68, 70, 69, 71, 32, 34, 33, 35
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BLm(cube_t c)
+static inline cube_fast_t
+_trans_BLm(cube_fast_t c)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t tn = _mm256_set_epi8(
+	cube_fast_t tn = _mm256_set_epi8(
 		0, 0, 0, 0, 21, 22, 23, 20, 16, 19, 18, 17, 8, 9, 10, 11,
 		0, 0, 0, 0, 0, 0, 0, 0, 66, 64, 65, 67, 36, 38, 39, 37
 	);
-	cube_t ti = _mm256_set_epi8(
+	cube_fast_t ti = _mm256_set_epi8(
 		0, 0, 0, 0, 0, 1, 2, 3, 25, 26, 27, 24, 22, 21, 20, 23,
 		0, 0, 0, 0, 0, 0, 0, 0, 65, 66, 64, 67, 36, 39, 37, 38
 	);
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static cube_t
-_arraytocube(cube_array_t a)
+static cube_fast_t
+cubetofast(cube_t a)
 {
 	uint8_t aux[32];
 
 	memset(aux, 0, 32);
-	memcpy(aux, &a.c, 8);
-	memcpy(aux + 16, &a.e, 12);
+	memcpy(aux, &a.corner, 8);
+	memcpy(aux + 16, &a.edge, 12);
 
 	return _mm256_loadu_si256((__m256i_u *)&aux);
 }
 
-static void
-_cubetoarray(cube_t c, cube_array_t *a)
+static cube_t
+fasttocube(cube_fast_t c)
 {
+	cube_t a;
 	uint8_t aux[32];
 
 	_mm256_storeu_si256((__m256i_u *)aux, c);
-	memcpy(&a->c, aux, 8);
-	memcpy(&a->e, aux + 16, 12);
+	memcpy(&a.corner, aux, 8);
+	memcpy(&a.edge, aux + 16, 12);
+
+	return a;
 }
 
 static inline bool
-_equal(cube_t c1, cube_t c2)
+equal_fast(cube_fast_t c1, cube_fast_t c2)
 {
 	uint32_t mask;
 	__m256i cmp;
@@ -2004,10 +1482,10 @@ _equal(cube_t c1, cube_t c2)
 	return mask == 0xffffffffU;
 }
 
-static inline cube_t
-_invertco(cube_t c)
+static inline cube_fast_t
+invertco_fast(cube_fast_t c)
 {
-        cube_t co, shleft, shright, summed, newco, cleanco, ret;
+        cube_fast_t co, shleft, shright, summed, newco, cleanco, ret;
 
         co = _mm256_and_si256(c, _co2_avx2);
         shleft = _mm256_slli_epi32(co, 1);
@@ -2020,15 +1498,15 @@ _invertco(cube_t c)
         return ret;
 }
 
-static inline cube_t
-_inverse(cube_t c)
+static inline cube_fast_t
+inverse_fast(cube_fast_t c)
 {
 	/* Method taken from Andrew Skalski's vcube[1]. The addition sequence
 	 * was generated using [2].
 	 * [1] https://github.com/Voltara/vcube
 	 * [2] http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
 	 */
-	cube_t v3, vi, vo, vp, ret;
+	cube_fast_t v3, vi, vo, vp, ret;
 
 	v3 = _mm256_shuffle_epi8(c, c);
 	v3 = _mm256_shuffle_epi8(v3, c);
@@ -2055,15 +1533,15 @@ _inverse(cube_t c)
 	vp = _mm256_andnot_si256(_mm256_or_si256(_eo_avx2, _co2_avx2), vi);
 	ret = _mm256_or_si256(vp, vo);
 	
-	return _invertco(ret);
+	return invertco_fast(ret);
 }
 
-static inline cube_t
-_compose(cube_t c1, cube_t c2)
+static inline cube_fast_t
+compose_fast(cube_fast_t c1, cube_fast_t c2)
 {
-	cube_t ret;
+	cube_fast_t ret;
 
-	cube_t s, eo2, ed, co1, co2, aux, auy1, auy2, auz1, auz2, coclean;
+	cube_fast_t s, eo2, ed, co1, co2, aux, auy1, auy2, auz1, auz2, coclean;
 
 	eo2 = _mm256_and_si256(c2, _eo_avx2);
 	s = _mm256_shuffle_epi8(c1, c2);
@@ -2081,17 +1559,17 @@ _compose(cube_t c1, cube_t c2)
 	return ret;
 }
 
-static inline int16_t
-_coord_eo(cube_t c)
+static inline int64_t
+coord_fast_eo(cube_fast_t c)
 {
-	cube_t eo, shifted;
-	int mask;
+	cube_fast_t eo, shifted;
+	int64_t mask;
 
 	eo = _mm256_and_si256(c, _eo_avx2);
 	shifted = _mm256_slli_epi32(eo, 3);
 	mask = _mm256_movemask_epi8(shifted);
 
-	return (int16_t)(mask >> 17);
+	return mask >> 17;
 }
 
 
@@ -2105,6 +1583,8 @@ in the previous section(s) for unsupported architectures.
 
 #else
 
+typedef cube_t cube_fast_t;
+
 #define PERM4(r, i, j, k, l) \
 	aux = r[i];          \
 	r[i] = r[l];         \
@@ -2134,1375 +1614,1814 @@ in the previous section(s) for unsupported architectures.
 	r[k] ^= _eobit;    \
 	r[l] ^= _eobit;
 
-static const cube_t _solvedcube = {
-	.c = {0, 1, 2, 3, 4, 5, 6, 7},
-	.e = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
-};
-static const cube_t _zerocube = { .e = {0}, .c = {0} };
+static const cube_fast_t zero_fast = { .corner = {0}, .edge = {0} };
 
-static cube_t _arraytocube(cube_array_t);
-static void _cubetoarray(cube_t, cube_array_t *);
-static inline bool _equal(cube_t, cube_t);
-static inline cube_t _invertco(cube_t);
-static inline cube_t _inverse(cube_t);
-static inline cube_t _compose(cube_t, cube_t);
+static cube_fast_t cubetofast(cube_t);
+static cube_t fasttocube(cube_fast_t);
+static inline bool equal_fast(cube_fast_t, cube_fast_t);
+static inline cube_fast_t invertco_fast(cube_fast_t);
+static inline cube_fast_t inverse_fast(cube_fast_t);
+static inline cube_fast_t compose_fast(cube_fast_t, cube_fast_t);
 
-static inline cube_t
-_move_U(cube_t c)
+static inline cube_fast_t
+_move_U(cube_fast_t c)
 {
 	uint8_t aux;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_uf, _e_ul, _e_ub, _e_ur)
-	PERM4(ret.c, _c_ufr, _c_ufl, _c_ubl, _c_ubr)
+	PERM4(ret.edge, _e_uf, _e_ul, _e_ub, _e_ur)
+	PERM4(ret.corner, _c_ufr, _c_ufl, _c_ubl, _c_ubr)
 
 	return ret;
 }
 
-static inline cube_t
-_move_U2(cube_t c)
+static inline cube_fast_t
+_move_U2(cube_fast_t c)
 {
 	uint8_t aux;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM22(ret.e, _e_uf, _e_ub, _e_ul, _e_ur)
-	PERM22(ret.c, _c_ufr, _c_ubl, _c_ufl, _c_ubr)
+	PERM22(ret.edge, _e_uf, _e_ub, _e_ul, _e_ur)
+	PERM22(ret.corner, _c_ufr, _c_ubl, _c_ufl, _c_ubr)
 
 	return ret;
 }
 
-static inline cube_t
-_move_U3(cube_t c)
+static inline cube_fast_t
+_move_U3(cube_fast_t c)
 {
 	uint8_t aux;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_uf, _e_ur, _e_ub, _e_ul)
-	PERM4(ret.c, _c_ufr, _c_ubr, _c_ubl, _c_ufl)
+	PERM4(ret.edge, _e_uf, _e_ur, _e_ub, _e_ul)
+	PERM4(ret.corner, _c_ufr, _c_ubr, _c_ubl, _c_ufl)
 
 	return ret;
 }
 
-static inline cube_t
-_move_D(cube_t c)
+static inline cube_fast_t
+_move_D(cube_fast_t c)
 {
 	uint8_t aux;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_df, _e_dr, _e_db, _e_dl)
-	PERM4(ret.c, _c_dfr, _c_dbr, _c_dbl, _c_dfl)
+	PERM4(ret.edge, _e_df, _e_dr, _e_db, _e_dl)
+	PERM4(ret.corner, _c_dfr, _c_dbr, _c_dbl, _c_dfl)
 
 	return ret;
 }
 
-static inline cube_t
-_move_D2(cube_t c)
+static inline cube_fast_t
+_move_D2(cube_fast_t c)
 {
 	uint8_t aux;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM22(ret.e, _e_df, _e_db, _e_dr, _e_dl)
-	PERM22(ret.c, _c_dfr, _c_dbl, _c_dbr, _c_dfl)
+	PERM22(ret.edge, _e_df, _e_db, _e_dr, _e_dl)
+	PERM22(ret.corner, _c_dfr, _c_dbl, _c_dbr, _c_dfl)
 
 	return ret;
 }
 
-static inline cube_t
-_move_D3(cube_t c)
+static inline cube_fast_t
+_move_D3(cube_fast_t c)
 {
 	uint8_t aux;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_df, _e_dl, _e_db, _e_dr)
-	PERM4(ret.c, _c_dfr, _c_dfl, _c_dbl, _c_dbr)
+	PERM4(ret.edge, _e_df, _e_dl, _e_db, _e_dr)
+	PERM4(ret.corner, _c_dfr, _c_dfl, _c_dbl, _c_dbr)
 
 	return ret;
 }
 
-static inline cube_t
-_move_R(cube_t c)
+static inline cube_fast_t
+_move_R(cube_fast_t c)
 {
 	uint8_t aux, auy, auz;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_ur, _e_br, _e_dr, _e_fr)
-	PERM4(ret.c, _c_ufr, _c_ubr, _c_dbr, _c_dfr)
+	PERM4(ret.edge, _e_ur, _e_br, _e_dr, _e_fr)
+	PERM4(ret.corner, _c_ufr, _c_ubr, _c_dbr, _c_dfr)
 
-	CO4(ret.c, _c_ubr, _c_dfr, _c_ufr, _c_dbr)
+	CO4(ret.corner, _c_ubr, _c_dfr, _c_ufr, _c_dbr)
 
 	return ret;
 }
 
-static inline cube_t
-_move_R2(cube_t c)
+static inline cube_fast_t
+_move_R2(cube_fast_t c)
 {
 	uint8_t aux;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM22(ret.e, _e_ur, _e_dr, _e_fr, _e_br)
-	PERM22(ret.c, _c_ufr, _c_dbr, _c_ubr, _c_dfr)
+	PERM22(ret.edge, _e_ur, _e_dr, _e_fr, _e_br)
+	PERM22(ret.corner, _c_ufr, _c_dbr, _c_ubr, _c_dfr)
 
 	return ret;
 }
 
-static inline cube_t
-_move_R3(cube_t c)
+static inline cube_fast_t
+_move_R3(cube_fast_t c)
 {
 	uint8_t aux, auy, auz;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_ur, _e_fr, _e_dr, _e_br)
-	PERM4(ret.c, _c_ufr, _c_dfr, _c_dbr, _c_ubr)
+	PERM4(ret.edge, _e_ur, _e_fr, _e_dr, _e_br)
+	PERM4(ret.corner, _c_ufr, _c_dfr, _c_dbr, _c_ubr)
 
-	CO4(ret.c, _c_ubr, _c_dfr, _c_ufr, _c_dbr)
+	CO4(ret.corner, _c_ubr, _c_dfr, _c_ufr, _c_dbr)
 
 	return ret;
 }
 
-static inline cube_t
-_move_L(cube_t c)
+static inline cube_fast_t
+_move_L(cube_fast_t c)
 {
 	uint8_t aux, auy, auz;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_ul, _e_fl, _e_dl, _e_bl)
-	PERM4(ret.c, _c_ufl, _c_dfl, _c_dbl, _c_ubl)
+	PERM4(ret.edge, _e_ul, _e_fl, _e_dl, _e_bl)
+	PERM4(ret.corner, _c_ufl, _c_dfl, _c_dbl, _c_ubl)
 
-	CO4(ret.c, _c_ufl, _c_dbl, _c_dfl, _c_ubl)
+	CO4(ret.corner, _c_ufl, _c_dbl, _c_dfl, _c_ubl)
 
 	return ret;
 }
 
-static inline cube_t
-_move_L2(cube_t c)
+static inline cube_fast_t
+_move_L2(cube_fast_t c)
 {
 	uint8_t aux;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM22(ret.e, _e_ul, _e_dl, _e_fl, _e_bl)
-	PERM22(ret.c, _c_ufl, _c_dbl, _c_ubl, _c_dfl)
+	PERM22(ret.edge, _e_ul, _e_dl, _e_fl, _e_bl)
+	PERM22(ret.corner, _c_ufl, _c_dbl, _c_ubl, _c_dfl)
 
 	return ret;
 }
 
-static inline cube_t
-_move_L3(cube_t c)
+static inline cube_fast_t
+_move_L3(cube_fast_t c)
 {
 	uint8_t aux, auy, auz;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_ul, _e_bl, _e_dl, _e_fl)
-	PERM4(ret.c, _c_ufl, _c_ubl, _c_dbl, _c_dfl)
+	PERM4(ret.edge, _e_ul, _e_bl, _e_dl, _e_fl)
+	PERM4(ret.corner, _c_ufl, _c_ubl, _c_dbl, _c_dfl)
 
-	CO4(ret.c, _c_ufl, _c_dbl, _c_dfl, _c_ubl)
+	CO4(ret.corner, _c_ufl, _c_dbl, _c_dfl, _c_ubl)
 
 	return ret;
 }
 
-static inline cube_t
-_move_F(cube_t c)
+static inline cube_fast_t
+_move_F(cube_fast_t c)
 {
 	uint8_t aux, auy, auz;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_uf, _e_fr, _e_df, _e_fl)
-	PERM4(ret.c, _c_ufr, _c_dfr, _c_dfl, _c_ufl)
+	PERM4(ret.edge, _e_uf, _e_fr, _e_df, _e_fl)
+	PERM4(ret.corner, _c_ufr, _c_dfr, _c_dfl, _c_ufl)
 
-	EO4(ret.e, _e_uf, _e_fr, _e_df, _e_fl)
-	CO4(ret.c, _c_ufr, _c_dfl, _c_dfr, _c_ufl)
+	EO4(ret.edge, _e_uf, _e_fr, _e_df, _e_fl)
+	CO4(ret.corner, _c_ufr, _c_dfl, _c_dfr, _c_ufl)
 
 	return ret;
 }
 
-static inline cube_t
-_move_F2(cube_t c)
+static inline cube_fast_t
+_move_F2(cube_fast_t c)
 {
 	uint8_t aux;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM22(ret.e, _e_uf, _e_df, _e_fr, _e_fl)
-	PERM22(ret.c, _c_ufr, _c_dfl, _c_ufl, _c_dfr)
+	PERM22(ret.edge, _e_uf, _e_df, _e_fr, _e_fl)
+	PERM22(ret.corner, _c_ufr, _c_dfl, _c_ufl, _c_dfr)
 
 	return ret;
 }
 
-static inline cube_t
-_move_F3(cube_t c)
+static inline cube_fast_t
+_move_F3(cube_fast_t c)
 {
 	uint8_t aux, auy, auz;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_uf, _e_fl, _e_df, _e_fr)
-	PERM4(ret.c, _c_ufr, _c_ufl, _c_dfl, _c_dfr)
+	PERM4(ret.edge, _e_uf, _e_fl, _e_df, _e_fr)
+	PERM4(ret.corner, _c_ufr, _c_ufl, _c_dfl, _c_dfr)
 
-	EO4(ret.e, _e_uf, _e_fr, _e_df, _e_fl)
-	CO4(ret.c, _c_ufr, _c_dfl, _c_dfr, _c_ufl)
+	EO4(ret.edge, _e_uf, _e_fr, _e_df, _e_fl)
+	CO4(ret.corner, _c_ufr, _c_dfl, _c_dfr, _c_ufl)
 
 	return ret;
 }
 
-static inline cube_t
-_move_B(cube_t c)
+static inline cube_fast_t
+_move_B(cube_fast_t c)
 {
 	uint8_t aux, auy, auz;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_ub, _e_bl, _e_db, _e_br)
-	PERM4(ret.c, _c_ubr, _c_ubl, _c_dbl, _c_dbr)
+	PERM4(ret.edge, _e_ub, _e_bl, _e_db, _e_br)
+	PERM4(ret.corner, _c_ubr, _c_ubl, _c_dbl, _c_dbr)
 
-	EO4(ret.e, _e_ub, _e_br, _e_db, _e_bl)
-	CO4(ret.c, _c_ubl, _c_dbr, _c_dbl, _c_ubr)
+	EO4(ret.edge, _e_ub, _e_br, _e_db, _e_bl)
+	CO4(ret.corner, _c_ubl, _c_dbr, _c_dbl, _c_ubr)
 
 	return ret;
 }
 
-static inline cube_t
-_move_B2(cube_t c)
+static inline cube_fast_t
+_move_B2(cube_fast_t c)
 {
 	uint8_t aux;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM22(ret.e, _e_ub, _e_db, _e_br, _e_bl)
-	PERM22(ret.c, _c_ubr, _c_dbl, _c_ubl, _c_dbr)
+	PERM22(ret.edge, _e_ub, _e_db, _e_br, _e_bl)
+	PERM22(ret.corner, _c_ubr, _c_dbl, _c_ubl, _c_dbr)
 
 	return ret;
 }
 
-static inline cube_t
-_move_B3(cube_t c)
+static inline cube_fast_t
+_move_B3(cube_fast_t c)
 {
 	uint8_t aux, auy, auz;
-	cube_t ret = c;
+	cube_fast_t ret = c;
 
-	PERM4(ret.e, _e_ub, _e_br, _e_db, _e_bl)
-	PERM4(ret.c, _c_ubr, _c_dbr, _c_dbl, _c_ubl)
+	PERM4(ret.edge, _e_ub, _e_br, _e_db, _e_bl)
+	PERM4(ret.corner, _c_ubr, _c_dbr, _c_dbl, _c_ubl)
 
-	EO4(ret.e, _e_ub, _e_br, _e_db, _e_bl)
-	CO4(ret.c, _c_ubl, _c_dbr, _c_dbl, _c_ubr)
+	EO4(ret.edge, _e_ub, _e_br, _e_db, _e_bl)
+	CO4(ret.corner, _c_ubl, _c_dbr, _c_dbl, _c_ubr)
 
 	return ret;
 }
 
-static inline cube_t
-_invertco(cube_t c)
+static inline cube_fast_t
+invertco_fast(cube_fast_t c)
 {
 	uint8_t i, piece, orien;
-	cube_t ret;
+	cube_fast_t ret;
 
 	ret = c;
 	for (i = 0; i < 8; i++) {
-		piece = c.c[i];
+		piece = c.corner[i];
 		orien = ((piece << 1) | (piece >> 1)) & _cobits2;
-		ret.c[i] = (piece & _pbits) | orien;
+		ret.corner[i] = (piece & _pbits) | orien;
 	}
 
 	return ret;
 }
 
-static inline cube_t
-_trans_UFr(cube_t c)
+static inline cube_fast_t
+_trans_UFr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {0, 1, 2, 3, 4, 5, 6, 7},
+		.edge = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
+	};
+	cube_fast_t ti = {
+		.corner = {0, 1, 2, 3, 4, 5, 6, 7},
+		.edge = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_ULr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {4, 5, 7, 6, 1, 0, 2, 3},
+		.edge = {5, 4, 7, 6, 0, 1, 2, 3, 25, 26, 27, 24}
+	};
+	cube_fast_t ti = {
+		.corner = {5, 4, 6, 7, 0, 1, 3, 2},
+		.edge = {4, 5, 6, 7, 1, 0, 3, 2, 27, 24, 25, 26}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_UBr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {1, 0, 3, 2, 5, 4, 7, 6},
+		.edge = {1, 0, 3, 2, 5, 4, 7, 6, 10, 11, 8, 9}
+	};
+	cube_fast_t ti = {
+		.corner = {1, 0, 3, 2, 5, 4, 7, 6},
+		.edge = {1, 0, 3, 2, 5, 4, 7, 6, 10, 11, 8, 9}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_URr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {5, 4, 6, 7, 0, 1, 3, 2},
+		.edge = {4, 5, 6, 7, 1, 0, 3, 2, 27, 24, 25, 26}
+	};
+	cube_fast_t ti = {
+		.corner = {4, 5, 7, 6, 1, 0, 2, 3},
+		.edge = {5, 4, 7, 6, 0, 1, 2, 3, 25, 26, 27, 24}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_DFr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {2, 3, 0, 1, 6, 7, 4, 5},
+		.edge = {3, 2, 1, 0, 6, 7, 4, 5, 9, 8, 11, 10}
+	};
+	cube_fast_t ti = {
+		.corner = {2, 3, 0, 1, 6, 7, 4, 5},
+		.edge = {3, 2, 1, 0, 6, 7, 4, 5, 9, 8, 11, 10}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_DLr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {7, 6, 4, 5, 2, 3, 1, 0},
+		.edge = {6, 7, 4, 5, 2, 3, 0, 1, 26, 25, 24, 27}
+	};
+	cube_fast_t ti = {
+		.corner = {7, 6, 4, 5, 2, 3, 1, 0},
+		.edge = {6, 7, 4, 5, 2, 3, 0, 1, 26, 25, 24, 27}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_DBr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {3, 2, 1, 0, 7, 6, 5, 4},
+		.edge = {2, 3, 0, 1, 7, 6, 5, 4, 11, 10, 9, 8}
+	};
+	cube_fast_t ti = {
+		.corner = {3, 2, 1, 0, 7, 6, 5, 4},
+		.edge = {2, 3, 0, 1, 7, 6, 5, 4, 11, 10, 9, 8}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_DRr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {6, 7, 5, 4, 3, 2, 0, 1},
+		.edge = {7, 6, 5, 4, 3, 2, 1, 0, 24, 27, 26, 25}
+	};
+	cube_fast_t ti = {
+		.corner = {6, 7, 5, 4, 3, 2, 0, 1},
+		.edge = {7, 6, 5, 4, 3, 2, 1, 0, 24, 27, 26, 25}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_RUr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {64, 67, 65, 66, 37, 38, 36, 39},
+		.edge = {20, 23, 22, 21, 24, 27, 26, 25, 0, 1, 2, 3}
+	};
+	cube_fast_t ti = {
+		.corner = {32, 34, 35, 33, 70, 68, 69, 71},
+		.edge = {8, 9, 10, 11, 16, 19, 18, 17, 20, 23, 22, 21}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_RFr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {38, 37, 36, 39, 64, 67, 66, 65},
+		.edge = {24, 27, 26, 25, 23, 20, 21, 22, 19, 16, 17, 18}
+	};
+	cube_fast_t ti = {
+		.corner = {36, 39, 38, 37, 66, 65, 64, 67},
+		.edge = {25, 26, 27, 24, 21, 22, 23, 20, 16, 19, 18, 17}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_RDr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {67, 64, 66, 65, 38, 37, 39, 36},
+		.edge = {23, 20, 21, 22, 27, 24, 25, 26, 2, 3, 0, 1}
+	};
+	cube_fast_t ti = {
+		.corner = {33, 35, 34, 32, 71, 69, 68, 70},
+		.edge = {10, 11, 8, 9, 17, 18, 19, 16, 21, 22, 23, 20}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_RBr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {37, 38, 39, 36, 67, 64, 65, 66},
+		.edge = {27, 24, 25, 26, 20, 23, 22, 21, 17, 18, 19, 16}
+	};
+	cube_fast_t ti = {
+		.corner = {37, 38, 39, 36, 67, 64, 65, 66},
+		.edge = {27, 24, 25, 26, 20, 23, 22, 21, 17, 18, 19, 16}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_LUr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {65, 66, 64, 67, 36, 39, 37, 38},
+		.edge = {21, 22, 23, 20, 26, 25, 24, 27, 1, 0, 3, 2}
+	};
+	cube_fast_t ti = {
+		.corner = {34, 32, 33, 35, 68, 70, 71, 69},
+		.edge = {9, 8, 11, 10, 19, 16, 17, 18, 22, 21, 20, 23}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_LFr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {36, 39, 38, 37, 66, 65, 64, 67},
+		.edge = {25, 26, 27, 24, 21, 22, 23, 20, 16, 19, 18, 17}
+	};
+	cube_fast_t ti = {
+		.corner = {38, 37, 36, 39, 64, 67, 66, 65},
+		.edge = {24, 27, 26, 25, 23, 20, 21, 22, 19, 16, 17, 18}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_LDr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {66, 65, 67, 64, 39, 36, 38, 37},
+		.edge = {22, 21, 20, 23, 25, 26, 27, 24, 3, 2, 1, 0}
+	};
+	cube_fast_t ti = {
+		.corner = {35, 33, 32, 34, 69, 71, 70, 68},
+		.edge = {11, 10, 9, 8, 18, 17, 16, 19, 23, 20, 21, 22}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_LBr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {39, 36, 37, 38, 65, 66, 67, 64},
+		.edge = {26, 25, 24, 27, 22, 21, 20, 23, 18, 17, 16, 19}
+	};
+	cube_fast_t ti = {
+		.corner = {39, 36, 37, 38, 65, 66, 67, 64},
+		.edge = {26, 25, 24, 27, 22, 21, 20, 23, 18, 17, 16, 19}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_FUr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {68, 70, 69, 71, 32, 34, 33, 35},
+		.edge = {16, 19, 18, 17, 9, 8, 11, 10, 5, 4, 7, 6}
+	};
+	cube_fast_t ti = {
+		.corner = {68, 70, 69, 71, 32, 34, 33, 35},
+		.edge = {16, 19, 18, 17, 9, 8, 11, 10, 5, 4, 7, 6}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_FRr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {32, 34, 35, 33, 70, 68, 69, 71},
+		.edge = {8, 9, 10, 11, 16, 19, 18, 17, 20, 23, 22, 21}
+	};
+	cube_fast_t ti = {
+		.corner = {64, 67, 65, 66, 37, 38, 36, 39},
+		.edge = {20, 23, 22, 21, 24, 27, 26, 25, 0, 1, 2, 3}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_FDr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {70, 68, 71, 69, 34, 32, 35, 33},
+		.edge = {19, 16, 17, 18, 8, 9, 10, 11, 7, 6, 5, 4}
+	};
+	cube_fast_t ti = {
+		.corner = {69, 71, 68, 70, 33, 35, 32, 34},
+		.edge = {17, 18, 19, 16, 11, 10, 9, 8, 4, 5, 6, 7}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_FLr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {34, 32, 33, 35, 68, 70, 71, 69},
+		.edge = {9, 8, 11, 10, 19, 16, 17, 18, 22, 21, 20, 23}
+	};
+	cube_fast_t ti = {
+		.corner = {65, 66, 64, 67, 36, 39, 37, 38},
+		.edge = {21, 22, 23, 20, 26, 25, 24, 27, 1, 0, 3, 2}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_BUr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {69, 71, 68, 70, 33, 35, 32, 34},
+		.edge = {17, 18, 19, 16, 11, 10, 9, 8, 4, 5, 6, 7}
+	};
+	cube_fast_t ti = {
+		.corner = {70, 68, 71, 69, 34, 32, 35, 33},
+		.edge = {19, 16, 17, 18, 8, 9, 10, 11, 7, 6, 5, 4}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_BRr(cube_fast_t c)
+{
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {35, 33, 32, 34, 69, 71, 70, 68},
+		.edge = {11, 10, 9, 8, 18, 17, 16, 19, 23, 20, 21, 22}
+	};
+	cube_fast_t ti = {
+		.corner = {66, 65, 67, 64, 39, 36, 38, 37},
+		.edge = {22, 21, 20, 23, 25, 26, 27, 24, 3, 2, 1, 0}
+	};
+
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+
+	return ret;
+}
+
+static inline cube_fast_t
+_trans_BDr(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {0, 1, 2, 3, 4, 5, 6, 7},
-		.e = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {71, 69, 70, 68, 35, 33, 34, 32},
+		.edge = {18, 17, 16, 19, 10, 11, 8, 9, 6, 7, 4, 5}
 	};
-	cube_t ti = {
-		.c = {0, 1, 2, 3, 4, 5, 6, 7},
-		.e = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
+	cube_fast_t ti = {
+		.corner = {71, 69, 70, 68, 35, 33, 34, 32},
+		.edge = {18, 17, 16, 19, 10, 11, 8, 9, 6, 7, 4, 5}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_ULr(cube_t c)
+static inline cube_fast_t
+_trans_BLr(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {4, 5, 7, 6, 1, 0, 2, 3},
-		.e = {5, 4, 7, 6, 0, 1, 2, 3, 25, 26, 27, 24}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {33, 35, 34, 32, 71, 69, 68, 70},
+		.edge = {10, 11, 8, 9, 17, 18, 19, 16, 21, 22, 23, 20}
 	};
-	cube_t ti = {
-		.c = {5, 4, 6, 7, 0, 1, 3, 2},
-		.e = {4, 5, 6, 7, 1, 0, 3, 2, 27, 24, 25, 26}
+	cube_fast_t ti = {
+		.corner = {67, 64, 66, 65, 38, 37, 39, 36},
+		.edge = {23, 20, 21, 22, 27, 24, 25, 26, 2, 3, 0, 1}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_UBr(cube_t c)
+static inline cube_fast_t
+_trans_UFm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {1, 0, 3, 2, 5, 4, 7, 6},
-		.e = {1, 0, 3, 2, 5, 4, 7, 6, 10, 11, 8, 9}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {4, 5, 6, 7, 0, 1, 2, 3},
+		.edge = {0, 1, 2, 3, 5, 4, 7, 6, 9, 8, 11, 10}
 	};
-	cube_t ti = {
-		.c = {1, 0, 3, 2, 5, 4, 7, 6},
-		.e = {1, 0, 3, 2, 5, 4, 7, 6, 10, 11, 8, 9}
+	cube_fast_t ti = {
+		.corner = {4, 5, 6, 7, 0, 1, 2, 3},
+		.edge = {0, 1, 2, 3, 5, 4, 7, 6, 9, 8, 11, 10}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_URr(cube_t c)
+static inline cube_fast_t
+_trans_ULm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {5, 4, 6, 7, 0, 1, 3, 2},
-		.e = {4, 5, 6, 7, 1, 0, 3, 2, 27, 24, 25, 26}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {0, 1, 3, 2, 5, 4, 6, 7},
+		.edge = {4, 5, 6, 7, 0, 1, 2, 3, 24, 27, 26, 25}
 	};
-	cube_t ti = {
-		.c = {4, 5, 7, 6, 1, 0, 2, 3},
-		.e = {5, 4, 7, 6, 0, 1, 2, 3, 25, 26, 27, 24}
+	cube_fast_t ti = {
+		.corner = {0, 1, 3, 2, 5, 4, 6, 7},
+		.edge = {4, 5, 6, 7, 0, 1, 2, 3, 24, 27, 26, 25}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DFr(cube_t c)
+static inline cube_fast_t
+_trans_UBm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {2, 3, 0, 1, 6, 7, 4, 5},
-		.e = {3, 2, 1, 0, 6, 7, 4, 5, 9, 8, 11, 10}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {5, 4, 7, 6, 1, 0, 3, 2},
+		.edge = {1, 0, 3, 2, 4, 5, 6, 7, 11, 10, 9, 8}
 	};
-	cube_t ti = {
-		.c = {2, 3, 0, 1, 6, 7, 4, 5},
-		.e = {3, 2, 1, 0, 6, 7, 4, 5, 9, 8, 11, 10}
+	cube_fast_t ti = {
+		.corner = {5, 4, 7, 6, 1, 0, 3, 2},
+		.edge = {1, 0, 3, 2, 4, 5, 6, 7, 11, 10, 9, 8}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DLr(cube_t c)
+static inline cube_fast_t
+_trans_URm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {7, 6, 4, 5, 2, 3, 1, 0},
-		.e = {6, 7, 4, 5, 2, 3, 0, 1, 26, 25, 24, 27}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {1, 0, 2, 3, 4, 5, 7, 6},
+		.edge = {5, 4, 7, 6, 1, 0, 3, 2, 26, 25, 24, 27}
 	};
-	cube_t ti = {
-		.c = {7, 6, 4, 5, 2, 3, 1, 0},
-		.e = {6, 7, 4, 5, 2, 3, 0, 1, 26, 25, 24, 27}
+	cube_fast_t ti = {
+		.corner = {1, 0, 2, 3, 4, 5, 7, 6},
+		.edge = {5, 4, 7, 6, 1, 0, 3, 2, 26, 25, 24, 27}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DBr(cube_t c)
+static inline cube_fast_t
+_trans_DFm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {3, 2, 1, 0, 7, 6, 5, 4},
-		.e = {2, 3, 0, 1, 7, 6, 5, 4, 11, 10, 9, 8}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {6, 7, 4, 5, 2, 3, 0, 1},
+		.edge = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11}
 	};
-	cube_t ti = {
-		.c = {3, 2, 1, 0, 7, 6, 5, 4},
-		.e = {2, 3, 0, 1, 7, 6, 5, 4, 11, 10, 9, 8}
+	cube_fast_t ti = {
+		.corner = {6, 7, 4, 5, 2, 3, 0, 1},
+		.edge = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DRr(cube_t c)
+static inline cube_fast_t
+_trans_DLm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {6, 7, 5, 4, 3, 2, 0, 1},
-		.e = {7, 6, 5, 4, 3, 2, 1, 0, 24, 27, 26, 25}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {3, 2, 0, 1, 6, 7, 5, 4},
+		.edge = {7, 6, 5, 4, 2, 3, 0, 1, 27, 24, 25, 26}
 	};
-	cube_t ti = {
-		.c = {6, 7, 5, 4, 3, 2, 0, 1},
-		.e = {7, 6, 5, 4, 3, 2, 1, 0, 24, 27, 26, 25}
+	cube_fast_t ti = {
+		.corner = {2, 3, 1, 0, 7, 6, 4, 5},
+		.edge = {6, 7, 4, 5, 3, 2, 1, 0, 25, 26, 27, 24}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RUr(cube_t c)
+static inline cube_fast_t
+_trans_DBm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {64, 67, 65, 66, 37, 38, 36, 39},
-		.e = {20, 23, 22, 21, 24, 27, 26, 25, 0, 1, 2, 3}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {7, 6, 5, 4, 3, 2, 1, 0},
+		.edge = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9}
 	};
-	cube_t ti = {
-		.c = {32, 34, 35, 33, 70, 68, 69, 71},
-		.e = {8, 9, 10, 11, 16, 19, 18, 17, 20, 23, 22, 21}
+	cube_fast_t ti = {
+		.corner = {7, 6, 5, 4, 3, 2, 1, 0},
+		.edge = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RFr(cube_t c)
+static inline cube_fast_t
+_trans_DRm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {38, 37, 36, 39, 64, 67, 66, 65},
-		.e = {24, 27, 26, 25, 23, 20, 21, 22, 19, 16, 17, 18}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {2, 3, 1, 0, 7, 6, 4, 5},
+		.edge = {6, 7, 4, 5, 3, 2, 1, 0, 25, 26, 27, 24}
 	};
-	cube_t ti = {
-		.c = {36, 39, 38, 37, 66, 65, 64, 67},
-		.e = {25, 26, 27, 24, 21, 22, 23, 20, 16, 19, 18, 17}
+	cube_fast_t ti = {
+		.corner = {3, 2, 0, 1, 6, 7, 5, 4},
+		.edge = {7, 6, 5, 4, 2, 3, 0, 1, 27, 24, 25, 26}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RDr(cube_t c)
+static inline cube_fast_t
+_trans_RUm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {67, 64, 66, 65, 38, 37, 39, 36},
-		.e = {23, 20, 21, 22, 27, 24, 25, 26, 2, 3, 0, 1}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {68, 71, 69, 70, 33, 34, 32, 35},
+		.edge = {21, 22, 23, 20, 25, 26, 27, 24, 0, 1, 2, 3}
 	};
-	cube_t ti = {
-		.c = {33, 35, 34, 32, 71, 69, 68, 70},
-		.e = {10, 11, 8, 9, 17, 18, 19, 16, 21, 22, 23, 20}
+	cube_fast_t ti = {
+		.corner = {70, 68, 69, 71, 32, 34, 35, 33},
+		.edge = {8, 9, 10, 11, 19, 16, 17, 18, 23, 20, 21, 22}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RBr(cube_t c)
+static inline cube_fast_t
+_trans_RFm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {37, 38, 39, 36, 67, 64, 65, 66},
-		.e = {27, 24, 25, 26, 20, 23, 22, 21, 17, 18, 19, 16}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {34, 33, 32, 35, 68, 71, 70, 69},
+		.edge = {25, 26, 27, 24, 22, 21, 20, 23, 19, 16, 17, 18}
 	};
-	cube_t ti = {
-		.c = {37, 38, 39, 36, 67, 64, 65, 66},
-		.e = {27, 24, 25, 26, 20, 23, 22, 21, 17, 18, 19, 16}
+	cube_fast_t ti = {
+		.corner = {66, 65, 64, 67, 36, 39, 38, 37},
+		.edge = {25, 26, 27, 24, 22, 21, 20, 23, 19, 16, 17, 18}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LUr(cube_t c)
+static inline cube_fast_t
+_trans_RDm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {65, 66, 64, 67, 36, 39, 37, 38},
-		.e = {21, 22, 23, 20, 26, 25, 24, 27, 1, 0, 3, 2}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {71, 68, 70, 69, 34, 33, 35, 32},
+		.edge = {22, 21, 20, 23, 26, 25, 24, 27, 2, 3, 0, 1}
 	};
-	cube_t ti = {
-		.c = {34, 32, 33, 35, 68, 70, 71, 69},
-		.e = {9, 8, 11, 10, 19, 16, 17, 18, 22, 21, 20, 23}
+	cube_fast_t ti = {
+		.corner = {71, 69, 68, 70, 33, 35, 34, 32},
+		.edge = {10, 11, 8, 9, 18, 17, 16, 19, 22, 21, 20, 23}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LFr(cube_t c)
+static inline cube_fast_t
+_trans_RBm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {36, 39, 38, 37, 66, 65, 64, 67},
-		.e = {25, 26, 27, 24, 21, 22, 23, 20, 16, 19, 18, 17}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {33, 34, 35, 32, 71, 68, 69, 70},
+		.edge = {26, 25, 24, 27, 21, 22, 23, 20, 17, 18, 19, 16}
 	};
-	cube_t ti = {
-		.c = {38, 37, 36, 39, 64, 67, 66, 65},
-		.e = {24, 27, 26, 25, 23, 20, 21, 22, 19, 16, 17, 18}
+	cube_fast_t ti = {
+		.corner = {67, 64, 65, 66, 37, 38, 39, 36},
+		.edge = {27, 24, 25, 26, 23, 20, 21, 22, 18, 17, 16, 19}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LDr(cube_t c)
+static inline cube_fast_t
+_trans_LUm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {66, 65, 67, 64, 39, 36, 38, 37},
-		.e = {22, 21, 20, 23, 25, 26, 27, 24, 3, 2, 1, 0}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {69, 70, 68, 71, 32, 35, 33, 34},
+		.edge = {20, 23, 22, 21, 27, 24, 25, 26, 1, 0, 3, 2}
 	};
-	cube_t ti = {
-		.c = {35, 33, 32, 34, 69, 71, 70, 68},
-		.e = {11, 10, 9, 8, 18, 17, 16, 19, 23, 20, 21, 22}
+	cube_fast_t ti = {
+		.corner = {68, 70, 71, 69, 34, 32, 33, 35},
+		.edge = {9, 8, 11, 10, 16, 19, 18, 17, 21, 22, 23, 20}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LBr(cube_t c)
+static inline cube_fast_t
+_trans_LFm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {39, 36, 37, 38, 65, 66, 67, 64},
-		.e = {26, 25, 24, 27, 22, 21, 20, 23, 18, 17, 16, 19}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {32, 35, 34, 33, 70, 69, 68, 71},
+		.edge = {24, 27, 26, 25, 20, 23, 22, 21, 16, 19, 18, 17}
 	};
-	cube_t ti = {
-		.c = {39, 36, 37, 38, 65, 66, 67, 64},
-		.e = {26, 25, 24, 27, 22, 21, 20, 23, 18, 17, 16, 19}
+	cube_fast_t ti = {
+		.corner = {64, 67, 66, 65, 38, 37, 36, 39},
+		.edge = {24, 27, 26, 25, 20, 23, 22, 21, 16, 19, 18, 17}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FUr(cube_t c)
+static inline cube_fast_t
+_trans_LDm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {68, 70, 69, 71, 32, 34, 33, 35},
-		.e = {16, 19, 18, 17, 9, 8, 11, 10, 5, 4, 7, 6}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {70, 69, 71, 68, 35, 32, 34, 33},
+		.edge = {23, 20, 21, 22, 24, 27, 26, 25, 3, 2, 1, 0}
 	};
-	cube_t ti = {
-		.c = {68, 70, 69, 71, 32, 34, 33, 35},
-		.e = {16, 19, 18, 17, 9, 8, 11, 10, 5, 4, 7, 6}
+	cube_fast_t ti = {
+		.corner = {69, 71, 70, 68, 35, 33, 32, 34},
+		.edge = {11, 10, 9, 8, 17, 18, 19, 16, 20, 23, 22, 21}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FRr(cube_t c)
+static inline cube_fast_t
+_trans_LBm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {32, 34, 35, 33, 70, 68, 69, 71},
-		.e = {8, 9, 10, 11, 16, 19, 18, 17, 20, 23, 22, 21}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {35, 32, 33, 34, 69, 70, 71, 68},
+		.edge = {27, 24, 25, 26, 23, 20, 21, 22, 18, 17, 16, 19}
 	};
-	cube_t ti = {
-		.c = {64, 67, 65, 66, 37, 38, 36, 39},
-		.e = {20, 23, 22, 21, 24, 27, 26, 25, 0, 1, 2, 3}
+	cube_fast_t ti = {
+		.corner = {65, 66, 67, 64, 39, 36, 37, 38},
+		.edge = {26, 25, 24, 27, 21, 22, 23, 20, 17, 18, 19, 16}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FDr(cube_t c)
+static inline cube_fast_t
+_trans_FUm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {70, 68, 71, 69, 34, 32, 35, 33},
-		.e = {19, 16, 17, 18, 8, 9, 10, 11, 7, 6, 5, 4}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {64, 66, 65, 67, 36, 38, 37, 39},
+		.edge = {16, 19, 18, 17, 8, 9, 10, 11, 4, 5, 6, 7}
 	};
-	cube_t ti = {
-		.c = {69, 71, 68, 70, 33, 35, 32, 34},
-		.e = {17, 18, 19, 16, 11, 10, 9, 8, 4, 5, 6, 7}
+	cube_fast_t ti = {
+		.corner = {32, 34, 33, 35, 68, 70, 69, 71},
+		.edge = {16, 19, 18, 17, 8, 9, 10, 11, 4, 5, 6, 7}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_FLr(cube_t c)
+static inline cube_fast_t
+_trans_FRm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {34, 32, 33, 35, 68, 70, 71, 69},
-		.e = {9, 8, 11, 10, 19, 16, 17, 18, 22, 21, 20, 23}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {36, 38, 39, 37, 66, 64, 65, 67},
+		.edge = {9, 8, 11, 10, 16, 19, 18, 17, 21, 22, 23, 20}
 	};
-	cube_t ti = {
-		.c = {65, 66, 64, 67, 36, 39, 37, 38},
-		.e = {21, 22, 23, 20, 26, 25, 24, 27, 1, 0, 3, 2}
+	cube_fast_t ti = {
+		.corner = {37, 38, 36, 39, 64, 67, 65, 66},
+		.edge = {20, 23, 22, 21, 27, 24, 25, 26, 1, 0, 3, 2}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BUr(cube_t c)
+static inline cube_fast_t
+_trans_FDm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {69, 71, 68, 70, 33, 35, 32, 34},
-		.e = {17, 18, 19, 16, 11, 10, 9, 8, 4, 5, 6, 7}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {66, 64, 67, 65, 38, 36, 39, 37},
+		.edge = {19, 16, 17, 18, 9, 8, 11, 10, 6, 7, 4, 5}
 	};
-	cube_t ti = {
-		.c = {70, 68, 71, 69, 34, 32, 35, 33},
-		.e = {19, 16, 17, 18, 8, 9, 10, 11, 7, 6, 5, 4}
+	cube_fast_t ti = {
+		.corner = {33, 35, 32, 34, 69, 71, 68, 70},
+		.edge = {17, 18, 19, 16, 10, 11, 8, 9, 5, 4, 7, 6}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BRr(cube_t c)
+static inline cube_fast_t
+_trans_FLm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {35, 33, 32, 34, 69, 71, 70, 68},
-		.e = {11, 10, 9, 8, 18, 17, 16, 19, 23, 20, 21, 22}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {38, 36, 37, 39, 64, 66, 67, 65},
+		.edge = {8, 9, 10, 11, 19, 16, 17, 18, 23, 20, 21, 22}
 	};
-	cube_t ti = {
-		.c = {66, 65, 67, 64, 39, 36, 38, 37},
-		.e = {22, 21, 20, 23, 25, 26, 27, 24, 3, 2, 1, 0}
+	cube_fast_t ti = {
+		.corner = {36, 39, 37, 38, 65, 66, 64, 67},
+		.edge = {21, 22, 23, 20, 25, 26, 27, 24, 0, 1, 2, 3}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BDr(cube_t c)
+static inline cube_fast_t
+_trans_BUm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {71, 69, 70, 68, 35, 33, 34, 32},
-		.e = {18, 17, 16, 19, 10, 11, 8, 9, 6, 7, 4, 5}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {65, 67, 64, 66, 37, 39, 36, 38},
+		.edge = {17, 18, 19, 16, 10, 11, 8, 9, 5, 4, 7, 6}
 	};
-	cube_t ti = {
-		.c = {71, 69, 70, 68, 35, 33, 34, 32},
-		.e = {18, 17, 16, 19, 10, 11, 8, 9, 6, 7, 4, 5}
+	cube_fast_t ti = {
+		.corner = {34, 32, 35, 33, 70, 68, 71, 69},
+		.edge = {19, 16, 17, 18, 9, 8, 11, 10, 6, 7, 4, 5}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_BLr(cube_t c)
+static inline cube_fast_t
+_trans_BRm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {33, 35, 34, 32, 71, 69, 68, 70},
-		.e = {10, 11, 8, 9, 17, 18, 19, 16, 21, 22, 23, 20}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {39, 37, 36, 38, 65, 67, 66, 64},
+		.edge = {10, 11, 8, 9, 18, 17, 16, 19, 22, 21, 20, 23}
 	};
-	cube_t ti = {
-		.c = {67, 64, 66, 65, 38, 37, 39, 36},
-		.e = {23, 20, 21, 22, 27, 24, 25, 26, 2, 3, 0, 1}
+	cube_fast_t ti = {
+		.corner = {39, 36, 38, 37, 66, 65, 67, 64},
+		.edge = {22, 21, 20, 23, 26, 25, 24, 27, 2, 3, 0, 1}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_UFm(cube_t c)
+static inline cube_fast_t
+_trans_BDm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {4, 5, 6, 7, 0, 1, 2, 3},
-		.e = {0, 1, 2, 3, 5, 4, 7, 6, 9, 8, 11, 10}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {67, 65, 66, 64, 39, 37, 38, 36},
+		.edge = {18, 17, 16, 19, 11, 10, 9, 8, 7, 6, 5, 4}
 	};
-	cube_t ti = {
-		.c = {4, 5, 6, 7, 0, 1, 2, 3},
-		.e = {0, 1, 2, 3, 5, 4, 7, 6, 9, 8, 11, 10}
+	cube_fast_t ti = {
+		.corner = {35, 33, 34, 32, 71, 69, 70, 68},
+		.edge = {18, 17, 16, 19, 11, 10, 9, 8, 7, 6, 5, 4}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_ULm(cube_t c)
+static inline cube_fast_t
+_trans_BLm(cube_fast_t c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {0, 1, 3, 2, 5, 4, 6, 7},
-		.e = {4, 5, 6, 7, 0, 1, 2, 3, 24, 27, 26, 25}
+	cube_fast_t ret;
+	cube_fast_t tn = {
+		.corner = {37, 39, 38, 36, 67, 65, 64, 66},
+		.edge = {11, 10, 9, 8, 17, 18, 19, 16, 20, 23, 22, 21}
 	};
-	cube_t ti = {
-		.c = {0, 1, 3, 2, 5, 4, 6, 7},
-		.e = {4, 5, 6, 7, 0, 1, 2, 3, 24, 27, 26, 25}
+	cube_fast_t ti = {
+		.corner = {38, 37, 39, 36, 67, 64, 66, 65},
+		.edge = {23, 20, 21, 22, 24, 27, 26, 25, 3, 2, 1, 0}
 	};
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = compose_fast(tn, c);
+	ret = compose_fast(ret, ti);
+	ret = invertco_fast(ret);
+
+	return ret;
+}
+
+static cube_fast_t
+cubetofast(cube_t cube)
+{
+	cube_fast_t fast;
+	memcpy(&fast, &cube, sizeof(cube_fast_t));
+	return fast;
+}
+
+static cube_t
+fasttocube(cube_fast_t fast)
+{
+	cube_t cube;
+	memcpy(&cube, &fast, sizeof(cube_fast_t));
+	return cube;
+}
+
+static inline bool
+equal_fast(cube_fast_t c1, cube_fast_t c2)
+{
+	uint8_t i;
+	bool ret;
+
+	ret = true;
+	for (i = 0; i < 8; i++)
+		ret = ret && c1.corner[i] == c2.corner[i];
+	for (i = 0; i < 12; i++)
+		ret = ret && c1.edge[i] == c2.edge[i];
 
 	return ret;
 }
 
-static inline cube_t
-_trans_UBm(cube_t c)
+static inline cube_fast_t
+inverse_fast(cube_fast_t cube)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {5, 4, 7, 6, 1, 0, 3, 2},
-		.e = {1, 0, 3, 2, 4, 5, 6, 7, 11, 10, 9, 8}
-	};
-	cube_t ti = {
-		.c = {5, 4, 7, 6, 1, 0, 3, 2},
-		.e = {1, 0, 3, 2, 4, 5, 6, 7, 11, 10, 9, 8}
-	};
-
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	cube_fast_t ret;
+	uint8_t i, piece, orien;
 
-	return ret;
-}
+	ret = zero_fast;
 
-static inline cube_t
-_trans_URm(cube_t c)
-{
-	cube_t ret;
-	cube_t tn = {
-		.c = {1, 0, 2, 3, 4, 5, 7, 6},
-		.e = {5, 4, 7, 6, 1, 0, 3, 2, 26, 25, 24, 27}
-	};
-	cube_t ti = {
-		.c = {1, 0, 2, 3, 4, 5, 7, 6},
-		.e = {5, 4, 7, 6, 1, 0, 3, 2, 26, 25, 24, 27}
-	};
+	for (i = 0; i < 12; i++) {
+		piece = cube.edge[i];
+		orien = piece & _eobit;
+		ret.edge[piece & _pbits] = i | orien;
+	}
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	for (i = 0; i < 8; i++) {
+		piece = cube.corner[i];
+		orien = ((piece << 1) | (piece >> 1)) & _cobits2;
+		ret.corner[piece & _pbits] = i | orien;
+	}
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DFm(cube_t c)
+static inline cube_fast_t
+compose_fast(cube_fast_t c1, cube_fast_t c2)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {6, 7, 4, 5, 2, 3, 0, 1},
-		.e = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11}
-	};
-	cube_t ti = {
-		.c = {6, 7, 4, 5, 2, 3, 0, 1},
-		.e = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11}
-	};
+	cube_fast_t ret;
+	uint8_t i, piece1, piece2, p, orien, aux, auy;
+
+	ret = zero_fast;
+
+	for (i = 0; i < 12; i++) {
+		piece2 = c2.edge[i];
+		p = piece2 & _pbits;
+		piece1 = c1.edge[p];
+		orien = (piece2 ^ piece1) & _eobit;
+		ret.edge[i] = (piece1 & _pbits) | orien;
+	}
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	for (i = 0; i < 8; i++) {
+		piece2 = c2.corner[i];
+		p = piece2 & _pbits;
+		piece1 = c1.corner[p];
+		aux = (piece2 & _cobits) + (piece1 & _cobits);
+		auy = (aux + _ctwist_cw) >> 2U;
+		orien = (aux + auy) & _cobits2;
+		ret.corner[i] = (piece1 & _pbits) | orien;
+	}
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DLm(cube_t c)
+static inline int64_t
+coord_fast_eo(cube_fast_t cube)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {3, 2, 0, 1, 6, 7, 5, 4},
-		.e = {7, 6, 5, 4, 2, 3, 0, 1, 27, 24, 25, 26}
-	};
-	cube_t ti = {
-		.c = {2, 3, 1, 0, 7, 6, 4, 5},
-		.e = {6, 7, 4, 5, 3, 2, 1, 0, 25, 26, 27, 24}
-	};
+	int i, p;
+	int64_t ret;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = 0;
+	for (i = 1, p = 1; i < 12; i++, p *= 2)
+		ret += p * (cube.edge[i] >> 4);
 
 	return ret;
 }
 
-static inline cube_t
-_trans_DBm(cube_t c)
-{
-	cube_t ret;
-	cube_t tn = {
-		.c = {7, 6, 5, 4, 3, 2, 1, 0},
-		.e = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9}
-	};
-	cube_t ti = {
-		.c = {7, 6, 5, 4, 3, 2, 1, 0},
-		.e = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9}
-	};
+#endif
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+/******************************************************************************
+Section: generic methods
 
-	return ret;
+This section contains generic functionality, including the public functions.
+Some of these routines depend on the efficient functions implemented in the
+previous sections, while some other operate directly on the cube.
+******************************************************************************/
+
+static uint8_t readco(char *);
+static uint8_t readcp(char *);
+static uint8_t readeo(char *);
+static uint8_t readep(char *);
+static int permsign(uint8_t *, int);
+static cube_t readcube_H48(char *);
+static void writecube_AVX(cube_t, char *);
+static void writecube_H48(cube_t, char *);
+static int writepiece_SRC(uint8_t, char *);
+static void writecube_SRC(cube_t, char *);
+static uint8_t readmove(char);
+static uint8_t readmodifier(char);
+static uint8_t readtrans(char *);
+static void writemoves(uint8_t *, int, char *);
+static void writetrans(uint8_t, char *);
+static cube_fast_t transform(cube_fast_t, uint8_t);
+static cube_fast_t move(cube_fast_t, uint8_t);
+
+static cube_t zero = { .corner = {0}, .edge = {0} };
+static cube_t solved = {
+	.corner = {0, 1, 2, 3, 4, 5, 6, 7},
+	.edge = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
+};
+
+cube_t
+solvedcube(void)
+{
+	return solved;
 }
 
-static inline cube_t
-_trans_DRm(cube_t c)
+bool
+equal(cube_t c1, cube_t c2)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {2, 3, 1, 0, 7, 6, 4, 5},
-		.e = {6, 7, 4, 5, 3, 2, 1, 0, 25, 26, 27, 24}
-	};
-	cube_t ti = {
-		.c = {3, 2, 0, 1, 6, 7, 5, 4},
-		.e = {7, 6, 5, 4, 2, 3, 0, 1, 27, 24, 25, 26}
-	};
+	int i;
+	bool ret;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	ret = true;
+	for (i = 0; i < 8; i++)
+		ret = ret && c1.corner[i] == c2.corner[i];
+	for (i = 0; i < 12; i++)
+		ret = ret && c1.edge[i] == c2.edge[i];
 
 	return ret;
 }
 
-static inline cube_t
-_trans_RUm(cube_t c)
+bool
+iserror(cube_t cube)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {68, 71, 69, 70, 33, 34, 32, 35},
-		.e = {21, 22, 23, 20, 25, 26, 27, 24, 0, 1, 2, 3}
-	};
-	cube_t ti = {
-		.c = {70, 68, 69, 71, 32, 34, 35, 33},
-		.e = {8, 9, 10, 11, 19, 16, 17, 18, 23, 20, 21, 22}
-	};
+	return equal(cube, zero);
+}
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+static uint8_t
+readco(char *str)
+{
+	if (*str == '0')
+		return 0;
+	if (*str == '1')
+		return _ctwist_cw;
+	if (*str == '2')
+		return _ctwist_ccw;
 
-	return ret;
+	DBG_LOG("Error reading CO\n");
+	return _error;
 }
 
-static inline cube_t
-_trans_RFm(cube_t c)
+static uint8_t
+readcp(char *str)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {34, 33, 32, 35, 68, 71, 70, 69},
-		.e = {25, 26, 27, 24, 22, 21, 20, 23, 19, 16, 17, 18}
-	};
-	cube_t ti = {
-		.c = {66, 65, 64, 67, 36, 39, 38, 37},
-		.e = {25, 26, 27, 24, 22, 21, 20, 23, 19, 16, 17, 18}
-	};
+	uint8_t c;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	for (c = 0; c < 8; c++)
+		if (!strncmp(str, cornerstr[c], 3) ||
+		    !strncmp(str, cornerstralt[c], 3))
+			return c;
 
-	return ret;
+	DBG_LOG("Error reading CP\n");
+	return _error;
 }
 
-static inline cube_t
-_trans_RDm(cube_t c)
+static uint8_t
+readeo(char *str)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {71, 68, 70, 69, 34, 33, 35, 32},
-		.e = {22, 21, 20, 23, 26, 25, 24, 27, 2, 3, 0, 1}
-	};
-	cube_t ti = {
-		.c = {71, 69, 68, 70, 33, 35, 34, 32},
-		.e = {10, 11, 8, 9, 18, 17, 16, 19, 22, 21, 20, 23}
-	};
-
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	if (*str == '0')
+		return 0;
+	if (*str == '1')
+		return _eflip;
 
-	return ret;
+	DBG_LOG("Error reading EO\n");
+	return _error;
 }
 
-static inline cube_t
-_trans_RBm(cube_t c)
+static uint8_t
+readep(char *str)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {33, 34, 35, 32, 71, 68, 69, 70},
-		.e = {26, 25, 24, 27, 21, 22, 23, 20, 17, 18, 19, 16}
-	};
-	cube_t ti = {
-		.c = {67, 64, 65, 66, 37, 38, 39, 36},
-		.e = {27, 24, 25, 26, 23, 20, 21, 22, 18, 17, 16, 19}
-	};
+	uint8_t e;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	for (e = 0; e < 12; e++)
+		if (!strncmp(str, edgestr[e], 2))
+			return e;
 
-	return ret;
+	DBG_LOG("Error reading EP\n");
+	return _error;
 }
 
-static inline cube_t
-_trans_LUm(cube_t c)
+static cube_t
+readcube_H48(char *buf)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {69, 70, 68, 71, 32, 35, 33, 34},
-		.e = {20, 23, 22, 21, 27, 24, 25, 26, 1, 0, 3, 2}
-	};
-	cube_t ti = {
-		.c = {68, 70, 71, 69, 34, 32, 33, 35},
-		.e = {9, 8, 11, 10, 16, 19, 18, 17, 21, 22, 23, 20}
-	};
+	int i;
+	uint8_t piece, orient;
+	cube_t ret = {0};
+	char *b;
+	
+	b = buf;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	for (i = 0; i < 12; i++) {
+		while (*b == ' ' || *b == '\t' || *b == '\n')
+			b++;
+		if ((piece = readep(b)) == _error)
+			return zero;
+		b += 2;
+		if ((orient = readeo(b)) == _error)
+			return zero;
+		b++;
+		ret.edge[i] = piece | orient;
+	}
+	for (i = 0; i < 8; i++) {
+		while (*b == ' ' || *b == '\t' || *b == '\n')
+			b++;
+		if ((piece = readcp(b)) == _error)
+			return zero;
+		b += 3;
+		if ((orient = readco(b)) == _error)
+			return zero;
+		b++;
+		ret.corner[i] = piece | orient;
+	}
 
 	return ret;
 }
 
-static inline cube_t
-_trans_LFm(cube_t c)
+cube_t
+readcube(char *format, char *buf)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {32, 35, 34, 33, 70, 69, 68, 71},
-		.e = {24, 27, 26, 25, 20, 23, 22, 21, 16, 19, 18, 17}
-	};
-	cube_t ti = {
-		.c = {64, 67, 66, 65, 38, 37, 36, 39},
-		.e = {24, 27, 26, 25, 20, 23, 22, 21, 16, 19, 18, 17}
-	};
+	cube_t cube;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	if (!strcmp(format, "H48")) {
+		cube = readcube_H48(buf);
+	} else {
+		DBG_LOG("Cannot read cube in the given format\n");
+		cube = zero;
+	}
 
-	return ret;
+	return cube;
 }
 
-static inline cube_t
-_trans_LDm(cube_t c)
+
+static int
+writepiece_SRC(uint8_t piece, char *buf)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {70, 69, 71, 68, 35, 32, 34, 33},
-		.e = {23, 20, 21, 22, 24, 27, 26, 25, 3, 2, 1, 0}
-	};
-	cube_t ti = {
-		.c = {69, 71, 70, 68, 35, 33, 32, 34},
-		.e = {11, 10, 9, 8, 17, 18, 19, 16, 20, 23, 22, 21}
-	};
+	char digits[3];
+	int i, len = 0;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	while (piece != 0) {
+		digits[len++] = (piece % 10) + '0';
+		piece /= 10;
+	}
 
-	return ret;
-}
+	if (len == 0)
+		digits[len++] = '0';
 
-static inline cube_t
-_trans_LBm(cube_t c)
-{
-	cube_t ret;
-	cube_t tn = {
-		.c = {35, 32, 33, 34, 69, 70, 71, 68},
-		.e = {27, 24, 25, 26, 23, 20, 21, 22, 18, 17, 16, 19}
-	};
-	cube_t ti = {
-		.c = {65, 66, 67, 64, 39, 36, 37, 38},
-		.e = {26, 25, 24, 27, 21, 22, 23, 20, 17, 18, 19, 16}
-	};
+	for (i = 0; i < len; i++)
+		buf[i] = digits[len-i-1];
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	buf[len] = ',';
+	buf[len+1] = ' ';
 
-	return ret;
+	return len+2;
 }
 
-static inline cube_t
-_trans_FUm(cube_t c)
+static void
+writecube_AVX(cube_t cube, char *buf)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {64, 66, 65, 67, 36, 38, 37, 39},
-		.e = {16, 19, 18, 17, 8, 9, 10, 11, 4, 5, 6, 7}
-	};
-	cube_t ti = {
-		.c = {32, 34, 33, 35, 68, 70, 69, 71},
-		.e = {16, 19, 18, 17, 8, 9, 10, 11, 4, 5, 6, 7}
-	};
+	int i, ptr;
+	uint8_t piece;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	memcpy(buf, "_mm256_set_epi8(\n\t0, 0, 0, 0, ", 30);
+	ptr = 30;
 
-	return ret;
-}
+	for (i = 11; i >= 0; i--) {
+		piece = cube.edge[i];
+		ptr += writepiece_SRC(piece, buf + ptr);
+	}
 
-static inline cube_t
-_trans_FRm(cube_t c)
-{
-	cube_t ret;
-	cube_t tn = {
-		.c = {36, 38, 39, 37, 66, 64, 65, 67},
-		.e = {9, 8, 11, 10, 16, 19, 18, 17, 21, 22, 23, 20}
-	};
-	cube_t ti = {
-		.c = {37, 38, 36, 39, 64, 67, 65, 66},
-		.e = {20, 23, 22, 21, 27, 24, 25, 26, 1, 0, 3, 2}
-	};
+	memcpy(buf+ptr-2, ",\n\t0, 0, 0, 0, 0, 0, 0, 0, ", 27);
+	ptr += 25;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	for (i = 7; i >= 0; i--) {
+		piece = cube.corner[i];
+		ptr += writepiece_SRC(piece, buf + ptr);
+	}
 
-	return ret;
+	memcpy(buf+ptr-2, "\n)\0", 3);
 }
 
-static inline cube_t
-_trans_FDm(cube_t c)
+static void
+writecube_H48(cube_t cube, char *buf)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {66, 64, 67, 65, 38, 36, 39, 37},
-		.e = {19, 16, 17, 18, 9, 8, 11, 10, 6, 7, 4, 5}
-	};
-	cube_t ti = {
-		.c = {33, 35, 32, 34, 69, 71, 68, 70},
-		.e = {17, 18, 19, 16, 10, 11, 8, 9, 5, 4, 7, 6}
-	};
+	uint8_t piece, perm, orient;
+	int i;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	for (i = 0; i < 12; i++) {
+		piece = cube.edge[i];
+		perm = piece & _pbits;
+		orient = (piece & _eobit) >> _eoshift;
+		buf[4*i    ] = edgestr[perm][0];
+		buf[4*i + 1] = edgestr[perm][1];
+		buf[4*i + 2] = orient + '0';
+		buf[4*i + 3] = ' ';
+	}
+	for (i = 0; i < 8; i++) {
+		piece = cube.corner[i];
+		perm = piece & _pbits;
+		orient = (piece & _cobits) >> _coshift;
+		buf[48 + 5*i    ] = cornerstr[perm][0];
+		buf[48 + 5*i + 1] = cornerstr[perm][1];
+		buf[48 + 5*i + 2] = cornerstr[perm][2];
+		buf[48 + 5*i + 3] = orient + '0';
+		buf[48 + 5*i + 4] = ' ';
+	}
 
-	return ret;
+	buf[48+39] = '\0';
 }
 
-static inline cube_t
-_trans_FLm(cube_t c)
+static void
+writecube_SRC(cube_t cube, char *buf)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {38, 36, 37, 39, 64, 66, 67, 65},
-		.e = {8, 9, 10, 11, 19, 16, 17, 18, 23, 20, 21, 22}
-	};
-	cube_t ti = {
-		.c = {36, 39, 37, 38, 65, 66, 64, 67},
-		.e = {21, 22, 23, 20, 25, 26, 27, 24, 0, 1, 2, 3}
-	};
+	int i, ptr;
+	uint8_t piece;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	memcpy(buf, "{\n\t.corner = {", 14);
+	ptr = 14;
 
-	return ret;
-}
+	for (i = 0; i < 8; i++) {
+		piece = cube.corner[i];
+		ptr += writepiece_SRC(piece, buf + ptr);
+	}
 
-static inline cube_t
-_trans_BUm(cube_t c)
-{
-	cube_t ret;
-	cube_t tn = {
-		.c = {65, 67, 64, 66, 37, 39, 36, 38},
-		.e = {17, 18, 19, 16, 10, 11, 8, 9, 5, 4, 7, 6}
-	};
-	cube_t ti = {
-		.c = {34, 32, 35, 33, 70, 68, 71, 69},
-		.e = {19, 16, 17, 18, 9, 8, 11, 10, 6, 7, 4, 5}
-	};
+	memcpy(buf+ptr-2, "},\n\t.edge = {", 13);
+	ptr += 11;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	for (i = 0; i < 12; i++) {
+		piece = cube.edge[i];
+		ptr += writepiece_SRC(piece, buf + ptr);
+	}
 
-	return ret;
+	memcpy(buf+ptr-2, "}\n}\0", 4);
 }
 
-static inline cube_t
-_trans_BRm(cube_t c)
+void
+writecube(char *format, cube_t cube, char *buf)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {39, 37, 36, 38, 65, 67, 66, 64},
-		.e = {10, 11, 8, 9, 18, 17, 16, 19, 22, 21, 20, 23}
-	};
-	cube_t ti = {
-		.c = {39, 36, 38, 37, 66, 65, 67, 64},
-		.e = {22, 21, 20, 23, 26, 25, 24, 27, 2, 3, 0, 1}
-	};
+	char *errormsg;
+	size_t len;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	if (!isconsistent(cube)) {
+		errormsg = "ERROR: cannot write inconsistent cube";
+		goto writecube_error;
+	}
 
-	return ret;
+	if (!strcmp(format, "H48")) {
+		writecube_H48(cube, buf);
+	} else if (!strcmp(format, "SRC")) {
+		writecube_SRC(cube, buf);
+	} else if (!strcmp(format, "AVX")) {
+		writecube_AVX(cube, buf);
+	} else {
+		errormsg = "ERROR: cannot write cube in the given format";
+		goto writecube_error;
+	}
+
+	return;
+
+writecube_error:
+	DBG_LOG("writecube error, see stdout for details\n");
+	len = strlen(errormsg);
+	memcpy(buf, errormsg, len);
+	buf[len] = '\n';
+	buf[len+1] = '\0';
 }
 
-static inline cube_t
-_trans_BDm(cube_t c)
+static uint8_t
+readmove(char c)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {67, 65, 66, 64, 39, 37, 38, 36},
-		.e = {18, 17, 16, 19, 11, 10, 9, 8, 7, 6, 5, 4}
-	};
-	cube_t ti = {
-		.c = {35, 33, 34, 32, 71, 69, 70, 68},
-		.e = {18, 17, 16, 19, 11, 10, 9, 8, 7, 6, 5, 4}
-	};
-
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	switch (c) {
+	case 'U':
+		return U;
+	case 'D':
+		return D;
+	case 'R':
+		return R;
+	case 'L':
+		return L;
+	case 'F':
+		return F;
+	case 'B':
+		return B;
+	default:
+		return _error;
+	}
+}
 
-	return ret;
+static uint8_t
+readmodifier(char c)
+{
+	switch (c) {
+	case '1': /* Fallthrough */
+	case '2': /* Fallthrough */
+	case '3':
+		return c - '0' - 1;
+	case '\'':
+		return 2;
+	default:
+		return 0;
+	}
 }
 
-static inline cube_t
-_trans_BLm(cube_t c)
+static uint8_t
+readtrans(char *buf)
 {
-	cube_t ret;
-	cube_t tn = {
-		.c = {37, 39, 38, 36, 67, 65, 64, 66},
-		.e = {11, 10, 9, 8, 17, 18, 19, 16, 20, 23, 22, 21}
-	};
-	cube_t ti = {
-		.c = {38, 37, 39, 36, 67, 64, 66, 65},
-		.e = {23, 20, 21, 22, 24, 27, 26, 25, 3, 2, 1, 0}
-	};
+	uint8_t t;
 
-	ret = compose(tn, c);
-	ret = compose(ret, ti);
-	ret = _invertco(ret);
+	for (t = 0; t < 48; t++)
+		if (!strncmp(buf, transstr[t], 11))
+			return t;
 
-	return ret;
+	DBG_LOG("readtrans error\n");
+	return _error;
 }
 
-static cube_t
-_arraytocube(cube_array_t a)
+static void
+writemoves(uint8_t *m, int n, char *buf)
 {
-	cube_t c;
-	memcpy(&c, &a, sizeof(cube_t));
-	return c;
+	int i;
+	size_t len;
+	char *b, *s;
+
+	for (i = 0, b = buf; i < n; i++, b++) {
+		s = movestr[m[i]];
+		len = strlen(s);
+		memcpy(b, s, len);
+		b += len;	
+		*b = ' ';
+	}
+	*b = '\0';
 }
 
 static void
-_cubetoarray(cube_t c, cube_array_t *a)
+writetrans(uint8_t t, char *buf)
 {
-	memcpy(a, &c, sizeof(cube_t));
+	if (t >= 48)
+		memcpy(buf, "error trans", 11);
+	else
+		memcpy(buf, transstr[t], 11);
+	buf[11] = '\0';
 }
 
-static inline bool
-_equal(cube_t c1, cube_t c2)
+static int
+permsign(uint8_t *a, int n)
 {
-	uint8_t i;
-	bool ret;
+	int i, j;
+	uint8_t ret = 0;
 
-	ret = true;
-	for (i = 0; i < 8; i++)
-		ret = ret && c1.c[i] == c2.c[i];
-	for (i = 0; i < 12; i++)
-		ret = ret && c1.e[i] == c2.e[i];
+	for (i = 0; i < n; i++)
+		for (j = i+1; j < n; j++)
+			ret += a[i] > a[j] ? 1 : 0;
 
-	return ret;
+	return ret % 2;
 }
 
-static inline cube_t
-_inverse(cube_t c)
+bool
+isconsistent(cube_t cube)
 {
-	cube_t ret;
-	uint8_t i, piece, orien;
-
-	ret = _zerocube;
+	uint8_t i, p, e, piece;
+	bool found[12];
 
+	for (i = 0; i < 12; i++)
+		found[i] = false;
 	for (i = 0; i < 12; i++) {
-		piece = c.e[i];
-		orien = piece & _eobit;
-		ret.e[piece & _pbits] = i | orien;
+		piece = cube.edge[i];
+		p = piece & _pbits;
+		e = piece & _eobit;
+		if (p >= 12)
+			goto inconsistent_ep;
+		if (e != 0 && e != _eobit)
+			goto inconsistent_eo;
+		found[p] = true;
 	}
+	for (i = 0; i < 12; i++)
+		if (!found[i])
+			goto inconsistent_ep;
 
+	for (i = 0; i < 8; i++)
+		found[i] = false;
 	for (i = 0; i < 8; i++) {
-		piece = c.c[i];
-		orien = ((piece << 1) | (piece >> 1)) & _cobits2;
-		ret.c[piece & _pbits] = i | orien;
+		piece = cube.corner[i];
+		p = piece & _pbits;
+		e = piece & _cobits;
+		if (p >= 8)
+			goto inconsistent_cp;
+		if (e != 0 && e != _ctwist_cw && e != _ctwist_ccw)
+			goto inconsistent_co;
+		found[p] = true;
 	}
+	for (i = 0; i < 8; i++)
+		if (!found[i])
+			goto inconsistent_co;
 
-	return ret;
+	return true;
+
+inconsistent_ep:
+	DBG_LOG("Inconsistent EP\n");
+	return false;
+inconsistent_cp:
+	DBG_LOG("Inconsistent CP\n");
+	return false;
+inconsistent_eo:
+	DBG_LOG("Inconsistent EO\n");
+	return false;
+inconsistent_co:
+	DBG_LOG("Inconsistent CO\n");
+	return false;
 }
 
-static inline cube_t
-_compose(cube_t c1, cube_t c2)
+bool
+issolvable(cube_t cube)
 {
-	cube_t ret;
-	uint8_t i, piece1, piece2, p, orien, aux, auy;
+	uint8_t i, eo, co, piece, edges[12], corners[8];
+
+	DBG_ASSERT(isconsistent(cube), false,
+	    "issolvable: cube is inconsistent\n");
+
+	for (i = 0; i < 12; i++)
+		edges[i] = cube.edge[i] & _pbits;
+	for (i = 0; i < 8; i++)
+		corners[i] = cube.corner[i] & _pbits;
 
-	ret = _zerocube;
+	if (permsign(edges, 12) != permsign(corners, 8))
+		goto issolvable_parity;
 
+	eo = 0;
 	for (i = 0; i < 12; i++) {
-		piece2 = c2.e[i];
-		p = piece2 & _pbits;
-		piece1 = c1.e[p];
-		orien = (piece2 ^ piece1) & _eobit;
-		ret.e[i] = (piece1 & _pbits) | orien;
+		piece = cube.edge[i];
+		eo += (piece & _eobit) >> _eoshift;
 	}
+	if (eo % 2 != 0)
+		goto issolvable_eo;
 
+	co = 0;
 	for (i = 0; i < 8; i++) {
-		piece2 = c2.c[i];
-		p = piece2 & _pbits;
-		piece1 = c1.c[p];
-		aux = (piece2 & _cobits) + (piece1 & _cobits);
-		auy = (aux + _ctwist_cw) >> 2U;
-		orien = (aux + auy) & _cobits2;
-		ret.c[i] = (piece1 & _pbits) | orien;
+		piece = cube.corner[i];
+		co += (piece & _cobits) >> _coshift;
 	}
+	if (co % 3 != 0)
+		goto issolvable_co;
 
-	return ret;
-}
-
-static inline int16_t
-_coord_eo(cube_t c)
-{
-	int i, p;
-	int16_t ret;
-
-	ret = 0;
-	for (i = 1, p = 1; i < 12; i++, p *= 2)
-		ret += p * (c.e[i] >> 4);
-
-	return ret;
-}
-
-
-#endif
-
-/******************************************************************************
-Section: generic methods.
-
-This section contains functions that are based (directly or indirectly)
-on the per-architecture functions defined in the previous sections. Many
-of them are public functions from cube.h
-******************************************************************************/
-
-cube_t
-solvedcube(void)
-{
-	return _solvedcube;
-}
-
-cube_t
-readcube(format_t format, char *buf)
-{
-	cube_array_t arr = readcube_array(format, buf);
-	return _arraytocube(arr);
-}
+	return true;
 
-void
-writecube(format_t format, cube_t cube, char *buf)
-{
-	cube_array_t arr;
-	_cubetoarray(cube, &arr);
-	writecube_array(format, arr, buf);
+issolvable_parity:
+	DBG_LOG("EP and CP parities are different\n");
+	return false;
+issolvable_eo:
+	DBG_LOG("Odd number of flipped edges\n");
+	return false;
+issolvable_co:
+	DBG_LOG("Sum of corner orientation is not multiple of 3\n");
+	return false;
 }
 
 bool
-isconsistent(cube_t c)
+issolved(cube_t cube)
 {
-	cube_array_t arr;
-	_cubetoarray(c, &arr);
-	return isconsistent_array(arr);
+	return equal(cube, solved);
 }
 
-bool
-issolvable(cube_t c)
+cube_t
+inverse(cube_t cube)
 {
-	cube_array_t arr;
-	_cubetoarray(c, &arr);
-	return issolvable_array(arr);
-}
+	DBG_ASSERT(isconsistent(cube), zero,
+	    "inverse error: inconsistent cube\n");
 
-bool
-iserror(cube_t c)
-{
-	cube_array_t arr;
-	_cubetoarray(c, &arr);
-	return iserror_array(arr);
+	return fasttocube(inverse_fast(cubetofast(cube)));
 }
 
-bool
-equal(cube_t c1, cube_t c2)
+cube_t
+compose(cube_t c1, cube_t c2)
 {
-	return _equal(c1, c2);
-}
+	DBG_ASSERT(isconsistent(c1) && isconsistent(c2),
+	    zero, "compose error: inconsistent cube\n")
 
-bool
-issolved(cube_t cube)
-{
-	return equal(cube, _solvedcube);
+	return fasttocube(compose_fast(cubetofast(c1), cubetofast(c2)));
 }
 
-cube_t
-move(cube_t c, move_t m)
+static cube_fast_t
+move(cube_fast_t c, uint8_t m)
 {
-	DBG_ASSERT(isconsistent(c), _zerocube,
-	    "move error: inconsistent cube\n");
-
 	switch (m) {
 	case U:
 		return _move_U(c);
@@ -3542,34 +3461,13 @@ move(cube_t c, move_t m)
 		return _move_B3(c);
 	default:
 		DBG_LOG("mover error, unknown move\n");
-		return _zerocube;
+		return zero_fast;
 	}
 }
 
-cube_t
-inverse(cube_t c)
-{
-	DBG_ASSERT(isconsistent(c), _zerocube,
-	    "inverse error: inconsistent cube\n");
-
-	return _inverse(c);
-}
-
-cube_t
-compose(cube_t c1, cube_t c2)
-{
-	DBG_ASSERT(isconsistent(c1) && isconsistent(c2),
-	    _zerocube, "compose error: inconsistent cube\n")
-
-	return _compose(c1, c2);
-}
-
-cube_t
-transform(cube_t c, trans_t t)
+static cube_fast_t
+transform(cube_fast_t c, uint8_t t)
 {
-	DBG_ASSERT(isconsistent(c), _zerocube,
-	    "transform error: inconsistent cube\n");
-
 	switch (t) {
 	case UFr:
 		return _trans_UFr(c);
@@ -3669,39 +3567,86 @@ transform(cube_t c, trans_t t)
 		return _trans_BLm(c);
 	default:
 		DBG_LOG("transform error, unknown transformation\n");
-		return _zerocube;
+		return zero_fast;
+	}
+}
+
+cube_t
+applymoves(cube_t cube, char *buf)
+{
+	cube_fast_t fast;
+	uint8_t r, m;
+	char *b;
+
+	DBG_ASSERT(isconsistent(cube), zero,
+	    "move error: inconsistent cube\n");
+
+	fast = cubetofast(cube);
+
+	for (b = buf; *b != '\0'; b++) {
+		while (*b == ' ' || *b == '\t' || *b == '\n')
+			b++;
+		if (*b == '\0')
+			goto readmoves_finish;
+		if ((r = readmove(*b)) == _error)
+			goto readmoves_error;
+		if ((m = readmodifier(*(b+1))) != 0)
+			b++;
+		fast = move(fast, r + m);
 	}
+
+readmoves_finish:
+	return fasttocube(fast);
+
+readmoves_error:
+	DBG_LOG("readmoves error\n");
+	return zero;
+}
+
+cube_t
+applytrans(cube_t cube, char *buf)
+{
+	cube_fast_t fast;
+	uint8_t t;
+
+	DBG_ASSERT(isconsistent(cube), zero,
+	    "transformation error: inconsistent cube\n");
+
+	t = readtrans(buf);
+	fast = cubetofast(cube);
+	fast = transform(fast, t);
+
+	return fasttocube(fast);
 }
 
-int16_t
-coord_eo(cube_t c)
+int64_t
+coord_eo(cube_t cube)
 {
-	return _coord_eo(c);
+	return coord_fast_eo(cubetofast(cube));
 }
 
 /******************************************************************************
 Section: solvers
 
-This is a continuation of the generic methods section. Here you can find the
-implementation of all the solving algorithms.
+Here you can find the implementation of all the solving algorithms.
 ******************************************************************************/
 
 typedef struct {
-	cube_t cube;
+	cube_fast_t cube;
 	uint8_t depth;
 	int maxsols;
-	move_t *sols;
+	uint8_t *sols;
 	int nsols;
 	int nmoves;
-	move_t moves[20];
-	int (*estimate)(cube_t);
+	uint8_t moves[20];
+	int (*estimate)(cube_fast_t);
 } dfs_arg_t;
 
 static bool
-allowednextmove(dfs_arg_t arg, move_t m)
+allowednextmove(dfs_arg_t arg, uint8_t m)
 {
 	int n;
-	move_t mbase, l1base, l2base, maxis, l1axis, l2axis;
+	uint8_t mbase, l1base, l2base, maxis, l1axis, l2axis;
 
 	n = arg.nmoves;
 
@@ -3730,7 +3675,7 @@ solve_generic_dfs(dfs_arg_t arg)
 {
 	dfs_arg_t nextarg;
 	int bound, ret;
-	move_t m;
+	uint8_t m;
 
 	bound = arg.estimate(arg.cube);
 
@@ -3740,9 +3685,7 @@ solve_generic_dfs(dfs_arg_t arg)
 	if (bound == 0) {
 		if (arg.nmoves != arg.depth)
 			return 0;
-		memcpy(&arg.sols[arg.depth * arg.nsols],
-		       arg.moves,
-		       arg.depth * sizeof(move_t));
+		memcpy(&arg.sols[arg.depth * arg.nsols], arg.moves, arg.depth);
 		return 1;
 	}
 
@@ -3759,13 +3702,14 @@ solve_generic_dfs(dfs_arg_t arg)
 	return ret;
 }
 
+/* TODO
 int
 solve_generic(
-	cube_t cube,
+	cube_fast_t cube,
 	uint8_t depth,
 	int maxsols,
-	move_t *sols
-	int (*estimate)(cube_t),
+	uint8_t *sols,
+	int (*estimate)(cube_fast_t)
 )
 {
 	dfs_arg_t arg;
@@ -3780,9 +3724,10 @@ solve_generic(
 		.sols = sols,
 		.nsols = 0,
 		.nmoves = 0,
-		.moves = {0}
+		.moves = {0},
 		.estimate = estimate,
 	};
 
 	return solve_generic_dfs(arg);
 }
+*/
diff --git a/cube.h b/cube.h
@@ -17,6 +17,7 @@ corners is with respect to U/D.
 The permutation of the center pieces is not stored. This means that the
 cube is assumed to be in a fixed orientation.
 
+TODO: define EO and CO better, explain how to use them
 TODO: encode centers?
 
 The exact cube type structure depends on your system's configuration. If
@@ -24,22 +25,22 @@ you operate on the cube only via the functions provided below, you don't
 need to worry about this.
 ******************************************************************************/
 
-#ifdef CUBE_AVX2
-typedef __m256i cube_t;
-#else
 typedef struct {
-	uint8_t c[8];   /* Corners */
-	uint8_t e[12];  /* Edges   */
+	uint8_t corner[8];
+	uint8_t edge[12];
 } cube_t;
-#endif
 
 /* Returns a copy of the solved cube */
 cube_t solvedcube(void);
 
 /* Basic checks on the cube */
+bool isconsistent(cube_t);
 bool issolvable(cube_t);
-bool equal(cube_t, cube_t);
 bool issolved(cube_t);
+bool equal(cube_t, cube_t);
+
+/* All functions can return an error value, use iserror() to check this */
+bool iserror(cube_t);
 
 /* Apply the second cube on the first as a move sequence */
 cube_t compose(cube_t, cube_t);
@@ -47,134 +48,75 @@ cube_t compose(cube_t, cube_t);
 /* Invert the cube */
 cube_t inverse(cube_t);
 
-/* All functions can return an error value, use iserror() to check this */
-bool iserror(cube_t);
-
-/******************************************************************************
-Moves and transformations
-
-Moves and transformations are represented each as an (unsigned) 8 bit integer.
+/* Check if a cube represent a valid state (possibly unsolvable) */
 
-Moves are numbered as follows:
-U=0  U2=1  U'=2  D=3  D2=4  D'=5
-R=6  R2=7  R'=8  L=9  L2=10 L'=11
-F=12 F2=13 F'=14 B=15 B2=16 B'=17
+/* TODO comment on these and the format for moves and trans */
+/* For trans, only one trans is supported */
+cube_t applymoves(cube_t, char *);
+cube_t applytrans(cube_t, char *);
 
-TODO: NISS
+/******************************************************************************
+Read / write utilities
 
-TODO: Extend the moveset?
+Reading and writing is not done directly via stdin / stdout, but via an
+array of char (called buf in the prototypes below).
 
-Transformations can be either simple rotations or a rotation composed
-with a mirroring.  A composed rotation + mirror is obtained by applying
-the corresponding rotation to the solved cube mirrored along the M plane.
+Multiple representations of the cube as text are supported:
 
-For example, to apply the transformation RBm (mirrored RB) to a cube C:
-	1. Apply a mirror along the M plane to the solved cube
-	2. Rotate the mirrored cube with z' y2
-	3. Apply the cube C to the transformed solved cube
-	4. Apply the transformations of step 1a and 1b in reverse
+- H48: a human-readable format.
+  Each edge is represented by two letters denoting the sides it
+  belongs to and one number denoting its orientation (0 oriented, 1
+  mis-oriented). Similarly, each corner is represented by three letters and
+  a number (0 oriented, 1 twisted clockwise, 2 twisted counter-clockwise).
 
-See cube.c for a full list of transformations.
-******************************************************************************/
+  The solved cube looks like this:
 
-typedef uint8_t move_t;
-typedef uint8_t trans_t;
+  UF0 UB0 DB0 DF0 UR0 UL0 DL0 DR0 FR0 FL0 BL0 BR0
+  UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0
 
-/* Apply a move or a transformation on the cube */
-cube_t move(cube_t, move_t);
-cube_t transform(cube_t, trans_t);
+  The cube after the moves R'U'F looks like this:
 
-/******************************************************************************
-Read / write utilities
+  FL1 BR0 DB0 UR1 UF0 UB0 DL0 FR0 UL1 DF1 BL0 DR0
+  UBL1 DBR1 UFR2 DFR2 DFL2 UBL2 UFL2 DBL0
 
-Reading and writing is not done directly via stdin / stdout, but via an
-array of char (called buf in the prototypes below).
+  Whitespace (including newlines) between pieces is ignored when reading the
+  cube. A single whitespace character is added between pieces when writing.
 
-Multiple representations of the cube as text are supported, although
-not all of them are supported for both reading and writing. See below
-for details. More formats may be supported in the future.
+- SRC: format used to generate code for internal use.
+  In cube.c, a type called cube_array_t is defined and used for basic,
+  non-performance-critical methods. If OUT is the output in SRC format,
+  the following line can be used to declare a new cube object:
 
-Moves are read using the standard notation. Each move (U, D, R, L, F,
-B) can be followed by a modifier (1, 2, 3, '). Whitespace (spaces, tabs,
-newlines) are ignored. Parantheses and other notation is not supported.
-TODO: parantheses for NISS
+cube_array_t cube = OUT
+      
+- AVX: analogue to SRC, but for the AVX2 internal representation of the cube.
 
-For how transformations are read or written, see cube.c.
+Not all formats are supported for both reading and writing. More formats
+may be supported in the future.
 ******************************************************************************/
 
-/* The different formats for reading or writing the cube */
-typedef enum {
-	H48, /* H48 is a human-readable format.
-	      *
-	      * Each edge is represented by two letters denoting the sides it
-	      * belongs to and one number denoting its orientation (0 oriented,
-	      * 1 mis-oriented). Similarly, each corner is represented by three
-	      * letters and a number (0 oriented, 1 twisted clockwise, 2
-	      * twisted counter-clockwise).
-	      *
-	      * The solved cube looks like this:
-	      *
-	      * UF0 UB0 DB0 DF0 UR0 UL0 DL0 DR0 FR0 FL0 BL0 BR0
-	      * UFR0 UBL0 DFL0 DBR0 UFL0 UBR0 DFR0 DBL0
-	      *
-	      * The cube after the moves R'U'F looks like this:
-	      *
-	      * FL1 BR0 DB0 UR1 UF0 UB0 DL0 FR0 UL1 DF1 BL0 DR0
-	      * UBL1 DBR1 UFR2 DFR2 DFL2 UBL2 UFL2 DBL0
-	      *
-	      * Whitespace (including newlines) between pieces is ignored when
-	      * reading the cube. A single whitespace character is added
-	      * between pieces when writing.
-	      */
-	SRC, /* The SRC format can be used to generate code for internal use.
-	      *
-	      * In cube.c, a type called cube_array_t is defined and used for
-	      * basic, non-performance-critical methods. If OUT is the output
-	      * in SRC format, the following line can be used to declare a new
-	      * cube object:
-	      *
-	      * cube_array_t cube = OUT
-	      */
-	AVX, /* The AVX format is analogous to SRC, but for the AVX2 internal
-	      * representation of the cube.
-	      */
-} format_t;
-
 /* Reads a cube from buf in the specified format, and return it.
- * Supported formats: H48.
+ * Supported formats: "H48".
  */
-cube_t readcube(format_t format, char *buf);
+cube_t readcube(char *format, char *buf);
 
 /* Write the given cube to buf in the specified format.
- * Supported formats: H48, SRC, AVX.
+ * Supported formats: "H48", "SRC", "AVX".
  */
-void writecube(format_t format, cube_t cube, char *buf);
-
-/* Utilities for reading and writing moves */
-int readmoves(char *buf, move_t *moves);
-void writemoves(move_t *moves, int n, char *buf);
-trans_t readtrans(char *buf);
-void writetrans(trans_t trans, char *buf);
+void writecube(char *format, cube_t cube, char *buf);
 
 /******************************************************************************
 Coordinates
 
-The coordinate functions compute one aspect of the cube (for example,
-the edge orientation) and they return it as an integer. They are used
-for example to build pruning tables for various solving methods.
+TODO description
 ******************************************************************************/
 
-int16_t coord_eo(cube_t); /* Edge orientation */
+int64_t coord_eo(cube_t);
 
 /******************************************************************************
 Solvers
 
-All solvers work at fixed depth, i.e. they will only find solutions of the
-specified length. Iterating over the possible lengths, if desired, is left as
-an implementation detail for the user of this library.
-
-The solutions are returned as a list of moves, which can then be converted to
-a string using writemoves().
+The solutions are returned as a newline-separated list of characters.
 
 Unless specified otherwise, all the solutions are not trivially simplifiable.
 This means that sequences like U U2 or R L R will not appear in any solution.
@@ -188,6 +130,7 @@ TODO NISS / INVERSE / LINEAR as a mask?
 
 All solvers take at least the following parameters, satisfying the conditions
 in square brackets:
+TODO more!
 	- cube_t cube [issolvable(cube)]: The cube to solve.
 	- uint8_t depth [depth <= 20]: The lenght of the solution.
 	- int maxsols: The maximum number of solutions to find. The solver
@@ -201,12 +144,12 @@ in square brackets:
 Some solvers take other parameters. See below for details.
 ******************************************************************************/
 
+/* TODO
 int solve_generic(
 	cube_t cube,
 	uint8_t depth,
 	int maxsols,
-	move_t *ret
-	int (*estimate)(cube_t),
+	uint8_t *ret, // TODO change to char
+	int (*estimate)(cube_t)
 );
-
-int solve_light(cube_t, int
+*/
diff --git a/test/010_io_H48_read_write/io_H48_tests.c b/test/010_io_H48_read_write/io_H48_tests.c
@@ -17,14 +17,14 @@ int main() {
 	for (c = str; (*c = getchar()) != EOF; c++) ;
 	*c = '\0';
 
-	cube = readcube(H48, str);
+	cube = readcube("H48", str);
 
 	if (iserror(cube)) {
 		printf("Error reading cube\n");
 	} else if (!issolvable(cube)) {
 		printf("Cube is not solvable\n");
 	} else {
-		writecube(H48, cube, str);
+		writecube("H48", cube, str);
 		printf("%s\n", str);
 	}
 
diff --git a/test/011_io_SRC_write/01_solved.out b/test/011_io_SRC_write/01_solved.out
@@ -1,4 +1,4 @@
 {
-	.c = {0, 1, 2, 3, 4, 5, 6, 7},
-	.e = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
+	.corner = {0, 1, 2, 3, 4, 5, 6, 7},
+	.edge = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
 }
diff --git a/test/011_io_SRC_write/02_scrambled.out b/test/011_io_SRC_write/02_scrambled.out
@@ -1,4 +1,4 @@
 {
-	.c = {38, 32, 37, 68, 67, 2, 71, 1},
-	.e = {9, 2, 17, 8, 4, 3, 0, 27, 21, 26, 6, 7}
+	.corner = {38, 32, 37, 68, 67, 2, 71, 1},
+	.edge = {9, 2, 17, 8, 4, 3, 0, 27, 21, 26, 6, 7}
 }
diff --git a/test/011_io_SRC_write/io_SRC_tests.c b/test/011_io_SRC_write/io_SRC_tests.c
@@ -17,14 +17,14 @@ int main() {
 	for (c = str; (*c = getchar()) != EOF; c++) ;
 	*c = '\0';
 
-	cube = readcube(H48, str);
+	cube = readcube("H48", str);
 
 	if (iserror(cube)) {
 		printf("Error reading cube\n");
 	} else if (!issolvable(cube)) {
 		printf("Cube is not solvable\n");
 	} else {
-		writecube(SRC, cube, str);
+		writecube("SRC", cube, str);
 		printf("%s\n", str);
 	}
 
diff --git a/test/012_io_AVX_write/io_AVX_tests.c b/test/012_io_AVX_write/io_AVX_tests.c
@@ -17,14 +17,14 @@ int main() {
 	for (c = str; (*c = getchar()) != EOF; c++) ;
 	*c = '\0';
 
-	cube = readcube(H48, str);
+	cube = readcube("H48", str);
 
 	if (iserror(cube)) {
 		printf("Error reading cube\n");
 	} else if (!issolvable(cube)) {
 		printf("Cube is not solvable\n");
 	} else {
-		writecube(AVX, cube, str);
+		writecube("AVX", cube, str);
 		printf("%s\n", str);
 	}
 
diff --git a/test/020_move/move_tests.c b/test/020_move/move_tests.c
@@ -12,32 +12,22 @@
 #define MOVESMAX 1000
 
 int main() {
-	char str[STRLENMAX];
-	int i, n;
-	move_t moves[MOVESMAX];
+	char movestr[STRLENMAX], cubestr[STRLENMAX];
 	cube_t cube;
 
-	fgets(str, STRLENMAX, stdin);
-	n = readmoves(str, moves);
+	fgets(movestr, STRLENMAX, stdin);
+	fgets(cubestr, STRLENMAX, stdin);
+	cube = readcube("H48", cubestr);
 
-	if (n == -1) {
-		printf("Error reading moves\n");
-		return 1;
-	}
-
-	fgets(str, STRLENMAX, stdin);
-	cube = readcube(H48, str);
-
-	for (i = 0; i < n; i++)
-		cube = move(cube, moves[i]);
+	cube = applymoves(cube, movestr);
 
 	if (iserror(cube)) {
 		printf("Error moving cube\n");
 	} else if (!issolvable(cube)) {
 		printf("Moved cube is not solvable\n");
 	} else {
-		writecube(H48, cube, str);
-		printf("%s\n", str);
+		writecube("H48", cube, cubestr);
+		printf("%s\n", cubestr);
 	}
 
 	return 0;
diff --git a/test/030_inverse_cube/inverse_tests.c b/test/030_inverse_cube/inverse_tests.c
@@ -15,7 +15,7 @@ int main() {
 	cube_t cube, inv;
 
 	fgets(str, STRLENMAX, stdin);
-	cube = readcube(H48, str);
+	cube = readcube("H48", str);
 	inv = inverse(cube);
 
 	if (iserror(inv)) {
@@ -23,7 +23,7 @@ int main() {
 	} else if (!issolvable(inv)) {
 		printf("Inverted cube is not solvable\n");
 	} else {
-		writecube(H48, inv, str);
+		writecube("H48", inv, str);
 		printf("%s\n", str);
 	}
 
diff --git a/test/040_compose/compose_tests.c b/test/040_compose/compose_tests.c
@@ -15,9 +15,9 @@ int main() {
 	cube_t c1, c2, c3;
 
 	fgets(str, STRLENMAX, stdin);
-	c1 = readcube(H48, str);
+	c1 = readcube("H48", str);
 	fgets(str, STRLENMAX, stdin);
-	c2 = readcube(H48, str);
+	c2 = readcube("H48", str);
 
 	c3 = compose(c1, c2);
 
@@ -26,7 +26,7 @@ int main() {
 	} else if (!issolvable(c3)) {
 		printf("Composed cube is not solvable\n");
 	} else {
-		writecube(H48, c3, str);
+		writecube("H48", c3, str);
 		printf("%s\n", str);
 	}
 
diff --git a/test/050_transform/transform_tests.c b/test/050_transform/transform_tests.c
@@ -11,30 +11,22 @@
 #define STRLENMAX 10000
 
 int main() {
-	char str[STRLENMAX];
-	trans_t t;
+	char cubestr[STRLENMAX], transtr[STRLENMAX];
 	cube_t cube;
 
-	fgets(str, STRLENMAX, stdin);
-	t = readtrans(str);
+	fgets(transtr, STRLENMAX, stdin);
+	fgets(cubestr, STRLENMAX, stdin);
+	cube = readcube("H48", cubestr);
 
-	if (t >= 48) {
-		printf("Error reading trans\n");
-		return 1;
-	}
-
-	fgets(str, STRLENMAX, stdin);
-	cube = readcube(H48, str);
-
-	cube = transform(cube, t);
+	cube = applytrans(cube, transtr);
 
 	if (iserror(cube)) {
 		printf("Error transforming cube\n");
 	} else if (!issolvable(cube)) {
 		printf("Transformed cube is not solvable\n");
 	} else {
-		writecube(H48, cube, str);
-		printf("%s\n", str);
+		writecube("H48", cube, cubestr);
+		printf("%s\n", cubestr);
 	}
 
 	return 0;
diff --git a/test/061_coord_eo/coord_eo_tests.c b/test/061_coord_eo/coord_eo_tests.c
@@ -16,7 +16,7 @@ int main() {
 	int16_t result;
 
 	fgets(str, STRLENMAX, stdin);
-	cube = readcube(H48, str);
+	cube = readcube("H48", str);
 
 	result = coord_eo(cube);
 
diff --git a/utils/genmovecode.sh b/utils/genmovecode.sh
@@ -7,11 +7,11 @@ gcc -DDEBUG h48_to_"$type".c ../cube.c -o h48_to_"$type"
 genfuncs() {
 	for f in move_??_*.txt; do
 		move="$(echo $f | sed 's/.*_// ; s/\.txt//')"
-		printf 'static inline cube_t\n_move_%s' "$move"
-		printf '(cube_t c)\n{\n'
-		printf '\tcube_t m = '
+		printf 'static inline cube_fast_t\n_move_%s' "$move"
+		printf '(cube_fast_t c)\n{\n'
+		printf '\tcube_fast_t m = '
 		./h48_to_"$type" <"$f" | sed '2,4s/^/\t/'
-		printf ';\n\n\treturn _compose(c, m);\n}\n\n'
+		printf ';\n\n\treturn compose_fast(c, m);\n}\n\n'
 	done
 }
 
diff --git a/utils/gentranscode.sh b/utils/gentranscode.sh
@@ -22,17 +22,17 @@ genarray() {
 genfuncs() {
 	for f in transform_??_???.txt; do
 		trans="$(echo $f | sed 's/.*_// ; s/\.txt//')"
-		printf 'static inline cube_t\n_trans_%s' "$trans"
-		printf '(cube_t c)\n{\n'
-		printf '\tcube_t ret;\n\n'
-		printf '\tcube_t tn = '
+		printf 'static inline cube_fast_t\n_trans_%s' "$trans"
+		printf '(cube_fast_t c)\n{\n'
+		printf '\tcube_fast_t ret;\n\n'
+		printf '\tcube_fast_t tn = '
 		./h48_to_"$type" <"$f" | sed '2,4s/^/\t/'
-		printf ';\n\tcube_t ti = '
+		printf ';\n\tcube_fast_t ti = '
 		./invert <"$f" | ./h48_to_"$type" | sed '2,4 s/^/\t/'
-		printf ';\n\n\tret = compose(tn, c);\n'
-		printf '\tret = compose(ret, ti);\n'
+		printf ';\n\n\tret = compose_fast(tn, c);\n'
+		printf '\tret = compose_fast(ret, ti);\n'
 		if [ -n "$(echo "$trans" | grep "m")" ]; then
-			printf '\tret = _invertco(ret);\n'
+			printf '\tret = invertco_fast(ret);\n'
 		fi
 		printf '\n\treturn ret;\n}\n\n'
 	done
diff --git a/utils/gentransswitch.sh b/utils/gentransswitch.sh
@@ -3,6 +3,6 @@
 printf '\tswitch (t) {\n'
 for f in transform_??_???.txt; do
 	t="$(echo $f | sed 's/.*_// ; s/\.txt//')"
-	printf '\tcase %s:\n\t\treturn inline_trans_%s(c);\n' "$t" "$t"
+	printf '\tcase %s:\n\t\treturn _trans_%s(c);\n' "$t" "$t"
 done
 printf '\t}\n'

	h48 A prototype for an optimal Rubik's cube solver, work in progress.
	git clone https://git.tronto.net/h48
	Download \| Log \| Files \| Refs \| README \| LICENSE

M	TODO.txt	\|	40	+++++++++++++++++++---------------------
M	cube.c	\|	3725	+++++++++++++++++++++++++++++++++++++++----------------------------------------
M	cube.h	\|	161	++++++++++++++++++++++++++-----------------------------------------------------
M	test/010_io_H48_read_write/io_H48_tests.c	\|	4	++--
M	test/011_io_SRC_write/01_solved.out	\|	4	++--
M	test/011_io_SRC_write/02_scrambled.out	\|	4	++--
M	test/011_io_SRC_write/io_SRC_tests.c	\|	4	++--
M	test/012_io_AVX_write/io_AVX_tests.c	\|	4	++--
M	test/020_move/move_tests.c	\|	24	+++++++-----------------
M	test/030_inverse_cube/inverse_tests.c	\|	4	++--
M	test/040_compose/compose_tests.c	\|	6	+++---
M	test/050_transform/transform_tests.c	\|	22	+++++++---------------
M	test/061_coord_eo/coord_eo_tests.c	\|	2	+-
M	utils/genmovecode.sh	\|	8	++++----
M	utils/gentranscode.sh	\|	16	++++++++--------
M	utils/gentransswitch.sh	\|	2	+-