h48

A prototype for an optimal Rubik's cube solver, work in progress.
git clone https://git.tronto.net/h48
Download | Log | Files | Refs | README | LICENSE

neon.h (7232B)


      1 #define CO2_NEON vdup_n_u8(0x60)
      2 #define COCW_NEON vdup_n_u8(0x20)
      3 #define CP_NEON vdup_n_u8(0x07)
      4 #define EP_NEON vcombine_u8(vdupq_n_u8(0x0F), vdupq_n_u8(0x0F))
      5 #define EO_NEON vcombine_u8(vdupq_n_u8(0x10), vdupq_n_u8(0x10))
      6 
      7 STATIC_INLINE uint8x16_t compose_edges_slim(uint8x16_t, uint8x16_t);
      8 STATIC_INLINE uint8x8_t compose_corners_slim(uint8x8_t, uint8x8_t);
      9 
     10 // static cube
     11 #define STATIC_CUBE(c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl, \
     12 					e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br) \
     13 	((cube_t){ \
     14 		.corner = {c_ufr, c_ubl, c_dfl, c_dbr, c_ufl, c_ubr, c_dfr, c_dbl}, \
     15 		.edge = {e_uf, e_ub, e_db, e_df, e_ur, e_ul, e_dl, e_dr, e_fr, e_fl, e_bl, e_br, 0, 0, 0, 0}})
     16 
     17 // zero cube
     18 #define ZERO_CUBE \
     19 	(cube_t) \
     20 	{ \
     21 		.corner = vdup_n_u8(0), \
     22 		.edge = vdupq_n_u8(0) \
     23 	}
     24 
     25 // solved cube
     26 #define SOLVED_CUBE STATIC_CUBE(	\
     27 	0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
     28 
     29 STATIC void
     30 pieces(cube_t *cube, uint8_t c[static 8], uint8_t e[static 12])
     31 {
     32 	// First 8 bytes of the corner vector are copied from the c array
     33 	vst1_u8(c, cube->corner);
     34 
     35 	// 12 bytes of the edge vector are copied from the e array
     36 	// First 8 bytes
     37 	vst1_u8(e, vget_low_u8(cube->edge));
     38 	// Next 4 bytes
     39 	vst1_lane_u32((uint32_t *)(e + 8), vreinterpret_u32_u8(vget_high_u8(cube->edge)), 0);
     40 }
     41 
     42 STATIC_INLINE bool
     43 equal(cube_t c1, cube_t c2)
     44 {
     45 	uint8x8_t cmp_corner;
     46 	uint8x16_t cmp_edge;
     47 	uint64x2_t cmp_corner_u64, cmp_edge_u64, cmp_result;
     48 
     49 	// compare the corner vectors and the edge vectors
     50 	cmp_corner = vceq_u8(c1.corner, c2.corner);
     51 	cmp_edge = vceqq_u8(c1.edge, c2.edge);
     52 
     53 	// convert the comparison vectors to 64-bit vectors and combine them
     54 	cmp_corner_u64 = vreinterpretq_u64_u8(vcombine_u64(cmp_corner, cmp_corner));
     55 	cmp_edge_u64 = vreinterpretq_u64_u8(cmp_edge);
     56 	cmp_result = vandq_u64(cmp_corner_u64, cmp_edge_u64);
     57 
     58 	// check if all the bits are set
     59 	return vgetq_lane_u64(cmp_result, 0) == ~0ULL && vgetq_lane_u64(cmp_result, 1) == ~0ULL;
     60 }
     61 
     62 STATIC_INLINE cube_t
     63 invertco(cube_t c)
     64 {
     65 	cube_t ret;
     66 	uint8x8_t co, shleft, shright, summed, newco, cleanco;
     67 
     68 	co = vand_u8(c.corner, CO2_NEON);
     69 	shleft = vshl_n_u8(co, 1);
     70 	shright = vshr_n_u8(co, 1);
     71 	summed = vorr_u8(shleft, shright);
     72 	newco = vand_u8(summed, CO2_NEON);
     73 	cleanco = veor_u8(c.corner, co);
     74 	ret.corner = vorr_u8(cleanco, newco);
     75 	ret.edge = c.edge;
     76 	
     77 	return ret;
     78 }
     79 
     80 STATIC_INLINE cube_t
     81 compose_edges(cube_t c1, cube_t c2)
     82 {
     83 	cube_t ret = {0};
     84 	ret.edge = compose_edges_slim(c1.edge, c2.edge);
     85 	return ret;
     86 }
     87 
     88 STATIC_INLINE cube_t
     89 compose_corners(cube_t c1, cube_t c2)
     90 {
     91 	cube_t ret = {0};
     92 	ret.corner = compose_corners_slim(c1.corner, c2.corner);
     93 	return ret;
     94 }
     95 
     96 STATIC_INLINE uint8x16_t
     97 compose_edges_slim(uint8x16_t edge1, uint8x16_t edge2)
     98 {
     99 	// Masks
    100 	uint8x16_t p_bits = vdupq_n_u8(PBITS);
    101 	uint8x16_t eo_bit = vdupq_n_u8(EOBIT);
    102 
    103 	// Find the index and permutation
    104 	uint8x16_t p = vandq_u8(edge2, p_bits);
    105 	uint8x16_t piece1 = vqtbl1q_u8(edge1, p);
    106 
    107 	// Calculate the orientation through XOR
    108 	uint8x16_t orien = vandq_u8(veorq_u8(edge2, piece1), eo_bit);
    109 
    110 	// Combine the results
    111 	uint8x16_t ret = vorrq_u8(vandq_u8(piece1, p_bits), orien);
    112 
    113 	// Mask to clear the last 32 bits of the result
    114 	uint8x16_t mask_last_32 = vsetq_lane_u32(0, vreinterpretq_u32_u8(ret), 3);
    115 	ret = vreinterpretq_u8_u32(mask_last_32);
    116 
    117 	return ret;
    118 }
    119 
    120 STATIC_INLINE uint8x8_t
    121 compose_corners_slim(uint8x8_t corner1, uint8x8_t corner2)
    122 {
    123 	// Masks
    124 	uint8x8_t p_bits = vdup_n_u8(PBITS);
    125 	uint8x8_t cobits = vdup_n_u8(COBITS);
    126 	uint8x8_t cobits2 = vdup_n_u8(COBITS_2);
    127 	uint8x8_t twist_cw = vdup_n_u8(CTWIST_CW);
    128 
    129 	// Find the index and permutation
    130 	uint8x8_t p = vand_u8(corner2, p_bits);
    131 	uint8x8_t piece1 = vtbl1_u8(corner1, p);
    132 
    133 	// Calculate the orientation
    134 	uint8x8_t aux = vadd_u8(vand_u8(corner2, cobits), vand_u8(piece1, cobits));
    135 	uint8x8_t auy = vshr_n_u8(vadd_u8(aux, twist_cw), 2);
    136 	uint8x8_t orien = vand_u8(vadd_u8(aux, auy), cobits2);
    137 
    138 	uint8x8_t ret = vorr_u8(vand_u8(piece1, p_bits), orien);
    139 
    140 	return ret;
    141 }
    142 
    143 STATIC_INLINE cube_t
    144 compose(cube_t c1, cube_t c2)
    145 {
    146 	cube_t ret = {0};
    147 
    148 	ret.edge = compose_edges_slim(c1.edge, c2.edge);
    149 	ret.corner = compose_corners_slim(c1.corner, c2.corner);
    150 
    151 	return ret;
    152 }
    153 
    154 STATIC_INLINE cube_t
    155 inverse(cube_t cube)
    156 {
    157 	uint8_t i, piece, orien;
    158 	cube_t ret;
    159 
    160 	// Temp arrays to store the NEON vectors
    161 	uint8_t edges[16];
    162 	uint8_t corners[8];
    163 
    164 	// Copy the NEON vectors to the arrays
    165 	vst1q_u8(edges, cube.edge);
    166 	vst1_u8(corners, cube.corner);
    167 
    168 	uint8_t edge_result[16] = {0};
    169 	uint8_t corner_result[8] = {0};
    170 
    171 	// Process the edges
    172 	for (i = 0; i < 12; i++)
    173 	{
    174 		piece = edges[i];
    175 		orien = piece & EOBIT;
    176 		edge_result[piece & PBITS] = i | orien;
    177 	}
    178 
    179 	// Process the corners
    180 	for (i = 0; i < 8; i++)
    181 	{
    182 		piece = corners[i];
    183 		orien = ((piece << 1) | (piece >> 1)) & COBITS_2;
    184 		corner_result[piece & PBITS] = i | orien;
    185 	}
    186 
    187 	// Copy the results back to the NEON vectors
    188 	ret.edge = vld1q_u8(edge_result);
    189 	ret.corner = vld1_u8(corner_result);
    190 
    191 	return ret;
    192 }
    193 
    194 STATIC_INLINE int64_t
    195 coord_co(cube_t c)
    196 {
    197 	// Temp array to store the NEON vector
    198 	uint8_t mem[8];
    199 	vst1_u8(mem, c.corner);
    200 
    201 	int i, p;
    202 	int64_t ret;
    203 
    204 	for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 3)
    205 		ret += p * (mem[i] >> COSHIFT);
    206 
    207 	return ret;
    208 }
    209 
    210 STATIC_INLINE int64_t
    211 coord_csep(cube_t c)
    212 {
    213 	// Temp array to store the NEON vector
    214 	uint8_t mem[8];
    215 	vst1_u8(mem, c.corner);
    216 
    217 	int64_t ret = 0;
    218 	int i, p;
    219 	for (ret = 0, i = 0, p = 1; i < 7; i++, p *= 2)
    220 		ret += p * ((mem[i] & CSEPBIT) >> 2);
    221 
    222 	return ret;
    223 	return 0;
    224 }
    225 
    226 STATIC_INLINE int64_t
    227 coord_cocsep(cube_t c)
    228 {
    229 	return (coord_co(c) << 7) + coord_csep(c);
    230 }
    231 
    232 STATIC_INLINE int64_t
    233 coord_eo(cube_t c)
    234 {
    235 	int64_t ret = 0;
    236 	int64_t p = 1;
    237 
    238 	// Temp array to store the NEON vector
    239 	uint8_t mem[16];
    240 	vst1q_u8(mem, c.edge);
    241 
    242 	for (int i = 1; i < 12; i++, p *= 2)
    243 	{
    244 		ret += p * (mem[i] >> EOSHIFT);
    245 	}
    246 
    247 	return ret;
    248 }
    249 
    250 STATIC_INLINE int64_t
    251 coord_esep(cube_t c)
    252 {
    253 	int64_t i, j, jj, k, l, ret1, ret2, bit1, bit2, is1;
    254 
    255 	// Temp array to store the NEON vector
    256 	uint8_t mem[16];
    257 	vst1q_u8(mem, c.edge);
    258 
    259 	for (i = 0, j = 0, k = 4, l = 4, ret1 = 0, ret2 = 0; i < 12; i++)
    260 	{
    261 		bit1 = (mem[i] & ESEPBIT_1) >> 2;
    262 		bit2 = (mem[i] & ESEPBIT_2) >> 3;
    263 		is1 = (1 - bit2) * bit1;
    264 
    265 		ret1 += bit2 * binomial[11 - i][k];
    266 		k -= bit2;
    267 
    268 		jj = j < 8;
    269 		ret2 += jj * is1 * binomial[7 - (j * jj)][l];
    270 		l -= is1;
    271 		j += (1 - bit2);
    272 	}
    273 
    274 	return ret1 * 70 + ret2;
    275 }
    276 
    277 STATIC_INLINE void
    278 copy_corners(cube_t *dst, cube_t src)
    279 {
    280 	dst->corner = src.corner;
    281 }
    282 
    283 STATIC_INLINE void
    284 copy_edges(cube_t *dst, cube_t src)
    285 {
    286 	dst->edge = src.edge;
    287 }
    288 
    289 STATIC_INLINE void
    290 set_eo(cube_t *cube, int64_t eo)
    291 {
    292 	// Temp array to store the NEON vector
    293 	uint8_t mem[16];
    294 	vst1q_u8(mem, cube->edge);
    295 	uint8_t i, sum, flip;
    296 
    297 	for (sum = 0, i = 1; i < 12; i++, eo >>= 1)
    298 	{
    299 		flip = eo % 2;
    300 		sum += flip;
    301 		mem[i] = (mem[i] & ~EOBIT) | (EOBIT * flip);
    302 	}
    303 	mem[0] = (mem[0] & ~EOBIT) | (EOBIT * (sum % 2));
    304 
    305 	// Copy the results back to the NEON vector
    306 	cube->edge = vld1q_u8(mem);
    307 	return;
    308 }
    309 
    310 STATIC_INLINE cube_t
    311 invcoord_esep(int64_t esep)
    312 {
    313 	cube_t ret;
    314 	uint8_t mem[16] = {0};
    315 
    316 	invcoord_esep_array(esep % 70, esep / 70, mem);
    317 
    318 	ret = SOLVED_CUBE;
    319 	ret.edge = vld1q_u8(mem);
    320 
    321 	return ret;
    322 }