nissy-core

The "engine" of nissy, including the H48 optimal solver.
git clone https://git.tronto.net/nissy-core
Download | Log | Files | Refs | README | LICENSE

gendata_h48.h (19456B)


      1 STATIC long long gendata_h48_dispatch(
      2     const char *, unsigned long long, unsigned char *);
      3 STATIC uint64_t gendata_h48short(gendata_h48short_arg_t [static 1]);
      4 STATIC int64_t gendata_h48(gendata_h48_arg_t [static 1]);
      5 STATIC void gendata_h48h0k4(gendata_h48_arg_t [static 1]);
      6 STATIC void gendata_h48k2(gendata_h48_arg_t [static 1]);
      7 
      8 STATIC void *gendata_h48h0k4_runthread(void *);
      9 STATIC void *gendata_h48k2_runthread(void *);
     10 
     11 STATIC_INLINE void gendata_h48_mark_atomic(gendata_h48_mark_t [static 1]);
     12 STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t [static 1]);
     13 STATIC_INLINE bool gendata_h48k2_dfs_stop(
     14     cube_t, int8_t, h48k2_dfs_arg_t [static 1]);
     15 STATIC void gendata_h48k2_dfs(h48k2_dfs_arg_t [static 1]);
     16 STATIC tableinfo_t makeinfo_h48k2(gendata_h48_arg_t [static 1]);
     17 
     18 STATIC const uint32_t *get_cocsepdata_constptr(const unsigned char *);
     19 STATIC const unsigned char *get_h48data_constptr(const unsigned char *);
     20 
     21 STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, int64_t, uint8_t);
     22 STATIC_INLINE void set_h48_pval(unsigned char *, int64_t, uint8_t, uint8_t);
     23 STATIC_INLINE uint8_t get_h48_pval_atomic(
     24     _Atomic const unsigned char *, int64_t, uint8_t);
     25 STATIC_INLINE void set_h48_pval_atomic(
     26     _Atomic unsigned char *, int64_t, uint8_t, uint8_t);
     27 
     28 STATIC long long
     29 gendata_h48_dispatch(
     30 	const char *solver,
     31 	unsigned long long data_size,
     32 	unsigned char *data
     33 )
     34 {
     35 	long long err;
     36 	gendata_h48_arg_t arg;
     37 
     38 	err = parse_h48_hk(solver, &arg.h, &arg.k);
     39 	if (err != NISSY_OK)
     40 		return err;
     41 
     42 	arg.buf_size = data_size;
     43 	arg.buf = data;
     44 	arg.maxdepth = 20;
     45 
     46 	return gendata_h48(&arg);
     47 }
     48 
     49 STATIC uint64_t
     50 gendata_h48short(gendata_h48short_arg_t arg[static 1])
     51 {
     52 	uint8_t i, m;
     53 	int64_t coord;
     54 	uint64_t j;
     55 	kvpair_t kv;
     56 	cube_t cube, d;
     57 
     58 	cube = SOLVED_CUBE;
     59 	coord = coord_h48(cube, arg->cocsepdata, 11);
     60 	h48map_insertmin(arg->map, coord, 0);
     61 	for (i = 0; i < arg->maxdepth; i++) {
     62 		j = 0;
     63 		for (kv = h48map_nextkvpair(arg->map, &j);
     64 		     j != arg->map->capacity;
     65 		     kv = h48map_nextkvpair(arg->map, &j)
     66 		) {
     67 			if (kv.val != i)
     68 				continue;
     69 			cube = invcoord_h48(kv.key, arg->crep, 11);
     70 			for (m = 0; m < 18; m++) {
     71 				d = move(cube, m);
     72 				FOREACH_H48SIM(d, arg->cocsepdata, arg->selfsim,
     73 					coord = coord_h48(d, arg->cocsepdata, 11);
     74 					h48map_insertmin(arg->map, coord, i+1);
     75 				)
     76 			}
     77 		}
     78 	}
     79 
     80 	return arg->map->n;
     81 }
     82 
     83 STATIC int64_t
     84 gendata_h48(gendata_h48_arg_t arg[static 1])
     85 {
     86 	uint64_t size, cocsepsize, h48size, fallbacksize, fallback2size, of;
     87 	long long r;
     88 	unsigned char *cocsepdata_offset;
     89 	tableinfo_t cocsepinfo, h48info, fallbackinfo;
     90 	gendata_h48_arg_t arg_h0k4;
     91 
     92 	cocsepsize = COCSEP_FULLSIZE;
     93 	h48size = INFOSIZE + H48_TABLESIZE(arg->h, arg->k);
     94 	fallbacksize = arg->k == 2 ? INFOSIZE + H48_TABLESIZE(0, 4) : 0;
     95 	fallback2size = EOESEP_FULLSIZE;
     96 
     97 	/* Add padding for 8-bit alignment */
     98 	h48size = 8 * DIV_ROUND_UP(h48size, 8);
     99 	fallbacksize = 8 * DIV_ROUND_UP(fallbacksize, 8);
    100 	fallback2size = 8 * DIV_ROUND_UP(fallback2size, 8);
    101 
    102 	size = cocsepsize + h48size + fallbacksize + fallback2size;
    103 
    104 	if (arg->buf == NULL)
    105 		return size; /* Dry-run */
    106 
    107 	if (arg->buf_size < size) {
    108 		LOG("[H48 gendata] Error: buffer is too small "
    109 		    "(needed %" PRId64 " bytes but received %" PRId64 ")\n",
    110 		    size, arg->buf_size);
    111 		return NISSY_ERROR_BUFFER_SIZE;
    112 	}
    113 
    114 	gendata_cocsep(arg->buf, arg->selfsim, arg->crep);
    115 
    116 	cocsepdata_offset = arg->buf + INFOSIZE;
    117 	arg->cocsepdata = (uint32_t *)cocsepdata_offset;
    118 	arg->h48buf = (_Atomic unsigned char*)arg->buf + cocsepsize;
    119 
    120 	arg->base = 99; /* TODO: set this somewhere else */
    121 
    122 	if (arg->h == 0 && arg->k == 4) {
    123 		gendata_h48h0k4(arg);
    124 	} else if (arg->k == 2) {
    125 		gendata_h48k2(arg);
    126 	} else {
    127 		LOG("[H48 gendata] Error: cannot generate data for h = %" PRIu8
    128 		    " and k = %" PRIu8 " (not implemented yet)\n",
    129 		    arg->h, arg->k);
    130 		return NISSY_ERROR_INVALID_SOLVER;
    131 	}
    132 
    133 	r = readtableinfo(arg->buf_size, arg->buf, &cocsepinfo);
    134 	if (r != NISSY_OK) {
    135 		LOG("[H48 gendata] Error: could not read info "
    136 		    "for cocsep table\n");
    137 		return NISSY_ERROR_UNKNOWN;
    138 	}
    139 
    140 	cocsepinfo.next = cocsepsize;
    141 	r = writetableinfo(&cocsepinfo, arg->buf_size, arg->buf);
    142 	if (r != NISSY_OK) {
    143 		LOG("[H48 gendata] Error: could not write info for "
    144 		    "cocsep table with updated 'next' value\n");
    145 		return NISSY_ERROR_UNKNOWN;
    146 	}
    147 
    148 	/* Add h0k4 fallback table */
    149 
    150 	if (arg->k == 2) {
    151 		arg_h0k4 = *arg;
    152 		arg_h0k4.h = 0;
    153 		arg_h0k4.k = 4;
    154 		arg_h0k4.base = 0;
    155 		arg_h0k4.maxdepth = 20;
    156 		arg_h0k4.buf_size = arg->buf_size - h48size;
    157 		arg_h0k4.buf = arg->buf + cocsepsize + h48size;
    158 		arg_h0k4.h48buf = arg->h48buf + h48size;
    159 
    160 		gendata_h48h0k4(&arg_h0k4);
    161 
    162 	}
    163 
    164 	/* Add eoesep fallback table */
    165 
    166 	gendata_eoesep(arg->buf + (size - fallback2size), 20);
    167 
    168 	/* Update tableinfo with correct next values */
    169 
    170 	r = readtableinfo_n(arg->buf_size, arg->buf, 2, &h48info);
    171 	if (r != NISSY_OK) {
    172 		LOG("[H48 gendata] Error: could not read info "
    173 		    "for h48 table\n");
    174 		return NISSY_ERROR_UNKNOWN;
    175 	}
    176 	h48info.next = h48size;
    177 	r = writetableinfo(&h48info,
    178 	     arg->buf_size - cocsepsize, arg->buf + cocsepsize);
    179 	if (r != NISSY_OK) {
    180 		LOG("[H48 gendata] Error: could not write info "
    181 		    "for h48 table\n");
    182 		return NISSY_ERROR_UNKNOWN;
    183 	}
    184 
    185 	if (arg->k == 2) {
    186 		r = readtableinfo_n(arg->buf_size, arg->buf, 3, &fallbackinfo);
    187 		if (r != NISSY_OK) {
    188 			LOG("[H48 gendata] Error: could not read info for h48 "
    189 			    "fallback table\n");
    190 			return NISSY_ERROR_UNKNOWN;
    191 		}
    192 
    193 		of = cocsepsize + h48size;
    194 		fallbackinfo.next = fallbacksize;
    195 		r = writetableinfo(
    196 		    &fallbackinfo, arg->buf_size - of, arg->buf + of);
    197 		if (r != NISSY_OK) {
    198 			LOG("[H48 gendata] Error: could not write info for "
    199 			    "h48 fallback table\n");
    200 			return NISSY_ERROR_UNKNOWN;
    201 		}
    202 	}
    203 
    204 	return size;
    205 }
    206 
    207 STATIC void
    208 gendata_h48h0k4(gendata_h48_arg_t arg[static 1])
    209 {
    210 	_Atomic unsigned char *table;
    211 	uint8_t val;
    212 	int64_t i, sc, done, d, h48max;
    213 	uint64_t t, tt, isize, cc, bufsize;
    214 	h48h0k4_bfs_arg_t bfsarg[THREADS];
    215 	pthread_t thread[THREADS];
    216 	pthread_mutex_t table_mutex[CHUNKS];
    217 
    218 	arg->info = (tableinfo_t) {
    219 		.solver = "h48 solver h = 0, k = 4",
    220 		.type = TABLETYPE_PRUNING,
    221 		.infosize = INFOSIZE,
    222 		.fullsize = H48_TABLESIZE(0, 4) + INFOSIZE,
    223 		.hash = 0, /* TODO */
    224 		.entries = H48_COORDMAX(0),
    225 		.classes = 0,
    226 		.h48h = 0,
    227 		.bits = 4,
    228 		.base = 0,
    229 		.maxvalue = 0,
    230 		.next = 0,
    231 	};
    232 
    233 	table = arg->h48buf + INFOSIZE;
    234 	memset(table, 0xFF, H48_TABLESIZE(0, 4));
    235 
    236 	h48max = (int64_t)H48_COORDMAX(0);
    237 	sc = coord_h48(SOLVED_CUBE, arg->cocsepdata, 0);
    238 	set_h48_pval_atomic(table, sc, 4, 0);
    239 	arg->info.distribution[0] = 1;
    240 
    241 	isize = h48max / THREADS;
    242 	isize = (isize / H48_COEFF(arg->k)) * H48_COEFF(arg->k);
    243 	for (t = 0; t < CHUNKS; t++)
    244 		pthread_mutex_init(&table_mutex[t], NULL);
    245 	for (t = 0; t < THREADS; t++) {
    246 		bfsarg[t] = (h48h0k4_bfs_arg_t) {
    247 			.cocsepdata = arg->cocsepdata,
    248 			.table = table,
    249 			.selfsim = arg->selfsim,
    250 			.crep = arg->crep,
    251 			.start = isize * t,
    252 			.end = t == THREADS-1 ? (uint64_t)h48max : isize * (t+1),
    253 		};
    254 		for (tt = 0; tt < CHUNKS; tt++)
    255 			bfsarg[t].table_mutex[tt] = &table_mutex[tt];
    256 	}
    257 	for (done = 1, d = 1; done < h48max && d <= arg->maxdepth; d++) {
    258 		LOG("[H48 gendata] Generating depth %" PRId64 "\n", d);
    259 
    260 		for (t = 0; t < THREADS; t++) {
    261 			bfsarg[t].depth = d;
    262 			pthread_create(&thread[t], NULL,
    263 			    gendata_h48h0k4_runthread, &bfsarg[t]);
    264 		}
    265 
    266 		for (t = 0; t < THREADS; t++)
    267 			pthread_join(thread[t], NULL);
    268 
    269 		for (i = 0, cc = 0; i < h48max; i++) {
    270 			val = get_h48_pval_atomic(table, i, 4);
    271 			cc += val == d;
    272 		}
    273 
    274 		done += cc;
    275 		arg->info.distribution[d] = cc;
    276 
    277 		LOG("[H48 gendata] Found %" PRId64 "\n", cc);
    278 	}
    279 
    280 	arg->info.maxvalue = d - 1;
    281 	bufsize = arg->buf_size - COCSEP_FULLSIZE;
    282 	writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf);
    283 }
    284 
    285 STATIC void *
    286 gendata_h48h0k4_runthread(void *arg)
    287 {
    288 	static const uint8_t breakpoint = 10; /* Hand-picked optimal */
    289 
    290 	uint8_t c, m;
    291 	uint64_t i;
    292 	int64_t j;
    293 	cube_t cube, moved;
    294 	gendata_h48_mark_t markarg;
    295 	h48h0k4_bfs_arg_t *bfsarg;
    296 
    297 	bfsarg = (h48h0k4_bfs_arg_t *)arg;
    298 
    299 	markarg = (gendata_h48_mark_t) {
    300 		.depth = bfsarg->depth,
    301 		.h = 0,
    302 		.k = 4,
    303 		.cocsepdata = bfsarg->cocsepdata,
    304 		.selfsim = bfsarg->selfsim,
    305 		.table_atomic = bfsarg->table,
    306 		.table_mutex = bfsarg->table_mutex,
    307 	};
    308 
    309 	/*
    310          * If depth < breakpoint, scan all neighbors of coordinates at depth-1.
    311          * Otherwise, scan all neighbors of unvisited coordinates.
    312 	 */
    313 	for (i = bfsarg->start; i < bfsarg->end; i++) {
    314 		c = get_h48_pval_atomic(bfsarg->table, i, 4);
    315 
    316 		if ((bfsarg->depth < breakpoint && c != bfsarg->depth - 1) ||
    317 		    (bfsarg->depth >= breakpoint && c != 0xF))
    318 			continue;
    319 
    320 		cube = invcoord_h48(i, bfsarg->crep, 0);
    321 		for (m = 0; m < 18; m++) {
    322 			moved = move(cube, m);
    323 			j = coord_h48(moved, bfsarg->cocsepdata, 0);
    324 			c = get_h48_pval_atomic(bfsarg->table, j, 4);
    325 			if (bfsarg->depth < breakpoint) {
    326 				if (c <= bfsarg->depth)
    327 					continue;
    328 				markarg.cube = moved;
    329 				gendata_h48_mark_atomic(&markarg);
    330 			} else {
    331 				if (c >= bfsarg->depth)
    332 					continue;
    333 				markarg.cube = cube;
    334 				gendata_h48_mark_atomic(&markarg);
    335 				break; /* Enough to find one, skip the rest */
    336 			}
    337 		}
    338 	}
    339 
    340 	return NULL;
    341 }
    342 
    343 STATIC void
    344 gendata_h48k2(gendata_h48_arg_t arg[static 1])
    345 {
    346 	static const uint8_t shortdepth = 8;
    347 	static const uint64_t capacity = 10000019;
    348 	static const uint64_t randomizer = 10000079;
    349 
    350 	/*
    351 	 * A good base value for the k=2 tables have few positions with value
    352 	 * 0, because those are treated as lower bound 0 and require a second
    353 	 * lookup in another table, and at the same time not too many positions
    354 	 * with value 3, because some of those are under-estimates.
    355 	 *
    356 	 * The following values for the base have been hand-picked. I first
    357 	 * performed some statistics on the frequency of these values, but
    358 	 * they turned out to be unreliable. In the end I generated the same
    359 	 * table with multiple base value and see what was best.
    360 	 *
    361 	 * A curious case is h3, which has this distribution for base 8:
    362 	 *   [0] = 6686828
    363 	 *   [1] = 63867852
    364 	 *   [2] = 392789689
    365 	 *   [3] = 477195231
    366 	 *
    367 	 * and this for base 9:
    368 	 *   [0] = 70554680
    369 	 *   [1] = 392789689
    370 	 *   [2] = 462294676
    371 	 *   [3] = 14900555
    372 	 *
    373 	 * I ended up picking base 8 to have a much lower count of elements
    374 	 * with value 0, at the cost of a less precise estimate for the higher
    375 	 * values. But I am not 100% confident this is the optimal choice,
    376 	 * so I'll leave it here for future considerations.
    377 	 */
    378 	 
    379 	static const uint8_t base[] = {
    380 		[0]  = 8,
    381 		[1]  = 8,
    382 		[2]  = 8,
    383 		[3]  = 8,
    384 		[4]  = 9,
    385 		[5]  = 9,
    386 		[6]  = 9,
    387 		[7]  = 9,
    388 		[8]  = 10,
    389 		[9]  = 10,
    390 		[10] = 10,
    391 		[11] = 10
    392 	};
    393 
    394 	uint8_t t;
    395 	int sleeptime;
    396 	unsigned char *table;
    397 	int64_t j;
    398 	_Atomic uint64_t count;
    399 	uint64_t i, ii, inext, bufsize, done, nshort, velocity;
    400 	h48map_t shortcubes;
    401 	gendata_h48short_arg_t shortarg;
    402 	h48k2_dfs_arg_t dfsarg[THREADS];
    403 	pthread_t thread[THREADS];
    404 	pthread_mutex_t shortcubes_mutex, table_mutex[CHUNKS];
    405 
    406 	table = (unsigned char *)arg->h48buf + INFOSIZE;
    407 	memset(table, 0xFF, H48_TABLESIZE(arg->h, arg->k));
    408 
    409 	LOG("[H48 gendata] Computing depth <=%" PRIu8 "\n", shortdepth)
    410 	h48map_create(&shortcubes, capacity, randomizer);
    411 	shortarg = (gendata_h48short_arg_t) {
    412 		.maxdepth = shortdepth,
    413 		.cocsepdata = arg->cocsepdata,
    414 		.crep = arg->crep,
    415 		.selfsim = arg->selfsim,
    416 		.map = &shortcubes
    417 	};
    418 	gendata_h48short(&shortarg);
    419 	nshort = shortarg.map->n;
    420 	LOG("[H48 gendata] Computed %" PRIu64 " positions\n", nshort);
    421 
    422 	if (arg->base >= 20)
    423 		arg->base = base[arg->h];
    424 	arg->info = makeinfo_h48k2(arg);
    425 
    426 	inext = 0;
    427 	count = 0;
    428 	pthread_mutex_init(&shortcubes_mutex, NULL);
    429 	for (i = 0; i < CHUNKS; i++)
    430 		pthread_mutex_init(&table_mutex[i], NULL);
    431 	for (i = 0; i < THREADS; i++) {
    432 		dfsarg[i] = (h48k2_dfs_arg_t){
    433 			.h = arg->h,
    434 			.k = arg->k,
    435 			.base = arg->base,
    436 			.shortdepth = shortdepth,
    437 			.cocsepdata = arg->cocsepdata,
    438 			.table = table,
    439 			.selfsim = arg->selfsim,
    440 			.crep = arg->crep,
    441 			.shortcubes = &shortcubes,
    442 			.shortcubes_mutex = &shortcubes_mutex,
    443 			.next = &inext,
    444 			.count = &count,
    445 		};
    446 		for (ii = 0; ii < CHUNKS; ii++)
    447 			dfsarg[i].table_mutex[ii] = &table_mutex[ii];
    448 
    449 		pthread_create(
    450 		    &thread[i], NULL, gendata_h48k2_runthread, &dfsarg[i]);
    451 	}
    452 
    453 	if (NISSY_CANSLEEP) {
    454 		/* Log the progress periodically */
    455 		LOG("Processing 'short cubes'. This will take a while.\n");
    456 
    457 		/* Estimate velocity by checking how much is done after 1s */
    458 		msleep(1000);
    459 		velocity = count;
    460 
    461 		/* We plan to log 10 times */
    462 		sleeptime = (100*(nshort-velocity)) / velocity;
    463 
    464 		done = count;
    465 		while (nshort - done > (velocity * sleeptime) / 1000) {
    466 			msleep(sleeptime);
    467 			pthread_mutex_lock(&shortcubes_mutex);
    468 			done = count;
    469 			pthread_mutex_unlock(&shortcubes_mutex);
    470 			LOG("Processed %" PRIu64 " / %" PRIu64 " cubes\n",
    471 			    (done / 1000) * 1000, nshort);
    472 		}
    473 	} else {
    474 		LOG("Status updates won't be available because the sleep() "
    475 		    "functionality is not available on this platform.\n");
    476 	}
    477 
    478 	for (i = 0; i < THREADS; i++)
    479 		pthread_join(thread[i], NULL);
    480 
    481 	h48map_destroy(&shortcubes);
    482 
    483 	for (j = 0; j < H48_COORDMAX(arg->h); j++) {
    484 		t = get_h48_pval(table, j, 2);
    485 		arg->info.distribution[t]++;
    486 	}
    487 
    488 	bufsize = arg->buf_size - COCSEP_FULLSIZE;
    489 	writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf);
    490 }
    491 
    492 STATIC void *
    493 gendata_h48k2_runthread(void *arg)
    494 {
    495 	uint64_t coord, mutex;
    496 	kvpair_t kv;
    497 	h48k2_dfs_arg_t *dfsarg;
    498 
    499 	dfsarg = (h48k2_dfs_arg_t *)arg;
    500 
    501 	while (true) {
    502 		pthread_mutex_lock(dfsarg->shortcubes_mutex);
    503 
    504 		kv = h48map_nextkvpair(dfsarg->shortcubes, dfsarg->next);
    505 		if (*dfsarg->next == dfsarg->shortcubes->capacity) {
    506 			pthread_mutex_unlock(dfsarg->shortcubes_mutex);
    507 			break;
    508 		}
    509 		(*dfsarg->count)++;
    510 		pthread_mutex_unlock(dfsarg->shortcubes_mutex);
    511 
    512 		if (kv.val < dfsarg->shortdepth) {
    513 			coord = kv.key >> (int64_t)(11 - dfsarg->h);
    514 			mutex = H48_INDEX(coord, dfsarg->k) % CHUNKS;
    515 			pthread_mutex_lock(dfsarg->table_mutex[mutex]);
    516 			set_h48_pval(dfsarg->table, coord, dfsarg->k, 0);
    517 			pthread_mutex_unlock(dfsarg->table_mutex[mutex]);
    518 		} else {
    519 			dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11);
    520 			gendata_h48k2_dfs(dfsarg);
    521 		}
    522 	}
    523 
    524 	return NULL;
    525 }
    526 
    527 STATIC void
    528 gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1])
    529 {
    530 	int8_t d;
    531 	uint8_t m[4];
    532 	cube_t cube[4];
    533 	gendata_h48_mark_t markarg;
    534 
    535 	markarg = (gendata_h48_mark_t) {
    536 		.h = arg->h,
    537 		.k = arg->k,
    538 		.cocsepdata = arg->cocsepdata,
    539 		.selfsim = arg->selfsim,
    540 		.table = arg->table,
    541 		.table_mutex = arg->table_mutex,
    542 	};
    543 
    544 	d = (int8_t)arg->shortdepth - (int8_t)arg->base;
    545 
    546 	/* Depth d+0 (shortcubes) */
    547 	markarg.depth = d;
    548 	markarg.cube = arg->cube;
    549 	gendata_h48_mark(&markarg);
    550 
    551 	/* Depth d+1 */
    552 	for (m[0] = 0; m[0] < 18; m[0]++) {
    553 		markarg.depth = d+1;
    554 		cube[0] = move(arg->cube, m[0]);
    555 		if (gendata_h48k2_dfs_stop(cube[0], d+1, arg))
    556 			continue;
    557 		markarg.cube = cube[0];
    558 		gendata_h48_mark(&markarg);
    559 
    560 		/* Depth d+2 */
    561 		for (m[1] = 0; m[1] < 18; m[1]++) {
    562 			markarg.depth = d+2;
    563 			if (m[0] / 3 == m[1] / 3) {
    564 				m[1] += 2;
    565 				continue;
    566 			}
    567 			cube[1] = move(cube[0], m[1]);
    568 			if (gendata_h48k2_dfs_stop(cube[1], d+2, arg))
    569 				continue;
    570 			markarg.cube = cube[1];
    571 			gendata_h48_mark(&markarg);
    572 			if (d >= 0)
    573 				continue;
    574 
    575 			/* Depth d+3 */
    576 			for (m[2] = 0; m[2] < 18; m[2]++) {
    577 				markarg.depth = d+3;
    578 				if (!allowednextmove(m[1], m[2])) {
    579 					m[2] += 2;
    580 					continue;
    581 				}
    582 				cube[2] = move(cube[1], m[2]);
    583 				if (gendata_h48k2_dfs_stop(cube[2], d+3, arg))
    584 					continue;
    585 				markarg.cube = cube[2];
    586 				gendata_h48_mark(&markarg);
    587 				if (d >= -1)
    588 					continue;
    589 
    590 				/* Depth d+4 */
    591 				for (m[3] = 0; m[3] < 18; m[3]++) {
    592 					markarg.depth = d+4;
    593 					if (!allowednextmove(m[2], m[3])) {
    594 						m[3] += 2;
    595 						continue;
    596 					}
    597 					cube[3] = move(cube[2], m[3]);
    598 					markarg.cube = cube[3];
    599 					gendata_h48_mark(&markarg);
    600 				}
    601 			}
    602 		}
    603 	}
    604 }
    605 
    606 STATIC_INLINE void
    607 gendata_h48_mark_atomic(gendata_h48_mark_t arg[static 1])
    608 {
    609 	uint8_t oldval, newval;
    610 	int64_t coord, mutex;
    611 
    612 	FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim,
    613 		coord = coord_h48(arg->cube, arg->cocsepdata, arg->h);
    614 		oldval = get_h48_pval_atomic(arg->table_atomic, coord, arg->k);
    615 		newval = (uint8_t)MAX(arg->depth, 0);
    616 		if (newval < oldval) {
    617 			mutex = H48_INDEX(coord, arg->k) % CHUNKS;
    618 			pthread_mutex_lock(arg->table_mutex[mutex]);
    619 			set_h48_pval_atomic(
    620 			    arg->table_atomic, coord, arg->k, newval);
    621 			pthread_mutex_unlock(arg->table_mutex[mutex]);
    622 		}
    623 	)
    624 }
    625 
    626 STATIC_INLINE void
    627 gendata_h48_mark(gendata_h48_mark_t arg[static 1])
    628 {
    629 	uint8_t oldval, newval;
    630 	int64_t coord, mutex;
    631 
    632 	FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim,
    633 		coord = coord_h48(arg->cube, arg->cocsepdata, arg->h);
    634 		mutex = H48_INDEX(coord, arg->k) % CHUNKS;
    635 		pthread_mutex_lock(arg->table_mutex[mutex]);
    636 		oldval = get_h48_pval(arg->table, coord, arg->k);
    637 		newval = (uint8_t)MAX(arg->depth, 0);
    638 		set_h48_pval(arg->table, coord, arg->k, MIN(newval, oldval));
    639 		pthread_mutex_unlock(arg->table_mutex[mutex]);
    640 	)
    641 }
    642 
    643 STATIC_INLINE bool
    644 gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1])
    645 {
    646 	uint64_t val;
    647 	int64_t coord, mutex;
    648 	int8_t oldval;
    649 
    650 	if (arg->h == 0 || arg->h == 11) {
    651 		/* We are in the "real coordinate" case, we can stop
    652 		   if this coordinate has already been visited */
    653 		coord = coord_h48(cube, arg->cocsepdata, arg->h);
    654 		mutex = H48_INDEX(coord, arg->k) % CHUNKS;
    655 		pthread_mutex_lock(arg->table_mutex[mutex]);
    656 		oldval = get_h48_pval(arg->table, coord, arg->k);
    657 		pthread_mutex_unlock(arg->table_mutex[mutex]);
    658 		return oldval <= d;
    659 	} else {
    660 		/* With 0 < k < 11 we do not have a "real coordinate".
    661 		   The best we can do is checking if we backtracked to
    662 		   one of the "short cubes". */
    663 		coord = coord_h48(cube, arg->cocsepdata, 11);
    664 		val = h48map_value(arg->shortcubes, coord);
    665 		return val <= arg->shortdepth;
    666 	}
    667 }
    668 
    669 STATIC tableinfo_t
    670 makeinfo_h48k2(gendata_h48_arg_t arg[static 1])
    671 {
    672 	tableinfo_t info;
    673 
    674 	info = (tableinfo_t) {
    675 		.solver = "h48 solver h =  , k = 2",
    676 		.type = TABLETYPE_PRUNING,
    677 		.infosize = INFOSIZE,
    678 		.fullsize = H48_TABLESIZE(arg->h, 2) + INFOSIZE,
    679 		.hash = 0, /* TODO */
    680 		.entries = H48_COORDMAX(arg->h),
    681 		.classes = 0,
    682 		.h48h = arg->h,
    683 		.bits = 2,
    684 		.base = arg->base,
    685 		.maxvalue = 3,
    686 		.next = 0,
    687 	};
    688 	info.solver[15] = (arg->h % 10) + '0';
    689 	if (arg->h >= 10)
    690 		info.solver[14] = (arg->h / 10) + '0';
    691 
    692 	return info;
    693 }
    694 
    695 STATIC const uint32_t *
    696 get_cocsepdata_constptr(const unsigned char *data)
    697 {
    698 	return (uint32_t *)(data + INFOSIZE);
    699 }
    700 
    701 STATIC const unsigned char *
    702 get_h48data_constptr(const unsigned char *data)
    703 {
    704 	return data + COCSEP_FULLSIZE + INFOSIZE;
    705 }
    706 
    707 STATIC_INLINE uint8_t
    708 get_h48_pval(const unsigned char *table, int64_t i, uint8_t k)
    709 {
    710 	return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k);
    711 }
    712 
    713 STATIC_INLINE uint8_t
    714 get_h48_pval_atomic(_Atomic const unsigned char *table, int64_t i, uint8_t k)
    715 {
    716 	return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k);
    717 }
    718 
    719 STATIC_INLINE void
    720 set_h48_pval(unsigned char *table, int64_t i, uint8_t k, uint8_t val)
    721 {
    722 	table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k)))
    723 	    | (val << H48_SHIFT(i, k));
    724 }
    725 
    726 STATIC_INLINE void
    727 set_h48_pval_atomic(
    728 	_Atomic unsigned char *table,
    729 	int64_t i,
    730 	uint8_t k,
    731 	uint8_t val
    732 )
    733 {
    734 	table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k)))
    735 	    | (val << H48_SHIFT(i, k));
    736 }