nissy-core

The "engine" of nissy, including the H48 optimal solver.
git clone https://git.tronto.net/nissy-core
Download | Log | Files | Refs | README | LICENSE

gendata_h48.h (22702B)


      1 STATIC uint64_t genddfggta_h48short(gendata_h48short_arg_t [static 1]);
      2 STATIC int64_t gendata_h48(gendata_h48_arg_t [static 1]);
      3 STATIC void gendata_h48h0k4(gendata_h48_arg_t [static 1]);
      4 STATIC void gendata_h48k2(gendata_h48_arg_t [static 1]);
      5 
      6 STATIC void *gendata_h48h0k4_runthread(void *);
      7 STATIC void *gendata_h48k2_runthread(void *);
      8 
      9 STATIC_INLINE void gendata_h48_mark_atomic(gendata_h48_mark_t [static 1]);
     10 STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t [static 1]);
     11 STATIC_INLINE bool gendata_h48k2_dfs_stop(
     12     cube_t, int8_t, h48k2_dfs_arg_t [static 1]);
     13 STATIC void gendata_h48k2_dfs(h48k2_dfs_arg_t [static 1]);
     14 STATIC tableinfo_t makeinfo_h48k2(gendata_h48_arg_t [static 1]);
     15 STATIC void *getdistribution_h48_runthread(void *);
     16 STATIC void getdistribution_h48(const unsigned char *,
     17     uint64_t [static INFO_DISTRIBUTION_LEN], uint8_t, uint8_t);
     18 
     19 STATIC const uint32_t *get_cocsepdata_constptr(const unsigned char *);
     20 STATIC const unsigned char *get_h48data_constptr(const unsigned char *);
     21 
     22 STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, int64_t, uint8_t);
     23 STATIC_INLINE void set_h48_pval(unsigned char *, int64_t, uint8_t, uint8_t);
     24 STATIC_INLINE uint8_t get_h48_pval_atomic(
     25     _Atomic const unsigned char *, int64_t, uint8_t);
     26 STATIC_INLINE void set_h48_pval_atomic(
     27     _Atomic unsigned char *, int64_t, uint8_t, uint8_t);
     28 
     29 size_t gendata_h48_derive(uint8_t, const unsigned char *, unsigned char *);
     30 
     31 STATIC uint64_t
     32 gendata_h48short(gendata_h48short_arg_t arg[static 1])
     33 {
     34 	uint8_t i, m;
     35 	int64_t coord;
     36 	uint64_t j;
     37 	kvpair_t kv;
     38 	cube_t cube, d;
     39 
     40 	cube = SOLVED_CUBE;
     41 	coord = coord_h48(cube, arg->cocsepdata, 11);
     42 	h48map_insertmin(arg->map, coord, 0);
     43 	for (i = 0; i < arg->maxdepth; i++) {
     44 		j = 0;
     45 		for (kv = h48map_nextkvpair(arg->map, &j);
     46 		     j != arg->map->capacity;
     47 		     kv = h48map_nextkvpair(arg->map, &j)
     48 		) {
     49 			if (kv.val != i)
     50 				continue;
     51 			cube = invcoord_h48(kv.key, arg->crep, 11);
     52 			for (m = 0; m < 18; m++) {
     53 				d = move(cube, m);
     54 				FOREACH_H48SIM(d, arg->cocsepdata, arg->selfsim,
     55 					coord = coord_h48(d, arg->cocsepdata, 11);
     56 					h48map_insertmin(arg->map, coord, i+1);
     57 				)
     58 			}
     59 		}
     60 	}
     61 
     62 	return arg->map->n;
     63 }
     64 
     65 STATIC int64_t
     66 gendata_h48(gendata_h48_arg_t arg[static 1])
     67 {
     68 	uint64_t size, cocsepsize, h48size, fallbacksize, fallback2size, of;
     69 	long long r;
     70 	unsigned char *cocsepdata_offset;
     71 	tableinfo_t cocsepinfo, h48info, fallbackinfo;
     72 	gendata_h48_arg_t arg_h0k4;
     73 
     74 	cocsepsize = COCSEP_FULLSIZE;
     75 	h48size = INFOSIZE + H48_TABLESIZE(arg->h, arg->k);
     76 	fallbacksize = arg->k == 2 ? INFOSIZE + H48_TABLESIZE(0, 4) : 0;
     77 	fallback2size = EOESEP_FULLSIZE;
     78 
     79 	/* Add padding for 8-bit alignment */
     80 	h48size = 8 * DIV_ROUND_UP(h48size, 8);
     81 	fallbacksize = 8 * DIV_ROUND_UP(fallbacksize, 8);
     82 	fallback2size = 8 * DIV_ROUND_UP(fallback2size, 8);
     83 
     84 	size = cocsepsize + h48size + fallbacksize + fallback2size;
     85 
     86 	if (arg->buf == NULL)
     87 		return size; /* Dry-run */
     88 
     89 	if (arg->buf_size < size) {
     90 		LOG("[H48 gendata] Error data: buffer is too small "
     91 		    "(needed %" PRId64 " bytes but received %" PRId64 ")\n",
     92 		    size, arg->buf_size);
     93 		return NISSY_ERROR_BUFFER_SIZE;
     94 	}
     95 
     96 	gendata_cocsep(arg->buf, arg->selfsim, arg->crep);
     97 
     98 	cocsepdata_offset = arg->buf + INFOSIZE;
     99 	arg->cocsepdata = (uint32_t *)cocsepdata_offset;
    100 	arg->h48buf = (_Atomic unsigned char*)arg->buf + cocsepsize;
    101 
    102 	arg->base = 99; /* TODO: set this somewhere else */
    103 
    104 	if (arg->h == 0 && arg->k == 4) {
    105 		gendata_h48h0k4(arg);
    106 	} else if (arg->k == 2) {
    107 		gendata_h48k2(arg);
    108 	} else {
    109 		LOG("[H48 gendata] Error: cannot generate data for h = %" PRIu8
    110 		    " and k = %" PRIu8 " (not implemented yet)\n",
    111 		    arg->h, arg->k);
    112 		return NISSY_ERROR_INVALID_SOLVER;
    113 	}
    114 
    115 	r = readtableinfo(arg->buf_size, arg->buf, &cocsepinfo);
    116 	if (r != NISSY_OK) {
    117 		LOG("[H48 gendata] Error: could not read info "
    118 		    "for cocsep table\n");
    119 		return NISSY_ERROR_UNKNOWN;
    120 	}
    121 
    122 	cocsepinfo.next = cocsepsize;
    123 	r = writetableinfo(&cocsepinfo, arg->buf_size, arg->buf);
    124 	if (r != NISSY_OK) {
    125 		LOG("[H48 gendata] Error: could not write info for "
    126 		    "cocsep table with updated 'next' value\n");
    127 		return NISSY_ERROR_UNKNOWN;
    128 	}
    129 
    130 	/* Add h0k4 fallback table */
    131 
    132 	if (arg->k == 2) {
    133 		arg_h0k4 = *arg;
    134 		arg_h0k4.h = 0;
    135 		arg_h0k4.k = 4;
    136 		arg_h0k4.base = 0;
    137 		arg_h0k4.maxdepth = 20;
    138 		arg_h0k4.buf_size = arg->buf_size - h48size;
    139 		arg_h0k4.buf = arg->buf + cocsepsize + h48size;
    140 		arg_h0k4.h48buf = arg->h48buf + h48size;
    141 
    142 		gendata_h48h0k4(&arg_h0k4);
    143 
    144 	}
    145 
    146 	/* Add eoesep fallback table */
    147 
    148 	gendata_eoesep(arg->buf + (size - fallback2size), 20);
    149 
    150 	/* Update tableinfo with correct next values */
    151 
    152 	r = readtableinfo_n(arg->buf_size, arg->buf, 2, &h48info);
    153 	if (r != NISSY_OK) {
    154 		LOG("[H48 gendata] Error: could not read info "
    155 		    "for h48 table\n");
    156 		return NISSY_ERROR_UNKNOWN;
    157 	}
    158 	h48info.next = h48size;
    159 	r = writetableinfo(&h48info,
    160 	     arg->buf_size - cocsepsize, arg->buf + cocsepsize);
    161 	if (r != NISSY_OK) {
    162 		LOG("[H48 gendata] Error: could not write info "
    163 		    "for h48 table\n");
    164 		return NISSY_ERROR_UNKNOWN;
    165 	}
    166 
    167 	if (arg->k == 2) {
    168 		r = readtableinfo_n(arg->buf_size, arg->buf, 3, &fallbackinfo);
    169 		if (r != NISSY_OK) {
    170 			LOG("[H48 gendata] Error: could not read info for h48 "
    171 			    "fallback table\n");
    172 			return NISSY_ERROR_UNKNOWN;
    173 		}
    174 
    175 		of = cocsepsize + h48size;
    176 		fallbackinfo.next = fallbacksize;
    177 		r = writetableinfo(
    178 		    &fallbackinfo, arg->buf_size - of, arg->buf + of);
    179 		if (r != NISSY_OK) {
    180 			LOG("[H48 gendata] Error: could not write info for "
    181 			    "h48 fallback table\n");
    182 			return NISSY_ERROR_UNKNOWN;
    183 		}
    184 	}
    185 
    186 	return size;
    187 }
    188 
    189 STATIC void
    190 gendata_h48h0k4(gendata_h48_arg_t arg[static 1])
    191 {
    192 	_Atomic unsigned char *table;
    193 	uint8_t val;
    194 	int64_t i, sc, done, d, h48max;
    195 	uint64_t t, tt, isize, cc, bufsize;
    196 	h48h0k4_bfs_arg_t bfsarg[THREADS];
    197 	pthread_t thread[THREADS];
    198 	pthread_mutex_t table_mutex[CHUNKS];
    199 
    200 	arg->info = (tableinfo_t) {
    201 		.solver = "h48 solver h = 0, k = 4",
    202 		.type = TABLETYPE_PRUNING,
    203 		.infosize = INFOSIZE,
    204 		.fullsize = H48_TABLESIZE(0, 4) + INFOSIZE,
    205 		.hash = 0, /* TODO */
    206 		.entries = H48_COORDMAX(0),
    207 		.classes = 0,
    208 		.h48h = 0,
    209 		.bits = 4,
    210 		.base = 0,
    211 		.maxvalue = 0,
    212 		.next = 0,
    213 	};
    214 
    215 	table = arg->h48buf + INFOSIZE;
    216 	memset(table, 0xFF, H48_TABLESIZE(0, 4));
    217 
    218 	h48max = (int64_t)H48_COORDMAX(0);
    219 	sc = coord_h48(SOLVED_CUBE, arg->cocsepdata, 0);
    220 	set_h48_pval_atomic(table, sc, 4, 0);
    221 	arg->info.distribution[0] = 1;
    222 
    223 	isize = h48max / THREADS;
    224 	isize = (isize / H48_COEFF(arg->k)) * H48_COEFF(arg->k);
    225 	for (t = 0; t < CHUNKS; t++)
    226 		pthread_mutex_init(&table_mutex[t], NULL);
    227 	for (t = 0; t < THREADS; t++) {
    228 		bfsarg[t] = (h48h0k4_bfs_arg_t) {
    229 			.cocsepdata = arg->cocsepdata,
    230 			.table = table,
    231 			.selfsim = arg->selfsim,
    232 			.crep = arg->crep,
    233 			.start = isize * t,
    234 			.end = t == THREADS-1 ? (uint64_t)h48max : isize * (t+1),
    235 		};
    236 		for (tt = 0; tt < CHUNKS; tt++)
    237 			bfsarg[t].table_mutex[tt] = &table_mutex[tt];
    238 	}
    239 	for (done = 1, d = 1; done < h48max && d <= arg->maxdepth; d++) {
    240 		LOG("[H48 gendata] Generating depth %" PRId64 "\n", d);
    241 
    242 		for (t = 0; t < THREADS; t++) {
    243 			bfsarg[t].depth = d;
    244 			pthread_create(&thread[t], NULL,
    245 			    gendata_h48h0k4_runthread, &bfsarg[t]);
    246 		}
    247 
    248 		for (t = 0; t < THREADS; t++)
    249 			pthread_join(thread[t], NULL);
    250 
    251 		for (i = 0, cc = 0; i < h48max; i++) {
    252 			val = get_h48_pval_atomic(table, i, 4);
    253 			cc += val == d;
    254 		}
    255 
    256 		done += cc;
    257 		arg->info.distribution[d] = cc;
    258 
    259 		LOG("[H48 gendata] Found %" PRId64 "\n", cc);
    260 	}
    261 
    262 	arg->info.maxvalue = d - 1;
    263 	bufsize = arg->buf_size - COCSEP_FULLSIZE;
    264 	writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf);
    265 }
    266 
    267 STATIC void *
    268 gendata_h48h0k4_runthread(void *arg)
    269 {
    270 	static const uint8_t breakpoint = 10; /* Hand-picked optimal */
    271 
    272 	uint8_t c, m;
    273 	uint64_t i;
    274 	int64_t j;
    275 	cube_t cube, moved;
    276 	gendata_h48_mark_t markarg;
    277 	h48h0k4_bfs_arg_t *bfsarg;
    278 
    279 	bfsarg = (h48h0k4_bfs_arg_t *)arg;
    280 
    281 	markarg = (gendata_h48_mark_t) {
    282 		.depth = bfsarg->depth,
    283 		.h = 0,
    284 		.k = 4,
    285 		.cocsepdata = bfsarg->cocsepdata,
    286 		.selfsim = bfsarg->selfsim,
    287 		.table_atomic = bfsarg->table,
    288 		.table_mutex = bfsarg->table_mutex,
    289 	};
    290 
    291 	/*
    292          * If depth < breakpoint, scan all neighbors of coordinates at depth-1.
    293          * Otherwise, scan all neighbors of unvisited coordinates.
    294 	 */
    295 	for (i = bfsarg->start; i < bfsarg->end; i++) {
    296 		c = get_h48_pval_atomic(bfsarg->table, i, 4);
    297 
    298 		if ((bfsarg->depth < breakpoint && c != bfsarg->depth - 1) ||
    299 		    (bfsarg->depth >= breakpoint && c != 0xF))
    300 			continue;
    301 
    302 		cube = invcoord_h48(i, bfsarg->crep, 0);
    303 		for (m = 0; m < 18; m++) {
    304 			moved = move(cube, m);
    305 			j = coord_h48(moved, bfsarg->cocsepdata, 0);
    306 			c = get_h48_pval_atomic(bfsarg->table, j, 4);
    307 			if (bfsarg->depth < breakpoint) {
    308 				if (c <= bfsarg->depth)
    309 					continue;
    310 				markarg.cube = moved;
    311 				gendata_h48_mark_atomic(&markarg);
    312 			} else {
    313 				if (c >= bfsarg->depth)
    314 					continue;
    315 				markarg.cube = cube;
    316 				gendata_h48_mark_atomic(&markarg);
    317 				break; /* Enough to find one, skip the rest */
    318 			}
    319 		}
    320 	}
    321 
    322 	return NULL;
    323 }
    324 
    325 STATIC void
    326 gendata_h48k2(gendata_h48_arg_t arg[static 1])
    327 {
    328 	static const uint8_t shortdepth = 8;
    329 	static const uint64_t capacity = 10000019;
    330 	static const uint64_t randomizer = 10000079;
    331 
    332 	/*
    333 	 * A good base value for the k=2 tables have few positions with value
    334 	 * 0, because those are treated as lower bound 0 and require a second
    335 	 * lookup in another table, and at the same time not too many positions
    336 	 * with value 3, because some of those are under-estimates.
    337 	 *
    338 	 * The following values for the base have been hand-picked. I first
    339 	 * performed some statistics on the frequency of these values, but
    340 	 * they turned out to be unreliable. In the end I generated the same
    341 	 * table with multiple base value and see what was best.
    342 	 *
    343 	 * A curious case is h3, which has this distribution for base 8:
    344 	 *   [0] = 6686828
    345 	 *   [1] = 63867852
    346 	 *   [2] = 392789689
    347 	 *   [3] = 477195231
    348 	 *
    349 	 * and this for base 9:
    350 	 *   [0] = 70554680
    351 	 *   [1] = 392789689
    352 	 *   [2] = 462294676
    353 	 *   [3] = 14900555
    354 	 *
    355 	 * I ended up picking base 8 to have a much lower count of elements
    356 	 * with value 0, at the cost of a less precise estimate for the higher
    357 	 * values. But I am not 100% confident this is the optimal choice,
    358 	 * so I'll leave it here for future considerations.
    359 	 */
    360 	 
    361 	static const uint8_t base[] = {
    362 		[0]  = 8,
    363 		[1]  = 8,
    364 		[2]  = 8,
    365 		[3]  = 8,
    366 		[4]  = 9,
    367 		[5]  = 9,
    368 		[6]  = 9,
    369 		[7]  = 9,
    370 		[8]  = 10,
    371 		[9]  = 10,
    372 		[10] = 10,
    373 		[11] = 10
    374 	};
    375 
    376 	uint8_t t;
    377 	unsigned char *table;
    378 	int64_t j;
    379 	uint64_t i, ii, inext, count, bufsize;
    380 	h48map_t shortcubes;
    381 	gendata_h48short_arg_t shortarg;
    382 	h48k2_dfs_arg_t dfsarg[THREADS];
    383 	pthread_t thread[THREADS];
    384 	pthread_mutex_t shortcubes_mutex, table_mutex[CHUNKS];
    385 
    386 	table = (unsigned char *)arg->h48buf + INFOSIZE;
    387 	memset(table, 0xFF, H48_TABLESIZE(arg->h, arg->k));
    388 
    389 	LOG("[H48 gendata] Computing depth <=%" PRIu8 "\n", shortdepth)
    390 	h48map_create(&shortcubes, capacity, randomizer);
    391 	shortarg = (gendata_h48short_arg_t) {
    392 		.maxdepth = shortdepth,
    393 		.cocsepdata = arg->cocsepdata,
    394 		.crep = arg->crep,
    395 		.selfsim = arg->selfsim,
    396 		.map = &shortcubes
    397 	};
    398 	gendata_h48short(&shortarg);
    399 	LOG("[H48 gendata] Computed %" PRIu64 " positions\n", shortarg.map->n);
    400 
    401 	if (arg->base >= 20)
    402 		arg->base = base[arg->h];
    403 	arg->info = makeinfo_h48k2(arg);
    404 
    405 	inext = count = 0;
    406 	pthread_mutex_init(&shortcubes_mutex, NULL);
    407 	for (i = 0; i < CHUNKS; i++)
    408 		pthread_mutex_init(&table_mutex[i], NULL);
    409 	for (i = 0; i < THREADS; i++) {
    410 		dfsarg[i] = (h48k2_dfs_arg_t){
    411 			.h = arg->h,
    412 			.k = arg->k,
    413 			.base = arg->base,
    414 			.shortdepth = shortdepth,
    415 			.cocsepdata = arg->cocsepdata,
    416 			.table = table,
    417 			.selfsim = arg->selfsim,
    418 			.crep = arg->crep,
    419 			.shortcubes = &shortcubes,
    420 			.shortcubes_mutex = &shortcubes_mutex,
    421 			.next = &inext,
    422 			.count = &count,
    423 		};
    424 		for (ii = 0; ii < CHUNKS; ii++)
    425 			dfsarg[i].table_mutex[ii] = &table_mutex[ii];
    426 
    427 		pthread_create(
    428 		    &thread[i], NULL, gendata_h48k2_runthread, &dfsarg[i]);
    429 	}
    430 
    431 	for (i = 0; i < THREADS; i++)
    432 		pthread_join(thread[i], NULL);
    433 
    434 	h48map_destroy(&shortcubes);
    435 
    436 	for (j = 0; j < H48_COORDMAX(arg->h); j++) {
    437 		t = get_h48_pval(table, j, 2);
    438 		arg->info.distribution[t]++;
    439 	}
    440 
    441 	bufsize = arg->buf_size - COCSEP_FULLSIZE;
    442 	writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf);
    443 }
    444 
    445 STATIC void *
    446 gendata_h48k2_runthread(void *arg)
    447 {
    448 	uint64_t count, coord, mutex;
    449 	kvpair_t kv;
    450 	h48k2_dfs_arg_t *dfsarg;
    451 
    452 	dfsarg = (h48k2_dfs_arg_t *)arg;
    453 
    454 	while (true) {
    455 		pthread_mutex_lock(dfsarg->shortcubes_mutex);
    456 
    457 		kv = h48map_nextkvpair(dfsarg->shortcubes, dfsarg->next);
    458 		if (*dfsarg->next == dfsarg->shortcubes->capacity) {
    459 			pthread_mutex_unlock(dfsarg->shortcubes_mutex);
    460 			break;
    461 		}
    462 		count = ++(*dfsarg->count);
    463 		pthread_mutex_unlock(dfsarg->shortcubes_mutex);
    464 
    465 		if (count % UINT64_C(1000000) == 0)
    466 			LOG("[H48 gendata] Processing %" PRIu64
    467 			    "th short cube\n", count);
    468 
    469 		if (kv.val < dfsarg->shortdepth) {
    470 			coord = kv.key >> (int64_t)(11 - dfsarg->h);
    471 			mutex = H48_INDEX(coord, dfsarg->k) % CHUNKS;
    472 			pthread_mutex_lock(dfsarg->table_mutex[mutex]);
    473 			set_h48_pval(dfsarg->table, coord, dfsarg->k, 0);
    474 			pthread_mutex_unlock(dfsarg->table_mutex[mutex]);
    475 		} else {
    476 			dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11);
    477 			gendata_h48k2_dfs(dfsarg);
    478 		}
    479 	}
    480 
    481 	return NULL;
    482 }
    483 
    484 STATIC void
    485 gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1])
    486 {
    487 	int8_t d;
    488 	uint8_t m[4];
    489 	cube_t cube[4];
    490 	gendata_h48_mark_t markarg;
    491 
    492 	markarg = (gendata_h48_mark_t) {
    493 		.h = arg->h,
    494 		.k = arg->k,
    495 		.cocsepdata = arg->cocsepdata,
    496 		.selfsim = arg->selfsim,
    497 		.table = arg->table,
    498 		.table_mutex = arg->table_mutex,
    499 	};
    500 
    501 	d = (int8_t)arg->shortdepth - (int8_t)arg->base;
    502 
    503 	/* Depth d+0 (shortcubes) */
    504 	markarg.depth = d;
    505 	markarg.cube = arg->cube;
    506 	gendata_h48_mark(&markarg);
    507 
    508 	/* Depth d+1 */
    509 	for (m[0] = 0; m[0] < 18; m[0]++) {
    510 		markarg.depth = d+1;
    511 		cube[0] = move(arg->cube, m[0]);
    512 		if (gendata_h48k2_dfs_stop(cube[0], d+1, arg))
    513 			continue;
    514 		markarg.cube = cube[0];
    515 		gendata_h48_mark(&markarg);
    516 
    517 		/* Depth d+2 */
    518 		for (m[1] = 0; m[1] < 18; m[1]++) {
    519 			markarg.depth = d+2;
    520 			if (m[0] / 3 == m[1] / 3) {
    521 				m[1] += 2;
    522 				continue;
    523 			}
    524 			cube[1] = move(cube[0], m[1]);
    525 			if (gendata_h48k2_dfs_stop(cube[1], d+2, arg))
    526 				continue;
    527 			markarg.cube = cube[1];
    528 			gendata_h48_mark(&markarg);
    529 			if (d >= 0)
    530 				continue;
    531 
    532 			/* Depth d+3 */
    533 			for (m[2] = 0; m[2] < 18; m[2]++) {
    534 				markarg.depth = d+3;
    535 				if (!allowednextmove(m[1], m[2])) {
    536 					m[2] += 2;
    537 					continue;
    538 				}
    539 				cube[2] = move(cube[1], m[2]);
    540 				if (gendata_h48k2_dfs_stop(cube[2], d+3, arg))
    541 					continue;
    542 				markarg.cube = cube[2];
    543 				gendata_h48_mark(&markarg);
    544 				if (d >= -1)
    545 					continue;
    546 
    547 				/* Depth d+4 */
    548 				for (m[3] = 0; m[3] < 18; m[3]++) {
    549 					markarg.depth = d+4;
    550 					if (!allowednextmove(m[2], m[3])) {
    551 						m[3] += 2;
    552 						continue;
    553 					}
    554 					cube[3] = move(cube[2], m[3]);
    555 					markarg.cube = cube[3];
    556 					gendata_h48_mark(&markarg);
    557 				}
    558 			}
    559 		}
    560 	}
    561 }
    562 
    563 STATIC_INLINE void
    564 gendata_h48_mark_atomic(gendata_h48_mark_t arg[static 1])
    565 {
    566 	uint8_t oldval, newval;
    567 	int64_t coord, mutex;
    568 
    569 	FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim,
    570 		coord = coord_h48(arg->cube, arg->cocsepdata, arg->h);
    571 		oldval = get_h48_pval_atomic(arg->table_atomic, coord, arg->k);
    572 		newval = (uint8_t)MAX(arg->depth, 0);
    573 		if (newval < oldval) {
    574 			mutex = H48_INDEX(coord, arg->k) % CHUNKS;
    575 			pthread_mutex_lock(arg->table_mutex[mutex]);
    576 			set_h48_pval_atomic(
    577 			    arg->table_atomic, coord, arg->k, newval);
    578 			pthread_mutex_unlock(arg->table_mutex[mutex]);
    579 		}
    580 	)
    581 }
    582 
    583 STATIC_INLINE void
    584 gendata_h48_mark(gendata_h48_mark_t arg[static 1])
    585 {
    586 	uint8_t oldval, newval;
    587 	int64_t coord, mutex;
    588 
    589 	FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim,
    590 		coord = coord_h48(arg->cube, arg->cocsepdata, arg->h);
    591 		mutex = H48_INDEX(coord, arg->k) % CHUNKS;
    592 		pthread_mutex_lock(arg->table_mutex[mutex]);
    593 		oldval = get_h48_pval(arg->table, coord, arg->k);
    594 		newval = (uint8_t)MAX(arg->depth, 0);
    595 		set_h48_pval(arg->table, coord, arg->k, MIN(newval, oldval));
    596 		pthread_mutex_unlock(arg->table_mutex[mutex]);
    597 	)
    598 }
    599 
    600 STATIC_INLINE bool
    601 gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1])
    602 {
    603 	uint64_t val;
    604 	int64_t coord, mutex;
    605 	int8_t oldval;
    606 
    607 	if (arg->h == 0 || arg->h == 11) {
    608 		/* We are in the "real coordinate" case, we can stop
    609 		   if this coordinate has already been visited */
    610 		coord = coord_h48(cube, arg->cocsepdata, arg->h);
    611 		mutex = H48_INDEX(coord, arg->k) % CHUNKS;
    612 		pthread_mutex_lock(arg->table_mutex[mutex]);
    613 		oldval = get_h48_pval(arg->table, coord, arg->k);
    614 		pthread_mutex_unlock(arg->table_mutex[mutex]);
    615 		return oldval <= d;
    616 	} else {
    617 		/* With 0 < k < 11 we do not have a "real coordinate".
    618 		   The best we can do is checking if we backtracked to
    619 		   one of the "short cubes". */
    620 		coord = coord_h48(cube, arg->cocsepdata, 11);
    621 		val = h48map_value(arg->shortcubes, coord);
    622 		return val <= arg->shortdepth;
    623 	}
    624 }
    625 
    626 STATIC tableinfo_t
    627 makeinfo_h48k2(gendata_h48_arg_t arg[static 1])
    628 {
    629 	tableinfo_t info;
    630 
    631 	info = (tableinfo_t) {
    632 		.solver = "h48 solver h =  , k = 2",
    633 		.type = TABLETYPE_PRUNING,
    634 		.infosize = INFOSIZE,
    635 		.fullsize = H48_TABLESIZE(arg->h, 2) + INFOSIZE,
    636 		.hash = 0, /* TODO */
    637 		.entries = H48_COORDMAX(arg->h),
    638 		.classes = 0,
    639 		.h48h = arg->h,
    640 		.bits = 2,
    641 		.base = arg->base,
    642 		.maxvalue = 3,
    643 		.next = 0,
    644 	};
    645 	info.solver[15] = (arg->h % 10) + '0';
    646 	if (arg->h >= 10)
    647 		info.solver[14] = (arg->h / 10) + '0';
    648 
    649 	return info;
    650 }
    651 
    652 STATIC void *
    653 getdistribution_h48_runthread(void *arg)
    654 {
    655 	getdistribution_h48_data_t *data = (getdistribution_h48_data_t *)arg;
    656 	const unsigned char *table;
    657 	uint8_t j, k, m;
    658 	int64_t i;
    659 
    660 	memset(data->distr, 0, INFO_DISTRIBUTION_LEN * sizeof(uint64_t));
    661 
    662 	k = data->k;
    663 	table = data->table;
    664 	m = H48_MASK(0, k);
    665 	for (i = data->min; i < data->max; i++)
    666 		for (j = 0; j < H48_DIV(k); j++)
    667 			data->distr[(table[i] & (m << (j*k))) >> (j*k)]++;
    668 
    669 	return NULL;
    670 }
    671 
    672 STATIC void
    673 getdistribution_h48(
    674 	const unsigned char *table,
    675 	uint64_t distr[static INFO_DISTRIBUTION_LEN],
    676 	uint8_t h,
    677 	uint8_t k
    678 ) {
    679 	getdistribution_h48_data_t targ[THREADS];
    680 	pthread_t thread[THREADS];
    681 	uint64_t local_distr[THREADS][INFO_DISTRIBUTION_LEN];
    682 	int64_t i, j, nbytes, sz;
    683 
    684 	nbytes = H48_COORDMAX(h) / H48_DIV(k);
    685 	sz = nbytes / THREADS;
    686 	for (i = 0; i < THREADS; i++) {
    687 		targ[i] = (getdistribution_h48_data_t) {
    688 			.min = i * sz,
    689 			.max = i == THREADS - 1 ? nbytes : (i+1) * sz,
    690 			.k = k,
    691 			.distr = local_distr[i],
    692 			.table = table,
    693 		};
    694 		pthread_create(&thread[i], NULL,
    695 		    getdistribution_h48_runthread, &targ[i]);
    696 	}
    697 
    698 	for (i = 0; i < THREADS; i++)
    699 		pthread_join(thread[i], NULL);
    700 
    701 	memset(distr, 0, INFO_DISTRIBUTION_LEN * sizeof(uint64_t));
    702 	for (i = 0; i < THREADS; i++)
    703 		for (j = 0; j < INFO_DISTRIBUTION_LEN; j++)
    704 			distr[j] += local_distr[i][j];
    705 
    706 	for (i = nbytes * H48_DIV(k); i < H48_COORDMAX(h); i++)
    707 		distr[get_h48_pval(table, i, k)]++;
    708 }
    709 
    710 STATIC const uint32_t *
    711 get_cocsepdata_constptr(const unsigned char *data)
    712 {
    713 	return (uint32_t *)(data + INFOSIZE);
    714 }
    715 
    716 STATIC const unsigned char *
    717 get_h48data_constptr(const unsigned char *data)
    718 {
    719 	return data + COCSEP_FULLSIZE + INFOSIZE;
    720 }
    721 
    722 STATIC_INLINE uint8_t
    723 get_h48_pval(const unsigned char *table, int64_t i, uint8_t k)
    724 {
    725 	return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k);
    726 }
    727 
    728 STATIC_INLINE uint8_t
    729 get_h48_pval_atomic(_Atomic const unsigned char *table, int64_t i, uint8_t k)
    730 {
    731 	return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k);
    732 }
    733 
    734 STATIC_INLINE void
    735 set_h48_pval(unsigned char *table, int64_t i, uint8_t k, uint8_t val)
    736 {
    737 	table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k)))
    738 	    | (val << H48_SHIFT(i, k));
    739 }
    740 
    741 STATIC_INLINE void
    742 set_h48_pval_atomic(
    743 	_Atomic unsigned char *table,
    744 	int64_t i,
    745 	uint8_t k,
    746 	uint8_t val
    747 )
    748 {
    749 	table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k)))
    750 	    | (val << H48_SHIFT(i, k));
    751 }
    752 
    753 size_t
    754 gendata_h48_derive(uint8_t h, const unsigned char *fulltable, unsigned char *buf)
    755 {
    756 	size_t cocsepsize, h48size;
    757 	uint8_t val_full, val_derive;
    758 	const unsigned char *h48full;
    759 	unsigned char *h48derive;
    760 	int64_t i, j, h48max;
    761 	uint64_t bufsize;
    762 	gendata_h48_arg_t arg;
    763 	tableinfo_t cocsepinfo, fulltableinfo;
    764 
    765 	/* Initializing values in case of error */
    766 	/* TODO cleanup this */
    767 	fulltableinfo.h48h = 11;
    768 	fulltableinfo.bits = 2;
    769 	fulltableinfo.base = 8;
    770 
    771 	int64_t TODOlarge = 999999999999; /* TODO: cleanup here */
    772 
    773 	readtableinfo_n(TODOlarge, fulltable, 2, &fulltableinfo);
    774 	arg.h = h;
    775 	arg.k = fulltableinfo.bits;
    776 	arg.maxdepth = 20;
    777 	arg.buf = buf;
    778 	arg.cocsepdata = (uint32_t *)(buf + INFOSIZE);
    779 	arg.base = fulltableinfo.base;
    780 	arg.info = makeinfo_h48k2(&arg);
    781 
    782 	/* Technically this step is redundant, except that we
    783 	   need selfsim and crep */
    784 	cocsepsize = gendata_cocsep(buf, arg.selfsim, arg.crep);
    785 	arg.h48buf = (_Atomic unsigned char *)buf + cocsepsize;
    786 	h48size = H48_TABLESIZE(h, arg.k) + INFOSIZE;
    787 
    788 	if (buf == NULL)
    789 		goto gendata_h48_derive_return_size;
    790 
    791 	bufsize = COCSEP_FULLSIZE + INFOSIZE;
    792 	if (readtableinfo(bufsize, buf, &cocsepinfo) != NISSY_OK) {
    793 		LOG("[H48 derive gendata] Error: could not read info for "
    794 		    "cocsep table\n");
    795 		goto gendata_h48_derive_error;
    796 	}
    797 
    798 	cocsepinfo.next = cocsepsize;
    799 	bufsize = COCSEP_FULLSIZE + INFOSIZE;
    800 	if (writetableinfo(&cocsepinfo, bufsize, buf) != NISSY_OK) {
    801 		LOG("[H48 derive gendata] Error: could not write info for "
    802 		    "cocsep table with updated 'next' value\n");
    803 		goto gendata_h48_derive_error;
    804 	}
    805 
    806 	h48full = fulltable + cocsepsize + INFOSIZE;
    807 	h48derive = (unsigned char *)arg.h48buf + INFOSIZE;
    808 	memset(h48derive, 0xFF, H48_TABLESIZE(h, arg.k));
    809 	memset(arg.info.distribution, 0,
    810 	    INFO_DISTRIBUTION_LEN * sizeof(uint64_t));
    811 
    812 	h48max = H48_COORDMAX(fulltableinfo.h48h);
    813 	for (i = 0; i < h48max; i++) {
    814 		if (i % INT64_C(1000000000) == 0 && i > 0)
    815 			LOG("[H48 derive gendata] Processing %" PRId64
    816 			    "th coordinate\n", i);
    817 		j = i >> (int64_t)(fulltableinfo.h48h - h);
    818 		val_full = get_h48_pval(h48full, i, arg.k);
    819 		val_derive = get_h48_pval(h48derive, j, arg.k);
    820 		set_h48_pval(
    821 		    h48derive, j, arg.k, MIN(val_full, val_derive));
    822 	}
    823 
    824 	getdistribution_h48(h48derive, arg.info.distribution, h, arg.k);
    825 
    826 	bufsize = arg.buf_size - COCSEP_FULLSIZE - INFOSIZE;
    827 	if (writetableinfo(&arg.info, bufsize, (unsigned char *)arg.h48buf)
    828 	    != NISSY_OK) {
    829 		LOG("H48 derive gendata] Error: could not write info "
    830 		    "for table\n");
    831 		goto gendata_h48_derive_error;
    832 	}
    833 
    834 gendata_h48_derive_return_size:
    835 	return cocsepsize + h48size;
    836 
    837 gendata_h48_derive_error:
    838 	return 0;
    839 }