nissy-core

The "engine" of nissy, including the H48 optimal solver.
git clone https://git.tronto.net/nissy-core
Download | Log | Files | Refs | README | LICENSE

gendata_h48.h (14519B)


      1 STATIC long long gendata_h48_dispatch(
      2     const char *, unsigned long long, unsigned char *);
      3 STATIC uint64_t gendata_h48short(gendata_h48short_arg_t [static 1]);
      4 STATIC int64_t gendata_h48(gendata_h48_arg_t [static 1]);
      5 STATIC void gendata_h48_maintable(gendata_h48_arg_t [static 1]);
      6 STATIC void *gendata_h48_runthread(void *);
      7 
      8 STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t [static 1]);
      9 STATIC_INLINE bool gendata_h48_dfs_stop(
     10     cube_t, int8_t, h48_dfs_arg_t [static 1]);
     11 STATIC void gendata_h48_dfs(h48_dfs_arg_t [static 1]);
     12 STATIC tableinfo_t makeinfo_h48(gendata_h48_arg_t [static 1]);
     13 
     14 STATIC const uint32_t *get_cocsepdata_constptr(const unsigned char *);
     15 STATIC const unsigned char *get_h48data_constptr(const unsigned char *);
     16 
     17 STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, uint64_t);
     18 STATIC_INLINE void set_h48_pval(unsigned char *, uint64_t, uint8_t);
     19 STATIC_INLINE uint8_t get_h48_pvalmin(const unsigned char *, uint64_t);
     20 STATIC_INLINE void set_h48_pvalmin(unsigned char *, uint64_t, uint8_t);
     21 STATIC_INLINE uint8_t get_h48_pval_and_min(
     22     const unsigned char *, uint64_t, uint8_t [static 1]);
     23 
     24 STATIC long long
     25 gendata_h48_dispatch(
     26 	const char *solver,
     27 	unsigned long long data_size,
     28 	unsigned char *data
     29 )
     30 {
     31 	long long err;
     32 	gendata_h48_arg_t arg;
     33 
     34 	err = parse_h48h(solver, &arg.h);
     35 	if (err != NISSY_OK)
     36 		return err;
     37 
     38 	arg.buf_size = data_size;
     39 	arg.buf = data;
     40 	arg.maxdepth = 20;
     41 
     42 	return gendata_h48(&arg);
     43 }
     44 
     45 STATIC uint64_t
     46 gendata_h48short(gendata_h48short_arg_t arg[static 1])
     47 {
     48 	uint8_t i, m;
     49 	uint64_t coord;
     50 	uint64_t j;
     51 	kvpair_t kv;
     52 	cube_t cube, d;
     53 
     54 	cube = SOLVED_CUBE;
     55 	coord = coord_h48(cube, arg->cocsepdata, 11);
     56 	h48map_insertmin(arg->map, coord, 0);
     57 	for (i = 0; i < arg->maxdepth; i++) {
     58 		j = 0;
     59 		for (kv = h48map_nextkvpair(arg->map, &j);
     60 		     j != arg->map->capacity;
     61 		     kv = h48map_nextkvpair(arg->map, &j)
     62 		) {
     63 			if (kv.val != i)
     64 				continue;
     65 			cube = invcoord_h48(kv.key, arg->crep, 11);
     66 			for (m = 0; m < 18; m++) {
     67 				d = move(cube, m);
     68 				FOREACH_H48SIM(d, arg->cocsepdata, arg->selfsim,
     69 					coord = coord_h48(d, arg->cocsepdata, 11);
     70 					h48map_insertmin(arg->map, coord, i+1);
     71 				)
     72 			}
     73 		}
     74 	}
     75 
     76 	return arg->map->n;
     77 }
     78 
     79 STATIC int64_t
     80 gendata_h48(gendata_h48_arg_t arg[static 1])
     81 {
     82 	uint64_t size, cocsepsize, h48size, eoesepsize;
     83 	long long r;
     84 	unsigned char *cocsepdata_offset;
     85 	tableinfo_t cocsepinfo, h48info;
     86 
     87 	cocsepsize = COCSEP_FULLSIZE;
     88 	h48size = INFOSIZE + H48_TABLESIZE(arg->h);
     89 	eoesepsize = EOESEP_FULLSIZE;
     90 
     91 	size = cocsepsize + h48size + eoesepsize;
     92 
     93 	if (arg->buf == NULL)
     94 		return size; /* Dry-run */
     95 
     96 	if (arg->buf_size < size) {
     97 		LOG("[H48 gendata] Error: buffer is too small "
     98 		    "(needed %" PRId64 " bytes but received %" PRIu64 ")\n",
     99 		    size, arg->buf_size);
    100 		return NISSY_ERROR_BUFFER_SIZE;
    101 	}
    102 
    103 	gendata_cocsep(arg->buf, arg->selfsim, arg->crep);
    104 
    105 	cocsepdata_offset = arg->buf + INFOSIZE;
    106 	arg->cocsepdata = (uint32_t *)cocsepdata_offset;
    107 	arg->h48buf = (wrapthread_atomic unsigned char*)arg->buf + cocsepsize;
    108 
    109 	/* Compute the main H48 table with two bits per entry */
    110 	gendata_h48_maintable(arg);
    111 
    112 	r = readtableinfo(arg->buf_size, arg->buf, &cocsepinfo);
    113 	if (r != NISSY_OK) {
    114 		LOG("[H48 gendata] Error: could not read info "
    115 		    "for cocsep table\n");
    116 		return NISSY_ERROR_UNKNOWN;
    117 	}
    118 
    119 	cocsepinfo.next = cocsepsize;
    120 	r = writetableinfo(&cocsepinfo, arg->buf_size, arg->buf);
    121 	if (r != NISSY_OK) {
    122 		LOG("[H48 gendata] Error: could not write info for "
    123 		    "cocsep table with updated 'next' value\n");
    124 		return NISSY_ERROR_UNKNOWN;
    125 	}
    126 
    127 	/* Add eoesep fallback table */
    128 
    129 	gendata_eoesep(arg->buf + (size - eoesepsize), 20);
    130 
    131 	/* Update tableinfo with correct next values */
    132 
    133 	r = readtableinfo_n(arg->buf_size, arg->buf, 2, &h48info);
    134 	if (r != NISSY_OK) {
    135 		LOG("[H48 gendata] Error: could not read info "
    136 		    "for h48 table\n");
    137 		return NISSY_ERROR_UNKNOWN;
    138 	}
    139 	h48info.next = h48size;
    140 	r = writetableinfo(&h48info,
    141 	     arg->buf_size - cocsepsize, arg->buf + cocsepsize);
    142 	if (r != NISSY_OK) {
    143 		LOG("[H48 gendata] Error: could not write info "
    144 		    "for h48 table\n");
    145 		return NISSY_ERROR_UNKNOWN;
    146 	}
    147 
    148 	return size;
    149 }
    150 
    151 STATIC void
    152 gendata_h48_maintable(gendata_h48_arg_t arg[static 1])
    153 {
    154 	/*
    155 	 * A good base value for the h48 tables have few positions with value
    156 	 * 0, because those are treated as lower bound 0 and require a second
    157 	 * lookup in another table, and at the same time not too many positions
    158 	 * with value 3, because some of those are under-estimates.
    159 	 *
    160 	 * The following values for the base have been hand-picked. I first
    161 	 * performed some statistics on the frequency of these values, but
    162 	 * they turned out to be unreliable. In the end I generated the same
    163 	 * table with multiple base value and see what was best.
    164 	 *
    165 	 * A curious case is h3, which has this distribution for base 8:
    166 	 *   [0] = 6686828
    167 	 *   [1] = 63867852
    168 	 *   [2] = 392789689
    169 	 *   [3] = 477195231
    170 	 *
    171 	 * and this for base 9:
    172 	 *   [0] = 70554680
    173 	 *   [1] = 392789689
    174 	 *   [2] = 462294676
    175 	 *   [3] = 14900555
    176 	 *
    177 	 * Experimentally, the table with base 8 is much faster. Intuitively,
    178 	 * this is because it has a much lower count of elements  with value 0,
    179          * at the cost of a less precise estimate for the higher values.
    180 	 */
    181 	 
    182 	static const uint8_t base[] = {
    183 		[0]  = 8,
    184 		[1]  = 8,
    185 		[2]  = 8,
    186 		[3]  = 8,
    187 		[4]  = 9,
    188 		[5]  = 9,
    189 		[6]  = 9,
    190 		[7]  = 9,
    191 		[8]  = 10,
    192 		[9]  = 10,
    193 		[10] = 10,
    194 		[11] = 10
    195 	};
    196 
    197 	static const uint8_t shortdepth = 8;
    198 	static const uint64_t capacity = 10000019;
    199 	static const uint64_t randomizer = 10000079;
    200 
    201 	int sleeptime;
    202 	unsigned char *table;
    203 	wrapthread_atomic uint64_t count;
    204 	uint64_t i, ii, inext, bufsize, done, nshort, velocity;
    205 	h48map_t shortcubes;
    206 	gendata_h48short_arg_t shortarg;
    207 	h48_dfs_arg_t dfsarg[THREADS];
    208 	wrapthread_define_var_thread_t(thread[THREADS]);
    209 	wrapthread_define_var_mutex_t(shortcubes_mutex);
    210 	wrapthread_define_var_mutex_t(table_mutex[CHUNKS]);
    211 
    212 	table = (unsigned char *)arg->h48buf + INFOSIZE;
    213 	memset(table, 0xFF, H48_TABLESIZE(arg->h));
    214 
    215 	LOG("[H48 gendata] Computing depth <=%" PRIu8 "\n", shortdepth)
    216 	h48map_create(&shortcubes, capacity, randomizer);
    217 	shortarg = (gendata_h48short_arg_t) {
    218 		.maxdepth = shortdepth,
    219 		.cocsepdata = arg->cocsepdata,
    220 		.crep = arg->crep,
    221 		.selfsim = arg->selfsim,
    222 		.map = &shortcubes
    223 	};
    224 	gendata_h48short(&shortarg);
    225 	nshort = shortarg.map->n;
    226 	LOG("[H48 gendata] Computed %" PRIu64 " positions\n", nshort);
    227 
    228 	arg->base = base[arg->h];
    229 	arg->info = makeinfo_h48(arg);
    230 
    231 	inext = 0;
    232 	count = 0;
    233 	wrapthread_mutex_init(&shortcubes_mutex, NULL);
    234 	for (i = 0; i < CHUNKS; i++)
    235 		wrapthread_mutex_init(&table_mutex[i], NULL);
    236 	for (i = 0; i < THREADS; i++) {
    237 		dfsarg[i] = (h48_dfs_arg_t){
    238 			.h = arg->h,
    239 			.base = arg->base,
    240 			.shortdepth = shortdepth,
    241 			.cocsepdata = arg->cocsepdata,
    242 			.table = table,
    243 			.selfsim = arg->selfsim,
    244 			.crep = arg->crep,
    245 			.shortcubes = &shortcubes,
    246 			.shortcubes_mutex = &shortcubes_mutex,
    247 			.next = &inext,
    248 			.count = &count,
    249 		};
    250 		for (ii = 0; ii < CHUNKS; ii++)
    251 			dfsarg[i].table_mutex[ii] = &table_mutex[ii];
    252 
    253 		wrapthread_create(
    254 		    &thread[i], NULL, gendata_h48_runthread, &dfsarg[i]);
    255 	}
    256 
    257 	if (NISSY_CANSLEEP) {
    258 		/* Log the progress periodically */
    259 		LOG("[H48 gendata] Processing 'short cubes'. "
    260 		    "This will take a while.\n");
    261 
    262 		/* Estimate velocity by checking how much is done after 1s */
    263 		msleep(1000);
    264 		velocity = count;
    265 
    266 		/* We plan to log 10 times */
    267 		sleeptime = (100*(nshort-velocity)) / velocity;
    268 
    269 		done = count;
    270 		while (nshort - done > (velocity * sleeptime) / 1000) {
    271 			msleep(sleeptime);
    272 			wrapthread_mutex_lock(&shortcubes_mutex);
    273 			done = count;
    274 			wrapthread_mutex_unlock(&shortcubes_mutex);
    275 			LOG("[H48 gendata] Processed %" PRIu64 " / %" PRIu64
    276 			    " cubes\n", (done / 1000) * 1000, nshort);
    277 		}
    278 	} else {
    279 		LOG("Status updates won't be available because the sleep() "
    280 		    "functionality is not available on this platform.\n");
    281 	}
    282 
    283 	for (i = 0; i < THREADS; i++)
    284 		wrapthread_join(thread[i], NULL);
    285 
    286 	h48map_destroy(&shortcubes);
    287 
    288 	getdistribution_h48(table, arg->info.distribution, &arg->info);
    289 
    290 	bufsize = arg->buf_size - COCSEP_FULLSIZE;
    291 	writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf);
    292 }
    293 
    294 STATIC void *
    295 gendata_h48_runthread(void *arg)
    296 {
    297 	uint64_t coord, coordext, coordmin;
    298 	kvpair_t kv;
    299 	h48_dfs_arg_t *dfsarg;
    300 	wrapthread_define_if_threads(uint64_t, mutex);
    301 
    302 	dfsarg = (h48_dfs_arg_t *)arg;
    303 
    304 	while (true) {
    305 		wrapthread_mutex_lock(dfsarg->shortcubes_mutex);
    306 
    307 		kv = h48map_nextkvpair(dfsarg->shortcubes, dfsarg->next);
    308 		if (*dfsarg->next == dfsarg->shortcubes->capacity) {
    309 			wrapthread_mutex_unlock(dfsarg->shortcubes_mutex);
    310 			break;
    311 		}
    312 		(*dfsarg->count)++;
    313 		wrapthread_mutex_unlock(dfsarg->shortcubes_mutex);
    314 
    315 		if (kv.val < dfsarg->shortdepth) {
    316 			coord = kv.key >> (uint64_t)(11 - dfsarg->h);
    317 			coordext = H48_LINE_EXT(coord);
    318 			coordmin = H48_LINE_MIN(coord);
    319 
    320 			mutex = H48_LINE(coord) % CHUNKS;
    321 			wrapthread_mutex_lock(dfsarg->table_mutex[mutex]);
    322 			set_h48_pval(dfsarg->table, coordext, 0);
    323 			set_h48_pvalmin(dfsarg->table, coordmin, kv.val);
    324 			wrapthread_mutex_unlock(dfsarg->table_mutex[mutex]);
    325 		} else {
    326 			dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11);
    327 			gendata_h48_dfs(dfsarg);
    328 		}
    329 	}
    330 
    331 	return NULL;
    332 }
    333 
    334 STATIC void
    335 gendata_h48_dfs(h48_dfs_arg_t arg[static 1])
    336 {
    337 	int8_t d;
    338 	uint8_t m[4];
    339 	cube_t cube[4];
    340 	gendata_h48_mark_t markarg;
    341 
    342 	markarg = (gendata_h48_mark_t) {
    343 		.h = arg->h,
    344 		.base = arg->base,
    345 		.cocsepdata = arg->cocsepdata,
    346 		.selfsim = arg->selfsim,
    347 		.table = arg->table,
    348 		.table_mutex = arg->table_mutex,
    349 	};
    350 
    351 	d = (int8_t)arg->shortdepth - (int8_t)arg->base;
    352 
    353 	/* Depth d+0 (shortcubes) */
    354 	markarg.depth = d;
    355 	markarg.cube = arg->cube;
    356 	gendata_h48_mark(&markarg);
    357 
    358 	/* Depth d+1 */
    359 	for (m[0] = 0; m[0] < 18; m[0]++) {
    360 		markarg.depth = d+1;
    361 		cube[0] = move(arg->cube, m[0]);
    362 		if (gendata_h48_dfs_stop(cube[0], d+1, arg))
    363 			continue;
    364 		markarg.cube = cube[0];
    365 		gendata_h48_mark(&markarg);
    366 
    367 		/* Depth d+2 */
    368 		for (m[1] = 0; m[1] < 18; m[1]++) {
    369 			markarg.depth = d+2;
    370 			if (m[0] / 3 == m[1] / 3) {
    371 				m[1] += 2;
    372 				continue;
    373 			}
    374 			cube[1] = move(cube[0], m[1]);
    375 			if (gendata_h48_dfs_stop(cube[1], d+2, arg))
    376 				continue;
    377 			markarg.cube = cube[1];
    378 			gendata_h48_mark(&markarg);
    379 			if (d >= 0)
    380 				continue;
    381 
    382 			/* Depth d+3 */
    383 			for (m[2] = 0; m[2] < 18; m[2]++) {
    384 				markarg.depth = d+3;
    385 				if (!allowednextmove(m[1], m[2])) {
    386 					m[2] += 2;
    387 					continue;
    388 				}
    389 				cube[2] = move(cube[1], m[2]);
    390 				if (gendata_h48_dfs_stop(cube[2], d+3, arg))
    391 					continue;
    392 				markarg.cube = cube[2];
    393 				gendata_h48_mark(&markarg);
    394 				if (d >= -1)
    395 					continue;
    396 
    397 				/* Depth d+4 */
    398 				for (m[3] = 0; m[3] < 18; m[3]++) {
    399 					markarg.depth = d+4;
    400 					if (!allowednextmove(m[2], m[3])) {
    401 						m[3] += 2;
    402 						continue;
    403 					}
    404 					cube[3] = move(cube[2], m[3]);
    405 					markarg.cube = cube[3];
    406 					gendata_h48_mark(&markarg);
    407 				}
    408 			}
    409 		}
    410 	}
    411 }
    412 
    413 STATIC_INLINE void
    414 gendata_h48_mark(gendata_h48_mark_t arg[static 1])
    415 {
    416 	uint8_t oldval, newval, v;
    417 	uint64_t coord, coordext, coordmin;
    418 	wrapthread_define_if_threads(uint64_t, mutex);
    419 
    420 	FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim,
    421 		coord = coord_h48(arg->cube, arg->cocsepdata, arg->h);
    422 		coordext = H48_LINE_EXT(coord);
    423 		coordmin = H48_LINE_MIN(coord);
    424 
    425 		mutex = H48_LINE(coord) % CHUNKS;
    426 		wrapthread_mutex_lock(arg->table_mutex[mutex]);
    427 		oldval = get_h48_pval(arg->table, coordext);
    428 		newval = (uint8_t)MAX(arg->depth, 0);
    429 		v = MIN(newval, oldval);
    430 		set_h48_pval(arg->table, coordext, v);
    431 		v = arg->depth + arg->base;
    432 		set_h48_pvalmin(arg->table, coordmin, v);
    433 		wrapthread_mutex_unlock(arg->table_mutex[mutex]);
    434 	)
    435 }
    436 
    437 STATIC_INLINE bool
    438 gendata_h48_dfs_stop(cube_t cube, int8_t d, h48_dfs_arg_t arg[static 1])
    439 {
    440 	uint64_t val;
    441 	uint64_t coord, coordext;
    442 	wrapthread_define_if_threads(uint64_t, mutex);
    443 	int8_t oldval;
    444 
    445 	if (arg->h == 0 || arg->h == 11) {
    446 		/* We are in the "real coordinate" case, we can stop
    447 		   if this coordinate has already been visited */
    448 		coord = coord_h48(cube, arg->cocsepdata, arg->h);
    449 		coordext = H48_LINE_EXT(coord);
    450 		mutex = H48_LINE(coord) % CHUNKS;
    451 		wrapthread_mutex_lock(arg->table_mutex[mutex]);
    452 		oldval = get_h48_pval(arg->table, coordext);
    453 		wrapthread_mutex_unlock(arg->table_mutex[mutex]);
    454 		return oldval <= d;
    455 	} else {
    456 		/* With 0 < h < 11 we do not have a "real coordinate".
    457 		   The best we can do is checking if we backtracked to
    458 		   one of the "short cubes". */
    459 		coord = coord_h48(cube, arg->cocsepdata, 11);
    460 		val = h48map_value(arg->shortcubes, coord);
    461 		return val <= arg->shortdepth;
    462 	}
    463 }
    464 
    465 STATIC tableinfo_t
    466 makeinfo_h48(gendata_h48_arg_t arg[static 1])
    467 {
    468 	tableinfo_t info;
    469 
    470 	info = (tableinfo_t) {
    471 		.solver = "h48 solver h =  ",
    472 		.type = TABLETYPE_PRUNING,
    473 		.infosize = INFOSIZE,
    474 		.fullsize = H48_TABLESIZE(arg->h) + INFOSIZE,
    475 		.hash = 0,
    476 		.entries = H48_COORDMAX(arg->h) + 2 * H48_LINES(arg->h),
    477 		.classes = 0,
    478 		.h48h = arg->h,
    479 		.bits = 2,
    480 		.base = arg->base,
    481 		.maxvalue = 3,
    482 		.next = 0,
    483 	};
    484 	info.solver[15] = (arg->h % 10) + '0';
    485 	if (arg->h >= 10)
    486 		info.solver[14] = (arg->h / 10) + '0';
    487 
    488 	return info;
    489 }
    490 
    491 STATIC const uint32_t *
    492 get_cocsepdata_constptr(const unsigned char *data)
    493 {
    494 	return (uint32_t *)(data + INFOSIZE);
    495 }
    496 
    497 STATIC const unsigned char *
    498 get_h48data_constptr(const unsigned char *data)
    499 {
    500 	return data + COCSEP_FULLSIZE + INFOSIZE;
    501 }
    502 
    503 STATIC_INLINE uint8_t
    504 get_h48_pval(const unsigned char *table, uint64_t i)
    505 {
    506 	return (table[H48_INDEX(i)] & H48_MASK(i)) >> H48_SHIFT(i);
    507 }
    508 
    509 STATIC_INLINE uint8_t
    510 get_h48_pvalmin(const unsigned char *table, uint64_t i)
    511 {
    512 	return table[H48_INDEX(i)] >> UINT8_C(4);
    513 }
    514 
    515 STATIC_INLINE void
    516 set_h48_pval(unsigned char *table, uint64_t i, uint8_t val)
    517 {
    518 	table[H48_INDEX(i)] = (table[H48_INDEX(i)] & (~H48_MASK(i)))
    519 	    | (val << H48_SHIFT(i));
    520 }
    521 
    522 STATIC_INLINE void
    523 set_h48_pvalmin(unsigned char *table, uint64_t i, uint8_t val)
    524 {
    525 	uint8_t t, v;
    526 
    527 	v = MIN(val, get_h48_pvalmin(table, i));
    528 	t = table[H48_INDEX(i)];
    529 	table[H48_INDEX(i)] = (t & UINT8_C(0xF)) | (v << UINT8_C(4));
    530 }
    531 
    532 STATIC_INLINE uint8_t
    533 get_h48_pval_and_min(
    534 	const unsigned char *table,
    535 	uint64_t coord_noext,
    536 	uint8_t pval_min[static 1]
    537 )
    538 {
    539 	uint64_t iext, imin;
    540 	uint8_t t, tmin;
    541 
    542 	iext = H48_LINE_EXT(coord_noext);
    543 	imin = H48_LINE_MIN(coord_noext);
    544 	t = table[H48_INDEX(iext)];
    545 	tmin = table[H48_INDEX(imin)];
    546 
    547 	*pval_min = tmin >> UINT8_C(4);
    548 	return (t & H48_MASK(iext)) >> H48_SHIFT(iext);
    549 }