gendata_h48.h (14519B)
1 STATIC long long gendata_h48_dispatch( 2 const char *, unsigned long long, unsigned char *); 3 STATIC uint64_t gendata_h48short(gendata_h48short_arg_t [static 1]); 4 STATIC int64_t gendata_h48(gendata_h48_arg_t [static 1]); 5 STATIC void gendata_h48_maintable(gendata_h48_arg_t [static 1]); 6 STATIC void *gendata_h48_runthread(void *); 7 8 STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t [static 1]); 9 STATIC_INLINE bool gendata_h48_dfs_stop( 10 cube_t, int8_t, h48_dfs_arg_t [static 1]); 11 STATIC void gendata_h48_dfs(h48_dfs_arg_t [static 1]); 12 STATIC tableinfo_t makeinfo_h48(gendata_h48_arg_t [static 1]); 13 14 STATIC const uint32_t *get_cocsepdata_constptr(const unsigned char *); 15 STATIC const unsigned char *get_h48data_constptr(const unsigned char *); 16 17 STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, uint64_t); 18 STATIC_INLINE void set_h48_pval(unsigned char *, uint64_t, uint8_t); 19 STATIC_INLINE uint8_t get_h48_pvalmin(const unsigned char *, uint64_t); 20 STATIC_INLINE void set_h48_pvalmin(unsigned char *, uint64_t, uint8_t); 21 STATIC_INLINE uint8_t get_h48_pval_and_min( 22 const unsigned char *, uint64_t, uint8_t [static 1]); 23 24 STATIC long long 25 gendata_h48_dispatch( 26 const char *solver, 27 unsigned long long data_size, 28 unsigned char *data 29 ) 30 { 31 long long err; 32 gendata_h48_arg_t arg; 33 34 err = parse_h48h(solver, &arg.h); 35 if (err != NISSY_OK) 36 return err; 37 38 arg.buf_size = data_size; 39 arg.buf = data; 40 arg.maxdepth = 20; 41 42 return gendata_h48(&arg); 43 } 44 45 STATIC uint64_t 46 gendata_h48short(gendata_h48short_arg_t arg[static 1]) 47 { 48 uint8_t i, m; 49 uint64_t coord; 50 uint64_t j; 51 kvpair_t kv; 52 cube_t cube, d; 53 54 cube = SOLVED_CUBE; 55 coord = coord_h48(cube, arg->cocsepdata, 11); 56 h48map_insertmin(arg->map, coord, 0); 57 for (i = 0; i < arg->maxdepth; i++) { 58 j = 0; 59 for (kv = h48map_nextkvpair(arg->map, &j); 60 j != arg->map->capacity; 61 kv = h48map_nextkvpair(arg->map, &j) 62 ) { 63 if (kv.val != i) 64 continue; 65 cube = invcoord_h48(kv.key, arg->crep, 11); 66 for (m = 0; m < 18; m++) { 67 d = move(cube, m); 68 FOREACH_H48SIM(d, arg->cocsepdata, arg->selfsim, 69 coord = coord_h48(d, arg->cocsepdata, 11); 70 h48map_insertmin(arg->map, coord, i+1); 71 ) 72 } 73 } 74 } 75 76 return arg->map->n; 77 } 78 79 STATIC int64_t 80 gendata_h48(gendata_h48_arg_t arg[static 1]) 81 { 82 uint64_t size, cocsepsize, h48size, eoesepsize; 83 long long r; 84 unsigned char *cocsepdata_offset; 85 tableinfo_t cocsepinfo, h48info; 86 87 cocsepsize = COCSEP_FULLSIZE; 88 h48size = INFOSIZE + H48_TABLESIZE(arg->h); 89 eoesepsize = EOESEP_FULLSIZE; 90 91 size = cocsepsize + h48size + eoesepsize; 92 93 if (arg->buf == NULL) 94 return size; /* Dry-run */ 95 96 if (arg->buf_size < size) { 97 LOG("[H48 gendata] Error: buffer is too small " 98 "(needed %" PRId64 " bytes but received %" PRIu64 ")\n", 99 size, arg->buf_size); 100 return NISSY_ERROR_BUFFER_SIZE; 101 } 102 103 gendata_cocsep(arg->buf, arg->selfsim, arg->crep); 104 105 cocsepdata_offset = arg->buf + INFOSIZE; 106 arg->cocsepdata = (uint32_t *)cocsepdata_offset; 107 arg->h48buf = (wrapthread_atomic unsigned char*)arg->buf + cocsepsize; 108 109 /* Compute the main H48 table with two bits per entry */ 110 gendata_h48_maintable(arg); 111 112 r = readtableinfo(arg->buf_size, arg->buf, &cocsepinfo); 113 if (r != NISSY_OK) { 114 LOG("[H48 gendata] Error: could not read info " 115 "for cocsep table\n"); 116 return NISSY_ERROR_UNKNOWN; 117 } 118 119 cocsepinfo.next = cocsepsize; 120 r = writetableinfo(&cocsepinfo, arg->buf_size, arg->buf); 121 if (r != NISSY_OK) { 122 LOG("[H48 gendata] Error: could not write info for " 123 "cocsep table with updated 'next' value\n"); 124 return NISSY_ERROR_UNKNOWN; 125 } 126 127 /* Add eoesep fallback table */ 128 129 gendata_eoesep(arg->buf + (size - eoesepsize), 20); 130 131 /* Update tableinfo with correct next values */ 132 133 r = readtableinfo_n(arg->buf_size, arg->buf, 2, &h48info); 134 if (r != NISSY_OK) { 135 LOG("[H48 gendata] Error: could not read info " 136 "for h48 table\n"); 137 return NISSY_ERROR_UNKNOWN; 138 } 139 h48info.next = h48size; 140 r = writetableinfo(&h48info, 141 arg->buf_size - cocsepsize, arg->buf + cocsepsize); 142 if (r != NISSY_OK) { 143 LOG("[H48 gendata] Error: could not write info " 144 "for h48 table\n"); 145 return NISSY_ERROR_UNKNOWN; 146 } 147 148 return size; 149 } 150 151 STATIC void 152 gendata_h48_maintable(gendata_h48_arg_t arg[static 1]) 153 { 154 /* 155 * A good base value for the h48 tables have few positions with value 156 * 0, because those are treated as lower bound 0 and require a second 157 * lookup in another table, and at the same time not too many positions 158 * with value 3, because some of those are under-estimates. 159 * 160 * The following values for the base have been hand-picked. I first 161 * performed some statistics on the frequency of these values, but 162 * they turned out to be unreliable. In the end I generated the same 163 * table with multiple base value and see what was best. 164 * 165 * A curious case is h3, which has this distribution for base 8: 166 * [0] = 6686828 167 * [1] = 63867852 168 * [2] = 392789689 169 * [3] = 477195231 170 * 171 * and this for base 9: 172 * [0] = 70554680 173 * [1] = 392789689 174 * [2] = 462294676 175 * [3] = 14900555 176 * 177 * Experimentally, the table with base 8 is much faster. Intuitively, 178 * this is because it has a much lower count of elements with value 0, 179 * at the cost of a less precise estimate for the higher values. 180 */ 181 182 static const uint8_t base[] = { 183 [0] = 8, 184 [1] = 8, 185 [2] = 8, 186 [3] = 8, 187 [4] = 9, 188 [5] = 9, 189 [6] = 9, 190 [7] = 9, 191 [8] = 10, 192 [9] = 10, 193 [10] = 10, 194 [11] = 10 195 }; 196 197 static const uint8_t shortdepth = 8; 198 static const uint64_t capacity = 10000019; 199 static const uint64_t randomizer = 10000079; 200 201 int sleeptime; 202 unsigned char *table; 203 wrapthread_atomic uint64_t count; 204 uint64_t i, ii, inext, bufsize, done, nshort, velocity; 205 h48map_t shortcubes; 206 gendata_h48short_arg_t shortarg; 207 h48_dfs_arg_t dfsarg[THREADS]; 208 wrapthread_define_var_thread_t(thread[THREADS]); 209 wrapthread_define_var_mutex_t(shortcubes_mutex); 210 wrapthread_define_var_mutex_t(table_mutex[CHUNKS]); 211 212 table = (unsigned char *)arg->h48buf + INFOSIZE; 213 memset(table, 0xFF, H48_TABLESIZE(arg->h)); 214 215 LOG("[H48 gendata] Computing depth <=%" PRIu8 "\n", shortdepth) 216 h48map_create(&shortcubes, capacity, randomizer); 217 shortarg = (gendata_h48short_arg_t) { 218 .maxdepth = shortdepth, 219 .cocsepdata = arg->cocsepdata, 220 .crep = arg->crep, 221 .selfsim = arg->selfsim, 222 .map = &shortcubes 223 }; 224 gendata_h48short(&shortarg); 225 nshort = shortarg.map->n; 226 LOG("[H48 gendata] Computed %" PRIu64 " positions\n", nshort); 227 228 arg->base = base[arg->h]; 229 arg->info = makeinfo_h48(arg); 230 231 inext = 0; 232 count = 0; 233 wrapthread_mutex_init(&shortcubes_mutex, NULL); 234 for (i = 0; i < CHUNKS; i++) 235 wrapthread_mutex_init(&table_mutex[i], NULL); 236 for (i = 0; i < THREADS; i++) { 237 dfsarg[i] = (h48_dfs_arg_t){ 238 .h = arg->h, 239 .base = arg->base, 240 .shortdepth = shortdepth, 241 .cocsepdata = arg->cocsepdata, 242 .table = table, 243 .selfsim = arg->selfsim, 244 .crep = arg->crep, 245 .shortcubes = &shortcubes, 246 .shortcubes_mutex = &shortcubes_mutex, 247 .next = &inext, 248 .count = &count, 249 }; 250 for (ii = 0; ii < CHUNKS; ii++) 251 dfsarg[i].table_mutex[ii] = &table_mutex[ii]; 252 253 wrapthread_create( 254 &thread[i], NULL, gendata_h48_runthread, &dfsarg[i]); 255 } 256 257 if (NISSY_CANSLEEP) { 258 /* Log the progress periodically */ 259 LOG("[H48 gendata] Processing 'short cubes'. " 260 "This will take a while.\n"); 261 262 /* Estimate velocity by checking how much is done after 1s */ 263 msleep(1000); 264 velocity = count; 265 266 /* We plan to log 10 times */ 267 sleeptime = (100*(nshort-velocity)) / velocity; 268 269 done = count; 270 while (nshort - done > (velocity * sleeptime) / 1000) { 271 msleep(sleeptime); 272 wrapthread_mutex_lock(&shortcubes_mutex); 273 done = count; 274 wrapthread_mutex_unlock(&shortcubes_mutex); 275 LOG("[H48 gendata] Processed %" PRIu64 " / %" PRIu64 276 " cubes\n", (done / 1000) * 1000, nshort); 277 } 278 } else { 279 LOG("Status updates won't be available because the sleep() " 280 "functionality is not available on this platform.\n"); 281 } 282 283 for (i = 0; i < THREADS; i++) 284 wrapthread_join(thread[i], NULL); 285 286 h48map_destroy(&shortcubes); 287 288 getdistribution_h48(table, arg->info.distribution, &arg->info); 289 290 bufsize = arg->buf_size - COCSEP_FULLSIZE; 291 writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf); 292 } 293 294 STATIC void * 295 gendata_h48_runthread(void *arg) 296 { 297 uint64_t coord, coordext, coordmin; 298 kvpair_t kv; 299 h48_dfs_arg_t *dfsarg; 300 wrapthread_define_if_threads(uint64_t, mutex); 301 302 dfsarg = (h48_dfs_arg_t *)arg; 303 304 while (true) { 305 wrapthread_mutex_lock(dfsarg->shortcubes_mutex); 306 307 kv = h48map_nextkvpair(dfsarg->shortcubes, dfsarg->next); 308 if (*dfsarg->next == dfsarg->shortcubes->capacity) { 309 wrapthread_mutex_unlock(dfsarg->shortcubes_mutex); 310 break; 311 } 312 (*dfsarg->count)++; 313 wrapthread_mutex_unlock(dfsarg->shortcubes_mutex); 314 315 if (kv.val < dfsarg->shortdepth) { 316 coord = kv.key >> (uint64_t)(11 - dfsarg->h); 317 coordext = H48_LINE_EXT(coord); 318 coordmin = H48_LINE_MIN(coord); 319 320 mutex = H48_LINE(coord) % CHUNKS; 321 wrapthread_mutex_lock(dfsarg->table_mutex[mutex]); 322 set_h48_pval(dfsarg->table, coordext, 0); 323 set_h48_pvalmin(dfsarg->table, coordmin, kv.val); 324 wrapthread_mutex_unlock(dfsarg->table_mutex[mutex]); 325 } else { 326 dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11); 327 gendata_h48_dfs(dfsarg); 328 } 329 } 330 331 return NULL; 332 } 333 334 STATIC void 335 gendata_h48_dfs(h48_dfs_arg_t arg[static 1]) 336 { 337 int8_t d; 338 uint8_t m[4]; 339 cube_t cube[4]; 340 gendata_h48_mark_t markarg; 341 342 markarg = (gendata_h48_mark_t) { 343 .h = arg->h, 344 .base = arg->base, 345 .cocsepdata = arg->cocsepdata, 346 .selfsim = arg->selfsim, 347 .table = arg->table, 348 .table_mutex = arg->table_mutex, 349 }; 350 351 d = (int8_t)arg->shortdepth - (int8_t)arg->base; 352 353 /* Depth d+0 (shortcubes) */ 354 markarg.depth = d; 355 markarg.cube = arg->cube; 356 gendata_h48_mark(&markarg); 357 358 /* Depth d+1 */ 359 for (m[0] = 0; m[0] < 18; m[0]++) { 360 markarg.depth = d+1; 361 cube[0] = move(arg->cube, m[0]); 362 if (gendata_h48_dfs_stop(cube[0], d+1, arg)) 363 continue; 364 markarg.cube = cube[0]; 365 gendata_h48_mark(&markarg); 366 367 /* Depth d+2 */ 368 for (m[1] = 0; m[1] < 18; m[1]++) { 369 markarg.depth = d+2; 370 if (m[0] / 3 == m[1] / 3) { 371 m[1] += 2; 372 continue; 373 } 374 cube[1] = move(cube[0], m[1]); 375 if (gendata_h48_dfs_stop(cube[1], d+2, arg)) 376 continue; 377 markarg.cube = cube[1]; 378 gendata_h48_mark(&markarg); 379 if (d >= 0) 380 continue; 381 382 /* Depth d+3 */ 383 for (m[2] = 0; m[2] < 18; m[2]++) { 384 markarg.depth = d+3; 385 if (!allowednextmove(m[1], m[2])) { 386 m[2] += 2; 387 continue; 388 } 389 cube[2] = move(cube[1], m[2]); 390 if (gendata_h48_dfs_stop(cube[2], d+3, arg)) 391 continue; 392 markarg.cube = cube[2]; 393 gendata_h48_mark(&markarg); 394 if (d >= -1) 395 continue; 396 397 /* Depth d+4 */ 398 for (m[3] = 0; m[3] < 18; m[3]++) { 399 markarg.depth = d+4; 400 if (!allowednextmove(m[2], m[3])) { 401 m[3] += 2; 402 continue; 403 } 404 cube[3] = move(cube[2], m[3]); 405 markarg.cube = cube[3]; 406 gendata_h48_mark(&markarg); 407 } 408 } 409 } 410 } 411 } 412 413 STATIC_INLINE void 414 gendata_h48_mark(gendata_h48_mark_t arg[static 1]) 415 { 416 uint8_t oldval, newval, v; 417 uint64_t coord, coordext, coordmin; 418 wrapthread_define_if_threads(uint64_t, mutex); 419 420 FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim, 421 coord = coord_h48(arg->cube, arg->cocsepdata, arg->h); 422 coordext = H48_LINE_EXT(coord); 423 coordmin = H48_LINE_MIN(coord); 424 425 mutex = H48_LINE(coord) % CHUNKS; 426 wrapthread_mutex_lock(arg->table_mutex[mutex]); 427 oldval = get_h48_pval(arg->table, coordext); 428 newval = (uint8_t)MAX(arg->depth, 0); 429 v = MIN(newval, oldval); 430 set_h48_pval(arg->table, coordext, v); 431 v = arg->depth + arg->base; 432 set_h48_pvalmin(arg->table, coordmin, v); 433 wrapthread_mutex_unlock(arg->table_mutex[mutex]); 434 ) 435 } 436 437 STATIC_INLINE bool 438 gendata_h48_dfs_stop(cube_t cube, int8_t d, h48_dfs_arg_t arg[static 1]) 439 { 440 uint64_t val; 441 uint64_t coord, coordext; 442 wrapthread_define_if_threads(uint64_t, mutex); 443 int8_t oldval; 444 445 if (arg->h == 0 || arg->h == 11) { 446 /* We are in the "real coordinate" case, we can stop 447 if this coordinate has already been visited */ 448 coord = coord_h48(cube, arg->cocsepdata, arg->h); 449 coordext = H48_LINE_EXT(coord); 450 mutex = H48_LINE(coord) % CHUNKS; 451 wrapthread_mutex_lock(arg->table_mutex[mutex]); 452 oldval = get_h48_pval(arg->table, coordext); 453 wrapthread_mutex_unlock(arg->table_mutex[mutex]); 454 return oldval <= d; 455 } else { 456 /* With 0 < h < 11 we do not have a "real coordinate". 457 The best we can do is checking if we backtracked to 458 one of the "short cubes". */ 459 coord = coord_h48(cube, arg->cocsepdata, 11); 460 val = h48map_value(arg->shortcubes, coord); 461 return val <= arg->shortdepth; 462 } 463 } 464 465 STATIC tableinfo_t 466 makeinfo_h48(gendata_h48_arg_t arg[static 1]) 467 { 468 tableinfo_t info; 469 470 info = (tableinfo_t) { 471 .solver = "h48 solver h = ", 472 .type = TABLETYPE_PRUNING, 473 .infosize = INFOSIZE, 474 .fullsize = H48_TABLESIZE(arg->h) + INFOSIZE, 475 .hash = 0, 476 .entries = H48_COORDMAX(arg->h) + 2 * H48_LINES(arg->h), 477 .classes = 0, 478 .h48h = arg->h, 479 .bits = 2, 480 .base = arg->base, 481 .maxvalue = 3, 482 .next = 0, 483 }; 484 info.solver[15] = (arg->h % 10) + '0'; 485 if (arg->h >= 10) 486 info.solver[14] = (arg->h / 10) + '0'; 487 488 return info; 489 } 490 491 STATIC const uint32_t * 492 get_cocsepdata_constptr(const unsigned char *data) 493 { 494 return (uint32_t *)(data + INFOSIZE); 495 } 496 497 STATIC const unsigned char * 498 get_h48data_constptr(const unsigned char *data) 499 { 500 return data + COCSEP_FULLSIZE + INFOSIZE; 501 } 502 503 STATIC_INLINE uint8_t 504 get_h48_pval(const unsigned char *table, uint64_t i) 505 { 506 return (table[H48_INDEX(i)] & H48_MASK(i)) >> H48_SHIFT(i); 507 } 508 509 STATIC_INLINE uint8_t 510 get_h48_pvalmin(const unsigned char *table, uint64_t i) 511 { 512 return table[H48_INDEX(i)] >> UINT8_C(4); 513 } 514 515 STATIC_INLINE void 516 set_h48_pval(unsigned char *table, uint64_t i, uint8_t val) 517 { 518 table[H48_INDEX(i)] = (table[H48_INDEX(i)] & (~H48_MASK(i))) 519 | (val << H48_SHIFT(i)); 520 } 521 522 STATIC_INLINE void 523 set_h48_pvalmin(unsigned char *table, uint64_t i, uint8_t val) 524 { 525 uint8_t t, v; 526 527 v = MIN(val, get_h48_pvalmin(table, i)); 528 t = table[H48_INDEX(i)]; 529 table[H48_INDEX(i)] = (t & UINT8_C(0xF)) | (v << UINT8_C(4)); 530 } 531 532 STATIC_INLINE uint8_t 533 get_h48_pval_and_min( 534 const unsigned char *table, 535 uint64_t coord_noext, 536 uint8_t pval_min[static 1] 537 ) 538 { 539 uint64_t iext, imin; 540 uint8_t t, tmin; 541 542 iext = H48_LINE_EXT(coord_noext); 543 imin = H48_LINE_MIN(coord_noext); 544 t = table[H48_INDEX(iext)]; 545 tmin = table[H48_INDEX(imin)]; 546 547 *pval_min = tmin >> UINT8_C(4); 548 return (t & H48_MASK(iext)) >> H48_SHIFT(iext); 549 }