gendata_h48.h (19800B)
1 STATIC long long gendata_h48_dispatch( 2 const char *, unsigned long long, unsigned char *); 3 STATIC uint64_t gendata_h48short(gendata_h48short_arg_t [static 1]); 4 STATIC int64_t gendata_h48(gendata_h48_arg_t [static 1]); 5 STATIC void gendata_h48h0k4(gendata_h48_arg_t [static 1]); 6 STATIC void gendata_h48k2(gendata_h48_arg_t [static 1]); 7 8 STATIC void *gendata_h48h0k4_runthread(void *); 9 STATIC void *gendata_h48k2_runthread(void *); 10 11 STATIC_INLINE void gendata_h48_mark_atomic(gendata_h48_mark_t [static 1]); 12 STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t [static 1]); 13 STATIC_INLINE bool gendata_h48k2_dfs_stop( 14 cube_t, int8_t, h48k2_dfs_arg_t [static 1]); 15 STATIC void gendata_h48k2_dfs(h48k2_dfs_arg_t [static 1]); 16 STATIC tableinfo_t makeinfo_h48k2(gendata_h48_arg_t [static 1]); 17 18 STATIC const uint32_t *get_cocsepdata_constptr(const unsigned char *); 19 STATIC const unsigned char *get_h48data_constptr(const unsigned char *); 20 21 STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, uint64_t, uint8_t); 22 STATIC_INLINE void set_h48_pval(unsigned char *, uint64_t, uint8_t, uint8_t); 23 STATIC_INLINE uint8_t get_h48_pval_atomic( 24 wrapthread_atomic const unsigned char *, uint64_t, uint8_t); 25 STATIC_INLINE void set_h48_pval_atomic( 26 wrapthread_atomic unsigned char *, uint64_t, uint8_t, uint8_t); 27 28 STATIC long long 29 gendata_h48_dispatch( 30 const char *solver, 31 unsigned long long data_size, 32 unsigned char *data 33 ) 34 { 35 long long err; 36 gendata_h48_arg_t arg; 37 38 err = parse_h48_hk(solver, &arg.h, &arg.k); 39 if (err != NISSY_OK) 40 return err; 41 42 arg.buf_size = data_size; 43 arg.buf = data; 44 arg.maxdepth = 20; 45 46 return gendata_h48(&arg); 47 } 48 49 STATIC uint64_t 50 gendata_h48short(gendata_h48short_arg_t arg[static 1]) 51 { 52 uint8_t i, m; 53 uint64_t coord; 54 uint64_t j; 55 kvpair_t kv; 56 cube_t cube, d; 57 58 cube = SOLVED_CUBE; 59 coord = coord_h48(cube, arg->cocsepdata, 11); 60 h48map_insertmin(arg->map, coord, 0); 61 for (i = 0; i < arg->maxdepth; i++) { 62 j = 0; 63 for (kv = h48map_nextkvpair(arg->map, &j); 64 j != arg->map->capacity; 65 kv = h48map_nextkvpair(arg->map, &j) 66 ) { 67 if (kv.val != i) 68 continue; 69 cube = invcoord_h48(kv.key, arg->crep, 11); 70 for (m = 0; m < 18; m++) { 71 d = move(cube, m); 72 FOREACH_H48SIM(d, arg->cocsepdata, arg->selfsim, 73 coord = coord_h48(d, arg->cocsepdata, 11); 74 h48map_insertmin(arg->map, coord, i+1); 75 ) 76 } 77 } 78 } 79 80 return arg->map->n; 81 } 82 83 STATIC int64_t 84 gendata_h48(gendata_h48_arg_t arg[static 1]) 85 { 86 uint64_t size, cocsepsize, h48size, fallbacksize, fallback2size, of; 87 long long r; 88 unsigned char *cocsepdata_offset; 89 tableinfo_t cocsepinfo, h48info, fallbackinfo; 90 gendata_h48_arg_t arg_h0k4; 91 92 cocsepsize = COCSEP_FULLSIZE; 93 h48size = INFOSIZE + H48_TABLESIZE(arg->h, arg->k); 94 fallbacksize = arg->k == 2 ? INFOSIZE + H48_TABLESIZE(0, 4) : 0; 95 fallback2size = EOESEP_FULLSIZE; 96 97 /* Add padding for 8-bit alignment */ 98 h48size = 8 * DIV_ROUND_UP(h48size, 8); 99 fallbacksize = 8 * DIV_ROUND_UP(fallbacksize, 8); 100 fallback2size = 8 * DIV_ROUND_UP(fallback2size, 8); 101 102 size = cocsepsize + h48size + fallbacksize + fallback2size; 103 104 if (arg->buf == NULL) 105 return size; /* Dry-run */ 106 107 if (arg->buf_size < size) { 108 LOG("[H48 gendata] Error: buffer is too small " 109 "(needed %" PRId64 " bytes but received %" PRIu64 ")\n", 110 size, arg->buf_size); 111 return NISSY_ERROR_BUFFER_SIZE; 112 } 113 114 gendata_cocsep(arg->buf, arg->selfsim, arg->crep); 115 116 cocsepdata_offset = arg->buf + INFOSIZE; 117 arg->cocsepdata = (uint32_t *)cocsepdata_offset; 118 arg->h48buf = (wrapthread_atomic unsigned char*)arg->buf + cocsepsize; 119 120 arg->base = 99; 121 122 if (arg->h == 0 && arg->k == 4) { 123 gendata_h48h0k4(arg); 124 } else if (arg->k == 2) { 125 gendata_h48k2(arg); 126 } else { 127 LOG("[H48 gendata] Error: cannot generate data for h = %" PRIu8 128 " and k = %" PRIu8 " (not implemented yet)\n", 129 arg->h, arg->k); 130 return NISSY_ERROR_INVALID_SOLVER; 131 } 132 133 r = readtableinfo(arg->buf_size, arg->buf, &cocsepinfo); 134 if (r != NISSY_OK) { 135 LOG("[H48 gendata] Error: could not read info " 136 "for cocsep table\n"); 137 return NISSY_ERROR_UNKNOWN; 138 } 139 140 cocsepinfo.next = cocsepsize; 141 r = writetableinfo(&cocsepinfo, arg->buf_size, arg->buf); 142 if (r != NISSY_OK) { 143 LOG("[H48 gendata] Error: could not write info for " 144 "cocsep table with updated 'next' value\n"); 145 return NISSY_ERROR_UNKNOWN; 146 } 147 148 /* Add h0k4 fallback table */ 149 150 if (arg->k == 2) { 151 arg_h0k4 = *arg; 152 arg_h0k4.h = 0; 153 arg_h0k4.k = 4; 154 arg_h0k4.base = 0; 155 arg_h0k4.maxdepth = 20; 156 arg_h0k4.buf_size = arg->buf_size - h48size; 157 arg_h0k4.buf = arg->buf + cocsepsize + h48size; 158 arg_h0k4.h48buf = arg->h48buf + h48size; 159 160 gendata_h48h0k4(&arg_h0k4); 161 162 } 163 164 /* Add eoesep fallback table */ 165 166 gendata_eoesep(arg->buf + (size - fallback2size), 20); 167 168 /* Update tableinfo with correct next values */ 169 170 r = readtableinfo_n(arg->buf_size, arg->buf, 2, &h48info); 171 if (r != NISSY_OK) { 172 LOG("[H48 gendata] Error: could not read info " 173 "for h48 table\n"); 174 return NISSY_ERROR_UNKNOWN; 175 } 176 h48info.next = h48size; 177 r = writetableinfo(&h48info, 178 arg->buf_size - cocsepsize, arg->buf + cocsepsize); 179 if (r != NISSY_OK) { 180 LOG("[H48 gendata] Error: could not write info " 181 "for h48 table\n"); 182 return NISSY_ERROR_UNKNOWN; 183 } 184 185 if (arg->k == 2) { 186 r = readtableinfo_n(arg->buf_size, arg->buf, 3, &fallbackinfo); 187 if (r != NISSY_OK) { 188 LOG("[H48 gendata] Error: could not read info for h48 " 189 "fallback table\n"); 190 return NISSY_ERROR_UNKNOWN; 191 } 192 193 of = cocsepsize + h48size; 194 fallbackinfo.next = fallbacksize; 195 r = writetableinfo( 196 &fallbackinfo, arg->buf_size - of, arg->buf + of); 197 if (r != NISSY_OK) { 198 LOG("[H48 gendata] Error: could not write info for " 199 "h48 fallback table\n"); 200 return NISSY_ERROR_UNKNOWN; 201 } 202 } 203 204 return size; 205 } 206 207 STATIC void 208 gendata_h48h0k4(gendata_h48_arg_t arg[static 1]) 209 { 210 wrapthread_atomic unsigned char *table; 211 uint8_t val; 212 uint64_t i, sc, done, d, h48max; 213 uint64_t t, tt, isize, cc, bufsize; 214 h48h0k4_bfs_arg_t bfsarg[THREADS]; 215 wrapthread_define_var_thread_t(thread[THREADS]); 216 wrapthread_define_var_mutex_t(table_mutex[CHUNKS]); 217 218 arg->info = (tableinfo_t) { 219 .solver = "h48 solver h = 0, k = 4", 220 .type = TABLETYPE_PRUNING, 221 .infosize = INFOSIZE, 222 .fullsize = H48_TABLESIZE(0, 4) + INFOSIZE, 223 .hash = 0, 224 .entries = H48_COORDMAX(0), 225 .classes = 0, 226 .h48h = 0, 227 .bits = 4, 228 .base = 0, 229 .maxvalue = 0, 230 .next = 0, 231 }; 232 233 table = arg->h48buf + INFOSIZE; 234 memset(table, 0xFF, H48_TABLESIZE(0, 4)); 235 236 h48max = H48_COORDMAX(0); 237 sc = coord_h48(SOLVED_CUBE, arg->cocsepdata, 0); 238 set_h48_pval_atomic(table, sc, 4, 0); 239 arg->info.distribution[0] = 1; 240 241 isize = h48max / THREADS; 242 isize = (isize / H48_COEFF(arg->k)) * H48_COEFF(arg->k); 243 for (t = 0; t < CHUNKS; t++) 244 wrapthread_mutex_init(&table_mutex[t], NULL); 245 for (t = 0; t < THREADS; t++) { 246 bfsarg[t] = (h48h0k4_bfs_arg_t) { 247 .cocsepdata = arg->cocsepdata, 248 .table = table, 249 .selfsim = arg->selfsim, 250 .crep = arg->crep, 251 .start = isize * t, 252 .end = t == THREADS-1 ? h48max : isize * (t+1), 253 }; 254 for (tt = 0; tt < CHUNKS; tt++) 255 bfsarg[t].table_mutex[tt] = &table_mutex[tt]; 256 } 257 for (done = 1, d = 1; done < h48max && d <= arg->maxdepth; d++) { 258 LOG("[H48 gendata] Generating depth %" PRIu64 "\n", d); 259 260 for (t = 0; t < THREADS; t++) { 261 bfsarg[t].depth = d; 262 wrapthread_create(&thread[t], NULL, 263 gendata_h48h0k4_runthread, &bfsarg[t]); 264 } 265 266 for (t = 0; t < THREADS; t++) 267 wrapthread_join(thread[t], NULL); 268 269 for (i = 0, cc = 0; i < h48max; i++) { 270 val = get_h48_pval_atomic(table, i, 4); 271 cc += val == d; 272 } 273 274 done += cc; 275 arg->info.distribution[d] = cc; 276 277 LOG("[H48 gendata] Found %" PRIu64 "\n", cc); 278 } 279 280 arg->info.maxvalue = d - 1; 281 bufsize = arg->buf_size - COCSEP_FULLSIZE; 282 writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf); 283 } 284 285 STATIC void * 286 gendata_h48h0k4_runthread(void *arg) 287 { 288 static const uint8_t breakpoint = 10; /* Hand-picked optimal */ 289 290 uint8_t c, m; 291 uint64_t i; 292 uint64_t j; 293 cube_t cube, moved; 294 gendata_h48_mark_t markarg; 295 h48h0k4_bfs_arg_t *bfsarg; 296 297 bfsarg = (h48h0k4_bfs_arg_t *)arg; 298 299 markarg = (gendata_h48_mark_t) { 300 .depth = bfsarg->depth, 301 .h = 0, 302 .k = 4, 303 .cocsepdata = bfsarg->cocsepdata, 304 .selfsim = bfsarg->selfsim, 305 .table_atomic = bfsarg->table, 306 .table_mutex = bfsarg->table_mutex, 307 }; 308 309 /* 310 * If depth < breakpoint, scan all neighbors of coordinates at depth-1. 311 * Otherwise, scan all neighbors of unvisited coordinates. 312 */ 313 for (i = bfsarg->start; i < bfsarg->end; i++) { 314 c = get_h48_pval_atomic(bfsarg->table, i, 4); 315 316 if ((bfsarg->depth < breakpoint && c != bfsarg->depth - 1) || 317 (bfsarg->depth >= breakpoint && c != 0xF)) 318 continue; 319 320 cube = invcoord_h48(i, bfsarg->crep, 0); 321 for (m = 0; m < 18; m++) { 322 moved = move(cube, m); 323 j = coord_h48(moved, bfsarg->cocsepdata, 0); 324 c = get_h48_pval_atomic(bfsarg->table, j, 4); 325 if (bfsarg->depth < breakpoint) { 326 if (c <= bfsarg->depth) 327 continue; 328 markarg.cube = moved; 329 gendata_h48_mark_atomic(&markarg); 330 } else { 331 if (c >= bfsarg->depth) 332 continue; 333 markarg.cube = cube; 334 gendata_h48_mark_atomic(&markarg); 335 break; /* Enough to find one, skip the rest */ 336 } 337 } 338 } 339 340 return NULL; 341 } 342 343 STATIC void 344 gendata_h48k2(gendata_h48_arg_t arg[static 1]) 345 { 346 static const uint8_t shortdepth = 8; 347 static const uint64_t capacity = 10000019; 348 static const uint64_t randomizer = 10000079; 349 350 /* 351 * A good base value for the k=2 tables have few positions with value 352 * 0, because those are treated as lower bound 0 and require a second 353 * lookup in another table, and at the same time not too many positions 354 * with value 3, because some of those are under-estimates. 355 * 356 * The following values for the base have been hand-picked. I first 357 * performed some statistics on the frequency of these values, but 358 * they turned out to be unreliable. In the end I generated the same 359 * table with multiple base value and see what was best. 360 * 361 * A curious case is h3, which has this distribution for base 8: 362 * [0] = 6686828 363 * [1] = 63867852 364 * [2] = 392789689 365 * [3] = 477195231 366 * 367 * and this for base 9: 368 * [0] = 70554680 369 * [1] = 392789689 370 * [2] = 462294676 371 * [3] = 14900555 372 * 373 * I ended up picking base 8 to have a much lower count of elements 374 * with value 0, at the cost of a less precise estimate for the higher 375 * values. But I am not 100% confident this is the optimal choice, 376 * so I'll leave it here for future considerations. 377 */ 378 379 static const uint8_t base[] = { 380 [0] = 8, 381 [1] = 8, 382 [2] = 8, 383 [3] = 8, 384 [4] = 9, 385 [5] = 9, 386 [6] = 9, 387 [7] = 9, 388 [8] = 10, 389 [9] = 10, 390 [10] = 10, 391 [11] = 10 392 }; 393 394 uint8_t t; 395 int sleeptime; 396 unsigned char *table; 397 uint64_t j; 398 wrapthread_atomic uint64_t count; 399 uint64_t i, ii, inext, bufsize, done, nshort, velocity; 400 h48map_t shortcubes; 401 gendata_h48short_arg_t shortarg; 402 h48k2_dfs_arg_t dfsarg[THREADS]; 403 wrapthread_define_var_thread_t(thread[THREADS]); 404 wrapthread_define_var_mutex_t(shortcubes_mutex); 405 wrapthread_define_var_mutex_t(table_mutex[CHUNKS]); 406 407 table = (unsigned char *)arg->h48buf + INFOSIZE; 408 memset(table, 0xFF, H48_TABLESIZE(arg->h, arg->k)); 409 410 LOG("[H48 gendata] Computing depth <=%" PRIu8 "\n", shortdepth) 411 h48map_create(&shortcubes, capacity, randomizer); 412 shortarg = (gendata_h48short_arg_t) { 413 .maxdepth = shortdepth, 414 .cocsepdata = arg->cocsepdata, 415 .crep = arg->crep, 416 .selfsim = arg->selfsim, 417 .map = &shortcubes 418 }; 419 gendata_h48short(&shortarg); 420 nshort = shortarg.map->n; 421 LOG("[H48 gendata] Computed %" PRIu64 " positions\n", nshort); 422 423 if (arg->base >= 20) 424 arg->base = base[arg->h]; 425 arg->info = makeinfo_h48k2(arg); 426 427 inext = 0; 428 count = 0; 429 wrapthread_mutex_init(&shortcubes_mutex, NULL); 430 for (i = 0; i < CHUNKS; i++) 431 wrapthread_mutex_init(&table_mutex[i], NULL); 432 for (i = 0; i < THREADS; i++) { 433 dfsarg[i] = (h48k2_dfs_arg_t){ 434 .h = arg->h, 435 .k = arg->k, 436 .base = arg->base, 437 .shortdepth = shortdepth, 438 .cocsepdata = arg->cocsepdata, 439 .table = table, 440 .selfsim = arg->selfsim, 441 .crep = arg->crep, 442 .shortcubes = &shortcubes, 443 .shortcubes_mutex = &shortcubes_mutex, 444 .next = &inext, 445 .count = &count, 446 }; 447 for (ii = 0; ii < CHUNKS; ii++) 448 dfsarg[i].table_mutex[ii] = &table_mutex[ii]; 449 450 wrapthread_create( 451 &thread[i], NULL, gendata_h48k2_runthread, &dfsarg[i]); 452 } 453 454 if (NISSY_CANSLEEP) { 455 /* Log the progress periodically */ 456 LOG("Processing 'short cubes'. This will take a while.\n"); 457 458 /* Estimate velocity by checking how much is done after 1s */ 459 msleep(1000); 460 velocity = count; 461 462 /* We plan to log 10 times */ 463 sleeptime = (100*(nshort-velocity)) / velocity; 464 465 done = count; 466 while (nshort - done > (velocity * sleeptime) / 1000) { 467 msleep(sleeptime); 468 wrapthread_mutex_lock(&shortcubes_mutex); 469 done = count; 470 wrapthread_mutex_unlock(&shortcubes_mutex); 471 LOG("Processed %" PRIu64 " / %" PRIu64 " cubes\n", 472 (done / 1000) * 1000, nshort); 473 } 474 } else { 475 LOG("Status updates won't be available because the sleep() " 476 "functionality is not available on this platform.\n"); 477 } 478 479 for (i = 0; i < THREADS; i++) 480 wrapthread_join(thread[i], NULL); 481 482 h48map_destroy(&shortcubes); 483 484 for (j = 0; j < H48_COORDMAX(arg->h); j++) { 485 t = get_h48_pval(table, j, 2); 486 arg->info.distribution[t]++; 487 } 488 489 bufsize = arg->buf_size - COCSEP_FULLSIZE; 490 writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf); 491 } 492 493 STATIC void * 494 gendata_h48k2_runthread(void *arg) 495 { 496 uint64_t coord; 497 kvpair_t kv; 498 h48k2_dfs_arg_t *dfsarg; 499 wrapthread_define_if_threads(uint64_t, mutex); 500 501 dfsarg = (h48k2_dfs_arg_t *)arg; 502 503 while (true) { 504 wrapthread_mutex_lock(dfsarg->shortcubes_mutex); 505 506 kv = h48map_nextkvpair(dfsarg->shortcubes, dfsarg->next); 507 if (*dfsarg->next == dfsarg->shortcubes->capacity) { 508 wrapthread_mutex_unlock(dfsarg->shortcubes_mutex); 509 break; 510 } 511 (*dfsarg->count)++; 512 wrapthread_mutex_unlock(dfsarg->shortcubes_mutex); 513 514 if (kv.val < dfsarg->shortdepth) { 515 coord = kv.key >> (uint64_t)(11 - dfsarg->h); 516 mutex = H48_INDEX(coord, dfsarg->k) % CHUNKS; 517 wrapthread_mutex_lock(dfsarg->table_mutex[mutex]); 518 set_h48_pval(dfsarg->table, coord, dfsarg->k, 0); 519 wrapthread_mutex_unlock(dfsarg->table_mutex[mutex]); 520 } else { 521 dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11); 522 gendata_h48k2_dfs(dfsarg); 523 } 524 } 525 526 return NULL; 527 } 528 529 STATIC void 530 gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1]) 531 { 532 int8_t d; 533 uint8_t m[4]; 534 cube_t cube[4]; 535 gendata_h48_mark_t markarg; 536 537 markarg = (gendata_h48_mark_t) { 538 .h = arg->h, 539 .k = arg->k, 540 .cocsepdata = arg->cocsepdata, 541 .selfsim = arg->selfsim, 542 .table = arg->table, 543 .table_mutex = arg->table_mutex, 544 }; 545 546 d = (int8_t)arg->shortdepth - (int8_t)arg->base; 547 548 /* Depth d+0 (shortcubes) */ 549 markarg.depth = d; 550 markarg.cube = arg->cube; 551 gendata_h48_mark(&markarg); 552 553 /* Depth d+1 */ 554 for (m[0] = 0; m[0] < 18; m[0]++) { 555 markarg.depth = d+1; 556 cube[0] = move(arg->cube, m[0]); 557 if (gendata_h48k2_dfs_stop(cube[0], d+1, arg)) 558 continue; 559 markarg.cube = cube[0]; 560 gendata_h48_mark(&markarg); 561 562 /* Depth d+2 */ 563 for (m[1] = 0; m[1] < 18; m[1]++) { 564 markarg.depth = d+2; 565 if (m[0] / 3 == m[1] / 3) { 566 m[1] += 2; 567 continue; 568 } 569 cube[1] = move(cube[0], m[1]); 570 if (gendata_h48k2_dfs_stop(cube[1], d+2, arg)) 571 continue; 572 markarg.cube = cube[1]; 573 gendata_h48_mark(&markarg); 574 if (d >= 0) 575 continue; 576 577 /* Depth d+3 */ 578 for (m[2] = 0; m[2] < 18; m[2]++) { 579 markarg.depth = d+3; 580 if (!allowednextmove(m[1], m[2])) { 581 m[2] += 2; 582 continue; 583 } 584 cube[2] = move(cube[1], m[2]); 585 if (gendata_h48k2_dfs_stop(cube[2], d+3, arg)) 586 continue; 587 markarg.cube = cube[2]; 588 gendata_h48_mark(&markarg); 589 if (d >= -1) 590 continue; 591 592 /* Depth d+4 */ 593 for (m[3] = 0; m[3] < 18; m[3]++) { 594 markarg.depth = d+4; 595 if (!allowednextmove(m[2], m[3])) { 596 m[3] += 2; 597 continue; 598 } 599 cube[3] = move(cube[2], m[3]); 600 markarg.cube = cube[3]; 601 gendata_h48_mark(&markarg); 602 } 603 } 604 } 605 } 606 } 607 608 STATIC_INLINE void 609 gendata_h48_mark_atomic(gendata_h48_mark_t arg[static 1]) 610 { 611 uint8_t oldval, newval; 612 uint64_t coord; 613 wrapthread_define_if_threads(uint64_t, mutex); 614 615 FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim, 616 coord = coord_h48(arg->cube, arg->cocsepdata, arg->h); 617 oldval = get_h48_pval_atomic(arg->table_atomic, coord, arg->k); 618 newval = (uint8_t)MAX(arg->depth, 0); 619 if (newval < oldval) { 620 mutex = H48_INDEX(coord, arg->k) % CHUNKS; 621 wrapthread_mutex_lock(arg->table_mutex[mutex]); 622 set_h48_pval_atomic( 623 arg->table_atomic, coord, arg->k, newval); 624 wrapthread_mutex_unlock(arg->table_mutex[mutex]); 625 } 626 ) 627 } 628 629 STATIC_INLINE void 630 gendata_h48_mark(gendata_h48_mark_t arg[static 1]) 631 { 632 uint8_t oldval, newval; 633 uint64_t coord; 634 wrapthread_define_if_threads(uint64_t, mutex); 635 636 FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim, 637 coord = coord_h48(arg->cube, arg->cocsepdata, arg->h); 638 mutex = H48_INDEX(coord, arg->k) % CHUNKS; 639 wrapthread_mutex_lock(arg->table_mutex[mutex]); 640 oldval = get_h48_pval(arg->table, coord, arg->k); 641 newval = (uint8_t)MAX(arg->depth, 0); 642 set_h48_pval(arg->table, coord, arg->k, MIN(newval, oldval)); 643 wrapthread_mutex_unlock(arg->table_mutex[mutex]); 644 ) 645 } 646 647 STATIC_INLINE bool 648 gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1]) 649 { 650 uint64_t val; 651 uint64_t coord; 652 wrapthread_define_if_threads(uint64_t, mutex); 653 int8_t oldval; 654 655 if (arg->h == 0 || arg->h == 11) { 656 /* We are in the "real coordinate" case, we can stop 657 if this coordinate has already been visited */ 658 coord = coord_h48(cube, arg->cocsepdata, arg->h); 659 mutex = H48_INDEX(coord, arg->k) % CHUNKS; 660 wrapthread_mutex_lock(arg->table_mutex[mutex]); 661 oldval = get_h48_pval(arg->table, coord, arg->k); 662 wrapthread_mutex_unlock(arg->table_mutex[mutex]); 663 return oldval <= d; 664 } else { 665 /* With 0 < k < 11 we do not have a "real coordinate". 666 The best we can do is checking if we backtracked to 667 one of the "short cubes". */ 668 coord = coord_h48(cube, arg->cocsepdata, 11); 669 val = h48map_value(arg->shortcubes, coord); 670 return val <= arg->shortdepth; 671 } 672 } 673 674 STATIC tableinfo_t 675 makeinfo_h48k2(gendata_h48_arg_t arg[static 1]) 676 { 677 tableinfo_t info; 678 679 info = (tableinfo_t) { 680 .solver = "h48 solver h = , k = 2", 681 .type = TABLETYPE_PRUNING, 682 .infosize = INFOSIZE, 683 .fullsize = H48_TABLESIZE(arg->h, 2) + INFOSIZE, 684 .hash = 0, 685 .entries = H48_COORDMAX(arg->h), 686 .classes = 0, 687 .h48h = arg->h, 688 .bits = 2, 689 .base = arg->base, 690 .maxvalue = 3, 691 .next = 0, 692 }; 693 info.solver[15] = (arg->h % 10) + '0'; 694 if (arg->h >= 10) 695 info.solver[14] = (arg->h / 10) + '0'; 696 697 return info; 698 } 699 700 STATIC const uint32_t * 701 get_cocsepdata_constptr(const unsigned char *data) 702 { 703 return (uint32_t *)(data + INFOSIZE); 704 } 705 706 STATIC const unsigned char * 707 get_h48data_constptr(const unsigned char *data) 708 { 709 return data + COCSEP_FULLSIZE + INFOSIZE; 710 } 711 712 STATIC_INLINE uint8_t 713 get_h48_pval(const unsigned char *table, uint64_t i, uint8_t k) 714 { 715 return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k); 716 } 717 718 STATIC_INLINE uint8_t 719 get_h48_pval_atomic( 720 wrapthread_atomic const unsigned char *table, 721 uint64_t i, 722 uint8_t k 723 ) 724 { 725 return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k); 726 } 727 728 STATIC_INLINE void 729 set_h48_pval(unsigned char *table, uint64_t i, uint8_t k, uint8_t val) 730 { 731 table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k))) 732 | (val << H48_SHIFT(i, k)); 733 } 734 735 STATIC_INLINE void 736 set_h48_pval_atomic( 737 wrapthread_atomic unsigned char *table, 738 uint64_t i, 739 uint8_t k, 740 uint8_t val 741 ) 742 { 743 table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k))) 744 | (val << H48_SHIFT(i, k)); 745 }