gendata_h48.h (19456B)
1 STATIC long long gendata_h48_dispatch( 2 const char *, unsigned long long, unsigned char *); 3 STATIC uint64_t gendata_h48short(gendata_h48short_arg_t [static 1]); 4 STATIC int64_t gendata_h48(gendata_h48_arg_t [static 1]); 5 STATIC void gendata_h48h0k4(gendata_h48_arg_t [static 1]); 6 STATIC void gendata_h48k2(gendata_h48_arg_t [static 1]); 7 8 STATIC void *gendata_h48h0k4_runthread(void *); 9 STATIC void *gendata_h48k2_runthread(void *); 10 11 STATIC_INLINE void gendata_h48_mark_atomic(gendata_h48_mark_t [static 1]); 12 STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t [static 1]); 13 STATIC_INLINE bool gendata_h48k2_dfs_stop( 14 cube_t, int8_t, h48k2_dfs_arg_t [static 1]); 15 STATIC void gendata_h48k2_dfs(h48k2_dfs_arg_t [static 1]); 16 STATIC tableinfo_t makeinfo_h48k2(gendata_h48_arg_t [static 1]); 17 18 STATIC const uint32_t *get_cocsepdata_constptr(const unsigned char *); 19 STATIC const unsigned char *get_h48data_constptr(const unsigned char *); 20 21 STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, int64_t, uint8_t); 22 STATIC_INLINE void set_h48_pval(unsigned char *, int64_t, uint8_t, uint8_t); 23 STATIC_INLINE uint8_t get_h48_pval_atomic( 24 _Atomic const unsigned char *, int64_t, uint8_t); 25 STATIC_INLINE void set_h48_pval_atomic( 26 _Atomic unsigned char *, int64_t, uint8_t, uint8_t); 27 28 STATIC long long 29 gendata_h48_dispatch( 30 const char *solver, 31 unsigned long long data_size, 32 unsigned char *data 33 ) 34 { 35 long long err; 36 gendata_h48_arg_t arg; 37 38 err = parse_h48_hk(solver, &arg.h, &arg.k); 39 if (err != NISSY_OK) 40 return err; 41 42 arg.buf_size = data_size; 43 arg.buf = data; 44 arg.maxdepth = 20; 45 46 return gendata_h48(&arg); 47 } 48 49 STATIC uint64_t 50 gendata_h48short(gendata_h48short_arg_t arg[static 1]) 51 { 52 uint8_t i, m; 53 int64_t coord; 54 uint64_t j; 55 kvpair_t kv; 56 cube_t cube, d; 57 58 cube = SOLVED_CUBE; 59 coord = coord_h48(cube, arg->cocsepdata, 11); 60 h48map_insertmin(arg->map, coord, 0); 61 for (i = 0; i < arg->maxdepth; i++) { 62 j = 0; 63 for (kv = h48map_nextkvpair(arg->map, &j); 64 j != arg->map->capacity; 65 kv = h48map_nextkvpair(arg->map, &j) 66 ) { 67 if (kv.val != i) 68 continue; 69 cube = invcoord_h48(kv.key, arg->crep, 11); 70 for (m = 0; m < 18; m++) { 71 d = move(cube, m); 72 FOREACH_H48SIM(d, arg->cocsepdata, arg->selfsim, 73 coord = coord_h48(d, arg->cocsepdata, 11); 74 h48map_insertmin(arg->map, coord, i+1); 75 ) 76 } 77 } 78 } 79 80 return arg->map->n; 81 } 82 83 STATIC int64_t 84 gendata_h48(gendata_h48_arg_t arg[static 1]) 85 { 86 uint64_t size, cocsepsize, h48size, fallbacksize, fallback2size, of; 87 long long r; 88 unsigned char *cocsepdata_offset; 89 tableinfo_t cocsepinfo, h48info, fallbackinfo; 90 gendata_h48_arg_t arg_h0k4; 91 92 cocsepsize = COCSEP_FULLSIZE; 93 h48size = INFOSIZE + H48_TABLESIZE(arg->h, arg->k); 94 fallbacksize = arg->k == 2 ? INFOSIZE + H48_TABLESIZE(0, 4) : 0; 95 fallback2size = EOESEP_FULLSIZE; 96 97 /* Add padding for 8-bit alignment */ 98 h48size = 8 * DIV_ROUND_UP(h48size, 8); 99 fallbacksize = 8 * DIV_ROUND_UP(fallbacksize, 8); 100 fallback2size = 8 * DIV_ROUND_UP(fallback2size, 8); 101 102 size = cocsepsize + h48size + fallbacksize + fallback2size; 103 104 if (arg->buf == NULL) 105 return size; /* Dry-run */ 106 107 if (arg->buf_size < size) { 108 LOG("[H48 gendata] Error: buffer is too small " 109 "(needed %" PRId64 " bytes but received %" PRId64 ")\n", 110 size, arg->buf_size); 111 return NISSY_ERROR_BUFFER_SIZE; 112 } 113 114 gendata_cocsep(arg->buf, arg->selfsim, arg->crep); 115 116 cocsepdata_offset = arg->buf + INFOSIZE; 117 arg->cocsepdata = (uint32_t *)cocsepdata_offset; 118 arg->h48buf = (_Atomic unsigned char*)arg->buf + cocsepsize; 119 120 arg->base = 99; /* TODO: set this somewhere else */ 121 122 if (arg->h == 0 && arg->k == 4) { 123 gendata_h48h0k4(arg); 124 } else if (arg->k == 2) { 125 gendata_h48k2(arg); 126 } else { 127 LOG("[H48 gendata] Error: cannot generate data for h = %" PRIu8 128 " and k = %" PRIu8 " (not implemented yet)\n", 129 arg->h, arg->k); 130 return NISSY_ERROR_INVALID_SOLVER; 131 } 132 133 r = readtableinfo(arg->buf_size, arg->buf, &cocsepinfo); 134 if (r != NISSY_OK) { 135 LOG("[H48 gendata] Error: could not read info " 136 "for cocsep table\n"); 137 return NISSY_ERROR_UNKNOWN; 138 } 139 140 cocsepinfo.next = cocsepsize; 141 r = writetableinfo(&cocsepinfo, arg->buf_size, arg->buf); 142 if (r != NISSY_OK) { 143 LOG("[H48 gendata] Error: could not write info for " 144 "cocsep table with updated 'next' value\n"); 145 return NISSY_ERROR_UNKNOWN; 146 } 147 148 /* Add h0k4 fallback table */ 149 150 if (arg->k == 2) { 151 arg_h0k4 = *arg; 152 arg_h0k4.h = 0; 153 arg_h0k4.k = 4; 154 arg_h0k4.base = 0; 155 arg_h0k4.maxdepth = 20; 156 arg_h0k4.buf_size = arg->buf_size - h48size; 157 arg_h0k4.buf = arg->buf + cocsepsize + h48size; 158 arg_h0k4.h48buf = arg->h48buf + h48size; 159 160 gendata_h48h0k4(&arg_h0k4); 161 162 } 163 164 /* Add eoesep fallback table */ 165 166 gendata_eoesep(arg->buf + (size - fallback2size), 20); 167 168 /* Update tableinfo with correct next values */ 169 170 r = readtableinfo_n(arg->buf_size, arg->buf, 2, &h48info); 171 if (r != NISSY_OK) { 172 LOG("[H48 gendata] Error: could not read info " 173 "for h48 table\n"); 174 return NISSY_ERROR_UNKNOWN; 175 } 176 h48info.next = h48size; 177 r = writetableinfo(&h48info, 178 arg->buf_size - cocsepsize, arg->buf + cocsepsize); 179 if (r != NISSY_OK) { 180 LOG("[H48 gendata] Error: could not write info " 181 "for h48 table\n"); 182 return NISSY_ERROR_UNKNOWN; 183 } 184 185 if (arg->k == 2) { 186 r = readtableinfo_n(arg->buf_size, arg->buf, 3, &fallbackinfo); 187 if (r != NISSY_OK) { 188 LOG("[H48 gendata] Error: could not read info for h48 " 189 "fallback table\n"); 190 return NISSY_ERROR_UNKNOWN; 191 } 192 193 of = cocsepsize + h48size; 194 fallbackinfo.next = fallbacksize; 195 r = writetableinfo( 196 &fallbackinfo, arg->buf_size - of, arg->buf + of); 197 if (r != NISSY_OK) { 198 LOG("[H48 gendata] Error: could not write info for " 199 "h48 fallback table\n"); 200 return NISSY_ERROR_UNKNOWN; 201 } 202 } 203 204 return size; 205 } 206 207 STATIC void 208 gendata_h48h0k4(gendata_h48_arg_t arg[static 1]) 209 { 210 _Atomic unsigned char *table; 211 uint8_t val; 212 int64_t i, sc, done, d, h48max; 213 uint64_t t, tt, isize, cc, bufsize; 214 h48h0k4_bfs_arg_t bfsarg[THREADS]; 215 pthread_t thread[THREADS]; 216 pthread_mutex_t table_mutex[CHUNKS]; 217 218 arg->info = (tableinfo_t) { 219 .solver = "h48 solver h = 0, k = 4", 220 .type = TABLETYPE_PRUNING, 221 .infosize = INFOSIZE, 222 .fullsize = H48_TABLESIZE(0, 4) + INFOSIZE, 223 .hash = 0, /* TODO */ 224 .entries = H48_COORDMAX(0), 225 .classes = 0, 226 .h48h = 0, 227 .bits = 4, 228 .base = 0, 229 .maxvalue = 0, 230 .next = 0, 231 }; 232 233 table = arg->h48buf + INFOSIZE; 234 memset(table, 0xFF, H48_TABLESIZE(0, 4)); 235 236 h48max = (int64_t)H48_COORDMAX(0); 237 sc = coord_h48(SOLVED_CUBE, arg->cocsepdata, 0); 238 set_h48_pval_atomic(table, sc, 4, 0); 239 arg->info.distribution[0] = 1; 240 241 isize = h48max / THREADS; 242 isize = (isize / H48_COEFF(arg->k)) * H48_COEFF(arg->k); 243 for (t = 0; t < CHUNKS; t++) 244 pthread_mutex_init(&table_mutex[t], NULL); 245 for (t = 0; t < THREADS; t++) { 246 bfsarg[t] = (h48h0k4_bfs_arg_t) { 247 .cocsepdata = arg->cocsepdata, 248 .table = table, 249 .selfsim = arg->selfsim, 250 .crep = arg->crep, 251 .start = isize * t, 252 .end = t == THREADS-1 ? (uint64_t)h48max : isize * (t+1), 253 }; 254 for (tt = 0; tt < CHUNKS; tt++) 255 bfsarg[t].table_mutex[tt] = &table_mutex[tt]; 256 } 257 for (done = 1, d = 1; done < h48max && d <= arg->maxdepth; d++) { 258 LOG("[H48 gendata] Generating depth %" PRId64 "\n", d); 259 260 for (t = 0; t < THREADS; t++) { 261 bfsarg[t].depth = d; 262 pthread_create(&thread[t], NULL, 263 gendata_h48h0k4_runthread, &bfsarg[t]); 264 } 265 266 for (t = 0; t < THREADS; t++) 267 pthread_join(thread[t], NULL); 268 269 for (i = 0, cc = 0; i < h48max; i++) { 270 val = get_h48_pval_atomic(table, i, 4); 271 cc += val == d; 272 } 273 274 done += cc; 275 arg->info.distribution[d] = cc; 276 277 LOG("[H48 gendata] Found %" PRId64 "\n", cc); 278 } 279 280 arg->info.maxvalue = d - 1; 281 bufsize = arg->buf_size - COCSEP_FULLSIZE; 282 writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf); 283 } 284 285 STATIC void * 286 gendata_h48h0k4_runthread(void *arg) 287 { 288 static const uint8_t breakpoint = 10; /* Hand-picked optimal */ 289 290 uint8_t c, m; 291 uint64_t i; 292 int64_t j; 293 cube_t cube, moved; 294 gendata_h48_mark_t markarg; 295 h48h0k4_bfs_arg_t *bfsarg; 296 297 bfsarg = (h48h0k4_bfs_arg_t *)arg; 298 299 markarg = (gendata_h48_mark_t) { 300 .depth = bfsarg->depth, 301 .h = 0, 302 .k = 4, 303 .cocsepdata = bfsarg->cocsepdata, 304 .selfsim = bfsarg->selfsim, 305 .table_atomic = bfsarg->table, 306 .table_mutex = bfsarg->table_mutex, 307 }; 308 309 /* 310 * If depth < breakpoint, scan all neighbors of coordinates at depth-1. 311 * Otherwise, scan all neighbors of unvisited coordinates. 312 */ 313 for (i = bfsarg->start; i < bfsarg->end; i++) { 314 c = get_h48_pval_atomic(bfsarg->table, i, 4); 315 316 if ((bfsarg->depth < breakpoint && c != bfsarg->depth - 1) || 317 (bfsarg->depth >= breakpoint && c != 0xF)) 318 continue; 319 320 cube = invcoord_h48(i, bfsarg->crep, 0); 321 for (m = 0; m < 18; m++) { 322 moved = move(cube, m); 323 j = coord_h48(moved, bfsarg->cocsepdata, 0); 324 c = get_h48_pval_atomic(bfsarg->table, j, 4); 325 if (bfsarg->depth < breakpoint) { 326 if (c <= bfsarg->depth) 327 continue; 328 markarg.cube = moved; 329 gendata_h48_mark_atomic(&markarg); 330 } else { 331 if (c >= bfsarg->depth) 332 continue; 333 markarg.cube = cube; 334 gendata_h48_mark_atomic(&markarg); 335 break; /* Enough to find one, skip the rest */ 336 } 337 } 338 } 339 340 return NULL; 341 } 342 343 STATIC void 344 gendata_h48k2(gendata_h48_arg_t arg[static 1]) 345 { 346 static const uint8_t shortdepth = 8; 347 static const uint64_t capacity = 10000019; 348 static const uint64_t randomizer = 10000079; 349 350 /* 351 * A good base value for the k=2 tables have few positions with value 352 * 0, because those are treated as lower bound 0 and require a second 353 * lookup in another table, and at the same time not too many positions 354 * with value 3, because some of those are under-estimates. 355 * 356 * The following values for the base have been hand-picked. I first 357 * performed some statistics on the frequency of these values, but 358 * they turned out to be unreliable. In the end I generated the same 359 * table with multiple base value and see what was best. 360 * 361 * A curious case is h3, which has this distribution for base 8: 362 * [0] = 6686828 363 * [1] = 63867852 364 * [2] = 392789689 365 * [3] = 477195231 366 * 367 * and this for base 9: 368 * [0] = 70554680 369 * [1] = 392789689 370 * [2] = 462294676 371 * [3] = 14900555 372 * 373 * I ended up picking base 8 to have a much lower count of elements 374 * with value 0, at the cost of a less precise estimate for the higher 375 * values. But I am not 100% confident this is the optimal choice, 376 * so I'll leave it here for future considerations. 377 */ 378 379 static const uint8_t base[] = { 380 [0] = 8, 381 [1] = 8, 382 [2] = 8, 383 [3] = 8, 384 [4] = 9, 385 [5] = 9, 386 [6] = 9, 387 [7] = 9, 388 [8] = 10, 389 [9] = 10, 390 [10] = 10, 391 [11] = 10 392 }; 393 394 uint8_t t; 395 int sleeptime; 396 unsigned char *table; 397 int64_t j; 398 _Atomic uint64_t count; 399 uint64_t i, ii, inext, bufsize, done, nshort, velocity; 400 h48map_t shortcubes; 401 gendata_h48short_arg_t shortarg; 402 h48k2_dfs_arg_t dfsarg[THREADS]; 403 pthread_t thread[THREADS]; 404 pthread_mutex_t shortcubes_mutex, table_mutex[CHUNKS]; 405 406 table = (unsigned char *)arg->h48buf + INFOSIZE; 407 memset(table, 0xFF, H48_TABLESIZE(arg->h, arg->k)); 408 409 LOG("[H48 gendata] Computing depth <=%" PRIu8 "\n", shortdepth) 410 h48map_create(&shortcubes, capacity, randomizer); 411 shortarg = (gendata_h48short_arg_t) { 412 .maxdepth = shortdepth, 413 .cocsepdata = arg->cocsepdata, 414 .crep = arg->crep, 415 .selfsim = arg->selfsim, 416 .map = &shortcubes 417 }; 418 gendata_h48short(&shortarg); 419 nshort = shortarg.map->n; 420 LOG("[H48 gendata] Computed %" PRIu64 " positions\n", nshort); 421 422 if (arg->base >= 20) 423 arg->base = base[arg->h]; 424 arg->info = makeinfo_h48k2(arg); 425 426 inext = 0; 427 count = 0; 428 pthread_mutex_init(&shortcubes_mutex, NULL); 429 for (i = 0; i < CHUNKS; i++) 430 pthread_mutex_init(&table_mutex[i], NULL); 431 for (i = 0; i < THREADS; i++) { 432 dfsarg[i] = (h48k2_dfs_arg_t){ 433 .h = arg->h, 434 .k = arg->k, 435 .base = arg->base, 436 .shortdepth = shortdepth, 437 .cocsepdata = arg->cocsepdata, 438 .table = table, 439 .selfsim = arg->selfsim, 440 .crep = arg->crep, 441 .shortcubes = &shortcubes, 442 .shortcubes_mutex = &shortcubes_mutex, 443 .next = &inext, 444 .count = &count, 445 }; 446 for (ii = 0; ii < CHUNKS; ii++) 447 dfsarg[i].table_mutex[ii] = &table_mutex[ii]; 448 449 pthread_create( 450 &thread[i], NULL, gendata_h48k2_runthread, &dfsarg[i]); 451 } 452 453 if (NISSY_CANSLEEP) { 454 /* Log the progress periodically */ 455 LOG("Processing 'short cubes'. This will take a while.\n"); 456 457 /* Estimate velocity by checking how much is done after 1s */ 458 msleep(1000); 459 velocity = count; 460 461 /* We plan to log 10 times */ 462 sleeptime = (100*(nshort-velocity)) / velocity; 463 464 done = count; 465 while (nshort - done > (velocity * sleeptime) / 1000) { 466 msleep(sleeptime); 467 pthread_mutex_lock(&shortcubes_mutex); 468 done = count; 469 pthread_mutex_unlock(&shortcubes_mutex); 470 LOG("Processed %" PRIu64 " / %" PRIu64 " cubes\n", 471 (done / 1000) * 1000, nshort); 472 } 473 } else { 474 LOG("Status updates won't be available because the sleep() " 475 "functionality is not available on this platform.\n"); 476 } 477 478 for (i = 0; i < THREADS; i++) 479 pthread_join(thread[i], NULL); 480 481 h48map_destroy(&shortcubes); 482 483 for (j = 0; j < H48_COORDMAX(arg->h); j++) { 484 t = get_h48_pval(table, j, 2); 485 arg->info.distribution[t]++; 486 } 487 488 bufsize = arg->buf_size - COCSEP_FULLSIZE; 489 writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf); 490 } 491 492 STATIC void * 493 gendata_h48k2_runthread(void *arg) 494 { 495 uint64_t coord, mutex; 496 kvpair_t kv; 497 h48k2_dfs_arg_t *dfsarg; 498 499 dfsarg = (h48k2_dfs_arg_t *)arg; 500 501 while (true) { 502 pthread_mutex_lock(dfsarg->shortcubes_mutex); 503 504 kv = h48map_nextkvpair(dfsarg->shortcubes, dfsarg->next); 505 if (*dfsarg->next == dfsarg->shortcubes->capacity) { 506 pthread_mutex_unlock(dfsarg->shortcubes_mutex); 507 break; 508 } 509 (*dfsarg->count)++; 510 pthread_mutex_unlock(dfsarg->shortcubes_mutex); 511 512 if (kv.val < dfsarg->shortdepth) { 513 coord = kv.key >> (int64_t)(11 - dfsarg->h); 514 mutex = H48_INDEX(coord, dfsarg->k) % CHUNKS; 515 pthread_mutex_lock(dfsarg->table_mutex[mutex]); 516 set_h48_pval(dfsarg->table, coord, dfsarg->k, 0); 517 pthread_mutex_unlock(dfsarg->table_mutex[mutex]); 518 } else { 519 dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11); 520 gendata_h48k2_dfs(dfsarg); 521 } 522 } 523 524 return NULL; 525 } 526 527 STATIC void 528 gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1]) 529 { 530 int8_t d; 531 uint8_t m[4]; 532 cube_t cube[4]; 533 gendata_h48_mark_t markarg; 534 535 markarg = (gendata_h48_mark_t) { 536 .h = arg->h, 537 .k = arg->k, 538 .cocsepdata = arg->cocsepdata, 539 .selfsim = arg->selfsim, 540 .table = arg->table, 541 .table_mutex = arg->table_mutex, 542 }; 543 544 d = (int8_t)arg->shortdepth - (int8_t)arg->base; 545 546 /* Depth d+0 (shortcubes) */ 547 markarg.depth = d; 548 markarg.cube = arg->cube; 549 gendata_h48_mark(&markarg); 550 551 /* Depth d+1 */ 552 for (m[0] = 0; m[0] < 18; m[0]++) { 553 markarg.depth = d+1; 554 cube[0] = move(arg->cube, m[0]); 555 if (gendata_h48k2_dfs_stop(cube[0], d+1, arg)) 556 continue; 557 markarg.cube = cube[0]; 558 gendata_h48_mark(&markarg); 559 560 /* Depth d+2 */ 561 for (m[1] = 0; m[1] < 18; m[1]++) { 562 markarg.depth = d+2; 563 if (m[0] / 3 == m[1] / 3) { 564 m[1] += 2; 565 continue; 566 } 567 cube[1] = move(cube[0], m[1]); 568 if (gendata_h48k2_dfs_stop(cube[1], d+2, arg)) 569 continue; 570 markarg.cube = cube[1]; 571 gendata_h48_mark(&markarg); 572 if (d >= 0) 573 continue; 574 575 /* Depth d+3 */ 576 for (m[2] = 0; m[2] < 18; m[2]++) { 577 markarg.depth = d+3; 578 if (!allowednextmove(m[1], m[2])) { 579 m[2] += 2; 580 continue; 581 } 582 cube[2] = move(cube[1], m[2]); 583 if (gendata_h48k2_dfs_stop(cube[2], d+3, arg)) 584 continue; 585 markarg.cube = cube[2]; 586 gendata_h48_mark(&markarg); 587 if (d >= -1) 588 continue; 589 590 /* Depth d+4 */ 591 for (m[3] = 0; m[3] < 18; m[3]++) { 592 markarg.depth = d+4; 593 if (!allowednextmove(m[2], m[3])) { 594 m[3] += 2; 595 continue; 596 } 597 cube[3] = move(cube[2], m[3]); 598 markarg.cube = cube[3]; 599 gendata_h48_mark(&markarg); 600 } 601 } 602 } 603 } 604 } 605 606 STATIC_INLINE void 607 gendata_h48_mark_atomic(gendata_h48_mark_t arg[static 1]) 608 { 609 uint8_t oldval, newval; 610 int64_t coord, mutex; 611 612 FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim, 613 coord = coord_h48(arg->cube, arg->cocsepdata, arg->h); 614 oldval = get_h48_pval_atomic(arg->table_atomic, coord, arg->k); 615 newval = (uint8_t)MAX(arg->depth, 0); 616 if (newval < oldval) { 617 mutex = H48_INDEX(coord, arg->k) % CHUNKS; 618 pthread_mutex_lock(arg->table_mutex[mutex]); 619 set_h48_pval_atomic( 620 arg->table_atomic, coord, arg->k, newval); 621 pthread_mutex_unlock(arg->table_mutex[mutex]); 622 } 623 ) 624 } 625 626 STATIC_INLINE void 627 gendata_h48_mark(gendata_h48_mark_t arg[static 1]) 628 { 629 uint8_t oldval, newval; 630 int64_t coord, mutex; 631 632 FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim, 633 coord = coord_h48(arg->cube, arg->cocsepdata, arg->h); 634 mutex = H48_INDEX(coord, arg->k) % CHUNKS; 635 pthread_mutex_lock(arg->table_mutex[mutex]); 636 oldval = get_h48_pval(arg->table, coord, arg->k); 637 newval = (uint8_t)MAX(arg->depth, 0); 638 set_h48_pval(arg->table, coord, arg->k, MIN(newval, oldval)); 639 pthread_mutex_unlock(arg->table_mutex[mutex]); 640 ) 641 } 642 643 STATIC_INLINE bool 644 gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1]) 645 { 646 uint64_t val; 647 int64_t coord, mutex; 648 int8_t oldval; 649 650 if (arg->h == 0 || arg->h == 11) { 651 /* We are in the "real coordinate" case, we can stop 652 if this coordinate has already been visited */ 653 coord = coord_h48(cube, arg->cocsepdata, arg->h); 654 mutex = H48_INDEX(coord, arg->k) % CHUNKS; 655 pthread_mutex_lock(arg->table_mutex[mutex]); 656 oldval = get_h48_pval(arg->table, coord, arg->k); 657 pthread_mutex_unlock(arg->table_mutex[mutex]); 658 return oldval <= d; 659 } else { 660 /* With 0 < k < 11 we do not have a "real coordinate". 661 The best we can do is checking if we backtracked to 662 one of the "short cubes". */ 663 coord = coord_h48(cube, arg->cocsepdata, 11); 664 val = h48map_value(arg->shortcubes, coord); 665 return val <= arg->shortdepth; 666 } 667 } 668 669 STATIC tableinfo_t 670 makeinfo_h48k2(gendata_h48_arg_t arg[static 1]) 671 { 672 tableinfo_t info; 673 674 info = (tableinfo_t) { 675 .solver = "h48 solver h = , k = 2", 676 .type = TABLETYPE_PRUNING, 677 .infosize = INFOSIZE, 678 .fullsize = H48_TABLESIZE(arg->h, 2) + INFOSIZE, 679 .hash = 0, /* TODO */ 680 .entries = H48_COORDMAX(arg->h), 681 .classes = 0, 682 .h48h = arg->h, 683 .bits = 2, 684 .base = arg->base, 685 .maxvalue = 3, 686 .next = 0, 687 }; 688 info.solver[15] = (arg->h % 10) + '0'; 689 if (arg->h >= 10) 690 info.solver[14] = (arg->h / 10) + '0'; 691 692 return info; 693 } 694 695 STATIC const uint32_t * 696 get_cocsepdata_constptr(const unsigned char *data) 697 { 698 return (uint32_t *)(data + INFOSIZE); 699 } 700 701 STATIC const unsigned char * 702 get_h48data_constptr(const unsigned char *data) 703 { 704 return data + COCSEP_FULLSIZE + INFOSIZE; 705 } 706 707 STATIC_INLINE uint8_t 708 get_h48_pval(const unsigned char *table, int64_t i, uint8_t k) 709 { 710 return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k); 711 } 712 713 STATIC_INLINE uint8_t 714 get_h48_pval_atomic(_Atomic const unsigned char *table, int64_t i, uint8_t k) 715 { 716 return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k); 717 } 718 719 STATIC_INLINE void 720 set_h48_pval(unsigned char *table, int64_t i, uint8_t k, uint8_t val) 721 { 722 table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k))) 723 | (val << H48_SHIFT(i, k)); 724 } 725 726 STATIC_INLINE void 727 set_h48_pval_atomic( 728 _Atomic unsigned char *table, 729 int64_t i, 730 uint8_t k, 731 uint8_t val 732 ) 733 { 734 table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k))) 735 | (val << H48_SHIFT(i, k)); 736 }