gendata_h48.h (22702B)
1 STATIC uint64_t genddfggta_h48short(gendata_h48short_arg_t [static 1]); 2 STATIC int64_t gendata_h48(gendata_h48_arg_t [static 1]); 3 STATIC void gendata_h48h0k4(gendata_h48_arg_t [static 1]); 4 STATIC void gendata_h48k2(gendata_h48_arg_t [static 1]); 5 6 STATIC void *gendata_h48h0k4_runthread(void *); 7 STATIC void *gendata_h48k2_runthread(void *); 8 9 STATIC_INLINE void gendata_h48_mark_atomic(gendata_h48_mark_t [static 1]); 10 STATIC_INLINE void gendata_h48_mark(gendata_h48_mark_t [static 1]); 11 STATIC_INLINE bool gendata_h48k2_dfs_stop( 12 cube_t, int8_t, h48k2_dfs_arg_t [static 1]); 13 STATIC void gendata_h48k2_dfs(h48k2_dfs_arg_t [static 1]); 14 STATIC tableinfo_t makeinfo_h48k2(gendata_h48_arg_t [static 1]); 15 STATIC void *getdistribution_h48_runthread(void *); 16 STATIC void getdistribution_h48(const unsigned char *, 17 uint64_t [static INFO_DISTRIBUTION_LEN], uint8_t, uint8_t); 18 19 STATIC const uint32_t *get_cocsepdata_constptr(const unsigned char *); 20 STATIC const unsigned char *get_h48data_constptr(const unsigned char *); 21 22 STATIC_INLINE uint8_t get_h48_pval(const unsigned char *, int64_t, uint8_t); 23 STATIC_INLINE void set_h48_pval(unsigned char *, int64_t, uint8_t, uint8_t); 24 STATIC_INLINE uint8_t get_h48_pval_atomic( 25 _Atomic const unsigned char *, int64_t, uint8_t); 26 STATIC_INLINE void set_h48_pval_atomic( 27 _Atomic unsigned char *, int64_t, uint8_t, uint8_t); 28 29 size_t gendata_h48_derive(uint8_t, const unsigned char *, unsigned char *); 30 31 STATIC uint64_t 32 gendata_h48short(gendata_h48short_arg_t arg[static 1]) 33 { 34 uint8_t i, m; 35 int64_t coord; 36 uint64_t j; 37 kvpair_t kv; 38 cube_t cube, d; 39 40 cube = SOLVED_CUBE; 41 coord = coord_h48(cube, arg->cocsepdata, 11); 42 h48map_insertmin(arg->map, coord, 0); 43 for (i = 0; i < arg->maxdepth; i++) { 44 j = 0; 45 for (kv = h48map_nextkvpair(arg->map, &j); 46 j != arg->map->capacity; 47 kv = h48map_nextkvpair(arg->map, &j) 48 ) { 49 if (kv.val != i) 50 continue; 51 cube = invcoord_h48(kv.key, arg->crep, 11); 52 for (m = 0; m < 18; m++) { 53 d = move(cube, m); 54 FOREACH_H48SIM(d, arg->cocsepdata, arg->selfsim, 55 coord = coord_h48(d, arg->cocsepdata, 11); 56 h48map_insertmin(arg->map, coord, i+1); 57 ) 58 } 59 } 60 } 61 62 return arg->map->n; 63 } 64 65 STATIC int64_t 66 gendata_h48(gendata_h48_arg_t arg[static 1]) 67 { 68 uint64_t size, cocsepsize, h48size, fallbacksize, fallback2size, of; 69 long long r; 70 unsigned char *cocsepdata_offset; 71 tableinfo_t cocsepinfo, h48info, fallbackinfo; 72 gendata_h48_arg_t arg_h0k4; 73 74 cocsepsize = COCSEP_FULLSIZE; 75 h48size = INFOSIZE + H48_TABLESIZE(arg->h, arg->k); 76 fallbacksize = arg->k == 2 ? INFOSIZE + H48_TABLESIZE(0, 4) : 0; 77 fallback2size = EOESEP_FULLSIZE; 78 79 /* Add padding for 8-bit alignment */ 80 h48size = 8 * DIV_ROUND_UP(h48size, 8); 81 fallbacksize = 8 * DIV_ROUND_UP(fallbacksize, 8); 82 fallback2size = 8 * DIV_ROUND_UP(fallback2size, 8); 83 84 size = cocsepsize + h48size + fallbacksize + fallback2size; 85 86 if (arg->buf == NULL) 87 return size; /* Dry-run */ 88 89 if (arg->buf_size < size) { 90 LOG("[H48 gendata] Error data: buffer is too small " 91 "(needed %" PRId64 " bytes but received %" PRId64 ")\n", 92 size, arg->buf_size); 93 return NISSY_ERROR_BUFFER_SIZE; 94 } 95 96 gendata_cocsep(arg->buf, arg->selfsim, arg->crep); 97 98 cocsepdata_offset = arg->buf + INFOSIZE; 99 arg->cocsepdata = (uint32_t *)cocsepdata_offset; 100 arg->h48buf = (_Atomic unsigned char*)arg->buf + cocsepsize; 101 102 arg->base = 99; /* TODO: set this somewhere else */ 103 104 if (arg->h == 0 && arg->k == 4) { 105 gendata_h48h0k4(arg); 106 } else if (arg->k == 2) { 107 gendata_h48k2(arg); 108 } else { 109 LOG("[H48 gendata] Error: cannot generate data for h = %" PRIu8 110 " and k = %" PRIu8 " (not implemented yet)\n", 111 arg->h, arg->k); 112 return NISSY_ERROR_INVALID_SOLVER; 113 } 114 115 r = readtableinfo(arg->buf_size, arg->buf, &cocsepinfo); 116 if (r != NISSY_OK) { 117 LOG("[H48 gendata] Error: could not read info " 118 "for cocsep table\n"); 119 return NISSY_ERROR_UNKNOWN; 120 } 121 122 cocsepinfo.next = cocsepsize; 123 r = writetableinfo(&cocsepinfo, arg->buf_size, arg->buf); 124 if (r != NISSY_OK) { 125 LOG("[H48 gendata] Error: could not write info for " 126 "cocsep table with updated 'next' value\n"); 127 return NISSY_ERROR_UNKNOWN; 128 } 129 130 /* Add h0k4 fallback table */ 131 132 if (arg->k == 2) { 133 arg_h0k4 = *arg; 134 arg_h0k4.h = 0; 135 arg_h0k4.k = 4; 136 arg_h0k4.base = 0; 137 arg_h0k4.maxdepth = 20; 138 arg_h0k4.buf_size = arg->buf_size - h48size; 139 arg_h0k4.buf = arg->buf + cocsepsize + h48size; 140 arg_h0k4.h48buf = arg->h48buf + h48size; 141 142 gendata_h48h0k4(&arg_h0k4); 143 144 } 145 146 /* Add eoesep fallback table */ 147 148 gendata_eoesep(arg->buf + (size - fallback2size), 20); 149 150 /* Update tableinfo with correct next values */ 151 152 r = readtableinfo_n(arg->buf_size, arg->buf, 2, &h48info); 153 if (r != NISSY_OK) { 154 LOG("[H48 gendata] Error: could not read info " 155 "for h48 table\n"); 156 return NISSY_ERROR_UNKNOWN; 157 } 158 h48info.next = h48size; 159 r = writetableinfo(&h48info, 160 arg->buf_size - cocsepsize, arg->buf + cocsepsize); 161 if (r != NISSY_OK) { 162 LOG("[H48 gendata] Error: could not write info " 163 "for h48 table\n"); 164 return NISSY_ERROR_UNKNOWN; 165 } 166 167 if (arg->k == 2) { 168 r = readtableinfo_n(arg->buf_size, arg->buf, 3, &fallbackinfo); 169 if (r != NISSY_OK) { 170 LOG("[H48 gendata] Error: could not read info for h48 " 171 "fallback table\n"); 172 return NISSY_ERROR_UNKNOWN; 173 } 174 175 of = cocsepsize + h48size; 176 fallbackinfo.next = fallbacksize; 177 r = writetableinfo( 178 &fallbackinfo, arg->buf_size - of, arg->buf + of); 179 if (r != NISSY_OK) { 180 LOG("[H48 gendata] Error: could not write info for " 181 "h48 fallback table\n"); 182 return NISSY_ERROR_UNKNOWN; 183 } 184 } 185 186 return size; 187 } 188 189 STATIC void 190 gendata_h48h0k4(gendata_h48_arg_t arg[static 1]) 191 { 192 _Atomic unsigned char *table; 193 uint8_t val; 194 int64_t i, sc, done, d, h48max; 195 uint64_t t, tt, isize, cc, bufsize; 196 h48h0k4_bfs_arg_t bfsarg[THREADS]; 197 pthread_t thread[THREADS]; 198 pthread_mutex_t table_mutex[CHUNKS]; 199 200 arg->info = (tableinfo_t) { 201 .solver = "h48 solver h = 0, k = 4", 202 .type = TABLETYPE_PRUNING, 203 .infosize = INFOSIZE, 204 .fullsize = H48_TABLESIZE(0, 4) + INFOSIZE, 205 .hash = 0, /* TODO */ 206 .entries = H48_COORDMAX(0), 207 .classes = 0, 208 .h48h = 0, 209 .bits = 4, 210 .base = 0, 211 .maxvalue = 0, 212 .next = 0, 213 }; 214 215 table = arg->h48buf + INFOSIZE; 216 memset(table, 0xFF, H48_TABLESIZE(0, 4)); 217 218 h48max = (int64_t)H48_COORDMAX(0); 219 sc = coord_h48(SOLVED_CUBE, arg->cocsepdata, 0); 220 set_h48_pval_atomic(table, sc, 4, 0); 221 arg->info.distribution[0] = 1; 222 223 isize = h48max / THREADS; 224 isize = (isize / H48_COEFF(arg->k)) * H48_COEFF(arg->k); 225 for (t = 0; t < CHUNKS; t++) 226 pthread_mutex_init(&table_mutex[t], NULL); 227 for (t = 0; t < THREADS; t++) { 228 bfsarg[t] = (h48h0k4_bfs_arg_t) { 229 .cocsepdata = arg->cocsepdata, 230 .table = table, 231 .selfsim = arg->selfsim, 232 .crep = arg->crep, 233 .start = isize * t, 234 .end = t == THREADS-1 ? (uint64_t)h48max : isize * (t+1), 235 }; 236 for (tt = 0; tt < CHUNKS; tt++) 237 bfsarg[t].table_mutex[tt] = &table_mutex[tt]; 238 } 239 for (done = 1, d = 1; done < h48max && d <= arg->maxdepth; d++) { 240 LOG("[H48 gendata] Generating depth %" PRId64 "\n", d); 241 242 for (t = 0; t < THREADS; t++) { 243 bfsarg[t].depth = d; 244 pthread_create(&thread[t], NULL, 245 gendata_h48h0k4_runthread, &bfsarg[t]); 246 } 247 248 for (t = 0; t < THREADS; t++) 249 pthread_join(thread[t], NULL); 250 251 for (i = 0, cc = 0; i < h48max; i++) { 252 val = get_h48_pval_atomic(table, i, 4); 253 cc += val == d; 254 } 255 256 done += cc; 257 arg->info.distribution[d] = cc; 258 259 LOG("[H48 gendata] Found %" PRId64 "\n", cc); 260 } 261 262 arg->info.maxvalue = d - 1; 263 bufsize = arg->buf_size - COCSEP_FULLSIZE; 264 writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf); 265 } 266 267 STATIC void * 268 gendata_h48h0k4_runthread(void *arg) 269 { 270 static const uint8_t breakpoint = 10; /* Hand-picked optimal */ 271 272 uint8_t c, m; 273 uint64_t i; 274 int64_t j; 275 cube_t cube, moved; 276 gendata_h48_mark_t markarg; 277 h48h0k4_bfs_arg_t *bfsarg; 278 279 bfsarg = (h48h0k4_bfs_arg_t *)arg; 280 281 markarg = (gendata_h48_mark_t) { 282 .depth = bfsarg->depth, 283 .h = 0, 284 .k = 4, 285 .cocsepdata = bfsarg->cocsepdata, 286 .selfsim = bfsarg->selfsim, 287 .table_atomic = bfsarg->table, 288 .table_mutex = bfsarg->table_mutex, 289 }; 290 291 /* 292 * If depth < breakpoint, scan all neighbors of coordinates at depth-1. 293 * Otherwise, scan all neighbors of unvisited coordinates. 294 */ 295 for (i = bfsarg->start; i < bfsarg->end; i++) { 296 c = get_h48_pval_atomic(bfsarg->table, i, 4); 297 298 if ((bfsarg->depth < breakpoint && c != bfsarg->depth - 1) || 299 (bfsarg->depth >= breakpoint && c != 0xF)) 300 continue; 301 302 cube = invcoord_h48(i, bfsarg->crep, 0); 303 for (m = 0; m < 18; m++) { 304 moved = move(cube, m); 305 j = coord_h48(moved, bfsarg->cocsepdata, 0); 306 c = get_h48_pval_atomic(bfsarg->table, j, 4); 307 if (bfsarg->depth < breakpoint) { 308 if (c <= bfsarg->depth) 309 continue; 310 markarg.cube = moved; 311 gendata_h48_mark_atomic(&markarg); 312 } else { 313 if (c >= bfsarg->depth) 314 continue; 315 markarg.cube = cube; 316 gendata_h48_mark_atomic(&markarg); 317 break; /* Enough to find one, skip the rest */ 318 } 319 } 320 } 321 322 return NULL; 323 } 324 325 STATIC void 326 gendata_h48k2(gendata_h48_arg_t arg[static 1]) 327 { 328 static const uint8_t shortdepth = 8; 329 static const uint64_t capacity = 10000019; 330 static const uint64_t randomizer = 10000079; 331 332 /* 333 * A good base value for the k=2 tables have few positions with value 334 * 0, because those are treated as lower bound 0 and require a second 335 * lookup in another table, and at the same time not too many positions 336 * with value 3, because some of those are under-estimates. 337 * 338 * The following values for the base have been hand-picked. I first 339 * performed some statistics on the frequency of these values, but 340 * they turned out to be unreliable. In the end I generated the same 341 * table with multiple base value and see what was best. 342 * 343 * A curious case is h3, which has this distribution for base 8: 344 * [0] = 6686828 345 * [1] = 63867852 346 * [2] = 392789689 347 * [3] = 477195231 348 * 349 * and this for base 9: 350 * [0] = 70554680 351 * [1] = 392789689 352 * [2] = 462294676 353 * [3] = 14900555 354 * 355 * I ended up picking base 8 to have a much lower count of elements 356 * with value 0, at the cost of a less precise estimate for the higher 357 * values. But I am not 100% confident this is the optimal choice, 358 * so I'll leave it here for future considerations. 359 */ 360 361 static const uint8_t base[] = { 362 [0] = 8, 363 [1] = 8, 364 [2] = 8, 365 [3] = 8, 366 [4] = 9, 367 [5] = 9, 368 [6] = 9, 369 [7] = 9, 370 [8] = 10, 371 [9] = 10, 372 [10] = 10, 373 [11] = 10 374 }; 375 376 uint8_t t; 377 unsigned char *table; 378 int64_t j; 379 uint64_t i, ii, inext, count, bufsize; 380 h48map_t shortcubes; 381 gendata_h48short_arg_t shortarg; 382 h48k2_dfs_arg_t dfsarg[THREADS]; 383 pthread_t thread[THREADS]; 384 pthread_mutex_t shortcubes_mutex, table_mutex[CHUNKS]; 385 386 table = (unsigned char *)arg->h48buf + INFOSIZE; 387 memset(table, 0xFF, H48_TABLESIZE(arg->h, arg->k)); 388 389 LOG("[H48 gendata] Computing depth <=%" PRIu8 "\n", shortdepth) 390 h48map_create(&shortcubes, capacity, randomizer); 391 shortarg = (gendata_h48short_arg_t) { 392 .maxdepth = shortdepth, 393 .cocsepdata = arg->cocsepdata, 394 .crep = arg->crep, 395 .selfsim = arg->selfsim, 396 .map = &shortcubes 397 }; 398 gendata_h48short(&shortarg); 399 LOG("[H48 gendata] Computed %" PRIu64 " positions\n", shortarg.map->n); 400 401 if (arg->base >= 20) 402 arg->base = base[arg->h]; 403 arg->info = makeinfo_h48k2(arg); 404 405 inext = count = 0; 406 pthread_mutex_init(&shortcubes_mutex, NULL); 407 for (i = 0; i < CHUNKS; i++) 408 pthread_mutex_init(&table_mutex[i], NULL); 409 for (i = 0; i < THREADS; i++) { 410 dfsarg[i] = (h48k2_dfs_arg_t){ 411 .h = arg->h, 412 .k = arg->k, 413 .base = arg->base, 414 .shortdepth = shortdepth, 415 .cocsepdata = arg->cocsepdata, 416 .table = table, 417 .selfsim = arg->selfsim, 418 .crep = arg->crep, 419 .shortcubes = &shortcubes, 420 .shortcubes_mutex = &shortcubes_mutex, 421 .next = &inext, 422 .count = &count, 423 }; 424 for (ii = 0; ii < CHUNKS; ii++) 425 dfsarg[i].table_mutex[ii] = &table_mutex[ii]; 426 427 pthread_create( 428 &thread[i], NULL, gendata_h48k2_runthread, &dfsarg[i]); 429 } 430 431 for (i = 0; i < THREADS; i++) 432 pthread_join(thread[i], NULL); 433 434 h48map_destroy(&shortcubes); 435 436 for (j = 0; j < H48_COORDMAX(arg->h); j++) { 437 t = get_h48_pval(table, j, 2); 438 arg->info.distribution[t]++; 439 } 440 441 bufsize = arg->buf_size - COCSEP_FULLSIZE; 442 writetableinfo(&arg->info, bufsize, (unsigned char *)arg->h48buf); 443 } 444 445 STATIC void * 446 gendata_h48k2_runthread(void *arg) 447 { 448 uint64_t count, coord, mutex; 449 kvpair_t kv; 450 h48k2_dfs_arg_t *dfsarg; 451 452 dfsarg = (h48k2_dfs_arg_t *)arg; 453 454 while (true) { 455 pthread_mutex_lock(dfsarg->shortcubes_mutex); 456 457 kv = h48map_nextkvpair(dfsarg->shortcubes, dfsarg->next); 458 if (*dfsarg->next == dfsarg->shortcubes->capacity) { 459 pthread_mutex_unlock(dfsarg->shortcubes_mutex); 460 break; 461 } 462 count = ++(*dfsarg->count); 463 pthread_mutex_unlock(dfsarg->shortcubes_mutex); 464 465 if (count % UINT64_C(1000000) == 0) 466 LOG("[H48 gendata] Processing %" PRIu64 467 "th short cube\n", count); 468 469 if (kv.val < dfsarg->shortdepth) { 470 coord = kv.key >> (int64_t)(11 - dfsarg->h); 471 mutex = H48_INDEX(coord, dfsarg->k) % CHUNKS; 472 pthread_mutex_lock(dfsarg->table_mutex[mutex]); 473 set_h48_pval(dfsarg->table, coord, dfsarg->k, 0); 474 pthread_mutex_unlock(dfsarg->table_mutex[mutex]); 475 } else { 476 dfsarg->cube = invcoord_h48(kv.key, dfsarg->crep, 11); 477 gendata_h48k2_dfs(dfsarg); 478 } 479 } 480 481 return NULL; 482 } 483 484 STATIC void 485 gendata_h48k2_dfs(h48k2_dfs_arg_t arg[static 1]) 486 { 487 int8_t d; 488 uint8_t m[4]; 489 cube_t cube[4]; 490 gendata_h48_mark_t markarg; 491 492 markarg = (gendata_h48_mark_t) { 493 .h = arg->h, 494 .k = arg->k, 495 .cocsepdata = arg->cocsepdata, 496 .selfsim = arg->selfsim, 497 .table = arg->table, 498 .table_mutex = arg->table_mutex, 499 }; 500 501 d = (int8_t)arg->shortdepth - (int8_t)arg->base; 502 503 /* Depth d+0 (shortcubes) */ 504 markarg.depth = d; 505 markarg.cube = arg->cube; 506 gendata_h48_mark(&markarg); 507 508 /* Depth d+1 */ 509 for (m[0] = 0; m[0] < 18; m[0]++) { 510 markarg.depth = d+1; 511 cube[0] = move(arg->cube, m[0]); 512 if (gendata_h48k2_dfs_stop(cube[0], d+1, arg)) 513 continue; 514 markarg.cube = cube[0]; 515 gendata_h48_mark(&markarg); 516 517 /* Depth d+2 */ 518 for (m[1] = 0; m[1] < 18; m[1]++) { 519 markarg.depth = d+2; 520 if (m[0] / 3 == m[1] / 3) { 521 m[1] += 2; 522 continue; 523 } 524 cube[1] = move(cube[0], m[1]); 525 if (gendata_h48k2_dfs_stop(cube[1], d+2, arg)) 526 continue; 527 markarg.cube = cube[1]; 528 gendata_h48_mark(&markarg); 529 if (d >= 0) 530 continue; 531 532 /* Depth d+3 */ 533 for (m[2] = 0; m[2] < 18; m[2]++) { 534 markarg.depth = d+3; 535 if (!allowednextmove(m[1], m[2])) { 536 m[2] += 2; 537 continue; 538 } 539 cube[2] = move(cube[1], m[2]); 540 if (gendata_h48k2_dfs_stop(cube[2], d+3, arg)) 541 continue; 542 markarg.cube = cube[2]; 543 gendata_h48_mark(&markarg); 544 if (d >= -1) 545 continue; 546 547 /* Depth d+4 */ 548 for (m[3] = 0; m[3] < 18; m[3]++) { 549 markarg.depth = d+4; 550 if (!allowednextmove(m[2], m[3])) { 551 m[3] += 2; 552 continue; 553 } 554 cube[3] = move(cube[2], m[3]); 555 markarg.cube = cube[3]; 556 gendata_h48_mark(&markarg); 557 } 558 } 559 } 560 } 561 } 562 563 STATIC_INLINE void 564 gendata_h48_mark_atomic(gendata_h48_mark_t arg[static 1]) 565 { 566 uint8_t oldval, newval; 567 int64_t coord, mutex; 568 569 FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim, 570 coord = coord_h48(arg->cube, arg->cocsepdata, arg->h); 571 oldval = get_h48_pval_atomic(arg->table_atomic, coord, arg->k); 572 newval = (uint8_t)MAX(arg->depth, 0); 573 if (newval < oldval) { 574 mutex = H48_INDEX(coord, arg->k) % CHUNKS; 575 pthread_mutex_lock(arg->table_mutex[mutex]); 576 set_h48_pval_atomic( 577 arg->table_atomic, coord, arg->k, newval); 578 pthread_mutex_unlock(arg->table_mutex[mutex]); 579 } 580 ) 581 } 582 583 STATIC_INLINE void 584 gendata_h48_mark(gendata_h48_mark_t arg[static 1]) 585 { 586 uint8_t oldval, newval; 587 int64_t coord, mutex; 588 589 FOREACH_H48SIM(arg->cube, arg->cocsepdata, arg->selfsim, 590 coord = coord_h48(arg->cube, arg->cocsepdata, arg->h); 591 mutex = H48_INDEX(coord, arg->k) % CHUNKS; 592 pthread_mutex_lock(arg->table_mutex[mutex]); 593 oldval = get_h48_pval(arg->table, coord, arg->k); 594 newval = (uint8_t)MAX(arg->depth, 0); 595 set_h48_pval(arg->table, coord, arg->k, MIN(newval, oldval)); 596 pthread_mutex_unlock(arg->table_mutex[mutex]); 597 ) 598 } 599 600 STATIC_INLINE bool 601 gendata_h48k2_dfs_stop(cube_t cube, int8_t d, h48k2_dfs_arg_t arg[static 1]) 602 { 603 uint64_t val; 604 int64_t coord, mutex; 605 int8_t oldval; 606 607 if (arg->h == 0 || arg->h == 11) { 608 /* We are in the "real coordinate" case, we can stop 609 if this coordinate has already been visited */ 610 coord = coord_h48(cube, arg->cocsepdata, arg->h); 611 mutex = H48_INDEX(coord, arg->k) % CHUNKS; 612 pthread_mutex_lock(arg->table_mutex[mutex]); 613 oldval = get_h48_pval(arg->table, coord, arg->k); 614 pthread_mutex_unlock(arg->table_mutex[mutex]); 615 return oldval <= d; 616 } else { 617 /* With 0 < k < 11 we do not have a "real coordinate". 618 The best we can do is checking if we backtracked to 619 one of the "short cubes". */ 620 coord = coord_h48(cube, arg->cocsepdata, 11); 621 val = h48map_value(arg->shortcubes, coord); 622 return val <= arg->shortdepth; 623 } 624 } 625 626 STATIC tableinfo_t 627 makeinfo_h48k2(gendata_h48_arg_t arg[static 1]) 628 { 629 tableinfo_t info; 630 631 info = (tableinfo_t) { 632 .solver = "h48 solver h = , k = 2", 633 .type = TABLETYPE_PRUNING, 634 .infosize = INFOSIZE, 635 .fullsize = H48_TABLESIZE(arg->h, 2) + INFOSIZE, 636 .hash = 0, /* TODO */ 637 .entries = H48_COORDMAX(arg->h), 638 .classes = 0, 639 .h48h = arg->h, 640 .bits = 2, 641 .base = arg->base, 642 .maxvalue = 3, 643 .next = 0, 644 }; 645 info.solver[15] = (arg->h % 10) + '0'; 646 if (arg->h >= 10) 647 info.solver[14] = (arg->h / 10) + '0'; 648 649 return info; 650 } 651 652 STATIC void * 653 getdistribution_h48_runthread(void *arg) 654 { 655 getdistribution_h48_data_t *data = (getdistribution_h48_data_t *)arg; 656 const unsigned char *table; 657 uint8_t j, k, m; 658 int64_t i; 659 660 memset(data->distr, 0, INFO_DISTRIBUTION_LEN * sizeof(uint64_t)); 661 662 k = data->k; 663 table = data->table; 664 m = H48_MASK(0, k); 665 for (i = data->min; i < data->max; i++) 666 for (j = 0; j < H48_DIV(k); j++) 667 data->distr[(table[i] & (m << (j*k))) >> (j*k)]++; 668 669 return NULL; 670 } 671 672 STATIC void 673 getdistribution_h48( 674 const unsigned char *table, 675 uint64_t distr[static INFO_DISTRIBUTION_LEN], 676 uint8_t h, 677 uint8_t k 678 ) { 679 getdistribution_h48_data_t targ[THREADS]; 680 pthread_t thread[THREADS]; 681 uint64_t local_distr[THREADS][INFO_DISTRIBUTION_LEN]; 682 int64_t i, j, nbytes, sz; 683 684 nbytes = H48_COORDMAX(h) / H48_DIV(k); 685 sz = nbytes / THREADS; 686 for (i = 0; i < THREADS; i++) { 687 targ[i] = (getdistribution_h48_data_t) { 688 .min = i * sz, 689 .max = i == THREADS - 1 ? nbytes : (i+1) * sz, 690 .k = k, 691 .distr = local_distr[i], 692 .table = table, 693 }; 694 pthread_create(&thread[i], NULL, 695 getdistribution_h48_runthread, &targ[i]); 696 } 697 698 for (i = 0; i < THREADS; i++) 699 pthread_join(thread[i], NULL); 700 701 memset(distr, 0, INFO_DISTRIBUTION_LEN * sizeof(uint64_t)); 702 for (i = 0; i < THREADS; i++) 703 for (j = 0; j < INFO_DISTRIBUTION_LEN; j++) 704 distr[j] += local_distr[i][j]; 705 706 for (i = nbytes * H48_DIV(k); i < H48_COORDMAX(h); i++) 707 distr[get_h48_pval(table, i, k)]++; 708 } 709 710 STATIC const uint32_t * 711 get_cocsepdata_constptr(const unsigned char *data) 712 { 713 return (uint32_t *)(data + INFOSIZE); 714 } 715 716 STATIC const unsigned char * 717 get_h48data_constptr(const unsigned char *data) 718 { 719 return data + COCSEP_FULLSIZE + INFOSIZE; 720 } 721 722 STATIC_INLINE uint8_t 723 get_h48_pval(const unsigned char *table, int64_t i, uint8_t k) 724 { 725 return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k); 726 } 727 728 STATIC_INLINE uint8_t 729 get_h48_pval_atomic(_Atomic const unsigned char *table, int64_t i, uint8_t k) 730 { 731 return (table[H48_INDEX(i, k)] & H48_MASK(i, k)) >> H48_SHIFT(i, k); 732 } 733 734 STATIC_INLINE void 735 set_h48_pval(unsigned char *table, int64_t i, uint8_t k, uint8_t val) 736 { 737 table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k))) 738 | (val << H48_SHIFT(i, k)); 739 } 740 741 STATIC_INLINE void 742 set_h48_pval_atomic( 743 _Atomic unsigned char *table, 744 int64_t i, 745 uint8_t k, 746 uint8_t val 747 ) 748 { 749 table[H48_INDEX(i, k)] = (table[H48_INDEX(i, k)] & (~H48_MASK(i, k))) 750 | (val << H48_SHIFT(i, k)); 751 } 752 753 size_t 754 gendata_h48_derive(uint8_t h, const unsigned char *fulltable, unsigned char *buf) 755 { 756 size_t cocsepsize, h48size; 757 uint8_t val_full, val_derive; 758 const unsigned char *h48full; 759 unsigned char *h48derive; 760 int64_t i, j, h48max; 761 uint64_t bufsize; 762 gendata_h48_arg_t arg; 763 tableinfo_t cocsepinfo, fulltableinfo; 764 765 /* Initializing values in case of error */ 766 /* TODO cleanup this */ 767 fulltableinfo.h48h = 11; 768 fulltableinfo.bits = 2; 769 fulltableinfo.base = 8; 770 771 int64_t TODOlarge = 999999999999; /* TODO: cleanup here */ 772 773 readtableinfo_n(TODOlarge, fulltable, 2, &fulltableinfo); 774 arg.h = h; 775 arg.k = fulltableinfo.bits; 776 arg.maxdepth = 20; 777 arg.buf = buf; 778 arg.cocsepdata = (uint32_t *)(buf + INFOSIZE); 779 arg.base = fulltableinfo.base; 780 arg.info = makeinfo_h48k2(&arg); 781 782 /* Technically this step is redundant, except that we 783 need selfsim and crep */ 784 cocsepsize = gendata_cocsep(buf, arg.selfsim, arg.crep); 785 arg.h48buf = (_Atomic unsigned char *)buf + cocsepsize; 786 h48size = H48_TABLESIZE(h, arg.k) + INFOSIZE; 787 788 if (buf == NULL) 789 goto gendata_h48_derive_return_size; 790 791 bufsize = COCSEP_FULLSIZE + INFOSIZE; 792 if (readtableinfo(bufsize, buf, &cocsepinfo) != NISSY_OK) { 793 LOG("[H48 derive gendata] Error: could not read info for " 794 "cocsep table\n"); 795 goto gendata_h48_derive_error; 796 } 797 798 cocsepinfo.next = cocsepsize; 799 bufsize = COCSEP_FULLSIZE + INFOSIZE; 800 if (writetableinfo(&cocsepinfo, bufsize, buf) != NISSY_OK) { 801 LOG("[H48 derive gendata] Error: could not write info for " 802 "cocsep table with updated 'next' value\n"); 803 goto gendata_h48_derive_error; 804 } 805 806 h48full = fulltable + cocsepsize + INFOSIZE; 807 h48derive = (unsigned char *)arg.h48buf + INFOSIZE; 808 memset(h48derive, 0xFF, H48_TABLESIZE(h, arg.k)); 809 memset(arg.info.distribution, 0, 810 INFO_DISTRIBUTION_LEN * sizeof(uint64_t)); 811 812 h48max = H48_COORDMAX(fulltableinfo.h48h); 813 for (i = 0; i < h48max; i++) { 814 if (i % INT64_C(1000000000) == 0 && i > 0) 815 LOG("[H48 derive gendata] Processing %" PRId64 816 "th coordinate\n", i); 817 j = i >> (int64_t)(fulltableinfo.h48h - h); 818 val_full = get_h48_pval(h48full, i, arg.k); 819 val_derive = get_h48_pval(h48derive, j, arg.k); 820 set_h48_pval( 821 h48derive, j, arg.k, MIN(val_full, val_derive)); 822 } 823 824 getdistribution_h48(h48derive, arg.info.distribution, h, arg.k); 825 826 bufsize = arg.buf_size - COCSEP_FULLSIZE - INFOSIZE; 827 if (writetableinfo(&arg.info, bufsize, (unsigned char *)arg.h48buf) 828 != NISSY_OK) { 829 LOG("H48 derive gendata] Error: could not write info " 830 "for table\n"); 831 goto gendata_h48_derive_error; 832 } 833 834 gendata_h48_derive_return_size: 835 return cocsepsize + h48size; 836 837 gendata_h48_derive_error: 838 return 0; 839 }