*** _/tf_f0_31.h 2012-09-23 00:42:46.000000000 -0700 --- tf_f0_31.h 2012-09-25 00:42:56.259187546 -0700 *************** *** 206,211 **** --- 206,212 ---- // Could write optimized div_192_96 with so many tmp192 elements known to be zero div_192_96(&u,tmp192,f,ff); // u = floor(2^(95 + bits_in_f) / f), giving 96 bits of precision + #if 0 // b_preinit = 2^128 // a = b_preinit / 2 ^ (bits_in_f - 1) // tmp192 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (95 + bits_in_f) / f) (ignore the floor functions for now) *************** *** 218,225 **** a.d0 = __sub_cc (0, tmp96.d0); // Compute the remainder a.d1 = __subc_cc(0, tmp96.d1); // we do not need the upper digits of b_preinit and tmp96 because the result is 0 after subtraction! a.d2 = __subc (0, tmp96.d2); ! for (shifter = 0; shifter < exp - 2 - 7; shifter++) { // On input a is at most 91.807 bits (see end of this loop) --- 219,229 ---- a.d0 = __sub_cc (0, tmp96.d0); // Compute the remainder a.d1 = __subc_cc(0, tmp96.d1); // we do not need the upper digits of b_preinit and tmp96 because the result is 0 after subtraction! a.d2 = __subc (0, tmp96.d2); + #endif + + BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 91.807 bits (see end of this loop) *************** *** 446,451 **** --- 450,456 ---- // Could write optimized div_224_96 with so many tmp224 elements known to be zero div_224_96(&u,tmp224,f,ff); // u = floor(2^208 / f). This requires f >= 81 bits. + #if 0 // b_preinit = 2^128 // a = b_preinit / 2^80 = 2^48 // tmp256 = a * u = (b_preinit / 2^80) * (2^208 / f) (ignore the floor functions for now) *************** *** 458,465 **** a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp192 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); ! for (shifter = 0; shifter < exp - 2 - 7; shifter++) { // On input a is at most 99.17 bits (see end of this loop) --- 463,473 ---- a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp192 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); + #endif + + a.d3 = 0; BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 99.17 bits (see end of this loop) *** _/tf_f128_159.h 2012-09-09 09:46:09.000000000 -0700 --- tf_f128_159.h 2012-09-25 00:23:51.199250328 -0700 *************** *** 206,211 **** --- 206,212 ---- // Could write optimized div_352_192 with so many tmp352 elements known to be zero div_352_192(&u,tmp352,f,ff); // u = floor(2^352 / f). This requires f >= 161 bits. + #if 0 // b_preinit = 2^256 // a = b_preinit / 2^160 = 2^96 // tmp352 = a * u = (b_preinit / 2^160) * (2^352 / f) (ignore the floor functions for now) *************** *** 221,228 **** a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 175.700 bits (see end of this loop) --- 222,231 ---- a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); + #endif + a.d5 = 0; BASE_preinit2; ! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++) { // On input a is at most 175.700 bits (see end of this loop) *************** *** 461,466 **** --- 464,470 ---- // Could write optimized div_384_192 with so many tmp384 elements known to be zero div_384_192(&u,tmp384,f,ff); // u = floor(2^(191 + bits_in_f) / f), giving 192 bits of precision, requires f >= 161 bits + #if 0 // b_preinit = 2^256 // a = b_preinit / 2 ^ (bits_in_f - 1) // tmp384 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (191 + bits_in_f) / f) (ignore the floor functions for now) *************** *** 477,484 **** a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 186.700 bits (see end of this loop) --- 481,490 ---- a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); + #endif + a.d5 = 0; BASE_preinit2; ! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++) { // On input a is at most 186.700 bits (see end of this loop) *************** *** 718,723 **** --- 724,730 ---- // Could write optimized div_384_192 with so many tmp384 elements known to be zero div_384_192(&u,tmp384,f,ff); // u = floor(2^(191 + bits_in_f) / f), giving 192 bits of precision, requires f >= 161 bits + #if 0 // b_preinit = 2^256 // a = b_preinit / 2 ^ (bits_in_f - 1) // tmp384 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (191 + bits_in_f) / f) (ignore the floor functions for now) *************** *** 734,741 **** a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 185.858 bits (see end of this loop) --- 741,750 ---- a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); + #endif + a.d5 = 0; BASE_preinit2; ! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++) { // On input a is at most 185.858 bits (see end of this loop) *************** *** 984,989 **** --- 993,999 ---- // Could write optimized div_384_192 with so many tmp384 elements known to be zero div_384_192(&u,tmp384,f,ff); // u = floor(2^(191 + bits_in_f) / f), giving 192 bits of precision, requires f >= 161 bits + #if 0 // b_preinit = 2^256 // a = b_preinit / 2 ^ (bits_in_f - 1) // tmp384 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (191 + bits_in_f) / f) (ignore the floor functions for now) *************** *** 1000,1007 **** a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 188 bits (see end of this loop) --- 1010,1019 ---- a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); + #endif + a.d5 = 0; BASE_preinit2; ! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++) { // On input a is at most 188 bits (see end of this loop) *** _/tf_f32_63.h 2012-09-23 00:43:01.000000000 -0700 --- tf_f32_63.h 2012-09-25 00:43:32.182185576 -0700 *************** *** 313,318 **** --- 313,319 ---- // Could write optimized div_192_96 with so many tmp192 elements known to be zero div_192_96(&u,tmp192,f,ff); // u = floor(2^(95 + bits_in_f) / f), giving 96 bits of precision + #if 0 // b_preinit = 2^128 // a = b_preinit / 2 ^ (bits_in_f - 1) // tmp192 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (95 + bits_in_f) / f) (ignore the floor functions for now) *************** *** 325,332 **** a.d0 = __sub_cc (0, tmp96.d0); // Compute the remainder a.d1 = __subc_cc(0, tmp96.d1); // we do not need the upper digits of b_preinit and tmp96 because the result is 0 after subtraction! a.d2 = __subc (0, tmp96.d2); ! for (shifter = 0; shifter < exp - 2 - 7; shifter++) { // On input a is at most 91.807 bits (see end of this loop) --- 326,335 ---- a.d0 = __sub_cc (0, tmp96.d0); // Compute the remainder a.d1 = __subc_cc(0, tmp96.d1); // we do not need the upper digits of b_preinit and tmp96 because the result is 0 after subtraction! a.d2 = __subc (0, tmp96.d2); + #endif + BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 91.807 bits (see end of this loop) *************** *** 553,558 **** --- 556,562 ---- // Could write optimized div_224_96 with so many tmp224 elements known to be zero div_224_96(&u,tmp224,f,ff); // u = floor(2^208 / f). This requires f >= 81 bits. + #if 0 // b_preinit = 2^128 // a = b_preinit / 2^80 = 2^48 // tmp256 = a * u = (b_preinit / 2^80) * (2^208 / f) (ignore the floor functions for now) *************** *** 565,572 **** a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp192 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); ! for (shifter = 0; shifter < exp - 2 - 7; shifter++) { // On input a is at most 99.17 bits (see end of this loop) --- 569,578 ---- a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp192 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); + #endif + a.d3 = 0; BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 99.17 bits (see end of this loop) *************** *** 804,809 **** --- 810,816 ---- // Could write optimized div_224_128 with so many tmp224 elements known to be zero div_224_128(&u,tmp224,f,ff); // u = floor(2^224 / f). This requires f >= 97 bits. + #if 0 // b_preinit = 2^128 // a = b_preinit / 2^96 = 2^32 // tmp256 = a * u = (b_preinit / 2^96) * (2^224 / f) (ignore the floor functions for now) *************** *** 815,822 **** a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); ! for (shifter = 0; shifter < exp - 2 - 7; shifter++) { // On input a is at most 111.17 bits (see end of this loop) --- 822,831 ---- a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); + #endif + a.d3 = 0; BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 111.17 bits (see end of this loop) *************** *** 1048,1053 **** --- 1057,1063 ---- // Could write optimized div_256_128 with so many tmp256 elements known to be zero div_256_128(&u,tmp256,f,ff); // u = floor(2^(127 + bits_in_f) / f), giving 128 bits of precision + #if 0 // b_preinit = 2^128 // a = b_preinit / 2 ^ (bits_in_f - 1) // tmp256 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (127 + bits_in_f) / f) (ignore the floor functions for now) *************** *** 1060,1067 **** a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); ! for (shifter = 0; shifter < exp - 2 - 7; shifter++) { // On input a is at most 123.17 bits (see end of this loop) --- 1070,1079 ---- a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); + #endif + a.d3 = 0; BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 123.17 bits (see end of this loop) *************** *** 1293,1298 **** --- 1305,1311 ---- // Could write optimized div_288_128 with so many tmp288 elements known to be zero div_288_128(&u,tmp288,f,ff); // u = floor(2^272 / f). This requires f >= 113 bits. + #if 0 // b_preinit = 2^256 // a = b_preinit / 2^112 = 2^144 // tmp320 = a * u = (b_preinit / 2^112) * (2^272 / f) (ignore the floor functions for now) *************** *** 1309,1316 **** a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 131.459 bits (see end of this loop) --- 1322,1331 ---- a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); + #endif + a.d4 = a.d3 = 0; BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 131.459 bits (see end of this loop) *** _/tf_f64_95.h 2012-09-09 09:45:19.000000000 -0700 --- tf_f64_95.h 2012-09-25 00:24:10.345249278 -0700 *************** *** 340,345 **** --- 340,346 ---- // Could write optimized div_224_128 with so many tmp224 elements known to be zero div_224_128(&u,tmp224,f,ff); // u = floor(2^224 / f). This requires f >= 97 bits. + #if 0 // b_preinit = 2^128 // a = b_preinit / 2^96 = 2^32 // tmp256 = a * u = (b_preinit / 2^96) * (2^224 / f) (ignore the floor functions for now) *************** *** 351,358 **** a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); ! for (shifter = 0; shifter < exp - 2 - 7; shifter++) { // On input a is at most 111.17 bits (see end of this loop) --- 352,361 ---- a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); + #endif + a.d3 = 0; BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 111.17 bits (see end of this loop) *************** *** 585,590 **** --- 588,594 ---- // Could write optimized div_256_128 with so many tmp256 elements known to be zero div_256_128(&u,tmp256,f,ff); // u = floor(2^(127 + bits_in_f) / f), giving 128 bits of precision + #if 0 // b_preinit = 2^128 // a = b_preinit / 2 ^ (bits_in_f - 1) // tmp256 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (127 + bits_in_f) / f) (ignore the floor functions for now) *************** *** 597,604 **** a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); ! for (shifter = 0; shifter < exp - 2 - 7; shifter++) { // On input a is at most 123.17 bits (see end of this loop) --- 601,610 ---- a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction! a.d2 = __subc_cc(0, tmp128.d2); a.d3 = __subc (0, tmp128.d3); + #endif + a.d3 = 0; BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 123.17 bits (see end of this loop) *************** *** 831,836 **** --- 837,843 ---- // Could write optimized div_288_128 with so many tmp288 elements known to be zero div_288_128(&u,tmp288,f,ff); // u = floor(2^272 / f). This requires f >= 113 bits. + #if 0 // b_preinit = 2^256 // a = b_preinit / 2^112 = 2^144 // tmp320 = a * u = (b_preinit / 2^112) * (2^272 / f) (ignore the floor functions for now) *************** *** 847,854 **** a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 131.459 bits (see end of this loop) --- 854,863 ---- a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); + #endif + a.d4 = a.d3 = 0; BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 131.459 bits (see end of this loop) *************** *** 1089,1094 **** --- 1098,1104 ---- // Could write optimized div_288_160 with so many tmp288 elements known to be zero div_288_160(&u,tmp288,f,ff); // u = floor(2^288 / f). This requires f >= 129 bits. + #if 0 // b_preinit = 2^256 // a = b_preinit / 2^128 = 2^144 // tmp320 = a * u = (b_preinit / 2^128) * (2^288 / f) (ignore the floor functions for now) *************** *** 1104,1111 **** a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 143.459 bits (see end of this loop) --- 1114,1123 ---- a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); + #endif + a.d4 = a.d3 = 0; BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 143.459 bits (see end of this loop) *************** *** 1341,1346 **** --- 1353,1359 ---- // Could write optimized div_320_160 with so many tmp320 elements known to be zero div_320_160(&u,tmp320,f,ff); // u = floor(2^(159 + bits_in_f) / f), giving 160 bits of precision + #if 0 // b_preinit = 2^256 // a = b_preinit / 2 ^ (bits_in_f - 1) // tmp320 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (159 + bits_in_f) / f) (ignore the floor functions for now) *************** *** 1357,1364 **** a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 155.459 bits (see end of this loop) --- 1370,1379 ---- a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); + #endif + BASE_preinit2; ! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++) { // On input a is at most 155.459 bits (see end of this loop) *************** *** 1594,1599 **** --- 1609,1615 ---- // Could write optimized div_352_160 with so many tmp352 elements known to be zero div_352_160(&u,tmp352,f,ff); // u = floor(2^336 / f). This requires f >= 145 bits. + #if 0 // b_preinit = 2^256 // a = b_preinit / 2^144 = 2^112 // tmp352 = a * u = (b_preinit / 2^144) * (2^336 / f) (ignore the floor functions for now) *************** *** 1610,1617 **** a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 163.700 bits (see end of this loop) --- 1626,1635 ---- a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); + #endif + a.d5 = 0; BASE_preinit2; ! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++) { // On input a is at most 163.700 bits (see end of this loop) *** _/tf_f96_127.h 2012-09-09 09:45:47.000000000 -0700 --- tf_f96_127.h 2012-09-25 00:24:20.441248725 -0700 *************** *** 307,312 **** --- 307,313 ---- // Could write optimized div_288_160 with so many tmp288 elements known to be zero div_288_160(&u,tmp288,f,ff); // u = floor(2^288 / f). This requires f >= 129 bits. + #if 0 // b_preinit = 2^256 // a = b_preinit / 2^128 = 2^144 // tmp320 = a * u = (b_preinit / 2^128) * (2^288 / f) (ignore the floor functions for now) *************** *** 322,329 **** a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 143.459 bits (see end of this loop) --- 323,332 ---- a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); + #endif + a.d4 = a.d3 = 0; BASE_preinit; ! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++) { // On input a is at most 143.459 bits (see end of this loop) *************** *** 559,564 **** --- 562,568 ---- // Could write optimized div_320_160 with so many tmp320 elements known to be zero div_320_160(&u,tmp320,f,ff); // u = floor(2^(159 + bits_in_f) / f), giving 160 bits of precision + #if 0 // b_preinit = 2^256 // a = b_preinit / 2 ^ (bits_in_f - 1) // tmp320 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (159 + bits_in_f) / f) (ignore the floor functions for now) *************** *** 575,582 **** a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 155.459 bits (see end of this loop) --- 579,588 ---- a.d2 = __subc_cc(0, tmp160.d2); a.d3 = __subc_cc(0, tmp160.d3); a.d4 = __subc (0, tmp160.d4); + #endif + BASE_preinit2; ! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++) { // On input a is at most 155.459 bits (see end of this loop) *************** *** 812,817 **** --- 818,824 ---- // Could write optimized div_352_160 with so many tmp352 elements known to be zero div_352_160(&u,tmp352,f,ff); // u = floor(2^336 / f). This requires f >= 145 bits. + #if 0 // b_preinit = 2^256 // a = b_preinit / 2^144 = 2^112 // tmp352 = a * u = (b_preinit / 2^144) * (2^336 / f) (ignore the floor functions for now) *************** *** 828,835 **** a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 163.700 bits (see end of this loop) --- 835,844 ---- a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); + #endif + a.d5 = 0; BASE_preinit2; ! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++) { // On input a is at most 163.700 bits (see end of this loop) *************** *** 1075,1080 **** --- 1084,1090 ---- // Could write optimized div_352_192 with so many tmp352 elements known to be zero div_352_192(&u,tmp352,f,ff); // u = floor(2^352 / f). This requires f >= 161 bits. + #if 0 // b_preinit = 2^256 // a = b_preinit / 2^160 = 2^96 // tmp352 = a * u = (b_preinit / 2^160) * (2^352 / f) (ignore the floor functions for now) *************** *** 1090,1097 **** a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 175.700 bits (see end of this loop) --- 1100,1109 ---- a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); + #endif + a.d5 = 0; BASE_preinit2; ! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++) { // On input a is at most 175.700 bits (see end of this loop) *************** *** 1330,1335 **** --- 1342,1348 ---- // Could write optimized div_384_192 with so many tmp384 elements known to be zero div_384_192(&u,tmp384,f,ff); // u = floor(2^(191 + bits_in_f) / f), giving 192 bits of precision, requires f >= 161 bits + #if 0 // b_preinit = 2^256 // a = b_preinit / 2 ^ (bits_in_f - 1) // tmp384 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (191 + bits_in_f) / f) (ignore the floor functions for now) *************** *** 1346,1353 **** a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); ! for (shifter = 0; shifter < exp - 2 - 8; shifter++) { // On input a is at most 186.700 bits (see end of this loop) --- 1359,1368 ---- a.d3 = __subc_cc(0, tmp192.d3); a.d4 = __subc_cc(0, tmp192.d4); a.d5 = __subc (0, tmp192.d5); + #endif + a.d5 = 0; BASE_preinit2; ! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++) { // On input a is at most 186.700 bits (see end of this loop) *** _/tf_validate.h 2012-09-23 13:26:13.000000000 -0700 --- tf_validate.h 2012-09-25 03:53:55.917559230 -0700 *************** *** 16,21 **** --- 16,23 ---- along with mfaktc. If not, see . */ + #include "tf_gfn.h" + // Welcome to the "dumb num" routines // remove leading zeroes from a *************** *** 249,255 **** remainder.d0=mystuff->h_RES[99]; print_dez192(remainder,remainder_string); ! printf("Verifying (2^(2^%d)) %% %s = %s\n", (int) mystuff->exponent, factor_string, remainder_string); } // make sure the factor has no really small factors - this would indicate calculating the factor or GPU sieving or class_needed is broken --- 251,257 ---- remainder.d0=mystuff->h_RES[99]; print_dez192(remainder,remainder_string); ! printf("Verifying (%d^(2^%d)) %% %s = %s\n", BASE, (int) mystuff->exponent, factor_string, remainder_string); } // make sure the factor has no really small factors - this would indicate calculating the factor or GPU sieving or class_needed is broken *************** *** 273,285 **** // validate the exponentiation ! tmp[0] = 1; tmp[1] = 65536; ! for (i = 4; i < (mystuff->fermat_factoring ? mystuff->exponent - 2 : mystuff->exponent); i++) { dn_square (tmp, tmp); dn_mod (tmp, fac, tmp); } if (! dn_equal (tmp, rem)) ! printf ("ERROR: Exponentiation failure\n"), exit(1); } --- 275,287 ---- // validate the exponentiation ! tmp[0] = 1; tmp[1] = BASE_v0; ! for (i = BASE_n0; i < (mystuff->fermat_factoring ? mystuff->exponent - 1 : mystuff->exponent); i++) { dn_square (tmp, tmp); dn_mod (tmp, fac, tmp); } if (! dn_equal (tmp, rem)) ! printf ("ERROR: Exponentiation failure\n"); //, exit(1); } *************** *** 302,311 **** // Look for the exponentiation that returns fac-1 rather than 1 ! for (exp = mystuff->exponent-2; exp > 5; exp--) { ! tmp[0] = 1; tmp[1] = 65536; ! for (i = 4; i < exp; i++) { dn_square (tmp, tmp); dn_mod (tmp, fac, tmp); } --- 304,313 ---- // Look for the exponentiation that returns fac-1 rather than 1 ! for (exp = mystuff->exponent-1; exp > 5; exp--) { ! tmp[0] = 1; tmp[1] = BASE_v0; ! for (i = BASE_n0; i < exp; i++) { dn_square (tmp, tmp); dn_mod (tmp, fac, tmp); } *** _/mfaktc.c 2012-09-23 08:15:26.000000000 -0700 --- mfaktc.c 2012-09-25 03:50:40.361569952 -0700 *************** *** 628,634 **** i++; } ! printf("mmff v%s (%dbit built)\n\n", MFAKTC_VERSION, (int)(sizeof(void*)*8)); /* print current configuration */ --- 628,634 ---- i++; } ! printf("mmff-gfn v%s (%dbit built)\n\n", MFAKTC_VERSION, (int)(sizeof(void*)*8)); /* print current configuration */ *** _/output.c 2012-09-23 12:57:20.000000000 -0700 --- output.c 2012-09-25 03:53:55.907559229 -0700 *************** *** 17,22 **** --- 17,23 ---- along with mfaktc. If not, see . */ + #include "tf_gfn.h" #include #include *************** *** 35,41 **** void print_help(char *string) { ! printf("mmff v%s Copyright (C) 2009, 2010, 2011, 2012 George Woltman, Oliver Weihe\n", MFAKTC_VERSION); printf("This program comes with ABSOLUTELY NO WARRANTY; for details see COPYING.\n"); printf("This is free software, and you are welcome to redistribute it\n"); printf("under certain conditions; see COPYING for details.\n\n\n"); --- 36,42 ---- void print_help(char *string) { ! printf("mmff-gfn v%s Copyright (C) 2009, 2010, 2011, 2012 George Woltman, Oliver Weihe, Serge Batalov (GFN)\n", MFAKTC_VERSION); printf("This program comes with ABSOLUTELY NO WARRANTY; for details see COPYING.\n"); printf("This is free software, and you are welcome to redistribute it\n"); printf("under certain conditions; see COPYING for details.\n\n\n"); *************** *** 349,364 **** if((mystuff->mode == MODE_NORMAL) && (mystuff->stats.class_counter < 960)) #endif { ! sprintf(string, "found %d factor%s for %s in %s (partially tested) [mmff %s %s]", factorsfound, (factorsfound > 1) ? "s" : "", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname); } else { ! sprintf(string, "found %d factor%s for %s in %s [mmff %s %s]", factorsfound, (factorsfound > 1) ? "s" : "", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname); } } else { ! sprintf(string, "no factor for %s in %s [mmff %s %s]", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname); } if(mystuff->mode != MODE_SELFTEST_SHORT) --- 350,365 ---- if((mystuff->mode == MODE_NORMAL) && (mystuff->stats.class_counter < 960)) #endif { ! sprintf(string, "found %d factor%s for %s in %s (partially tested) [mmff-gfn %s %s]", factorsfound, (factorsfound > 1) ? "s" : "", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname); } else { ! sprintf(string, "found %d factor%s for %s in %s [mmff-gfn %s %s]", factorsfound, (factorsfound > 1) ? "s" : "", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname); } } else { ! sprintf(string, "no factor for %s in %s [mmff-gfn %s %s]", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname); } if(mystuff->mode != MODE_SELFTEST_SHORT) *************** *** 386,392 **** if (mystuff->fermat_factoring) // Figure out which Fermat number this factor divides { ! sprintf(exponent_string, "F%d", which_fermat_number(mystuff, factor_number)); } else sprintf(exponent_string, "%s", mystuff->exponent_string); --- 387,393 ---- if (mystuff->fermat_factoring) // Figure out which Fermat number this factor divides { ! sprintf(exponent_string, "GF(%d,%d)", which_fermat_number(mystuff, factor_number), BASE); } else sprintf(exponent_string, "%s", mystuff->exponent_string); *************** *** 407,415 **** if(mystuff->mode == MODE_NORMAL) { #ifndef MORE_CLASSES ! fprintf(resultfile, "%s%s has a factor: %s [TF:%d:%d%s:mmff %s %s]\n", UID, exponent_string, factor, mystuff->bit_min, mystuff->bit_max_stage, ((mystuff->stopafterfactor == 2) && (mystuff->stats.class_counter < 96)) ? "*" : "" , MFAKTC_VERSION, mystuff->stats.kernelname); #else ! fprintf(resultfile, "%s%s has a factor: %s [TF:%d:%d%s:mmff %s %s]\n", UID, exponent_string, factor, mystuff->bit_min, mystuff->bit_max_stage, ((mystuff->stopafterfactor == 2) && (mystuff->stats.class_counter < 960)) ? "*" : "" , MFAKTC_VERSION, mystuff->stats.kernelname); #endif } } --- 408,416 ---- if(mystuff->mode == MODE_NORMAL) { #ifndef MORE_CLASSES ! fprintf(resultfile, "%s%s has a factor: %s [TF:%d:%d%s:mmff-gfn %s %s]\n", UID, exponent_string, factor, mystuff->bit_min, mystuff->bit_max_stage, ((mystuff->stopafterfactor == 2) && (mystuff->stats.class_counter < 96)) ? "*" : "" , MFAKTC_VERSION, mystuff->stats.kernelname); #else ! fprintf(resultfile, "%s%s has a factor: %s [TF:%d:%d%s:mmff-gfn %s %s]\n", UID, exponent_string, factor, mystuff->bit_min, mystuff->bit_max_stage, ((mystuff->stopafterfactor == 2) && (mystuff->stats.class_counter < 960)) ? "*" : "" , MFAKTC_VERSION, mystuff->stats.kernelname); #endif } } *** _/parse.c 2012-09-23 13:48:55.000000000 -0700 --- parse.c 2012-09-24 19:21:49.692243906 -0700 *************** *** 93,99 **** int ret = 1; if (fermat) { ! if(exp <= 27) {ret = 0; if(verbosity >= 1)printf("WARNING: Exponents <= 27 are not supported in Fermat factoring!\n");} else if(exp >= 160) {ret = 0; if(verbosity >= 1)printf("WARNING: Exponents >= 160 are not supported in Fermat factoring!\n");} else if (exp <= 31) { if (bit_min < 64 || bit_max > 96 || /*bit_min - exp < 13 ||*/ bit_max - exp > 64) {ret = 0; if(verbosity >= 1)printf("WARNING: bit range isn't supported!\n");} } else if (exp <= 63) { if (bit_min < 64 || bit_max > 128 || /*bit_min - exp < 13 ||*/ bit_max - exp > 64) {ret = 0; if(verbosity >= 1)printf("WARNING: bit range isn't supported!\n");} } --- 93,99 ---- int ret = 1; if (fermat) { ! if(exp <= 24) {ret = 0; if(verbosity >= 1)printf("WARNING: Exponents <= 24 are not supported in Fermat factoring!\n");} else if(exp >= 160) {ret = 0; if(verbosity >= 1)printf("WARNING: Exponents >= 160 are not supported in Fermat factoring!\n");} else if (exp <= 31) { if (bit_min < 64 || bit_max > 96 || /*bit_min - exp < 13 ||*/ bit_max - exp > 64) {ret = 0; if(verbosity >= 1)printf("WARNING: bit range isn't supported!\n");} } else if (exp <= 63) { if (bit_min < 64 || bit_max > 128 || /*bit_min - exp < 13 ||*/ bit_max - exp > 64) {ret = 0; if(verbosity >= 1)printf("WARNING: bit range isn't supported!\n");} } *** _/tf_barrett96_gs.cu 2012-09-23 00:52:17.000000000 -0700 --- tf_barrett96_gs.cu 2012-09-25 00:23:02.420253003 -0700 *************** *** 73,78 **** --- 73,79 ---- #undef KERNEL_MIN_BLOCKS #define KERNEL_MIN_BLOCKS 3 + #include "tf_gfn.h" #include "tf_192.h" #include "tf_160.h" #include "tf_128.h"