*** _/tf_f0_31.h 2012-09-23 00:42:46.000000000 -0700
--- tf_f0_31.h 2012-09-25 00:42:56.259187546 -0700
***************
*** 206,211 ****
--- 206,212 ----
// Could write optimized div_192_96 with so many tmp192 elements known to be zero
div_192_96(&u,tmp192,f,ff); // u = floor(2^(95 + bits_in_f) / f), giving 96 bits of precision
+ #if 0
// b_preinit = 2^128
// a = b_preinit / 2 ^ (bits_in_f - 1)
// tmp192 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (95 + bits_in_f) / f) (ignore the floor functions for now)
***************
*** 218,225 ****
a.d0 = __sub_cc (0, tmp96.d0); // Compute the remainder
a.d1 = __subc_cc(0, tmp96.d1); // we do not need the upper digits of b_preinit and tmp96 because the result is 0 after subtraction!
a.d2 = __subc (0, tmp96.d2);
! for (shifter = 0; shifter < exp - 2 - 7; shifter++)
{
// On input a is at most 91.807 bits (see end of this loop)
--- 219,229 ----
a.d0 = __sub_cc (0, tmp96.d0); // Compute the remainder
a.d1 = __subc_cc(0, tmp96.d1); // we do not need the upper digits of b_preinit and tmp96 because the result is 0 after subtraction!
a.d2 = __subc (0, tmp96.d2);
+ #endif
+
+ BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 91.807 bits (see end of this loop)
***************
*** 446,451 ****
--- 450,456 ----
// Could write optimized div_224_96 with so many tmp224 elements known to be zero
div_224_96(&u,tmp224,f,ff); // u = floor(2^208 / f). This requires f >= 81 bits.
+ #if 0
// b_preinit = 2^128
// a = b_preinit / 2^80 = 2^48
// tmp256 = a * u = (b_preinit / 2^80) * (2^208 / f) (ignore the floor functions for now)
***************
*** 458,465 ****
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp192 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
! for (shifter = 0; shifter < exp - 2 - 7; shifter++)
{
// On input a is at most 99.17 bits (see end of this loop)
--- 463,473 ----
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp192 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
+ #endif
+
+ a.d3 = 0; BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 99.17 bits (see end of this loop)
*** _/tf_f128_159.h 2012-09-09 09:46:09.000000000 -0700
--- tf_f128_159.h 2012-09-25 00:23:51.199250328 -0700
***************
*** 206,211 ****
--- 206,212 ----
// Could write optimized div_352_192 with so many tmp352 elements known to be zero
div_352_192(&u,tmp352,f,ff); // u = floor(2^352 / f). This requires f >= 161 bits.
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2^160 = 2^96
// tmp352 = a * u = (b_preinit / 2^160) * (2^352 / f) (ignore the floor functions for now)
***************
*** 221,228 ****
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 175.700 bits (see end of this loop)
--- 222,231 ----
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
+ #endif
+ a.d5 = 0; BASE_preinit2;
! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++)
{
// On input a is at most 175.700 bits (see end of this loop)
***************
*** 461,466 ****
--- 464,470 ----
// Could write optimized div_384_192 with so many tmp384 elements known to be zero
div_384_192(&u,tmp384,f,ff); // u = floor(2^(191 + bits_in_f) / f), giving 192 bits of precision, requires f >= 161 bits
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2 ^ (bits_in_f - 1)
// tmp384 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (191 + bits_in_f) / f) (ignore the floor functions for now)
***************
*** 477,484 ****
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 186.700 bits (see end of this loop)
--- 481,490 ----
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
+ #endif
+ a.d5 = 0; BASE_preinit2;
! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++)
{
// On input a is at most 186.700 bits (see end of this loop)
***************
*** 718,723 ****
--- 724,730 ----
// Could write optimized div_384_192 with so many tmp384 elements known to be zero
div_384_192(&u,tmp384,f,ff); // u = floor(2^(191 + bits_in_f) / f), giving 192 bits of precision, requires f >= 161 bits
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2 ^ (bits_in_f - 1)
// tmp384 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (191 + bits_in_f) / f) (ignore the floor functions for now)
***************
*** 734,741 ****
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 185.858 bits (see end of this loop)
--- 741,750 ----
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
+ #endif
+ a.d5 = 0; BASE_preinit2;
! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++)
{
// On input a is at most 185.858 bits (see end of this loop)
***************
*** 984,989 ****
--- 993,999 ----
// Could write optimized div_384_192 with so many tmp384 elements known to be zero
div_384_192(&u,tmp384,f,ff); // u = floor(2^(191 + bits_in_f) / f), giving 192 bits of precision, requires f >= 161 bits
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2 ^ (bits_in_f - 1)
// tmp384 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (191 + bits_in_f) / f) (ignore the floor functions for now)
***************
*** 1000,1007 ****
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 188 bits (see end of this loop)
--- 1010,1019 ----
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
+ #endif
+ a.d5 = 0; BASE_preinit2;
! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++)
{
// On input a is at most 188 bits (see end of this loop)
*** _/tf_f32_63.h 2012-09-23 00:43:01.000000000 -0700
--- tf_f32_63.h 2012-09-25 00:43:32.182185576 -0700
***************
*** 313,318 ****
--- 313,319 ----
// Could write optimized div_192_96 with so many tmp192 elements known to be zero
div_192_96(&u,tmp192,f,ff); // u = floor(2^(95 + bits_in_f) / f), giving 96 bits of precision
+ #if 0
// b_preinit = 2^128
// a = b_preinit / 2 ^ (bits_in_f - 1)
// tmp192 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (95 + bits_in_f) / f) (ignore the floor functions for now)
***************
*** 325,332 ****
a.d0 = __sub_cc (0, tmp96.d0); // Compute the remainder
a.d1 = __subc_cc(0, tmp96.d1); // we do not need the upper digits of b_preinit and tmp96 because the result is 0 after subtraction!
a.d2 = __subc (0, tmp96.d2);
! for (shifter = 0; shifter < exp - 2 - 7; shifter++)
{
// On input a is at most 91.807 bits (see end of this loop)
--- 326,335 ----
a.d0 = __sub_cc (0, tmp96.d0); // Compute the remainder
a.d1 = __subc_cc(0, tmp96.d1); // we do not need the upper digits of b_preinit and tmp96 because the result is 0 after subtraction!
a.d2 = __subc (0, tmp96.d2);
+ #endif
+ BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 91.807 bits (see end of this loop)
***************
*** 553,558 ****
--- 556,562 ----
// Could write optimized div_224_96 with so many tmp224 elements known to be zero
div_224_96(&u,tmp224,f,ff); // u = floor(2^208 / f). This requires f >= 81 bits.
+ #if 0
// b_preinit = 2^128
// a = b_preinit / 2^80 = 2^48
// tmp256 = a * u = (b_preinit / 2^80) * (2^208 / f) (ignore the floor functions for now)
***************
*** 565,572 ****
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp192 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
! for (shifter = 0; shifter < exp - 2 - 7; shifter++)
{
// On input a is at most 99.17 bits (see end of this loop)
--- 569,578 ----
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp192 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
+ #endif
+ a.d3 = 0; BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 99.17 bits (see end of this loop)
***************
*** 804,809 ****
--- 810,816 ----
// Could write optimized div_224_128 with so many tmp224 elements known to be zero
div_224_128(&u,tmp224,f,ff); // u = floor(2^224 / f). This requires f >= 97 bits.
+ #if 0
// b_preinit = 2^128
// a = b_preinit / 2^96 = 2^32
// tmp256 = a * u = (b_preinit / 2^96) * (2^224 / f) (ignore the floor functions for now)
***************
*** 815,822 ****
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
! for (shifter = 0; shifter < exp - 2 - 7; shifter++)
{
// On input a is at most 111.17 bits (see end of this loop)
--- 822,831 ----
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
+ #endif
+ a.d3 = 0; BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 111.17 bits (see end of this loop)
***************
*** 1048,1053 ****
--- 1057,1063 ----
// Could write optimized div_256_128 with so many tmp256 elements known to be zero
div_256_128(&u,tmp256,f,ff); // u = floor(2^(127 + bits_in_f) / f), giving 128 bits of precision
+ #if 0
// b_preinit = 2^128
// a = b_preinit / 2 ^ (bits_in_f - 1)
// tmp256 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (127 + bits_in_f) / f) (ignore the floor functions for now)
***************
*** 1060,1067 ****
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
! for (shifter = 0; shifter < exp - 2 - 7; shifter++)
{
// On input a is at most 123.17 bits (see end of this loop)
--- 1070,1079 ----
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
+ #endif
+ a.d3 = 0; BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 123.17 bits (see end of this loop)
***************
*** 1293,1298 ****
--- 1305,1311 ----
// Could write optimized div_288_128 with so many tmp288 elements known to be zero
div_288_128(&u,tmp288,f,ff); // u = floor(2^272 / f). This requires f >= 113 bits.
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2^112 = 2^144
// tmp320 = a * u = (b_preinit / 2^112) * (2^272 / f) (ignore the floor functions for now)
***************
*** 1309,1316 ****
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 131.459 bits (see end of this loop)
--- 1322,1331 ----
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
+ #endif
+ a.d4 = a.d3 = 0; BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 131.459 bits (see end of this loop)
*** _/tf_f64_95.h 2012-09-09 09:45:19.000000000 -0700
--- tf_f64_95.h 2012-09-25 00:24:10.345249278 -0700
***************
*** 340,345 ****
--- 340,346 ----
// Could write optimized div_224_128 with so many tmp224 elements known to be zero
div_224_128(&u,tmp224,f,ff); // u = floor(2^224 / f). This requires f >= 97 bits.
+ #if 0
// b_preinit = 2^128
// a = b_preinit / 2^96 = 2^32
// tmp256 = a * u = (b_preinit / 2^96) * (2^224 / f) (ignore the floor functions for now)
***************
*** 351,358 ****
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
! for (shifter = 0; shifter < exp - 2 - 7; shifter++)
{
// On input a is at most 111.17 bits (see end of this loop)
--- 352,361 ----
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
+ #endif
+ a.d3 = 0; BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 111.17 bits (see end of this loop)
***************
*** 585,590 ****
--- 588,594 ----
// Could write optimized div_256_128 with so many tmp256 elements known to be zero
div_256_128(&u,tmp256,f,ff); // u = floor(2^(127 + bits_in_f) / f), giving 128 bits of precision
+ #if 0
// b_preinit = 2^128
// a = b_preinit / 2 ^ (bits_in_f - 1)
// tmp256 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (127 + bits_in_f) / f) (ignore the floor functions for now)
***************
*** 597,604 ****
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
! for (shifter = 0; shifter < exp - 2 - 7; shifter++)
{
// On input a is at most 123.17 bits (see end of this loop)
--- 601,610 ----
a.d1 = __subc_cc(0, tmp128.d1); // we do not need the upper digits of b_preinit and tmp128 because the result is 0 after subtraction!
a.d2 = __subc_cc(0, tmp128.d2);
a.d3 = __subc (0, tmp128.d3);
+ #endif
+ a.d3 = 0; BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 123.17 bits (see end of this loop)
***************
*** 831,836 ****
--- 837,843 ----
// Could write optimized div_288_128 with so many tmp288 elements known to be zero
div_288_128(&u,tmp288,f,ff); // u = floor(2^272 / f). This requires f >= 113 bits.
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2^112 = 2^144
// tmp320 = a * u = (b_preinit / 2^112) * (2^272 / f) (ignore the floor functions for now)
***************
*** 847,854 ****
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 131.459 bits (see end of this loop)
--- 854,863 ----
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
+ #endif
+ a.d4 = a.d3 = 0; BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 131.459 bits (see end of this loop)
***************
*** 1089,1094 ****
--- 1098,1104 ----
// Could write optimized div_288_160 with so many tmp288 elements known to be zero
div_288_160(&u,tmp288,f,ff); // u = floor(2^288 / f). This requires f >= 129 bits.
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2^128 = 2^144
// tmp320 = a * u = (b_preinit / 2^128) * (2^288 / f) (ignore the floor functions for now)
***************
*** 1104,1111 ****
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 143.459 bits (see end of this loop)
--- 1114,1123 ----
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
+ #endif
+ a.d4 = a.d3 = 0; BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 143.459 bits (see end of this loop)
***************
*** 1341,1346 ****
--- 1353,1359 ----
// Could write optimized div_320_160 with so many tmp320 elements known to be zero
div_320_160(&u,tmp320,f,ff); // u = floor(2^(159 + bits_in_f) / f), giving 160 bits of precision
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2 ^ (bits_in_f - 1)
// tmp320 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (159 + bits_in_f) / f) (ignore the floor functions for now)
***************
*** 1357,1364 ****
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 155.459 bits (see end of this loop)
--- 1370,1379 ----
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
+ #endif
+ BASE_preinit2;
! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++)
{
// On input a is at most 155.459 bits (see end of this loop)
***************
*** 1594,1599 ****
--- 1609,1615 ----
// Could write optimized div_352_160 with so many tmp352 elements known to be zero
div_352_160(&u,tmp352,f,ff); // u = floor(2^336 / f). This requires f >= 145 bits.
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2^144 = 2^112
// tmp352 = a * u = (b_preinit / 2^144) * (2^336 / f) (ignore the floor functions for now)
***************
*** 1610,1617 ****
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 163.700 bits (see end of this loop)
--- 1626,1635 ----
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
+ #endif
+ a.d5 = 0; BASE_preinit2;
! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++)
{
// On input a is at most 163.700 bits (see end of this loop)
*** _/tf_f96_127.h 2012-09-09 09:45:47.000000000 -0700
--- tf_f96_127.h 2012-09-25 00:24:20.441248725 -0700
***************
*** 307,312 ****
--- 307,313 ----
// Could write optimized div_288_160 with so many tmp288 elements known to be zero
div_288_160(&u,tmp288,f,ff); // u = floor(2^288 / f). This requires f >= 129 bits.
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2^128 = 2^144
// tmp320 = a * u = (b_preinit / 2^128) * (2^288 / f) (ignore the floor functions for now)
***************
*** 322,329 ****
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 143.459 bits (see end of this loop)
--- 323,332 ----
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
+ #endif
+ a.d4 = a.d3 = 0; BASE_preinit;
! for (shifter = 0; shifter < exp - 1 - BASE_n1; shifter++)
{
// On input a is at most 143.459 bits (see end of this loop)
***************
*** 559,564 ****
--- 562,568 ----
// Could write optimized div_320_160 with so many tmp320 elements known to be zero
div_320_160(&u,tmp320,f,ff); // u = floor(2^(159 + bits_in_f) / f), giving 160 bits of precision
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2 ^ (bits_in_f - 1)
// tmp320 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (159 + bits_in_f) / f) (ignore the floor functions for now)
***************
*** 575,582 ****
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 155.459 bits (see end of this loop)
--- 579,588 ----
a.d2 = __subc_cc(0, tmp160.d2);
a.d3 = __subc_cc(0, tmp160.d3);
a.d4 = __subc (0, tmp160.d4);
+ #endif
+ BASE_preinit2;
! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++)
{
// On input a is at most 155.459 bits (see end of this loop)
***************
*** 812,817 ****
--- 818,824 ----
// Could write optimized div_352_160 with so many tmp352 elements known to be zero
div_352_160(&u,tmp352,f,ff); // u = floor(2^336 / f). This requires f >= 145 bits.
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2^144 = 2^112
// tmp352 = a * u = (b_preinit / 2^144) * (2^336 / f) (ignore the floor functions for now)
***************
*** 828,835 ****
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 163.700 bits (see end of this loop)
--- 835,844 ----
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
+ #endif
+ a.d5 = 0; BASE_preinit2;
! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++)
{
// On input a is at most 163.700 bits (see end of this loop)
***************
*** 1075,1080 ****
--- 1084,1090 ----
// Could write optimized div_352_192 with so many tmp352 elements known to be zero
div_352_192(&u,tmp352,f,ff); // u = floor(2^352 / f). This requires f >= 161 bits.
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2^160 = 2^96
// tmp352 = a * u = (b_preinit / 2^160) * (2^352 / f) (ignore the floor functions for now)
***************
*** 1090,1097 ****
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 175.700 bits (see end of this loop)
--- 1100,1109 ----
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
+ #endif
+ a.d5 = 0; BASE_preinit2;
! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++)
{
// On input a is at most 175.700 bits (see end of this loop)
***************
*** 1330,1335 ****
--- 1342,1348 ----
// Could write optimized div_384_192 with so many tmp384 elements known to be zero
div_384_192(&u,tmp384,f,ff); // u = floor(2^(191 + bits_in_f) / f), giving 192 bits of precision, requires f >= 161 bits
+ #if 0
// b_preinit = 2^256
// a = b_preinit / 2 ^ (bits_in_f - 1)
// tmp384 = a * u = (b_preinit / 2 ^ (bits_in_f - 1)) * (2 ^ (191 + bits_in_f) / f) (ignore the floor functions for now)
***************
*** 1346,1353 ****
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
! for (shifter = 0; shifter < exp - 2 - 8; shifter++)
{
// On input a is at most 186.700 bits (see end of this loop)
--- 1359,1368 ----
a.d3 = __subc_cc(0, tmp192.d3);
a.d4 = __subc_cc(0, tmp192.d4);
a.d5 = __subc (0, tmp192.d5);
+ #endif
+ a.d5 = 0; BASE_preinit2;
! for (shifter = 0; shifter < exp - 1 - BASE_n2; shifter++)
{
// On input a is at most 186.700 bits (see end of this loop)
*** _/tf_validate.h 2012-09-23 13:26:13.000000000 -0700
--- tf_validate.h 2012-09-25 03:53:55.917559230 -0700
***************
*** 16,21 ****
--- 16,23 ----
along with mfaktc. If not, see .
*/
+ #include "tf_gfn.h"
+
// Welcome to the "dumb num" routines
// remove leading zeroes from a
***************
*** 249,255 ****
remainder.d0=mystuff->h_RES[99];
print_dez192(remainder,remainder_string);
! printf("Verifying (2^(2^%d)) %% %s = %s\n", (int) mystuff->exponent, factor_string, remainder_string);
}
// make sure the factor has no really small factors - this would indicate calculating the factor or GPU sieving or class_needed is broken
--- 251,257 ----
remainder.d0=mystuff->h_RES[99];
print_dez192(remainder,remainder_string);
! printf("Verifying (%d^(2^%d)) %% %s = %s\n", BASE, (int) mystuff->exponent, factor_string, remainder_string);
}
// make sure the factor has no really small factors - this would indicate calculating the factor or GPU sieving or class_needed is broken
***************
*** 273,285 ****
// validate the exponentiation
! tmp[0] = 1; tmp[1] = 65536;
! for (i = 4; i < (mystuff->fermat_factoring ? mystuff->exponent - 2 : mystuff->exponent); i++) {
dn_square (tmp, tmp);
dn_mod (tmp, fac, tmp);
}
if (! dn_equal (tmp, rem))
! printf ("ERROR: Exponentiation failure\n"), exit(1);
}
--- 275,287 ----
// validate the exponentiation
! tmp[0] = 1; tmp[1] = BASE_v0;
! for (i = BASE_n0; i < (mystuff->fermat_factoring ? mystuff->exponent - 1 : mystuff->exponent); i++) {
dn_square (tmp, tmp);
dn_mod (tmp, fac, tmp);
}
if (! dn_equal (tmp, rem))
! printf ("ERROR: Exponentiation failure\n"); //, exit(1);
}
***************
*** 302,311 ****
// Look for the exponentiation that returns fac-1 rather than 1
! for (exp = mystuff->exponent-2; exp > 5; exp--)
{
! tmp[0] = 1; tmp[1] = 65536;
! for (i = 4; i < exp; i++) {
dn_square (tmp, tmp);
dn_mod (tmp, fac, tmp);
}
--- 304,313 ----
// Look for the exponentiation that returns fac-1 rather than 1
! for (exp = mystuff->exponent-1; exp > 5; exp--)
{
! tmp[0] = 1; tmp[1] = BASE_v0;
! for (i = BASE_n0; i < exp; i++) {
dn_square (tmp, tmp);
dn_mod (tmp, fac, tmp);
}
*** _/mfaktc.c 2012-09-23 08:15:26.000000000 -0700
--- mfaktc.c 2012-09-25 03:50:40.361569952 -0700
***************
*** 628,634 ****
i++;
}
! printf("mmff v%s (%dbit built)\n\n", MFAKTC_VERSION, (int)(sizeof(void*)*8));
/* print current configuration */
--- 628,634 ----
i++;
}
! printf("mmff-gfn v%s (%dbit built)\n\n", MFAKTC_VERSION, (int)(sizeof(void*)*8));
/* print current configuration */
*** _/output.c 2012-09-23 12:57:20.000000000 -0700
--- output.c 2012-09-25 03:53:55.907559229 -0700
***************
*** 17,22 ****
--- 17,23 ----
along with mfaktc. If not, see .
*/
+ #include "tf_gfn.h"
#include
#include
***************
*** 35,41 ****
void print_help(char *string)
{
! printf("mmff v%s Copyright (C) 2009, 2010, 2011, 2012 George Woltman, Oliver Weihe\n", MFAKTC_VERSION);
printf("This program comes with ABSOLUTELY NO WARRANTY; for details see COPYING.\n");
printf("This is free software, and you are welcome to redistribute it\n");
printf("under certain conditions; see COPYING for details.\n\n\n");
--- 36,42 ----
void print_help(char *string)
{
! printf("mmff-gfn v%s Copyright (C) 2009, 2010, 2011, 2012 George Woltman, Oliver Weihe, Serge Batalov (GFN)\n", MFAKTC_VERSION);
printf("This program comes with ABSOLUTELY NO WARRANTY; for details see COPYING.\n");
printf("This is free software, and you are welcome to redistribute it\n");
printf("under certain conditions; see COPYING for details.\n\n\n");
***************
*** 349,364 ****
if((mystuff->mode == MODE_NORMAL) && (mystuff->stats.class_counter < 960))
#endif
{
! sprintf(string, "found %d factor%s for %s in %s (partially tested) [mmff %s %s]", factorsfound, (factorsfound > 1) ? "s" : "", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname);
}
else
{
! sprintf(string, "found %d factor%s for %s in %s [mmff %s %s]", factorsfound, (factorsfound > 1) ? "s" : "", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname);
}
}
else
{
! sprintf(string, "no factor for %s in %s [mmff %s %s]", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname);
}
if(mystuff->mode != MODE_SELFTEST_SHORT)
--- 350,365 ----
if((mystuff->mode == MODE_NORMAL) && (mystuff->stats.class_counter < 960))
#endif
{
! sprintf(string, "found %d factor%s for %s in %s (partially tested) [mmff-gfn %s %s]", factorsfound, (factorsfound > 1) ? "s" : "", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname);
}
else
{
! sprintf(string, "found %d factor%s for %s in %s [mmff-gfn %s %s]", factorsfound, (factorsfound > 1) ? "s" : "", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname);
}
}
else
{
! sprintf(string, "no factor for %s in %s [mmff-gfn %s %s]", mystuff->exponent_string, krange, MFAKTC_VERSION, mystuff->stats.kernelname);
}
if(mystuff->mode != MODE_SELFTEST_SHORT)
***************
*** 386,392 ****
if (mystuff->fermat_factoring) // Figure out which Fermat number this factor divides
{
! sprintf(exponent_string, "F%d", which_fermat_number(mystuff, factor_number));
}
else
sprintf(exponent_string, "%s", mystuff->exponent_string);
--- 387,393 ----
if (mystuff->fermat_factoring) // Figure out which Fermat number this factor divides
{
! sprintf(exponent_string, "GF(%d,%d)", which_fermat_number(mystuff, factor_number), BASE);
}
else
sprintf(exponent_string, "%s", mystuff->exponent_string);
***************
*** 407,415 ****
if(mystuff->mode == MODE_NORMAL)
{
#ifndef MORE_CLASSES
! fprintf(resultfile, "%s%s has a factor: %s [TF:%d:%d%s:mmff %s %s]\n", UID, exponent_string, factor, mystuff->bit_min, mystuff->bit_max_stage, ((mystuff->stopafterfactor == 2) && (mystuff->stats.class_counter < 96)) ? "*" : "" , MFAKTC_VERSION, mystuff->stats.kernelname);
#else
! fprintf(resultfile, "%s%s has a factor: %s [TF:%d:%d%s:mmff %s %s]\n", UID, exponent_string, factor, mystuff->bit_min, mystuff->bit_max_stage, ((mystuff->stopafterfactor == 2) && (mystuff->stats.class_counter < 960)) ? "*" : "" , MFAKTC_VERSION, mystuff->stats.kernelname);
#endif
}
}
--- 408,416 ----
if(mystuff->mode == MODE_NORMAL)
{
#ifndef MORE_CLASSES
! fprintf(resultfile, "%s%s has a factor: %s [TF:%d:%d%s:mmff-gfn %s %s]\n", UID, exponent_string, factor, mystuff->bit_min, mystuff->bit_max_stage, ((mystuff->stopafterfactor == 2) && (mystuff->stats.class_counter < 96)) ? "*" : "" , MFAKTC_VERSION, mystuff->stats.kernelname);
#else
! fprintf(resultfile, "%s%s has a factor: %s [TF:%d:%d%s:mmff-gfn %s %s]\n", UID, exponent_string, factor, mystuff->bit_min, mystuff->bit_max_stage, ((mystuff->stopafterfactor == 2) && (mystuff->stats.class_counter < 960)) ? "*" : "" , MFAKTC_VERSION, mystuff->stats.kernelname);
#endif
}
}
*** _/parse.c 2012-09-23 13:48:55.000000000 -0700
--- parse.c 2012-09-24 19:21:49.692243906 -0700
***************
*** 93,99 ****
int ret = 1;
if (fermat) {
! if(exp <= 27) {ret = 0; if(verbosity >= 1)printf("WARNING: Exponents <= 27 are not supported in Fermat factoring!\n");}
else if(exp >= 160) {ret = 0; if(verbosity >= 1)printf("WARNING: Exponents >= 160 are not supported in Fermat factoring!\n");}
else if (exp <= 31) { if (bit_min < 64 || bit_max > 96 || /*bit_min - exp < 13 ||*/ bit_max - exp > 64) {ret = 0; if(verbosity >= 1)printf("WARNING: bit range isn't supported!\n");} }
else if (exp <= 63) { if (bit_min < 64 || bit_max > 128 || /*bit_min - exp < 13 ||*/ bit_max - exp > 64) {ret = 0; if(verbosity >= 1)printf("WARNING: bit range isn't supported!\n");} }
--- 93,99 ----
int ret = 1;
if (fermat) {
! if(exp <= 24) {ret = 0; if(verbosity >= 1)printf("WARNING: Exponents <= 24 are not supported in Fermat factoring!\n");}
else if(exp >= 160) {ret = 0; if(verbosity >= 1)printf("WARNING: Exponents >= 160 are not supported in Fermat factoring!\n");}
else if (exp <= 31) { if (bit_min < 64 || bit_max > 96 || /*bit_min - exp < 13 ||*/ bit_max - exp > 64) {ret = 0; if(verbosity >= 1)printf("WARNING: bit range isn't supported!\n");} }
else if (exp <= 63) { if (bit_min < 64 || bit_max > 128 || /*bit_min - exp < 13 ||*/ bit_max - exp > 64) {ret = 0; if(verbosity >= 1)printf("WARNING: bit range isn't supported!\n");} }
*** _/tf_barrett96_gs.cu 2012-09-23 00:52:17.000000000 -0700
--- tf_barrett96_gs.cu 2012-09-25 00:23:02.420253003 -0700
***************
*** 73,78 ****
--- 73,79 ----
#undef KERNEL_MIN_BLOCKS
#define KERNEL_MIN_BLOCKS 3
+ #include "tf_gfn.h"
#include "tf_192.h"
#include "tf_160.h"
#include "tf_128.h"