1 ------------------------------------------------------------------------------ 2 ------------------------------------------------------------------------------ 3 -- This file is part of 'Finite Field Arithmetic', aka 'FFA'. -- 4 -- -- 5 -- (C) 2019 Stanislav Datskovskiy ( www.loper-os.org ) -- 6 -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html -- 7 -- -- 8 -- You do not have, nor can you ever acquire the right to use, copy or -- 9 -- distribute this software ; Should you use this software for any purpose, -- 10 -- or copy and distribute it to anyone or in any manner, you are breaking -- 11 -- the laws of whatever soi-disant jurisdiction, and you promise to -- 12 -- continue doing so for the indefinite future. In any case, please -- 13 -- always : read and understand any software ; verify any PGP signatures -- 14 -- that you use - for any purpose. -- 15 -- -- 16 -- See also http://trilema.com/2015/a-new-software-licensing-paradigm . -- 17 ------------------------------------------------------------------------------ 18 ------------------------------------------------------------------------------ 19 20 with Words; use Words; 21 with Word_Ops; use Word_Ops; 22 with W_Mul; use W_Mul; 23 with FZ_Arith; use FZ_Arith; 24 25 26 package body FZ_Sqr is 27 28 -- Square case of Comba's multiplier. (CAUTION: UNBUFFERED) 29 procedure FZ_Sqr_Comba(X : in FZ; 30 XX : out FZ) is 31 32 -- Words in each multiplicand 33 L : constant Word_Index := X'Length; 34 35 -- Length of Product, i.e. double the length of X 36 LP : constant Word_Index := 2 * L; 37 38 -- 3-word Accumulator 39 A2, A1, A0 : Word := 0; 40 41 Lo, Hi : Word; -- Output of WxW multiply/square 42 43 -- Type for referring to a column of XX 44 subtype ColXX is Word_Index range 0 .. LP - 1; 45 46 procedure Accum is 47 C : WBool; -- Carry for the Accumulator addition 48 Sum : Word; -- Sum for Accumulator addition 49 begin 50 -- Now add add double-Word Hi:Lo to accumulator A2:A1:A0: 51 -- A0 += Lo; C := Carry 52 Sum := A0 + Lo; 53 C := W_Carry(A0, Lo, Sum); 54 A0 := Sum; 55 -- A1 += Hi + C; C := Carry 56 Sum := A1 + Hi + C; 57 C := W_Carry(A1, Hi, Sum); 58 A1 := Sum; 59 -- A2 += A2 + C 60 A2 := A2 + C; 61 end Accum; 62 pragma Inline_Always(Accum); 63 64 procedure SymmDigits(N : in ColXX; From : in ColXX; To : in ColXX) is 65 begin 66 for j in From .. To loop 67 -- Hi:Lo := j-th * (N - j)-th Word, and then, 68 Mul_Word(X(X'First + j), 69 X(X'First - j + N), 70 Lo, Hi); 71 Accum; 72 Accum; -- Accum += 2 * (Hi:Lo) 73 end loop; 74 end SymmDigits; 75 pragma Inline_Always(SymmDigits); 76 77 procedure SqrDigit(N : in ColXX) is 78 begin 79 Sqr_Word(X(X'First + N), Lo, Hi); -- Hi:Lo := Square(N-th digit) 80 Accum; -- Accum += Hi:Lo 81 end SqrDigit; 82 pragma Inline_Always(SqrDigit); 83 84 procedure HaveDigit(N : in ColXX) is 85 begin 86 -- Save the Nth (indexed from zero) word of XX: 87 XX(XX'First + N) := A0; 88 -- Right-Shift the Accumulator by one Word width: 89 A0 := A1; 90 A1 := A2; 91 A2 := 0; 92 end HaveDigit; 93 pragma Inline_Always(HaveDigit); 94 95 -- Compute the Nth (indexed from zero) column of the Product 96 procedure Col(N : in ColXX; U : in ColXX; V : in ColXX) is 97 begin 98 -- The branch pattern depends only on FZ wordness 99 if N mod 2 = 0 then -- If we're doing an EVEN-numbered column: 100 SymmDigits(N, U, V - 1); -- Stop before V: it is the square case 101 SqrDigit(V); -- Compute the square case at V 102 else -- If we're doing an ODD-numbered column: 103 SymmDigits(N, U, V); -- All of them are the symmetric case 104 end if; -- After either even or odd column: 105 HaveDigit(N); -- We have the N-th digit of the result. 106 end Col; 107 pragma Inline_Always(Col); 108 109 begin 110 -- First col always exists: 111 SqrDigit(ColXX'First); 112 HaveDigit(ColXX'First); 113 114 -- Compute the lower half of the Product: 115 for i in 1 .. L - 1 loop 116 Col(i, 0, i / 2); 117 end loop; 118 119 -- Compute the upper half (sans last Word) of the Product: 120 for i in L .. LP - 2 loop 121 Col(i, i - L + 1, i / 2); 122 end loop; 123 124 -- The very last Word of the Product: 125 XX(XX'Last) := A0; -- Equiv. of XX(XX'First + 2*L - 1) := A0; 126 end FZ_Sqr_Comba; 127 128 129 -- Karatsuba's Squaring. (CAUTION: UNBUFFERED) 130 procedure Sqr_Karatsuba(X : in FZ; 131 XX : out FZ) is 132 133 -- L is the wordness of X. Guaranteed to be a power of two. 134 L : constant Word_Count := X'Length; 135 136 -- An 'LSeg' is the same length as X. 137 subtype LSeg is FZ(1 .. L); 138 139 -- K is HALF of the length of X. 140 K : constant Word_Index := L / 2; 141 142 -- A 'KSeg' is the same length as HALF of X. 143 subtype KSeg is FZ(1 .. K); 144 145 -- The three L-sized variables of the product equation, i.e.: 146 -- XX = LL + 2^(K*Bitness)(LL + HH - DD) + 2^(2*K*Bitness)HH 147 LL, DD, HH : LSeg; 148 149 -- K-sized term Dx, Dx := abs(XLo - XHi) to then get DD := Dx^2 150 Dx : KSeg; 151 152 -- IGNORED Subtraction borrow, sign of (XL - XH) 153 Cx : WBool; -- given as DD := Dx^2, DD is always positive 154 pragma Unreferenced(Cx); 155 156 -- Bottom and Top K-sized halves of X. 157 XLo : KSeg renames X( X'First .. X'Last - K ); 158 XHi : KSeg renames X( X'First + K .. X'Last ); 159 160 -- L-sized middle segment of the product XX (+/- K from the midpoint). 161 XXMid : LSeg renames XX( XX'First + K .. XX'Last - K ); 162 163 -- Bottom and Top L-sized halves of the product XX. 164 XXLo : LSeg renames XX( XX'First .. XX'Last - L ); 165 XXHi : LSeg renames XX( XX'First + L .. XX'Last ); 166 167 -- Topmost K-sized quarter segment of the product XX, or 'tail' 168 XXHiHi : KSeg renames XXHi( XXHi'First + K .. XXHi'Last ); 169 170 -- Carry from addition of HH and LL terms; borrow from subtraction of DD 171 C : WBool; 172 173 -- Barring a cosmic ray, 0 <= TC <= 2 174 subtype TailCarry is Word range 0 .. 2; 175 176 -- Tail-Carry accumulator, for the final ripple-out into XXHiHi 177 TC : TailCarry := 0; 178 179 -- Barring a cosmic ray, the tail ripple will NOT overflow. 180 FinalCarry : WZeroOrDie := 0; 181 182 begin 183 184 -- Recurse: LL := XLo^2 185 FZ_Square_Unbuffered(XLo, LL); 186 187 -- Recurse: HH := XHi^2 188 FZ_Square_Unbuffered(XHi, HH); 189 190 -- Dx := |XLo - XHi| , and we don't care about the borrow 191 FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx); 192 193 -- Recurse: DD := Dx^2 (always positive, which is why we don't need Cx) 194 FZ_Square_Unbuffered(Dx, DD); 195 196 -- XX := LL + 2^(2 * K * Bitness) * HH 197 XXLo := LL; 198 XXHi := HH; 199 200 -- XX += 2^(K * Bitness) * HH, carry goes into Tail Carry accumulator. 201 FZ_Add_D(X => XXMid, Y => HH, Overflow => TC); 202 203 -- XX += 2^(K * Bitness) * LL, ... 204 FZ_Add_D(X => XXMid, Y => LL, Overflow => C); 205 206 -- ... add the carry from LL addition into the Tail Carry accumulator. 207 TC := TC + C; 208 209 -- XX -= 2^(K * Bitness) * DD 210 FZ_Sub_D(X => XXMid, -- Because DD is always positive, 211 Y => DD, -- this term is always subtractive. 212 Underflow => C); -- C is the borrow from DD term subtraction 213 214 -- Get final Tail Carry for the ripple by subtracting DD term's borrow 215 TC := TC - C; 216 217 -- Ripple the Tail Carry into the tail. 218 FZ_Add_D_W(X => XXHiHi, W => TC, Overflow => FinalCarry); 219 220 end Sqr_Karatsuba; 221 -- CAUTION: Inlining prohibited for Sqr_Karatsuba ! 222 223 224 -- Squaring. (CAUTION: UNBUFFERED) 225 procedure FZ_Square_Unbuffered(X : in FZ; 226 XX : out FZ) is 227 begin 228 229 if X'Length <= Sqr_Karatsuba_Thresh then 230 231 -- Base case: 232 FZ_Sqr_Comba(X, XX); 233 234 else 235 236 -- Recursive case: 237 Sqr_Karatsuba(X, XX); 238 239 end if; 240 241 end FZ_Square_Unbuffered; 242 243 244 -- Squaring. Preserves the inputs. 245 procedure FZ_Square_Buffered(X : in FZ; 246 XX_Lo : out FZ; 247 XX_Hi : out FZ) is 248 249 -- Product buffer. 250 P : FZ(1 .. 2 * X'Length); 251 252 begin 253 254 FZ_Square_Unbuffered(X, P); 255 256 XX_Lo := P(P'First .. P'First + X'Length - 1); 257 XX_Hi := P(P'First + X'Length .. P'Last); 258 259 end FZ_Square_Buffered; 260 261 end FZ_Sqr;