1 ------------------------------------------------------------------------------ 2 ------------------------------------------------------------------------------ 3 -- This file is part of 'Finite Field Arithmetic', aka 'FFA'. -- 4 -- -- 5 -- (C) 2019 Stanislav Datskovskiy ( www.loper-os.org ) -- 6 -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html -- 7 -- -- 8 -- You do not have, nor can you ever acquire the right to use, copy or -- 9 -- distribute this software ; Should you use this software for any purpose, -- 10 -- or copy and distribute it to anyone or in any manner, you are breaking -- 11 -- the laws of whatever soi-disant jurisdiction, and you promise to -- 12 -- continue doing so for the indefinite future. In any case, please -- 13 -- always : read and understand any software ; verify any PGP signatures -- 14 -- that you use - for any purpose. -- 15 -- -- 16 -- See also http://trilema.com/2015/a-new-software-licensing-paradigm . -- 17 ------------------------------------------------------------------------------ 18 ------------------------------------------------------------------------------ 19 20 with Words; use Words; 21 with Word_Ops; use Word_Ops; 22 with W_Mul; use W_Mul; 23 with FZ_Arith; use FZ_Arith; 24 25 26 package body FZ_Mul is 27 28 -- Comba's multiplier. (CAUTION: UNBUFFERED) 29 procedure FZ_Mul_Comba(X : in FZ; 30 Y : in FZ; 31 XY : out FZ) is 32 33 -- Words in each multiplicand 34 L : constant Word_Index := X'Length; 35 36 -- Length of Product, i.e. double the length of either multiplicand 37 LP : constant Word_Index := 2 * L; 38 39 -- 3-word Accumulator 40 A2, A1, A0 : Word := 0; 41 42 -- Type for referring to a column of XY 43 subtype ColXY is Word_Index range 0 .. LP - 1; 44 45 -- Compute the Nth (indexed from zero) column of the Product 46 procedure Col(N : in ColXY; U : in ColXY; V : in ColXY) is 47 48 -- The outputs of a Word multiplication 49 Lo, Hi : Word; 50 51 -- Carry for the Accumulator addition 52 C : WBool; 53 54 -- Sum for Accumulator addition 55 Sum : Word; 56 57 begin 58 59 -- For lower half of XY, will go from 0 to N 60 -- For upper half of XY, will go from N - L + 1 to L - 1 61 for j in U .. V loop 62 63 -- Hi:Lo := j-th Word of X * (N - j)-th Word of Y 64 Mul_Word(X(X'First + j), 65 Y(Y'First - j + N), 66 Lo, Hi); 67 68 -- Now add Hi:Lo into the Accumulator: 69 70 -- A0 += Lo; C := Carry 71 Sum := A0 + Lo; 72 C := W_Carry(A0, Lo, Sum); 73 A0 := Sum; 74 75 -- A1 += Hi + C; C := Carry 76 Sum := A1 + Hi + C; 77 C := W_Carry(A1, Hi, Sum); 78 A1 := Sum; 79 80 -- A2 += A2 + C 81 A2 := A2 + C; 82 83 end loop; 84 85 -- We now have the Nth (indexed from zero) word of XY 86 XY(XY'First + N) := A0; 87 88 -- Right-Shift the Accumulator by one Word width: 89 A0 := A1; 90 A1 := A2; 91 A2 := 0; 92 93 end Col; 94 pragma Inline_Always(Col); 95 96 begin 97 98 -- Compute the lower half of the Product: 99 for i in 0 .. L - 1 loop 100 101 Col(i, 0, i); 102 103 end loop; 104 105 -- Compute the upper half (sans last Word) of the Product: 106 for i in L .. LP - 2 loop 107 108 Col(i, i - L + 1, L - 1); 109 110 end loop; 111 112 -- The very last Word of the Product: 113 XY(XY'Last) := A0; 114 115 end FZ_Mul_Comba; 116 117 118 -- Karatsuba's Multiplier. (CAUTION: UNBUFFERED) 119 procedure Mul_Karatsuba(X : in FZ; 120 Y : in FZ; 121 XY : out FZ) is 122 123 -- L is the wordness of a multiplicand. Guaranteed to be a power of two. 124 L : constant Word_Count := X'Length; 125 126 -- An 'LSeg' is the same length as either multiplicand. 127 subtype LSeg is FZ(1 .. L); 128 129 -- K is HALF of the length of a multiplicand. 130 K : constant Word_Index := L / 2; 131 132 -- A 'KSeg' is the same length as HALF of a multiplicand. 133 subtype KSeg is FZ(1 .. K); 134 135 -- The three L-sized variables of the product equation, i.e.: 136 -- XY = LL + 2^(K*Bitness)(LL + HH + (-1^DD_Sub)*DD) + 2^(2*K*Bitness)HH 137 LL, DD, HH : LSeg; 138 139 -- K-sized terms of Dx * Dy = DD 140 Dx, Dy : KSeg; -- Dx = abs(XLo - XHi) , Dy = abs(YLo - YHi) 141 142 -- Subtraction borrows, signs of (XL - XH) and (YL - YH), 143 Cx, Cy : WBool; -- so that we can calculate (-1^DD_Sub) 144 145 -- Bottom and Top K-sized halves of the multiplicand X. 146 XLo : KSeg renames X( X'First .. X'Last - K ); 147 XHi : KSeg renames X( X'First + K .. X'Last ); 148 149 -- Bottom and Top K-sized halves of the multiplicand Y. 150 YLo : KSeg renames Y( Y'First .. Y'Last - K ); 151 YHi : KSeg renames Y( Y'First + K .. Y'Last ); 152 153 -- L-sized middle segment of the product XY (+/- K from the midpoint). 154 XYMid : LSeg renames XY( XY'First + K .. XY'Last - K ); 155 156 -- Bottom and Top L-sized halves of the product XY. 157 XYLo : LSeg renames XY( XY'First .. XY'Last - L ); 158 XYHi : LSeg renames XY( XY'First + L .. XY'Last ); 159 160 -- Topmost K-sized quarter segment of the product XY, or 'tail' 161 XYHiHi : KSeg renames XYHi( XYHi'First + K .. XYHi'Last ); 162 163 -- Whether the DD term is being subtracted. 164 DD_Sub : WBool; 165 166 -- Carry from individual term additions. 167 C : WBool; 168 169 -- Barring a cosmic ray, 0 <= TC <= 2 170 subtype TailCarry is Word range 0 .. 2; 171 172 -- Tail-Carry accumulator, for the final ripple-out into XXHiHi 173 TC : TailCarry := 0; 174 175 -- Barring a cosmic ray, the tail ripple will NOT overflow. 176 FinalCarry : WZeroOrDie := 0; 177 178 begin 179 180 -- Recurse: LL := XL * YL 181 FZ_Multiply_Unbuffered(XLo, YLo, LL); 182 183 -- Recurse: HH := XH * YH 184 FZ_Multiply_Unbuffered(XHi, YHi, HH); 185 186 -- Dx := |XL - XH| , Cx := Borrow (i.e. 1 iff XL < XH) 187 FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx); 188 189 -- Dy := |YL - YH| , Cy := Borrow (i.e. 1 iff YL < YH) 190 FZ_Sub_Abs(X => YLo, Y => YHi, Difference => Dy, Underflow => Cy); 191 192 -- Recurse: DD := Dx * Dy 193 FZ_Multiply_Unbuffered(Dx, Dy, DD); 194 195 -- Whether (XL - XH)(YL - YH) is positive, and so DD must be subtracted: 196 DD_Sub := 1 - (Cx xor Cy); 197 198 -- XY := LL + 2^(2 * K * Bitness) * HH 199 XYLo := LL; 200 XYHi := HH; 201 202 -- XY += 2^(K * Bitness) * HH, but carry goes in Tail Carry accum. 203 FZ_Add_D(X => XYMid, Y => HH, Overflow => TC); 204 205 -- XY += 2^(K * Bitness) * LL, ... 206 FZ_Add_D(X => XYMid, Y => LL, Overflow => C); 207 208 -- ... but the carry goes into the Tail Carry accumulator. 209 TC := TC + C; 210 211 -- XY += 2^(K * Bitness) * (-1^DD_Sub) * DD 212 FZ_Not_Cond_D(N => DD, Cond => DD_Sub); -- invert DD if 2s-complementing 213 FZ_Add_D(OF_In => DD_Sub, -- ... and then must increment 214 X => XYMid, 215 Y => DD, 216 Overflow => C); -- carry will go in Tail Carry accumulator 217 218 -- Compute the final Tail Carry for the ripple 219 TC := TC + C - DD_Sub; 220 221 -- Ripple the Tail Carry into the tail. 222 FZ_Add_D_W(X => XYHiHi, W => TC, Overflow => FinalCarry); 223 224 end Mul_Karatsuba; 225 -- CAUTION: Inlining prohibited for Mul_Karatsuba ! 226 227 228 -- Multiplier. (CAUTION: UNBUFFERED) 229 procedure FZ_Multiply_Unbuffered(X : in FZ; 230 Y : in FZ; 231 XY : out FZ) is 232 233 -- The length of either multiplicand 234 L : constant Word_Count := X'Length; 235 236 begin 237 238 if L <= Karatsuba_Thresh then 239 240 -- Base case: 241 FZ_Mul_Comba(X, Y, XY); 242 243 else 244 245 -- Recursive case: 246 Mul_Karatsuba(X, Y, XY); 247 248 end if; 249 250 end FZ_Multiply_Unbuffered; 251 252 253 -- Multiplier. Preserves the inputs. 254 procedure FZ_Multiply_Buffered(X : in FZ; 255 Y : in FZ; 256 XY_Lo : out FZ; 257 XY_Hi : out FZ) is 258 259 -- Product buffer. 260 P : FZ(1 .. 2 * X'Length); 261 262 begin 263 264 FZ_Multiply_Unbuffered(X, Y, P); 265 266 XY_Lo := P(P'First .. P'First + X'Length - 1); 267 XY_Hi := P(P'First + X'Length .. P'Last); 268 269 end FZ_Multiply_Buffered; 270 271 end FZ_Mul;