File : fz_mul.adb
1 ------------------------------------------------------------------------------
2 ------------------------------------------------------------------------------
3 -- This file is part of 'Finite Field Arithmetic', aka 'FFA'. --
4 -- --
5 -- (C) 2019 Stanislav Datskovskiy ( www.loper-os.org ) --
6 -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html --
7 -- --
8 -- You do not have, nor can you ever acquire the right to use, copy or --
9 -- distribute this software ; Should you use this software for any purpose, --
10 -- or copy and distribute it to anyone or in any manner, you are breaking --
11 -- the laws of whatever soi-disant jurisdiction, and you promise to --
12 -- continue doing so for the indefinite future. In any case, please --
13 -- always : read and understand any software ; verify any PGP signatures --
14 -- that you use - for any purpose. --
15 -- --
16 -- See also http://trilema.com/2015/a-new-software-licensing-paradigm . --
17 ------------------------------------------------------------------------------
18 ------------------------------------------------------------------------------
19
20 with Words; use Words;
21 with Word_Ops; use Word_Ops;
22 with W_Mul; use W_Mul;
23 with FZ_Arith; use FZ_Arith;
24
25
26 package body FZ_Mul is
27
28 -- Comba's multiplier. (CAUTION: UNBUFFERED)
29 procedure FZ_Mul_Comba(X : in FZ;
30 Y : in FZ;
31 XY : out FZ) is
32
33 -- Words in each multiplicand
34 L : constant Word_Index := X'Length;
35
36 -- Length of Product, i.e. double the length of either multiplicand
37 LP : constant Word_Index := 2 * L;
38
39 -- 3-word Accumulator
40 A2, A1, A0 : Word := 0;
41
42 -- Type for referring to a column of XY
43 subtype ColXY is Word_Index range 0 .. LP - 1;
44
45 -- Compute the Nth (indexed from zero) column of the Product
46 procedure Col(N : in ColXY; U : in ColXY; V : in ColXY) is
47
48 -- The outputs of a Word multiplication
49 Lo, Hi : Word;
50
51 -- Carry for the Accumulator addition
52 C : WBool;
53
54 -- Sum for Accumulator addition
55 Sum : Word;
56
57 begin
58
59 -- For lower half of XY, will go from 0 to N
60 -- For upper half of XY, will go from N - L + 1 to L - 1
61 for j in U .. V loop
62
63 -- Hi:Lo := j-th Word of X * (N - j)-th Word of Y
64 Mul_Word(X(X'First + j),
65 Y(Y'First - j + N),
66 Lo, Hi);
67
68 -- Now add Hi:Lo into the Accumulator:
69
70 -- A0 += Lo; C := Carry
71 Sum := A0 + Lo;
72 C := W_Carry(A0, Lo, Sum);
73 A0 := Sum;
74
75 -- A1 += Hi + C; C := Carry
76 Sum := A1 + Hi + C;
77 C := W_Carry(A1, Hi, Sum);
78 A1 := Sum;
79
80 -- A2 += A2 + C
81 A2 := A2 + C;
82
83 end loop;
84
85 -- We now have the Nth (indexed from zero) word of XY
86 XY(XY'First + N) := A0;
87
88 -- Right-Shift the Accumulator by one Word width:
89 A0 := A1;
90 A1 := A2;
91 A2 := 0;
92
93 end Col;
94 pragma Inline_Always(Col);
95
96 begin
97
98 -- Compute the lower half of the Product:
99 for i in 0 .. L - 1 loop
100
101 Col(i, 0, i);
102
103 end loop;
104
105 -- Compute the upper half (sans last Word) of the Product:
106 for i in L .. LP - 2 loop
107
108 Col(i, i - L + 1, L - 1);
109
110 end loop;
111
112 -- The very last Word of the Product:
113 XY(XY'Last) := A0;
114
115 end FZ_Mul_Comba;
116
117
118 -- Karatsuba's Multiplier. (CAUTION: UNBUFFERED)
119 procedure Mul_Karatsuba(X : in FZ;
120 Y : in FZ;
121 XY : out FZ) is
122
123 -- L is the wordness of a multiplicand. Guaranteed to be a power of two.
124 L : constant Word_Count := X'Length;
125
126 -- An 'LSeg' is the same length as either multiplicand.
127 subtype LSeg is FZ(1 .. L);
128
129 -- K is HALF of the length of a multiplicand.
130 K : constant Word_Index := L / 2;
131
132 -- A 'KSeg' is the same length as HALF of a multiplicand.
133 subtype KSeg is FZ(1 .. K);
134
135 -- The three L-sized variables of the product equation, i.e.:
136 -- XY = LL + 2^(K*Bitness)(LL + HH + (-1^DD_Sub)*DD) + 2^(2*K*Bitness)HH
137 LL, DD, HH : LSeg;
138
139 -- K-sized terms of Dx * Dy = DD
140 Dx, Dy : KSeg; -- Dx = abs(XLo - XHi) , Dy = abs(YLo - YHi)
141
142 -- Subtraction borrows, signs of (XL - XH) and (YL - YH),
143 Cx, Cy : WBool; -- so that we can calculate (-1^DD_Sub)
144
145 -- Bottom and Top K-sized halves of the multiplicand X.
146 XLo : KSeg renames X( X'First .. X'Last - K );
147 XHi : KSeg renames X( X'First + K .. X'Last );
148
149 -- Bottom and Top K-sized halves of the multiplicand Y.
150 YLo : KSeg renames Y( Y'First .. Y'Last - K );
151 YHi : KSeg renames Y( Y'First + K .. Y'Last );
152
153 -- L-sized middle segment of the product XY (+/- K from the midpoint).
154 XYMid : LSeg renames XY( XY'First + K .. XY'Last - K );
155
156 -- Bottom and Top L-sized halves of the product XY.
157 XYLo : LSeg renames XY( XY'First .. XY'Last - L );
158 XYHi : LSeg renames XY( XY'First + L .. XY'Last );
159
160 -- Topmost K-sized quarter segment of the product XY, or 'tail'
161 XYHiHi : KSeg renames XYHi( XYHi'First + K .. XYHi'Last );
162
163 -- Whether the DD term is being subtracted.
164 DD_Sub : WBool;
165
166 -- Carry from individual term additions.
167 C : WBool;
168
169 -- Barring a cosmic ray, 0 <= TC <= 2
170 subtype TailCarry is Word range 0 .. 2;
171
172 -- Tail-Carry accumulator, for the final ripple-out into XXHiHi
173 TC : TailCarry := 0;
174
175 -- Barring a cosmic ray, the tail ripple will NOT overflow.
176 FinalCarry : WZeroOrDie := 0;
177
178 begin
179
180 -- Recurse: LL := XL * YL
181 FZ_Multiply_Unbuffered(XLo, YLo, LL);
182
183 -- Recurse: HH := XH * YH
184 FZ_Multiply_Unbuffered(XHi, YHi, HH);
185
186 -- Dx := |XL - XH| , Cx := Borrow (i.e. 1 iff XL < XH)
187 FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx);
188
189 -- Dy := |YL - YH| , Cy := Borrow (i.e. 1 iff YL < YH)
190 FZ_Sub_Abs(X => YLo, Y => YHi, Difference => Dy, Underflow => Cy);
191
192 -- Recurse: DD := Dx * Dy
193 FZ_Multiply_Unbuffered(Dx, Dy, DD);
194
195 -- Whether (XL - XH)(YL - YH) is positive, and so DD must be subtracted:
196 DD_Sub := 1 - (Cx xor Cy);
197
198 -- XY := LL + 2^(2 * K * Bitness) * HH
199 XYLo := LL;
200 XYHi := HH;
201
202 -- XY += 2^(K * Bitness) * HH, but carry goes in Tail Carry accum.
203 FZ_Add_D(X => XYMid, Y => HH, Overflow => TC);
204
205 -- XY += 2^(K * Bitness) * LL, ...
206 FZ_Add_D(X => XYMid, Y => LL, Overflow => C);
207
208 -- ... but the carry goes into the Tail Carry accumulator.
209 TC := TC + C;
210
211 -- XY += 2^(K * Bitness) * (-1^DD_Sub) * DD
212 FZ_Not_Cond_D(N => DD, Cond => DD_Sub); -- invert DD if 2s-complementing
213 FZ_Add_D(OF_In => DD_Sub, -- ... and then must increment
214 X => XYMid,
215 Y => DD,
216 Overflow => C); -- carry will go in Tail Carry accumulator
217
218 -- Compute the final Tail Carry for the ripple
219 TC := TC + C - DD_Sub;
220
221 -- Ripple the Tail Carry into the tail.
222 FZ_Add_D_W(X => XYHiHi, W => TC, Overflow => FinalCarry);
223
224 end Mul_Karatsuba;
225 -- CAUTION: Inlining prohibited for Mul_Karatsuba !
226
227
228 -- Multiplier. (CAUTION: UNBUFFERED)
229 procedure FZ_Multiply_Unbuffered(X : in FZ;
230 Y : in FZ;
231 XY : out FZ) is
232
233 -- The length of either multiplicand
234 L : constant Word_Count := X'Length;
235
236 begin
237
238 if L <= Karatsuba_Thresh then
239
240 -- Base case:
241 FZ_Mul_Comba(X, Y, XY);
242
243 else
244
245 -- Recursive case:
246 Mul_Karatsuba(X, Y, XY);
247
248 end if;
249
250 end FZ_Multiply_Unbuffered;
251
252
253 -- Multiplier. Preserves the inputs.
254 procedure FZ_Multiply_Buffered(X : in FZ;
255 Y : in FZ;
256 XY_Lo : out FZ;
257 XY_Hi : out FZ) is
258
259 -- Product buffer.
260 P : FZ(1 .. 2 * X'Length);
261
262 begin
263
264 FZ_Multiply_Unbuffered(X, Y, P);
265
266 XY_Lo := P(P'First .. P'First + X'Length - 1);
267 XY_Hi := P(P'First + X'Length .. P'Last);
268
269 end FZ_Multiply_Buffered;
270
271 end FZ_Mul;