File : fz_sqr.adb
1 ------------------------------------------------------------------------------
2 ------------------------------------------------------------------------------
3 -- This file is part of 'Finite Field Arithmetic', aka 'FFA'. --
4 -- --
5 -- (C) 2019 Stanislav Datskovskiy ( www.loper-os.org ) --
6 -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html --
7 -- --
8 -- You do not have, nor can you ever acquire the right to use, copy or --
9 -- distribute this software ; Should you use this software for any purpose, --
10 -- or copy and distribute it to anyone or in any manner, you are breaking --
11 -- the laws of whatever soi-disant jurisdiction, and you promise to --
12 -- continue doing so for the indefinite future. In any case, please --
13 -- always : read and understand any software ; verify any PGP signatures --
14 -- that you use - for any purpose. --
15 -- --
16 -- See also http://trilema.com/2015/a-new-software-licensing-paradigm . --
17 ------------------------------------------------------------------------------
18 ------------------------------------------------------------------------------
19
20 with Words; use Words;
21 with Word_Ops; use Word_Ops;
22 with W_Mul; use W_Mul;
23 with FZ_Arith; use FZ_Arith;
24
25
26 package body FZ_Sqr is
27
28 -- Square case of Comba's multiplier. (CAUTION: UNBUFFERED)
29 procedure FZ_Sqr_Comba(X : in FZ;
30 XX : out FZ) is
31
32 -- Words in each multiplicand
33 L : constant Word_Index := X'Length;
34
35 -- Length of Product, i.e. double the length of X
36 LP : constant Word_Index := 2 * L;
37
38 -- 3-word Accumulator
39 A2, A1, A0 : Word := 0;
40
41 Lo, Hi : Word; -- Output of WxW multiply/square
42
43 -- Type for referring to a column of XX
44 subtype ColXX is Word_Index range 0 .. LP - 1;
45
46 procedure Accum is
47 C : WBool; -- Carry for the Accumulator addition
48 Sum : Word; -- Sum for Accumulator addition
49 begin
50 -- Now add add double-Word Hi:Lo to accumulator A2:A1:A0:
51 -- A0 += Lo; C := Carry
52 Sum := A0 + Lo;
53 C := W_Carry(A0, Lo, Sum);
54 A0 := Sum;
55 -- A1 += Hi + C; C := Carry
56 Sum := A1 + Hi + C;
57 C := W_Carry(A1, Hi, Sum);
58 A1 := Sum;
59 -- A2 += A2 + C
60 A2 := A2 + C;
61 end Accum;
62 pragma Inline_Always(Accum);
63
64 procedure SymmDigits(N : in ColXX; From : in ColXX; To : in ColXX) is
65 begin
66 for j in From .. To loop
67 -- Hi:Lo := j-th * (N - j)-th Word, and then,
68 Mul_Word(X(X'First + j),
69 X(X'First - j + N),
70 Lo, Hi);
71 Accum;
72 Accum; -- Accum += 2 * (Hi:Lo)
73 end loop;
74 end SymmDigits;
75 pragma Inline_Always(SymmDigits);
76
77 procedure SqrDigit(N : in ColXX) is
78 begin
79 Sqr_Word(X(X'First + N), Lo, Hi); -- Hi:Lo := Square(N-th digit)
80 Accum; -- Accum += Hi:Lo
81 end SqrDigit;
82 pragma Inline_Always(SqrDigit);
83
84 procedure HaveDigit(N : in ColXX) is
85 begin
86 -- Save the Nth (indexed from zero) word of XX:
87 XX(XX'First + N) := A0;
88 -- Right-Shift the Accumulator by one Word width:
89 A0 := A1;
90 A1 := A2;
91 A2 := 0;
92 end HaveDigit;
93 pragma Inline_Always(HaveDigit);
94
95 -- Compute the Nth (indexed from zero) column of the Product
96 procedure Col(N : in ColXX; U : in ColXX; V : in ColXX) is
97 begin
98 -- The branch pattern depends only on FZ wordness
99 if N mod 2 = 0 then -- If we're doing an EVEN-numbered column:
100 SymmDigits(N, U, V - 1); -- Stop before V: it is the square case
101 SqrDigit(V); -- Compute the square case at V
102 else -- If we're doing an ODD-numbered column:
103 SymmDigits(N, U, V); -- All of them are the symmetric case
104 end if; -- After either even or odd column:
105 HaveDigit(N); -- We have the N-th digit of the result.
106 end Col;
107 pragma Inline_Always(Col);
108
109 begin
110 -- First col always exists:
111 SqrDigit(ColXX'First);
112 HaveDigit(ColXX'First);
113
114 -- Compute the lower half of the Product:
115 for i in 1 .. L - 1 loop
116 Col(i, 0, i / 2);
117 end loop;
118
119 -- Compute the upper half (sans last Word) of the Product:
120 for i in L .. LP - 2 loop
121 Col(i, i - L + 1, i / 2);
122 end loop;
123
124 -- The very last Word of the Product:
125 XX(XX'Last) := A0; -- Equiv. of XX(XX'First + 2*L - 1) := A0;
126 end FZ_Sqr_Comba;
127
128
129 -- Karatsuba's Squaring. (CAUTION: UNBUFFERED)
130 procedure Sqr_Karatsuba(X : in FZ;
131 XX : out FZ) is
132
133 -- L is the wordness of X. Guaranteed to be a power of two.
134 L : constant Word_Count := X'Length;
135
136 -- An 'LSeg' is the same length as X.
137 subtype LSeg is FZ(1 .. L);
138
139 -- K is HALF of the length of X.
140 K : constant Word_Index := L / 2;
141
142 -- A 'KSeg' is the same length as HALF of X.
143 subtype KSeg is FZ(1 .. K);
144
145 -- The three L-sized variables of the product equation, i.e.:
146 -- XX = LL + 2^(K*Bitness)(LL + HH - DD) + 2^(2*K*Bitness)HH
147 LL, DD, HH : LSeg;
148
149 -- K-sized term Dx, Dx := abs(XLo - XHi) to then get DD := Dx^2
150 Dx : KSeg;
151
152 -- IGNORED Subtraction borrow, sign of (XL - XH)
153 Cx : WBool; -- given as DD := Dx^2, DD is always positive
154 pragma Unreferenced(Cx);
155
156 -- Bottom and Top K-sized halves of X.
157 XLo : KSeg renames X( X'First .. X'Last - K );
158 XHi : KSeg renames X( X'First + K .. X'Last );
159
160 -- L-sized middle segment of the product XX (+/- K from the midpoint).
161 XXMid : LSeg renames XX( XX'First + K .. XX'Last - K );
162
163 -- Bottom and Top L-sized halves of the product XX.
164 XXLo : LSeg renames XX( XX'First .. XX'Last - L );
165 XXHi : LSeg renames XX( XX'First + L .. XX'Last );
166
167 -- Topmost K-sized quarter segment of the product XX, or 'tail'
168 XXHiHi : KSeg renames XXHi( XXHi'First + K .. XXHi'Last );
169
170 -- Carry from addition of HH and LL terms; borrow from subtraction of DD
171 C : WBool;
172
173 -- Barring a cosmic ray, 0 <= TC <= 2
174 subtype TailCarry is Word range 0 .. 2;
175
176 -- Tail-Carry accumulator, for the final ripple-out into XXHiHi
177 TC : TailCarry := 0;
178
179 -- Barring a cosmic ray, the tail ripple will NOT overflow.
180 FinalCarry : WZeroOrDie := 0;
181
182 begin
183
184 -- Recurse: LL := XLo^2
185 FZ_Square_Unbuffered(XLo, LL);
186
187 -- Recurse: HH := XHi^2
188 FZ_Square_Unbuffered(XHi, HH);
189
190 -- Dx := |XLo - XHi| , and we don't care about the borrow
191 FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx);
192
193 -- Recurse: DD := Dx^2 (always positive, which is why we don't need Cx)
194 FZ_Square_Unbuffered(Dx, DD);
195
196 -- XX := LL + 2^(2 * K * Bitness) * HH
197 XXLo := LL;
198 XXHi := HH;
199
200 -- XX += 2^(K * Bitness) * HH, carry goes into Tail Carry accumulator.
201 FZ_Add_D(X => XXMid, Y => HH, Overflow => TC);
202
203 -- XX += 2^(K * Bitness) * LL, ...
204 FZ_Add_D(X => XXMid, Y => LL, Overflow => C);
205
206 -- ... add the carry from LL addition into the Tail Carry accumulator.
207 TC := TC + C;
208
209 -- XX -= 2^(K * Bitness) * DD
210 FZ_Sub_D(X => XXMid, -- Because DD is always positive,
211 Y => DD, -- this term is always subtractive.
212 Underflow => C); -- C is the borrow from DD term subtraction
213
214 -- Get final Tail Carry for the ripple by subtracting DD term's borrow
215 TC := TC - C;
216
217 -- Ripple the Tail Carry into the tail.
218 FZ_Add_D_W(X => XXHiHi, W => TC, Overflow => FinalCarry);
219
220 end Sqr_Karatsuba;
221 -- CAUTION: Inlining prohibited for Sqr_Karatsuba !
222
223
224 -- Squaring. (CAUTION: UNBUFFERED)
225 procedure FZ_Square_Unbuffered(X : in FZ;
226 XX : out FZ) is
227 begin
228
229 if X'Length <= Sqr_Karatsuba_Thresh then
230
231 -- Base case:
232 FZ_Sqr_Comba(X, XX);
233
234 else
235
236 -- Recursive case:
237 Sqr_Karatsuba(X, XX);
238
239 end if;
240
241 end FZ_Square_Unbuffered;
242
243
244 -- Squaring. Preserves the inputs.
245 procedure FZ_Square_Buffered(X : in FZ;
246 XX_Lo : out FZ;
247 XX_Hi : out FZ) is
248
249 -- Product buffer.
250 P : FZ(1 .. 2 * X'Length);
251
252 begin
253
254 FZ_Square_Unbuffered(X, P);
255
256 XX_Lo := P(P'First .. P'First + X'Length - 1);
257 XX_Hi := P(P'First + X'Length .. P'Last);
258
259 end FZ_Square_Buffered;
260
261 end FZ_Sqr;