File : fz_mul.adb


   1 ------------------------------------------------------------------------------
   2 ------------------------------------------------------------------------------
   3 -- This file is part of 'Finite Field Arithmetic', aka 'FFA'.               --
   4 --                                                                          --
   5 -- (C) 2017 Stanislav Datskovskiy ( www.loper-os.org )                      --
   6 -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html     --
   7 --                                                                          --
   8 -- You do not have, nor can you ever acquire the right to use, copy or      --
   9 -- distribute this software ; Should you use this software for any purpose, --
  10 -- or copy and distribute it to anyone or in any manner, you are breaking   --
  11 -- the laws of whatever soi-disant jurisdiction, and you promise to         --
  12 -- continue doing so for the indefinite future. In any case, please         --
  13 -- always : read and understand any software ; verify any PGP signatures    --
  14 -- that you use - for any purpose.                                          --
  15 --                                                                          --
  16 -- See also http://trilema.com/2015/a-new-software-licensing-paradigm .     --
  17 ------------------------------------------------------------------------------
  18 ------------------------------------------------------------------------------
  19 
  20 with Words;    use Words;
  21 with Word_Ops; use Word_Ops;
  22 with W_Mul;    use W_Mul;
  23 with FZ_Arith; use FZ_Arith;
  24 
  25 
  26 package body FZ_Mul is
  27       
  28    -- Comba's multiplier. (CAUTION: UNBUFFERED)
  29    procedure FZ_Mul_Comba(X     : in  FZ;
  30                           Y     : in  FZ;
  31                           XY    : out FZ) is
  32       
  33       -- Words in each multiplicand
  34       L : constant Word_Index := X'Length;
  35       
  36       -- Length of Product, i.e. double the length of either multiplicand
  37       LP : constant Word_Index := 2 * L;
  38       
  39       -- 3-word Accumulator
  40       A2, A1, A0 : Word := 0;
  41       
  42       -- Type for referring to a column of XY
  43       subtype ColXY is Word_Index range 0 .. LP - 1;
  44       
  45       -- Compute the Nth (indexed from zero) column of the Product
  46       procedure Col(N : in ColXY; U : in ColXY; V : in ColXY) is
  47          
  48          -- The outputs of a Word multiplication
  49          Lo, Hi : Word;
  50          
  51          -- Carry for the Accumulator addition
  52          C      : WBool;
  53          
  54          -- Sum for Accumulator addition
  55          Sum    : Word;
  56          
  57       begin
  58          
  59          -- For lower half of XY, will go from 0 to N
  60          -- For upper half of XY, will go from N - L + 1 to L - 1
  61          for j in U .. V loop
  62             
  63             -- Hi:Lo := j-th Word of X  *  (N - j)-th Word of Y
  64             Mul_Word(X(X'First + j),
  65                      Y(Y'First - j + N),
  66                      Lo, Hi);
  67             
  68             -- Now add Hi:Lo into the Accumulator:
  69             
  70             -- A0 += Lo; C := Carry
  71             Sum := A0 + Lo;
  72             C   := W_Carry(A0, Lo, Sum);
  73             A0  := Sum;
  74             
  75             -- A1 += Hi + C; C := Carry
  76             Sum := A1 + Hi + C;
  77             C   := W_Carry(A1, Hi, Sum);
  78             A1  := Sum;
  79             
  80             -- A2 += A2 + C
  81             A2  := A2 + C;
  82             
  83          end loop;
  84          
  85          -- We now have the Nth (indexed from zero) word of XY
  86          XY(XY'First + N) := A0;
  87          
  88          -- Right-Shift the Accumulator by one Word width:
  89          A0 := A1;
  90          A1 := A2;
  91          A2 := 0;
  92          
  93       end Col;
  94       pragma Inline_Always(Col);
  95       
  96    begin
  97       
  98       -- Compute the lower half of the Product:
  99       for i in 0 .. L - 1 loop
 100          
 101          Col(i, 0, i);
 102          
 103       end loop;
 104       
 105       -- Compute the upper half (sans last Word) of the Product:
 106       for i in L .. LP - 2 loop
 107          
 108          Col(i, i - L + 1, L - 1);
 109          
 110       end loop;
 111       
 112       -- The very last Word of the Product:
 113       XY(XY'Last) := A0;
 114       
 115    end FZ_Mul_Comba;
 116    
 117    
 118    -- Karatsuba's Multiplier. (CAUTION: UNBUFFERED)
 119    procedure Mul_Karatsuba(X  : in  FZ;
 120                            Y  : in  FZ;
 121                            XY : out FZ) is
 122       
 123       -- L is the wordness of a multiplicand. Guaranteed to be a power of two.
 124       L : constant Word_Count := X'Length;
 125       
 126       -- An 'LSeg' is the same length as either multiplicand.
 127       subtype LSeg is FZ(1 .. L);
 128       
 129       -- K is HALF of the length of a multiplicand.
 130       K : constant Word_Index := L / 2;
 131       
 132       -- A 'KSeg' is the same length as HALF of a multiplicand.
 133       subtype KSeg is FZ(1 .. K);
 134       
 135       -- The three L-sized variables of the product equation, i.e.:
 136       -- XY = LL + 2^(K*Bitness)(LL + HH + (-1^DD_Sub)*DD) + 2^(2*K*Bitness)HH
 137       LL, DD, HH : LSeg;
 138       
 139       -- K-sized terms of Dx * Dy = DD
 140       Dx, Dy     : KSeg;  -- Dx = abs(XLo - XHi) , Dy = abs(YLo - YHi)
 141       
 142       -- Subtraction borrows, signs of (XL - XH) and (YL - YH),
 143       Cx, Cy     : WBool; -- so that we can calculate (-1^DD_Sub)
 144       
 145       -- Bottom and Top K-sized halves of the multiplicand X.
 146       XLo        : KSeg  renames    X(  X'First       ..   X'Last - K );
 147       XHi        : KSeg  renames    X(  X'First + K   ..   X'Last     );
 148       
 149       -- Bottom and Top K-sized halves of the multiplicand Y.
 150       YLo        : KSeg  renames    Y(  Y'First       ..   Y'Last - K );
 151       YHi        : KSeg  renames    Y(  Y'First + K   ..   Y'Last     );
 152       
 153       -- L-sized middle segment of the product XY (+/- K from the midpoint).
 154       XYMid      : LSeg  renames   XY( XY'First + K   ..  XY'Last - K );
 155       
 156       -- Bottom and Top L-sized halves of the product XY.
 157       XYLo       : LSeg  renames   XY( XY'First       ..  XY'Last - L );
 158       XYHi       : LSeg  renames   XY( XY'First + L   ..  XY'Last     );
 159       
 160       -- Topmost K-sized quarter segment of the product XY, or 'tail'
 161       XYHiHi     : KSeg  renames XYHi( XYHi'First + K .. XYHi'Last    );
 162       
 163       -- Whether the DD term is being subtracted.
 164       DD_Sub     : WBool;
 165       
 166       -- Carry from individual term additions.
 167       C          : WBool;
 168       
 169       -- Tail-Carry accumulator, for the final ripple
 170       TC         : Word;
 171       
 172    begin
 173       
 174       -- Recurse: LL := XL * YL
 175       FZ_Multiply_Unbuffered(XLo, YLo, LL);
 176       
 177       -- Recurse: HH := XH * YH
 178       FZ_Multiply_Unbuffered(XHi, YHi, HH);
 179       
 180       -- Dx := |XL - XH| , Cx := Borrow (i.e. 1 iff XL < XH)
 181       FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx);
 182       
 183       -- Dy := |YL - YH| , Cy := Borrow (i.e. 1 iff YL < YH)
 184       FZ_Sub_Abs(X => YLo, Y => YHi, Difference => Dy, Underflow => Cy);
 185       
 186       -- Recurse: DD := Dx * Dy
 187       FZ_Multiply_Unbuffered(Dx, Dy, DD);
 188       
 189       -- Whether (XL - XH)(YL - YH) is positive, and so DD must be subtracted:
 190       DD_Sub := 1 - (Cx xor Cy);
 191       
 192       -- XY := LL + 2^(2 * K * Bitness) * HH
 193       XYLo := LL;
 194       XYHi := HH;
 195       
 196       -- XY += 2^(K * Bitness) * HH, but carry goes in Tail Carry accum.
 197       FZ_Add_D(X => XYMid, Y => HH, Overflow => TC);
 198       
 199       -- XY += 2^(K * Bitness) * LL, ...
 200       FZ_Add_D(X => XYMid, Y => LL, Overflow => C);
 201       
 202       -- ... but the carry goes into the Tail Carry accumulator.
 203       TC := TC + C;
 204       
 205       -- XY += 2^(K * Bitness) * (-1^DD_Sub) * DD
 206       FZ_Not_Cond_D(N => DD, Cond => DD_Sub); -- invert DD if 2s-complementing
 207       FZ_Add_D(OF_In    => DD_Sub,            -- ... and then must increment
 208                X        => XYMid,
 209                Y        => DD,
 210                Overflow => C); -- carry will go in Tail Carry accumulator
 211       
 212       -- Compute the final Tail Carry for the ripple
 213       TC := TC + C - DD_Sub;
 214       
 215       -- Barring a cosmic ray, 0 <= TC <= 2 .
 216       pragma Assert(TC <= 2);
 217       
 218       -- Ripple the Tail Carry into the tail.
 219       FZ_Add_D_W(X => XYHiHi, W => TC, Overflow => C);
 220       
 221       -- Barring a cosmic ray, the tail ripple will NOT overflow.
 222       pragma Assert(C = 0);
 223 
 224    end Mul_Karatsuba;
 225    -- CAUTION: Inlining prohibited for Mul_Karatsuba !
 226    
 227    
 228    -- Multiplier. (CAUTION: UNBUFFERED)
 229    procedure FZ_Multiply_Unbuffered(X     : in  FZ;
 230                                     Y     : in  FZ;
 231                                     XY    : out FZ) is
 232       
 233       -- The length of either multiplicand
 234       L : constant Word_Count := X'Length;
 235       
 236    begin
 237       
 238       if L <= Karatsuba_Thresh then
 239          
 240          -- Base case:
 241          FZ_Mul_Comba(X, Y, XY);
 242          
 243       else
 244          
 245          -- Recursive case:
 246          Mul_Karatsuba(X, Y, XY);
 247          
 248       end if;
 249       
 250    end FZ_Multiply_Unbuffered;
 251    
 252    
 253    -- Multiplier. Preserves the inputs.
 254    procedure FZ_Multiply_Buffered(X     : in  FZ;
 255                                   Y     : in  FZ;
 256                                   XY_Lo : out FZ;
 257                                   XY_Hi : out FZ) is
 258       
 259       -- Product buffer.
 260       P : FZ(1 .. 2 * X'Length);
 261       
 262    begin
 263       
 264       FZ_Multiply_Unbuffered(X, Y, P);
 265       
 266       XY_Lo := P(P'First            .. P'First + X'Length - 1);
 267       XY_Hi := P(P'First + X'Length .. P'Last);
 268       
 269    end FZ_Multiply_Buffered;
 270    
 271 end FZ_Mul;