Main Page   Reference Manual   Compound List   File List  

libecc/polynomial.h

Go to the documentation of this file.
00001 //
00012 //
00013 // This file is part of the libecc package.
00014 // Copyright (C) 2002 - 2004 by
00015 //
00016 // Carlo Wood, Run on IRC <carlo@alinoe.com>
00017 // RSA-1024 0x624ACAD5 1997-01-26                    Sign & Encrypt
00018 // Fingerprint16 = 32 EC A7 B6 AC DB 65 A6  F6 F6 55 DD 1C DC FF 61
00019 //
00020 // This program is free software; you can redistribute it and/or
00021 // modify it under the terms of the GNU General Public License
00022 // as published by the Free Software Foundation; either version 2
00023 // of the License, or (at your option) any later version.
00024 //
00025 // This program is distributed in the hope that it will be useful,
00026 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00027 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00028 // GNU General Public License for more details.
00029 //
00030 // You should have received a copy of the GNU General Public License
00031 // along with this program; if not, write to the Free Software
00032 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00033 //
00034 
00035 #ifndef LIBECC_POLYNOMIAL_H
00036 #define LIBECC_POLYNOMIAL_H
00037 
00038 #include <stdexcept>
00039 #include <libecc/bitset.h>
00040 #include <libecc/debug.h>
00041 #if ECC_DEBUGOUTPUT
00042 #include <libcwd/cwprint.h>
00043 #endif
00044 
00045 #if ECC_DEBUG
00046 #define LIBECC_AUGMENTED 1
00047 #define LIBECC_INPLACE (1 || !LIBECC_AUGMENTED)
00048 #define LIBECC_SWAPCOLUMNS (1 || LIBECC_INPLACE)
00049 #else
00050 // Don't change these.
00051 #define LIBECC_AUGMENTED 0
00052 #define LIBECC_INPLACE 1
00053 #define LIBECC_SWAPCOLUMNS 1
00054 #endif
00055 
00056 namespacelibecc {
00057 
00058 // Forward declarations.
00059 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00060   classpolynomial;
00061 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00062   polynomial<m, k, k1, k2> operator*(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00063 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00064   polynomial<m, k, k1, k2> operator/(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00065 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00066   bool operator==(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00067 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00068   bool operator!=(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00069 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00070   std::ostream& operator<<(std::ostream&, polynomial<m, k, k1, k2> const&);
00071 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00072   std::ostream& operator<<(std::ostream&, typename polynomial<m, k, k1, k2>::xor_type const&);
00073 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00074   typename polynomial<m, k, k1, k2>::xor_type operator+(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00075 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00076   typename polynomial<m, k, k1, k2>::xor_type operator-(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00077 
00091 template<unsigned int m, unsigned int k, unsigned int k1 = 0, unsigned int k2 = 0>
00092   classpolynomial {
00093     public:
00097       typedef Operator::bitsetExpression<m, false, false, Operator::bitsetXOR> xor_type;
00098 
00099       // Fix this if you add members in front of M_coefficients.
00100       static size_t const offsetof_vector = bitset<m>::offsetof_vector;
00101 
00102     private:
00103       bitset<m> M_coefficients;
00104       static polynomial<m, k, k1, k2> const one;
00105       static bool S_normal_initialized;
00106       static bitset<m> S_normal;
00107 
00108     public:
00112       static polynomial const& unity(void) { return one; }
00113 
00114     public:
00118       polynomial(void) { }
00119 
00123       explicit polynomial(bitset_digit_t coefficients) : M_coefficients(coefficients) { }
00124 
00128       polynomial(polynomial const& p) : M_coefficients(p.M_coefficients) { }
00129 
00133       explicit polynomial(bitset<m> const& coefficients) : M_coefficients(coefficients) { }
00134 
00138       polynomial(std::string const& coefficients) : M_coefficients(coefficients) { }
00139 
00180       polynomial(xor_type const& expression) : M_coefficients(expression) { }
00181 
00185       polynomial& operator=(polynomial const& p) { M_coefficients = p.M_coefficients; return *this; }
00186 
00190       polynomial& operator=(bitset<m> const& coefficients) { M_coefficients = coefficients; return *this; }
00191 
00196       polynomial& operator=(xor_type const& expression);
00197 
00201       polynomial(polynomial const& b, polynomial const& c);
00202 
00206       static unsigned int const square_digits = 2 * bitset_base<m>::digits + 4;
00207 
00223       polynomial& square(bitset_digit_t* tmpbuf) const; // tmpbuf must be an array of `square_digits' bitset_digit_t.
00224 
00232       bool sqrt(void);
00233 
00234       // The field arithmetic is implemented in terms of operations on the bits.
00238       polynomial& operator+=(polynomial const& p) { M_coefficients ^= p.M_coefficients; return *this; }
00239 
00243       polynomial& operator-=(polynomial const& p) { M_coefficients ^= p.M_coefficients; return *this; }
00244 
00248       polynomial& operator*=(polynomial const& p);
00249 #ifdef LIBECC_DOXYGEN
00250       // Stupid doxygen.
00262       polynomial& operator*=(typename polynomial<m, k, k1, k2>::xor_type const& expr);
00263 #else
00264       // The real prototype.
00265       polynomial& operator*=(xor_type const& expr);
00266 #endif
00267 
00271       polynomial& operator/=(polynomial const& p);
00272 #ifdef LIBECC_DOXYGEN
00273       // Stupid doxygen.
00285       polynomial& operator/=(typename polynomial<m, k, k1, k2>::xor_type const& expr);
00286 #else
00287       // The real prototype.
00288       polynomial& operator/=(xor_type const& expr);
00289 #endif
00290 
00299       static bitset<m> const& normal(void) { if (!S_normal_initialized) calculate_normal(); return S_normal; }
00300 
00312       int trace(void) const
00313      {
00314         // This method was invented by me, so give me credit for it when you use it somewhere. Thank you.
00315         // Carlo Wood <carlo@alinoe.com> -- 4 December 2004.
00316         int tr = 0;
00317         if ((m & 1))
00318           tr = M_coefficients.template test<0>();
00319         if (((m - k) & 1))
00320           tr ^= M_coefficients.template test<m - k>();
00321         if (k1)
00322         {
00323           if (((m - k1) & 1))
00324             tr ^= M_coefficients.template test<m - k1>();
00325           if (((m - k2) & 1))
00326             tr ^= M_coefficients.template test<m - k2>();
00327         }
00328         return tr;
00329       }
00330 
00363       friend xor_type operator+ <>(polynomial const& p1, polynomial const& p2);
00364 
00373       friend xor_type operator- <>(polynomial const& p1, polynomial const& p2);
00374 
00378       friend polynomial operator* <>(polynomial const& p1, polynomial const& p2);
00379 #ifdef LIBECC_DOXYGEN
00380       // Only added for documentational reasons.
00386       friend bool operator*(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00392       friend bool operator*(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00393 #endif
00394 
00398       friend polynomial operator/ <>(polynomial const& p1, polynomial const& p2);
00399 #ifdef LIBECC_DOXYGEN
00400       // Only added for documentational reasons.
00406       friend bool operator/(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00412       friend bool operator/(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00413 #endif
00414 
00418       friend bool operator== <>(polynomial const& p1, polynomial const& p2);
00419 #ifdef LIBECC_DOXYGEN
00420       // Only added for documentational reasons.
00428       friend bool operator==(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00436       friend bool operator==(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00437 #endif
00438 
00442       friend bool operator!= <>(polynomial const& p1, polynomial const& p2);
00443 #ifdef LIBECC_DOXYGEN
00444       // Only added for documentational reasons.
00452       friend bool operator!=(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00460       friend bool operator!=(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00461 #endif
00462 
00468       friend std::ostream& operator<< <>(std::ostream& os, polynomial const& p);
00469 #ifdef LIBECC_DOXYGEN
00470       // Only added for documentational reasons.
00476       friend std::ostream& operator<<(std::ostream& os, polynomial<m, k, k1, k2>::xor_type const& expr);
00477 #endif
00478 
00482       bitset<m> const& get_bitset(void) const{ return M_coefficients; }
00483 
00487       bitset<m>& get_bitset(void) { return M_coefficients; }
00488 
00489     private:
00490       static void reduce(bitset_digit_t* buf);
00491       static bitset_digit_t reducea(bitset_digit_t* a);
00492       static void calculate_normal(void);
00493 
00494       void multiply_with(polynomial const& p1, bitset<m>& result) const;
00495 #if ECC_DEBUG
00496 #if LIBECC_AUGMENTED
00497       void print_matrix(bitset<2 * m> const* matrix, bitset<m> const& pivotted);
00498 #else
00499       void print_matrix(bitset<m> const* matrix, bitset<m> const& pivotted);
00500 #endif
00501 #endif
00502   };
00503 
00504 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00505   polynomial<m, k, k1, k2> const polynomial<m, k, k1, k2>::one(1);
00506 
00507 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00508   bool polynomial<m, k, k1, k2>::sqrt(void)
00509   {
00510     if (!k1)
00511     {
00512       bitset<m> highbits;
00513       highbits.reset();
00514 
00515       // First convert all odd powers into even powers
00516       if ((m & 1) == 1)
00517       {
00518         if ((k & 1) == 1)               // m and k are odd?
00519         {
00520           for(unsigned int bit = 1; bit < m; bit += 2)
00521           {
00522             if (M_coefficients.test(bit))
00523             {
00524               if (bit >= m - k)
00525                 highbits.flip(bit + k - m);
00526               else
00527                 M_coefficients.flip(bit + k);
00528               highbits.flip(bit);
00529             }
00530           }
00531         }
00532         else                    // m is odd and k is even
00533         {
00534           for(unsigned int bit = 1; bit < m; bit += 2)
00535           {
00536             if (M_coefficients.test(bit))
00537             {
00538               if (bit >= m - k)
00539               {
00540                 M_coefficients.flip(bit + 2 * k - m);
00541                 M_coefficients.flip(bit + k - m);
00542               }
00543               else
00544                 M_coefficients.flip(bit + k);
00545               highbits.flip(bit);
00546             }
00547           }
00548         }
00549       }
00550       else if ((k & 1) == 1)    // m is even and k is odd
00551       {
00552         for(unsigned int bit = 1; bit < m; bit += 2)
00553         {
00554           if (M_coefficients.test(bit))
00555           {
00556             if (bit < k)
00557             {
00558               M_coefficients.flip(bit + k);
00559               M_coefficients.flip(bit + m - k);
00560               highbits.flip(bit + m - k);
00561             }
00562             else
00563             {
00564               M_coefficients.flip(bit - k);
00565               highbits.flip(bit - k);
00566             }
00567           }
00568         }
00569       }
00570       else                      // m and k are both even (actually, this should never be used as reduction polynomial).
00571       {
00572         for(unsigned int bit = 1; bit < m; bit += 2)
00573           if (M_coefficients.test(bit))
00574             return false;               // This can't be a square
00575       }
00576 
00577       // Next handle the remaining even powers
00578       unsigned int bit_to = 1;
00579       for(unsigned int bit = 2; bit < m; bit += 2)
00580       {
00581         if (M_coefficients.test(bit))
00582           M_coefficients.set(bit_to);
00583         else
00584           M_coefficients.clear(bit_to);
00585         ++bit_to;
00586       }
00587       for(unsigned int bit = m % 2; bit < m; bit += 2)
00588       {
00589         if (highbits.test(bit))
00590           M_coefficients.set(bit_to);
00591         else
00592           M_coefficients.clear(bit_to);
00593         ++bit_to;
00594       }
00595     }
00596     else
00597     {
00598       structRoot {
00599         polynomial<m, k, k1, k2> value;
00600         Root(polynomial<m, k, k1, k2> const& p) : value(p)
00601         {
00602           bitset_digit_t p2buf[libecc::polynomial<m, k, k1, k2>::square_digits];
00603           polynomial<m, k, k1, k2>& p2 = value.square(p2buf);
00604           bitset_digit_t p4buf[libecc::polynomial<m, k, k1, k2>::square_digits];
00605           polynomial<m, k, k1, k2>& p4 = p2.square(p4buf);
00606           for (unsigned int i = 1; i < m / 2; ++i)
00607           {
00608             p4.square(p2buf);
00609             p2.square(p4buf);
00610           }
00611           value = (m % 2 == 0) ? p2 : p4;
00612         }
00613       };
00614       static Root const root_of_t(polynomial<m, k, k1, k2>(2));
00615       polynomial<m, k, k1, k2> tmp(0);
00616       bitset<m> tmp2;
00617       tmp2.reset();
00618       for(unsigned int bit = 0; bit < m / 2; ++bit)
00619       {
00620         if (M_coefficients.test(2 * bit))
00621           tmp2.set(bit);
00622         if (M_coefficients.test(2 * bit + 1))
00623           tmp.get_bitset().set(bit);
00624       }
00625       if (m % 2 == 1 && M_coefficients.test(m - 1))
00626         tmp2.set(m / 2);
00627       M_coefficients = tmp2;
00628       *this += tmp * root_of_t.value;
00629     }
00630     return true;
00631   }
00632 
00633 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00634   inline polynomial<m, k, k1, k2>&
00635   polynomial<m, k, k1, k2>::operator*=(polynomial const& p)
00636   {
00637     multiply_with(p, M_coefficients);
00638     return *this;
00639   }
00640 
00641 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00642   inline polynomial<m, k, k1, k2>&
00643   polynomial<m, k, k1, k2>::operator*=(typename polynomial<m, k, k1, k2>::xor_type const& expr)
00644   {
00645     return (*this *= polynomial<m, k, k1, k2>(expr));
00646   }
00647 
00648 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00649   inline polynomial<m, k, k1, k2>&
00650   polynomial<m, k, k1, k2>::operator=(xor_type const& expression)
00651   {
00652     M_coefficients = expression;
00653     return *this;
00654   }
00655 
00656 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00657   void
00658   polynomial<m, k, k1, k2>::multiply_with(polynomial const& p1, bitset<m>& result) const
00659  {
00660     bitset_digit_t output[bitset<m>::digits * 2] __attribute__ ((aligned (8)));
00661 
00662     // Find the first non-zero digit in the input polynomial of this object.
00663     unsigned int digit = 0;
00664     while(M_coefficients.digit(digit) == 0)             // Still zero?
00665     {
00666       output[digit] = 0;                                // That means that the output will end on zero too.
00667       if (++digit == bitset<m>::digits)
00668       {
00669         result.reset();                                 // The whole polynomial is zero, the result will be zero too.
00670         return;
00671       }
00672     }
00673     unsigned int uninitialized_digit = digit;           // The next digit of `output' that has not yet been initialized.
00674     // Find the first digit in the input polynomial of this object whose first bit is set.
00675     for(; digit < bitset<m>::digits; ++digit)
00676     {
00677       if ((M_coefficients.digit(digit) & 1))            // Is the first bit set?
00678       {
00679         // Set the output to p1 times this bit.
00680         for (unsigned int d = 0; d < bitset<m>::digits; ++d)
00681           output[d + digit] = p1.get_bitset().digit(d);
00682         uninitialized_digit = bitset<m>::digits + digit;
00683         ++digit;                                        // Set to the next input digit.
00684         break;
00685       }
00686       output[digit] = 0;                                // Initialize this digit of the output to 0.
00687       ++uninitialized_digit;
00688     }
00689     // Set the remaining digits to zero, if any.
00690     for(unsigned int remaining_digit = uninitialized_digit; remaining_digit < sizeof(output) / sizeof(bitset_digit_t); ++remaining_digit)
00691       output[remaining_digit] = 0;
00692     // Find for the remaining input digits the ones that have their first bit set.
00693     for(; digit < bitset<m>::digits; ++digit)
00694       if ((M_coefficients.digit(digit) & 1))            // Is the first bit set?
00695       {
00696         // Add p1 times this bit to the output.
00697         for (unsigned int d = 0; d < bitset<m>::digits; ++d)
00698           output[d + digit] ^= p1.get_bitset().digit(d);
00699       }
00700     // Create a bitset that will contain p1, shifted at most bitset_digit_bits - 1 to the left.
00701     bitset<m + bitset_digit_bits - 1> shifted_p1;
00702     // Start with having it shifted 1 bit to the left.
00703     bitset_digit_t carry = 0;
00704     unsigned int d = 0;
00705     for(bitset_digit_t const* ptr = p1.get_bitset().digits_ptr(); ptr < p1.get_bitset().digits_ptr() + bitset<m>::digits; ++ptr, ++d)
00706     {
00707       shifted_p1.rawdigit(d) = (*ptr << 1) | carry;
00708       carry = *ptr >> (8 * sizeof(bitset_digit_t) - 1);
00709     }
00710     if (d < bitset<m + bitset_digit_bits - 1>::digits)
00711       shifted_p1.rawdigit(d) = carry;
00712     for(bitset_digit_t bitmask = 2;;)
00713     {
00714       for(unsigned int digit = 0; digit < bitset<m>::digits; ++digit)
00715         if ((M_coefficients.digit(digit) & bitmask))
00716         {
00717           for (unsigned int d = 0; d < shifted_p1.digits; ++d)
00718             output[d + digit] ^= shifted_p1.digit(d);
00719         }
00720       bitmask <<= 1;            // Next bit.
00721       if (bitmask == 0)         // Done?
00722         break;
00723       // Shift p1 one bit further to the left.
00724       shifted_p1.template shift_op<1, left, assign>(shifted_p1);
00725     }
00726     // Reduce the resulting output of the multiplication.
00727     reduce(output);
00728     // Copy the reduced output to `result'.
00729     std::memcpy(result.digits_ptr(), output, bitset<m>::digits * sizeof(bitset_digit_t));
00730   }
00731 
00732 #if ECC_DEBUG
00733 template<unsigned int m>
00734 structdiv_tct {
00735   bitset_digit_t const* M_p;
00736   int M_deg;
00737   int M_low;
00738   div_tct(bitset<m> const& b, int deg, int low) : M_p(b.digits_ptr()), M_deg(deg), M_low(low) { }
00739   void print_on(std::ostream& os) const
00740  {
00741     int lowbit = (M_low >> bitset_digit_bits_log2) * bitset_digit_bits;
00742     if (lowbit > 0)
00743       lowbit = 0;
00744     for (int b = 2 * m - 1; b >= lowbit; --b)
00745     {
00746       if (b == M_deg)
00747         os << "\e[31m";
00748       int digitoffset = (b >> bitset_digit_bits_log2);
00749       bitset_digit_t mask = static_cast<bitset_digit_t>(1) << (b & (bitset_digit_bits - 1));
00750       if (M_p[digitoffset] & mask)
00751         os << '1';
00752       else
00753         os << '0';
00754       if (b == M_low)
00755         os << "\e[0m";
00756       if (b == 0)
00757         os << '.';
00758     }
00759   }
00760 };
00761 #endif
00762 
00763 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00764   polynomial<m, k, k1, k2>&
00765   polynomial<m, k, k1, k2>::operator/=(polynomial const& p)
00766   {
00767 #if ECC_DEBUG
00768     LibEccDout(dc::polynomial|noprefix_cf, "");
00769     LibEccDout(dc::polynomial, "Entering polynomial<" << m << ", " << k << ", " << k1 << ", " << k2 << ">::operator/=()");
00770     polynomial<m, k, k1, k2> x(p.get_bitset());
00771     polynomial<m, k, k1, k2> y(M_coefficients);
00772     LibEccDout(dc::polynomial, "x(t) = " << x);
00773     LibEccDout(dc::polynomial|flush_cf, "y(t) = " << y);
00774 #endif
00775 
00776     // The following algorithm is based on the algorithm described on
00777     // page 7 of http://research.sun.com/techrep/2001/smli_tr-2001-95.ps
00778     // with significant optimization changes by Carlo Wood.
00779     // The basis of the algorithm is to keep an invariants valid:
00780     // A * y = U * x  and  B * y = V * x
00781     // while reducing the size of A and B steadily to 0, until either is 1.
00782     // The size of A and B is defined as the distance between the lowest
00783     // and highest 1 in the bitset.
00784 
00785     // Make sure that there is enough space for a full bitset object
00786     // and align the bitsets on a multiple of bitset_digit_t.
00787     static unsigned int const digit_offset_UV = ((sizeof(bitset<m>) * 8 - 1) / bitset_digit_bits + 1);
00788     static unsigned int const offset_UV = digit_offset_UV * bitset_digit_bits;
00789     // Make room for exponents from at least t^-m till t^2m.
00790     static unsigned int const digit_size_UV = 3 * digit_offset_UV;
00791     // Variables A and B do not need this much space.
00792     static unsigned int const digit_size_AB = bitset<m>::digits;
00793     // One digit of padding, needed for xor_with_zero_padded.
00794     static unsigned int const padding_digit_size = 1;
00795 
00796     // Declare stack space for four variables.
00797     bitset_digit_t bitpool [5 * padding_digit_size + 2 * digit_size_AB + 2 * digit_size_UV] __attribute__ ((__aligned__ (ECC_BITS)));
00798     std::memset((char*)bitpool, 0, sizeof(bitpool));
00799 
00800     bitset<m>& A(*(bitset<m>*)&bitpool[padding_digit_size]);
00801     bitset<m>& B(*(bitset<m>*)&bitpool[2 * padding_digit_size + digit_size_AB]);
00802     bitset<m>& U(*(bitset<m>*)&bitpool[3 * padding_digit_size + 2 * digit_size_AB + digit_offset_UV]);
00803     bitset<m>& V(*(bitset<m>*)&bitpool[4 * padding_digit_size + 2 * digit_size_AB + digit_size_UV + digit_offset_UV]);
00804 
00805     // The representation of U and V will be done with bitsets of size `digit_size_UV * bitset_digit_bits'.
00806     // This means that they contain powers of t with a negative exponent.
00807     // That is not a problem as those are well defined: t^(-n) = 1 / t^n.
00808 
00809     // Let rp = M(t) = t^m + t^k [+ t^k1 + t^k2] + 1.
00810 #if ECC_DEBUG
00811     bitset<m + 1> rp("1");
00812     rp.template set<m>();
00813     rp.template set<k>();
00814     if (k1)
00815     {
00816       rp.template set<k1>();
00817       rp.template set<k2>();
00818     }
00819 #endif
00820 
00821     // Let U(t) = y(t) (= M_coefficients).
00822     LibEccDout(dc::polynomial|flush_cf, "U <- y");
00823     U = M_coefficients;
00824 
00825     // Guess the maximum and minimum powers to be the possible limits.
00826     int degU = m - 1;
00827     int lowU = 0;
00828 
00829     // Let A(t) = x(t).
00830     LibEccDout(dc::polynomial|flush_cf, "A <- x");
00831     A = p.get_bitset();
00832 
00833     // Then
00834     //
00835     // A(t) * y(t) = U(t) * x(t)  [mod M(t)].
00836 
00837     // Let V(t) = 0
00838     // Let B = M(t)
00839     //
00840     // Then
00841     //
00842     // B(t) * y(t) = V(t) * x(t)  [mod M(t)].
00843     //
00844     // Let degA be the highest power of t in A.
00845     typename bitset<m>::const_reverse_iterator degA = A.rbegin();
00846     degA.find1();
00847     LibEccDout(dc::polynomial|flush_cf, "deg(A) == " << degA);
00848 
00849     // Let lowA be the lowest power of t in A.
00850     typename bitset<m>::const_iterator lowA = A.begin();
00851     lowA.find1();
00852     LibEccDout(dc::polynomial|flush_cf, "low(A) == " << lowA);
00853 
00854     unsigned int sizeA = degA.get_index() - lowA.get_index();
00855 
00856     // Let n = m - deg(A).
00857     unsigned int n = m - degA.get_index();
00858     //
00859     // Then B'(t) = B(t) - A(t) * t^n will have a degree less than m.
00860     // And
00861     //
00862     // B'(t) * y(t) = B(t) * y(t) - A(t) * y(t) * t^n =
00863     //              = V(t) * x(t) - U(t) * x(t) * t^n =
00864     //              = (V(t) - U(t) * t^n) * x(t) =
00865     //              = V'(t) * x(t)                      [mod M(t)].
00866     //
00867     // B <- B'
00868     LibEccDout(dc::polynomial|flush_cf, "B <- A * t^" << n << " + " << cwprint_using(rp, &bitset<m+1>::base2_print_on));
00869     B.xor_with_zero_padded(A, lowA.get_index(), degA.get_index(), n);
00870     B.template flip<m>();
00871     B.template flip<k>();
00872     if (k1)
00873     {
00874       B.template flip<k1>();
00875       B.template flip<k2>();
00876     }
00877     B.template flip<0>();
00878 
00879     // Let degB be the highest power of t in B.
00880     typename bitset<m>::const_reverse_iterator degB = B.rbegin();
00881     degB.find1();
00882     LibEccDout(dc::polynomial|flush_cf, "deg(B) == " << degB);
00883 
00884     // Let lowB be the lowest power of t in B.
00885     typename bitset<m>::const_iterator lowB = B.begin();
00886     lowB.find1();
00887     LibEccDout(dc::polynomial|flush_cf, "low(B) == " << lowB);
00888 
00889     // V <- V'
00890     LibEccDout(dc::polynomial|flush_cf, "V <- U * t^" << n <<
00891         "  [mod " << cwprint_using(rp, &bitset<m + 1>::base2_print_on) << "]");
00892     V.xor_with_zero_padded(U, 0, m - 1, n);
00893 
00894     int degV = degU + n;
00895     int lowV = lowU + n;
00896     
00897     unsigned int sizeB = degB.get_index() - lowB.get_index();
00898 
00899     if (sizeA > 0 && sizeB > 0)
00900       for(;;)
00901       {
00902         LibEccDout(dc::polynomial|flush_cf, "A = " << cwprint(div_tct<m>(A, degA.get_index(), lowA.get_index())));
00903         LibEccDout(dc::polynomial|flush_cf, "B = " << cwprint(div_tct<m>(B, degB.get_index(), lowB.get_index())));
00904         LibEccDout(dc::polynomial|flush_cf, "U = " << cwprint(div_tct<m>(U, degU, lowU)));
00905         LibEccDout(dc::polynomial|flush_cf, "V = " << cwprint(div_tct<m>(V, degV, lowV)));
00906         if (sizeA < sizeB)
00907         {
00908           int left_shift = lowB.get_index() - lowA.get_index();
00909           LibEccDout(dc::polynomial|flush_cf, "B <- B + A * t^" << left_shift);
00910           B.xor_with_zero_padded(A, lowA.get_index(), degA.get_index(), left_shift);
00911           degB.find1();
00912           lowB.find1();
00913           sizeB = degB.get_index() - lowB.get_index();
00914           LibEccDout(dc::polynomial|flush_cf, "V <- V + U * t^" << left_shift);
00915           V.xor_with_zero_padded(U, lowU, degU, left_shift);
00916           degV = std::max(degV, degU + left_shift);
00917           lowV = std::min(lowV, lowU + left_shift);
00918           if (sizeB == 0)
00919             break;
00920         }
00921         else
00922         {
00923           int left_shift = lowA.get_index() - lowB.get_index();
00924           LibEccDout(dc::polynomial|flush_cf, "A <- A + B * t^" << left_shift);
00925           A.xor_with_zero_padded(B, lowB.get_index(), degB.get_index(), left_shift);
00926           degA.find1();
00927           lowA.find1();
00928           sizeA = degA.get_index() - lowA.get_index();
00929           LibEccDout(dc::polynomial|flush_cf, "U <- U + V * t^" << left_shift);
00930           U.xor_with_zero_padded(V, lowV, degV, left_shift);
00931           degU = std::max(degU, degV + left_shift);
00932           lowU = std::min(lowU, lowV + left_shift);
00933           if (sizeA == 0)
00934             break;
00935         }
00936       }
00937 
00938     LibEccDout(dc::polynomial|flush_cf, "A = " << cwprint(div_tct<m>(A, degA.get_index(), lowA.get_index())));
00939     LibEccDout(dc::polynomial|flush_cf, "B = " << cwprint(div_tct<m>(B, degB.get_index(), lowB.get_index())));
00940     LibEccDout(dc::polynomial|flush_cf, "U = " << cwprint(div_tct<m>(U, degU, lowU)));
00941     LibEccDout(dc::polynomial|flush_cf, "V = " << cwprint(div_tct<m>(V, degV, lowV)));
00942 
00943     bitset<m>* R;
00944     // 'F' (Floating-point polynomial) will be shifted to the right and
00945     // is therefore defined to run from t^-2m till t^2m.  This means it will
00946     // be shifted OVER the other bitsets, but we don't need those anymore anyway.
00947     static unsigned int const offset_F = 2 * offset_UV;
00948     static unsigned int const size_F = 2 * m + offset_F;
00949     bitset<size_F>* F;
00950     int low1, lowR;
00951 #if ECC_DEBUG
00952     int degR;
00953 #endif
00954     if (sizeA == 0)
00955     {
00956       LibEccDout(dc::polynomial|flush_cf, "R = U");
00957       R = &U;
00958       // tmp to avoid 'warning: type-punning to incomplete type might break strict-aliasing rules'
00959       bitset_digit_t* tmp = &bitpool[3 * padding_digit_size + 2 * digit_size_AB - digit_offset_UV];
00960       F = reinterpret_cast<bitset<size_F>*>(tmp);
00961       low1 = lowA.get_index();
00962       lowR = lowU;
00963 #if ECC_DEBUG
00964       degR = degU;
00965 #endif
00966     }
00967     else // sizeB == 0
00968     {
00969       LibEccDout(dc::polynomial|flush_cf, "R = V");
00970       R = &V;
00971       // tmp to avoid 'warning: type-punning to incomplete type might break strict-aliasing rules'
00972       bitset_digit_t* tmp = &bitpool[4 * padding_digit_size + 2 * digit_size_AB + digit_size_UV - digit_offset_UV];
00973       F = reinterpret_cast<bitset<size_F>*>(tmp);
00974       low1 = lowB.get_index();
00975       lowR = lowV;
00976 #if ECC_DEBUG
00977       degR = degV;
00978 #endif
00979     }
00980 
00981     *F >>= low1;
00982     lowR -= low1;
00983 #if ECC_DEBUG
00984     degR -= low1;
00985 #endif
00986     // Get rid of negative exponents.
00987     LibEccDout(dc::polynomial|flush_cf, "lowR = " << lowR);
00988     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
00989     if ((!k1 && k >= bitset_digit_bits) || k2 >= bitset_digit_bits)
00990     {
00991       static int const digit_shift_k2 = k2 >> bitset_digit_bits_log2;
00992       static int const bit_shift_k2 = k2 & (bitset_digit_bits  - 1);
00993       static int const digit_shift_k1 = k1 >> bitset_digit_bits_log2;
00994       static int const bit_shift_k1 = k1 & (bitset_digit_bits  - 1);
00995       static int const digit_shift_k = k >> bitset_digit_bits_log2;
00996       static int const bit_shift_k = k & (bitset_digit_bits  - 1);
00997       static int const digit_shift_m = m >> bitset_digit_bits_log2;
00998       static int const bit_shift_m = m & (bitset_digit_bits  - 1);
00999       static int const DS_minus_bit_shift_k2_with_compile_warning_evasion = (bitset_digit_bits - bit_shift_k2) & (bitset_digit_bits  - 1);
01000       static int const DS_minus_bit_shift_k1_with_compile_warning_evasion = (bitset_digit_bits - bit_shift_k1) & (bitset_digit_bits  - 1);
01001       static int const DS_minus_bit_shift_k_with_compile_warning_evasion = (bitset_digit_bits - bit_shift_k) & (bitset_digit_bits  - 1);
01002       static int const DS_minus_bit_shift_m_with_compile_warning_evasion = (bitset_digit_bits - bit_shift_m) & (bitset_digit_bits  - 1);
01003       int first_digit = (lowR + offset_F) >> bitset_digit_bits_log2;
01004       bitset_digit_t* ptr = F->digits_ptr() + first_digit;
01005       bitset_digit_t* ptr1 = R->digits_ptr();
01006       while(ptr < ptr1)
01007       {
01008         if (k1)
01009         {
01010           ptr[digit_shift_k2] ^= (*ptr) << bit_shift_k2;
01011           if (bit_shift_k2 != 0)
01012             ptr[digit_shift_k2 + 1] ^= (*ptr) >> DS_minus_bit_shift_k2_with_compile_warning_evasion;
01013           ptr[digit_shift_k1] ^= (*ptr) << bit_shift_k1;
01014           if (bit_shift_k1 != 0)
01015             ptr[digit_shift_k1 + 1] ^= (*ptr) >> DS_minus_bit_shift_k1_with_compile_warning_evasion;
01016         }
01017         ptr[digit_shift_k] ^= (*ptr) << bit_shift_k;
01018         if (bit_shift_k != 0)
01019           ptr[digit_shift_k + 1] ^= (*ptr) >> DS_minus_bit_shift_k_with_compile_warning_evasion;
01020         ptr[digit_shift_m] ^= (*ptr) << bit_shift_m;
01021         if (bit_shift_m != 0)
01022           ptr[digit_shift_m + 1] ^= (*ptr) >> DS_minus_bit_shift_m_with_compile_warning_evasion;
01023         ++ptr;
01024       }
01025     }
01026     else
01027     {
01028       for (unsigned int i = lowR + offset_F; i < offset_F; ++i)
01029       {
01030         if (F->test(i))
01031         {
01032 #if ECC_DEBUG
01033           F->flip(i);           // This is not really needed, but prints nicer output below.
01034 #endif
01035           if (k1)
01036           {
01037             F->flip(i + k2);
01038             F->flip(i + k1);
01039           }
01040           F->flip(i + k);
01041           F->flip(i + m);
01042         }
01043       }
01044     }
01045 #if ECC_DEBUG
01046     lowR = 0;
01047     degR = 2 * m - 1;
01048 #endif
01049     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
01050     reduce(R->digits_ptr());
01051 #if ECC_DEBUG
01052     degR = m - 1;
01053 #endif
01054     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
01055     M_coefficients = *R;
01056 
01057     return *this;
01058   }
01059 
01060 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01061   inline polynomial<m, k, k1, k2>&
01062   polynomial<m, k, k1, k2>::operator/=(typename polynomial<m, k, k1, k2>::xor_type const& expr)
01063   {
01064     return (*this /= polynomial<m, k, k1, k2>(expr));
01065   }
01066 
01067 // Solve x^2 + b x = c.
01068 // Assuming that b != 0, there are 2 solutions: x1 and x1 + b.
01069 // This means that during the 'wiping' of the matrix in order
01070 // to solve x, one bit of x will stay undetermined.  We need
01071 // to take special care to make sure that this will be a bit
01072 // for which a bit of 'b' is set, otherwise we'd return a wrong
01073 // value.
01074 //
01075 // If b equals zero, then the solution is the sqrt(c).  Otherwise
01076 // we can devide both sides of the equation by b^2 and solve
01077 // y^2 + y = c/b^2, and set x = b * y.
01078 //
01079 // There will only be a solution to this equation iff 0 = Tr(c/b^2).
01080 // (simply square the equation m-1 times and add them all up).
01081 //
01082 // Note that if y1 is a solution, then so is y1 + 1, hence we
01083 // cannot determine the least significant bit of y.
01084 //
01085 // It is possible to compose a matrix A such that Ax = x^2 + x
01086 // because squaring is an automorphism of the field:
01087 // x is a sum of basis elements, ie x = b1 + b2 + b3 and
01088 // x^2 = b1^2 + b2^2 + b3^2.  Therefore, if there exists a
01089 // matrix S such that Sb_i = b_i^2 for any basis element then
01090 // A = (S + I).  Moreover, such a matrix S must exist because
01091 // there are exactly m basis elements, and a matrix of mxm
01092 // will always be able to satisfy that.
01093 
01094 #if ECC_DEBUG
01095 // Debug function.
01096 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01097   void polynomial<m, k, k1, k2>::print_matrix(
01098 #if LIBECC_AUGMENTED
01099        bitset<2 * m> const* matrix,
01100 #else
01101        bitset<m> const* matrix,
01102 #endif
01103        bitset<m> const& pivotted)
01104   {
01105     // Print the matrix.
01106     for (unsigned int n = 1; n < m; n *= 10)
01107     {
01108       LibEccDout(dc::gaussj|continued_cf, "  ");
01109       for (unsigned int bit = 0; bit < matrix->number_of_bits; ++bit)
01110       {
01111         if (bit == m)
01112           LibEccDout(dc::continued, ' ');
01113         if ((bit % m) >= 1 && (bit % m) < (m + 1) / 2)
01114           LibEccDout(dc::continued, "+ ");
01115         else if (pivotted.test(bit % m))
01116           LibEccDout(dc::continued, (((bit % m) / n) % 10) << ' ');
01117         else
01118           LibEccDout(dc::continued, "  ");
01119       }
01120       LibEccDout(dc::finish, "");
01121     }
01122     for (unsigned int row = 0; row < m; ++row)
01123     {
01124       std::string line;
01125       if (row >= 1 && row < (m + 1) / 2)
01126         line = "+ ";
01127       else if (pivotted.test(row))
01128         line = "* ";
01129       else
01130         line = "  ";
01131       for (unsigned int bit = 0; bit < matrix->number_of_bits; ++bit)
01132       {
01133         if (bit == m)
01134           line += ' ';
01135         bool isset = matrix[row].test(bit);
01136         bool need_color = LIBECC_INPLACE && (matrix->number_of_bits > m) &&
01137             (((bit % m) >= 1 && (bit % m) < (m + 1) / 2) || pivotted.test(bit % m));
01138         if (need_color)
01139         {
01140           unsigned int corresponding_bit = (bit + m) % (2 * m);
01141           if (isset == matrix[row].test(corresponding_bit))
01142             line += "\e[32m";
01143           else
01144             line += "\e[31m";
01145         }
01146         line += (isset ? '1' : '0');
01147         if (need_color)
01148           line += "\e[0m";
01149         line += ' ';
01150       }
01151       LibEccDout(dc::gaussj, line);
01152     }
01153     LibEccDout(dc::gaussj|noprefix_cf, "");
01154   }
01155 #endif
01156 
01157 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01158   polynomial<m, k, k1, k2>::polynomial(polynomial<m, k, k1, k2> const& b, polynomial<m, k, k1, k2> const& c) :
01159       M_coefficients(0)
01160   {
01161     // If b == 0, then x = sqrt(c).
01162     if (!b.M_coefficients.any())
01163     {
01164       M_coefficients = c.M_coefficients;
01165       sqrt();
01166       return;
01167     }
01168 
01169     // Calculate c/b^2.
01170     bitset_digit_t b2buf[square_digits];
01171     polynomial<m, k, k1, k2>& b2 = b.square(b2buf);
01172     polynomial<m, k, k1, k2> cdb2(c);
01173     cdb2 /= b2;
01174     if (cdb2.trace() == 1)
01175       throw std::domain_error("x^2 + bx = c has no solution");
01176 
01177 #if LIBECC_AUGMENTED
01178     typedef bitset<2 * m> matrixrow_type;
01179 #else
01180     typedef bitset<m> matrixrow_type;
01181 #endif
01182     static matrixrow_type matrix[m];            // A mx2m or mxm matrix.
01183     static bool matrix_initialized;
01184     if (!matrix_initialized)
01185     {
01186       std::memset(matrix, 0, sizeof(matrix));
01187       // Fill this matrix with either the augmented matrix (A|I) or with just A,
01188       // where A is the matrix such that Ax = x^2 + x.
01189       for (unsigned int bit = 0; bit < m; ++bit)
01190       {
01191         matrix[bit].set(bit);           // The I of A = (S + I).
01192 #if LIBECC_AUGMENTED
01193         matrix[bit].set(bit + m);               // The I of (A|I).
01194 #endif
01195       }
01196       for (unsigned int bit = 0; bit < (m + 1) / 2; ++bit)
01197         matrix[2 * bit].flip(bit);      // The square of low exponents.
01198       for (unsigned int bit = (m + 1) / 2; bit < m; ++bit)
01199         matrix[2 * bit - m].set(bit);   // Reduction with m.
01200       for (unsigned int bit = (m + 1) / 2; bit < m - k / 2; ++bit)
01201         matrix[2 * bit - m + k].flip(bit);      // Reduction with m - k.
01202       if (k1)
01203       {
01204         for (unsigned int bit = (m + 1) / 2; bit < m - k1 / 2; ++bit)
01205           matrix[2 * bit - m + k1].flip(bit);   // Reduction with m - k1.
01206         for (unsigned int bit = (m + 1) / 2; bit < m - k2 / 2; ++bit)
01207           matrix[2 * bit - m + k2].flip(bit);   // Reduction with m - k2.
01208       }
01209       for (unsigned int bit = m - k / 2; bit < m; ++bit)
01210       {
01211         matrix[2 * bit - m + k - m].flip(bit);
01212         matrix[2 * bit - m + k - m + k].flip(bit);
01213         if (k1)
01214         {
01215           matrix[2 * bit - m + k - m + k1].flip(bit);
01216           matrix[2 * bit - m + k - m + k2].flip(bit);
01217         }
01218       }
01219       if (k1)
01220       {
01221         for (unsigned int bit = m - k1 / 2; bit < m; ++bit)
01222         {
01223           matrix[2 * bit - m + k1 - m].flip(bit);
01224           matrix[2 * bit - m + k1 - m + k].flip(bit);
01225           matrix[2 * bit - m + k1 - m + k1].flip(bit);
01226           matrix[2 * bit - m + k1 - m + k2].flip(bit);
01227         }
01228         for (unsigned int bit = m - k2 / 2; bit < m; ++bit)
01229         {
01230           matrix[2 * bit - m + k2 - m].flip(bit);
01231           matrix[2 * bit - m + k2 - m + k].flip(bit);
01232           matrix[2 * bit - m + k2 - m + k1].flip(bit);
01233           matrix[2 * bit - m + k2 - m + k2].flip(bit);
01234         }
01235       }
01236 
01237       bitset<m> pivotted;
01238       pivotted.reset();
01239 
01240       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01241 
01242       // Next, wipe it, so that the left half becomes I.
01243       // The first half is easy.
01244       for (unsigned int wipecol = 1; wipecol < (m + 1) / 2; ++wipecol)
01245       {
01246         matrix[2 * wipecol] ^= matrix[wipecol];
01247 #if LIBECC_INPLACE
01248         matrix[2 * wipecol].set(wipecol);               // Store the inverse in-place, destroying the original.
01249 #endif
01250       }
01251 
01252       // The second half is not.  Use Gauss-Jordan here.
01253       // Note that pivotting is hardly necessary because our arithmetic is infinitely accurate,
01254       // but we still need to find a '1' when we encounter a '0' on the main diagonal of course.
01255       // There will always be at least one '1' in every column, so that partial pivotting suffices
01256       // (speeding up things obviously), with the exception of the case where that '1' is only
01257       // found in row 0 (which is our 'singular' row and needs some special attention).
01258       // The row swapping is done because it is needed if we want to do our work "in-place",
01259       // reducing the amount of memory needed with a factor of two.
01260 
01261       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01262 
01263       unsigned int rowswaps[m];
01264       rowswaps[0] = 0;
01265       unsigned int colswaps[m], colswaps_inverse[m];
01266       for (unsigned int row = 0; row < m; ++row)
01267       {
01268         colswaps[row] = row;
01269         colswaps_inverse[row] = row;
01270       }
01271 
01272       // Run over all remaining columns and wipe them, immedeately replacing them with the result
01273       // since once a column is wiped we don't need its contents anymore.  Moreover, while wiping
01274       // the column it is optionally swapped with another column at the same time.  This, of course,
01275       // is only done to make the code not understandable anymore for you.
01276 #if LIBECC_SWAPCOLUMNS
01277       for (unsigned int colcnt = (m + 1) / 2; colcnt < m; ++colcnt)
01278 #else
01279       for (unsigned int wipecol = (m + 1) / 2; wipecol < m; ++wipecol)
01280 #endif
01281       {
01282 #if LIBECC_SWAPCOLUMNS
01283         // Find the next row that wasn't already wiped.
01284         unsigned int wipecol = colswaps[colcnt];
01285 #if ECC_DEBUG
01286         LibEccDout(dc::gaussj, "colcnt = " << colcnt);
01287         for (unsigned int row = 0; row < m; ++row)
01288         {
01289           LibEccDout(dc::gaussj, "colswaps[" << row << "] = " << colswaps[row] << "\t\tcolswaps_inverse[" << row << "] = " << colswaps_inverse[row]);
01290           assert(colswaps[colswaps_inverse[row]] == row);
01291           assert(colswaps_inverse[colswaps[row]] == row);
01292         }
01293         LibEccDout(dc::polynomial|noprefix_cf, "");
01294 #endif
01295 #endif
01296 
01297         // First find a suitable row to wipe with.
01298         // This searching is called 'pivotting'.
01299         LibEccDout(dc::gaussj, "Searching for suitable row to wipe with in column " << wipecol);
01300         unsigned int pivotrow;
01301         if (!matrix[wipecol].test(wipecol) || pivotted.test(wipecol))
01302         {
01303           for (pivotrow = wipecol;;)
01304           {
01305             if (++pivotrow == m)
01306             {
01307               if (matrix[0].test(wipecol) && !pivotted.template test<0>())
01308                 pivotrow = 0;
01309               else
01310               {
01311                 for (pivotrow = (m + 1) / 2; pivotrow < wipecol; ++pivotrow)
01312                   if (matrix[pivotrow].test(wipecol) && !pivotted.test(pivotrow))
01313                     break;
01314               }
01315               if (pivotrow == wipecol)
01316               {
01317                 // This happens when we swapped with column 0 (which is all zeroes), for example when m == 14.
01318                 // Just ignore this column.
01319                 pivotrow = m;                   // Flag that we need to continue the main loop.
01320                 pivotted.set(wipecol);
01321                 matrix[wipecol].set(wipecol);   // Copy identity matrix over.
01322                 break;
01323               }
01324             }
01325             if (matrix[pivotrow].test(wipecol) && !pivotted.test(pivotrow))
01326               break;
01327           }
01328           if (pivotrow == m)
01329             continue;
01330         }
01331         else
01332           pivotrow = wipecol;
01333         LibEccDout(dc::gaussj, "Using row " << pivotrow << " to wipe column " << wipecol);
01334         LibEccDout(dc::gaussj, "Before:");
01335         LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01336         pivotted.set(pivotrow);
01337 #if LIBECC_SWAPCOLUMNS
01338         rowswaps[colcnt] = pivotrow;
01339         LibEccDout(dc::gaussj, "Setting rowswaps[" << colcnt << "] to " << pivotrow);
01340 #else
01341         rowswaps[wipecol] = pivotrow;                   // We temporarily use row 'pivotrow' to store row 'wipecol'.
01342         LibEccDout(dc::gaussj, "Setting rowswaps[" << wipecol << "] to " << pivotrow);
01343 #endif
01344         if (pivotrow == wipecol)
01345         {
01346 #if LIBECC_INPLACE
01347           matrix[pivotrow].set(wipecol);                // Store the inverse in-place, destroying the original.
01348 #endif
01349           for (unsigned int row = 0; row < m; ++row)
01350           {
01351             if (row == pivotrow)
01352               continue;
01353             if (matrix[row].test(wipecol))
01354             {
01355 #if LIBECC_INPLACE
01356               matrix[row].clear(wipecol);               // Store the inverse in-place, destroying the original.
01357 #endif
01358               matrix[row] ^= matrix[pivotrow]; 
01359             }
01360           }
01361         }
01362         else
01363         {
01364           // This block contains the main magic.  It's hard to understand I am afraid.
01365           // Basically this does the same as the code block above, but at the same time
01366           // swaps the columns 'wipecol' and 'pivotrow'.
01367 
01368 #if LIBECC_SWAPCOLUMNS
01369           // Swap pivot row bits, and set the bit in pivotrow (thats the identity matrix bit).
01370           if (matrix[pivotrow].test(pivotrow) != matrix[pivotrow].test(wipecol))
01371           {
01372             matrix[pivotrow].flip(wipecol);
01373 #if !LIBECC_INPLACE
01374             matrix[pivotrow].flip(pivotrow);    // No need to flip the 'pivotrow' column when we set it in the next line.
01375 #endif
01376           }
01377 #endif
01378 #if LIBECC_INPLACE
01379           matrix[pivotrow].set(pivotrow);               // Store the inverse in-place, destroying the original.
01380 #endif
01381           for (unsigned int row = 0; row < m; ++row)
01382           {
01383             if (row == pivotrow)                        // Don't wipe the row that we use to wipe.
01384               continue;
01385             matrixrow_type& mrow = matrix[row];
01386             if (mrow.test(wipecol))
01387             {
01388 #if LIBECC_SWAPCOLUMNS
01389               if (!mrow.test(pivotrow))         // If the value in the two columns differ,
01390               {
01391                 mrow.clear(wipecol);            // swap the two values.
01392 #if !LIBECC_INPLACE
01393                 mrow.set(pivotrow);             // No need to set pivotrow when it is overwritten in the next line.
01394 #endif
01395               }
01396 #endif
01397 #if LIBECC_INPLACE
01398               mrow.clear(pivotrow);             // Store the inverse in-place, destroying the original.
01399                                                 // This represents a 0 from the identity matrix.
01400 #endif
01401               mrow ^= matrix[pivotrow];         // Ok, now the columns have been swapped and to-be-wiped column
01402                                                 // has been replaced with a clean identity matrix bit. Perform
01403                                                 // the actual wiping.
01404             }
01405 #if LIBECC_SWAPCOLUMNS
01406             else if (mrow.test(pivotrow))       // Are the pivotrow and wipecol different?
01407             {
01408               mrow.set(wipecol);                // Then flip both, exchanging them effectively. If they
01409               mrow.clear(pivotrow);             //   were the same, consider them exchanged anyway.
01410             }
01411 #endif
01412           }
01413 #if LIBECC_SWAPCOLUMNS
01414           LibEccDout(dc::gaussj, "Also swapped columns " << pivotrow << " and " << wipecol);
01415           // Keep colswaps up to date.  We need colswaps_inverse to do that, therefore
01416           // we need to keep colswaps_inverse up to date too.
01417           std::swap(colswaps[colswaps_inverse[wipecol]], colswaps[colswaps_inverse[pivotrow]]);
01418           std::swap(colswaps_inverse[wipecol], colswaps_inverse[pivotrow]);
01419 #endif
01420         }
01421         LibEccDout(dc::gaussj, "After:");
01422         LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01423       }
01424 
01425 #if ECC_DEBUG
01426       for (unsigned int i = 0; i < m; ++i)
01427       {
01428         if (rowswaps[i] != i)
01429           LibEccDout(dc::gaussj, i << " : " << rowswaps[i]);
01430         // Skip the first half of the matrix.
01431         if (i == 0)
01432           i = (m + 1) / 2 - 1;
01433       }
01434       LibEccDout(dc::gaussj|noprefix_cf, "");
01435 #endif
01436 
01437       if (pivotted.test(0))
01438       {
01439         int row0 = (m + 1) / 2;
01440         while (pivotted.test(row0))
01441           ++row0;
01442         rowswaps[0] = row0;
01443         pivotted.set(row0);
01444       }
01445 
01446       // Next perform some row rotations, in order to get all rows on their correct places again.
01447       for (unsigned int i = 0; i < m; ++i)
01448       {
01449         if (rowswaps[i] != i)
01450         {
01451           unsigned int j = i;
01452           bitset<2 * m> temp = matrix[j];
01453           LibEccDout(dc::gaussj|continued_cf, j);
01454           do
01455           {
01456             matrix[j] = matrix[rowswaps[j]];
01457             LibEccDout(dc::continued, " <-- " << rowswaps[j]);
01458             j = rowswaps[j];
01459           }
01460           while (rowswaps[j] != i);
01461           matrix[j] = temp;
01462           LibEccDout(dc::finish, " <-- " << i);
01463           j = i;
01464           do
01465           {
01466             int pj = j;
01467             j = rowswaps[pj];
01468             // Update the administration so that we won't try to rotate them again.
01469             rowswaps[pj] = pj;
01470           }
01471           while (j != i);
01472         }
01473         // Skip the first half of the matrix.
01474         if (i == 0)
01475           i = (m + 1) / 2 - 1;
01476       }
01477 
01478       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01479       matrix_initialized = true;
01480     }
01481 
01482     // Multiply the matrix with cdb2.
01483     for (unsigned int row = 0; row < m; ++row)
01484     {
01485 #if LIBECC_AUGMENTED
01486 #if LIBECC_INPLACE
01487       bitset<m> tmp = matrix[row];
01488 #else
01489       bitset<2 * m> tmp2;
01490       matrix[row].template shift_op<m, right, assign>(tmp2);
01491       bitset<m> tmp = tmp2;
01492 #endif
01493       tmp &= cdb2.get_bitset();
01494 #else
01495       bitset<m> tmp = matrix[row] & cdb2.get_bitset();
01496 #endif
01497       if (tmp.odd())
01498         M_coefficients.set(row);
01499     }
01500 
01501     // Finally, multiply with b to get x.
01502     *this *= b;
01503   }
01504 
01505 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01506   inline bool
01507   operator==(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01508   {
01509     return p1.M_coefficients == p2.M_coefficients;
01510   }
01511 
01512 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01513   inline bool
01514   operator==(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01515   {
01516     return polynomial<m, k, k1, k2>(expr) == p2;
01517   }
01518 
01519 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01520   inline bool
01521   operator==(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01522   {
01523     return p1 == polynomial<m, k, k1, k2>(expr);
01524   }
01525 
01526 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01527   inline bool
01528   operator!=(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01529   {
01530     return p1.M_coefficients != p2.M_coefficients;
01531   }
01532 
01533 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01534   inline bool
01535   operator!=(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01536   {
01537     return polynomial<m, k, k1, k2>(expr) != p2;
01538   }
01539 
01540 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01541   inline bool
01542   operator!=(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01543   {
01544     return p1 != polynomial<m, k, k1, k2>(expr);
01545   }
01546 
01547 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01548   inline typename polynomial<m, k, k1, k2>::xor_type
01549   operator+(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01550   {
01551     return typename polynomial<m, k, k1, k2>::xor_type(p1.M_coefficients, p2.M_coefficients);
01552   }
01553 
01554 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01555   inline typename polynomial<m, k, k1, k2>::xor_type
01556   operator-(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01557   {
01558     return typename polynomial<m, k, k1, k2>::xor_type(p1.M_coefficients, p2.M_coefficients);
01559   }
01560 
01561 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01562   inline polynomial<m, k, k1, k2>
01563   operator*(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01564   {
01565     polynomial<m, k, k1, k2> result;
01566     p1.multiply_with(p2, result.M_coefficients);
01567     return result;
01568   }
01569 
01570 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01571   inline polynomial<m, k, k1, k2>
01572   operator*(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01573   {
01574     return polynomial<m, k, k1, k2>(expr) * p2;
01575   }
01576 
01577 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01578   inline polynomial<m, k, k1, k2>
01579   operator*(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01580   {
01581     return p1 * polynomial<m, k, k1, k2>(expr);
01582   }
01583 
01584 
01585 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01586   inline polynomial<m, k, k1, k2>
01587   operator/(polynomial<m, k, k1, k2> const& e1, polynomial<m, k, k1, k2> const& e2)
01588   {
01589     polynomial<m, k, k1, k2> tmp(e1);
01590     tmp /= e2;
01591     return tmp;
01592   }
01593 
01594 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01595   inline polynomial<m, k, k1, k2>
01596   operator/(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01597   {
01598     return polynomial<m, k, k1, k2>(expr) / p2;
01599   }
01600 
01601 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01602   inline polynomial<m, k, k1, k2>
01603   operator/(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01604   {
01605     return p1 / polynomial<m, k, k1, k2>(expr);
01606   }
01607 
01608 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01609   std::ostream& operator<<(std::ostream& os, polynomial<m, k, k1, k2> const& p)
01610   {
01611     p.M_coefficients.base2_print_on(os);
01612     return os;
01613   }
01614 
01615 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01616   std::ostream& operator<<(std::ostream& os, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01617   {
01618     polynomial<m, k, k1, k2> p(expr);
01619     p.M_coefficients.base2_print_on(os);
01620     return os;
01621   }
01622 
01623 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01624   bool polynomial<m, k, k1, k2>::S_normal_initialized;
01625 
01626 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01627   bitset<m> polynomial<m, k, k1, k2>::S_normal;
01628  
01629 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01630   void polynomial<m, k, k1, k2>::calculate_normal(void)
01631   {
01632 #if 0
01633     bitset<m> single_bit(1);
01634     polynomial trace;
01635     bitset_digit_t nextfrob1_buf[square_digits];
01636     bitset_digit_t nextfrob2_buf[square_digits];
01637     polynomial* nextfrob1;
01638     polynomial* nextfrob2;
01639     for (int bit = 0; bit < m; ++bit)
01640     {
01641       trace = single_bit;
01642       nextfrob1 = &trace.square(nextfrob1_buf);
01643       for (int i = 0; i < (m - 1) / 2; ++i)
01644       {
01645         nextfrob2 = &nextfrob1->square(nextfrob2_buf);
01646         trace += *nextfrob1 + *nextfrob2;
01647         if ((m & 1) && i == (m - 3) / 2)
01648           break;
01649         nextfrob1 = &nextfrob2->square(nextfrob1_buf);
01650       }
01651       if (!(m & 1))
01652         trace += *nextfrob1;
01653       if (trace.get_bitset().template test<0>())
01654         S_normal.set(bit);
01655       single_bit.template shift_op<1, libecc::left, libecc::assign>(single_bit);
01656     }
01657 #else
01658     // We can do that faster... I didn't prove this yet, but it works.
01659     if ((m & 1))
01660       S_normal.template set<0>();
01661     if (((m - k) & 1))
01662       S_normal.template set<m - k>();
01663     if (k1)
01664     {
01665       if (((m - k1) & 1))
01666         S_normal.template set<m - k1>();
01667       if (((m - k2) & 1))
01668         S_normal.template set<m - k2>();
01669     }
01670 #endif
01671     S_normal_initialized = true;
01672   }
01673 
01674 } // namespace libecc
01675 
01676 #include <libecc/square.hcc>    // File with different copyright.
01677 
01678 #endif // LIBECC_POLYNOMIAL_H
Copyright © 2002-2008 Carlo Wood.  All rights reserved.