Mercurial > hg > vamp-build-and-test
comparison DEPENDENCIES/generic/include/boost/random/detail/polynomial.hpp @ 102:f46d142149f5
Whoops, finish that update
author | Chris Cannam |
---|---|
date | Mon, 07 Sep 2015 11:13:41 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
101:c530137014c0 | 102:f46d142149f5 |
---|---|
1 /* boost random/detail/polynomial.hpp header file | |
2 * | |
3 * Copyright Steven Watanabe 2014 | |
4 * Distributed under the Boost Software License, Version 1.0. (See | |
5 * accompanying file LICENSE_1_0.txt or copy at | |
6 * http://www.boost.org/LICENSE_1_0.txt) | |
7 * | |
8 * See http://www.boost.org for most recent version including documentation. | |
9 * | |
10 * $Id$ | |
11 */ | |
12 | |
13 #ifndef BOOST_RANDOM_DETAIL_POLYNOMIAL_HPP | |
14 #define BOOST_RANDOM_DETAIL_POLYNOMIAL_HPP | |
15 | |
16 #include <cstddef> | |
17 #include <limits> | |
18 #include <vector> | |
19 #include <algorithm> | |
20 #include <boost/assert.hpp> | |
21 #include <boost/cstdint.hpp> | |
22 | |
23 namespace boost { | |
24 namespace random { | |
25 namespace detail { | |
26 | |
27 class polynomial_ops { | |
28 public: | |
29 typedef unsigned long digit_t; | |
30 | |
31 static void add(std::size_t size, const digit_t * lhs, | |
32 const digit_t * rhs, digit_t * output) | |
33 { | |
34 for(std::size_t i = 0; i < size; ++i) { | |
35 output[i] = lhs[i] ^ rhs[i]; | |
36 } | |
37 } | |
38 | |
39 static void add_shifted_inplace(std::size_t size, const digit_t * lhs, | |
40 digit_t * output, std::size_t shift) | |
41 { | |
42 if(shift == 0) { | |
43 add(size, lhs, output, output); | |
44 return; | |
45 } | |
46 std::size_t bits = std::numeric_limits<digit_t>::digits; | |
47 digit_t prev = 0; | |
48 for(std::size_t i = 0; i < size; ++i) { | |
49 digit_t tmp = lhs[i]; | |
50 output[i] ^= (tmp << shift) | (prev >> (bits-shift)); | |
51 prev = tmp; | |
52 } | |
53 output[size] ^= (prev >> (bits-shift)); | |
54 } | |
55 | |
56 static void multiply_simple(std::size_t size, const digit_t * lhs, | |
57 const digit_t * rhs, digit_t * output) | |
58 { | |
59 std::size_t bits = std::numeric_limits<digit_t>::digits; | |
60 for(std::size_t i = 0; i < 2*size; ++i) { | |
61 output[i] = 0; | |
62 } | |
63 for(std::size_t i = 0; i < size; ++i) { | |
64 for(std::size_t j = 0; j < bits; ++j) { | |
65 if((lhs[i] & (digit_t(1) << j)) != 0) { | |
66 add_shifted_inplace(size, rhs, output + i, j); | |
67 } | |
68 } | |
69 } | |
70 } | |
71 | |
72 // memory requirements: (size - cutoff) * 4 + next_smaller | |
73 static void multiply_karatsuba(std::size_t size, | |
74 const digit_t * lhs, const digit_t * rhs, | |
75 digit_t * output) | |
76 { | |
77 if(size < 64) { | |
78 multiply_simple(size, lhs, rhs, output); | |
79 return; | |
80 } | |
81 // split in half | |
82 std::size_t cutoff = size/2; | |
83 multiply_karatsuba(cutoff, lhs, rhs, output); | |
84 multiply_karatsuba(size - cutoff, lhs + cutoff, rhs + cutoff, | |
85 output + cutoff*2); | |
86 std::vector<digit_t> local1(size - cutoff); | |
87 std::vector<digit_t> local2(size - cutoff); | |
88 // combine the digits for the inner multiply | |
89 add(cutoff, lhs, lhs + cutoff, &local1[0]); | |
90 if(size & 1) local1[cutoff] = lhs[size - 1]; | |
91 add(cutoff, rhs + cutoff, rhs, &local2[0]); | |
92 if(size & 1) local2[cutoff] = rhs[size - 1]; | |
93 std::vector<digit_t> local3((size - cutoff) * 2); | |
94 multiply_karatsuba(size - cutoff, &local1[0], &local2[0], &local3[0]); | |
95 add(cutoff * 2, output, &local3[0], &local3[0]); | |
96 add((size - cutoff) * 2, output + cutoff*2, &local3[0], &local3[0]); | |
97 // Finally, add the inner result | |
98 add((size - cutoff) * 2, output + cutoff, &local3[0], output + cutoff); | |
99 } | |
100 | |
101 static void multiply_add_karatsuba(std::size_t size, | |
102 const digit_t * lhs, const digit_t * rhs, | |
103 digit_t * output) | |
104 { | |
105 std::vector<digit_t> buf(size * 2); | |
106 multiply_karatsuba(size, lhs, rhs, &buf[0]); | |
107 add(size * 2, &buf[0], output, output); | |
108 } | |
109 | |
110 static void multiply(const digit_t * lhs, std::size_t lhs_size, | |
111 const digit_t * rhs, std::size_t rhs_size, | |
112 digit_t * output) | |
113 { | |
114 std::fill_n(output, lhs_size + rhs_size, digit_t(0)); | |
115 multiply_add(lhs, lhs_size, rhs, rhs_size, output); | |
116 } | |
117 | |
118 static void multiply_add(const digit_t * lhs, std::size_t lhs_size, | |
119 const digit_t * rhs, std::size_t rhs_size, | |
120 digit_t * output) | |
121 { | |
122 // split into pieces that can be passed to | |
123 // karatsuba multiply. | |
124 while(lhs_size != 0) { | |
125 if(lhs_size < rhs_size) { | |
126 std::swap(lhs, rhs); | |
127 std::swap(lhs_size, rhs_size); | |
128 } | |
129 | |
130 multiply_add_karatsuba(rhs_size, lhs, rhs, output); | |
131 | |
132 lhs += rhs_size; | |
133 lhs_size -= rhs_size; | |
134 output += rhs_size; | |
135 } | |
136 } | |
137 | |
138 static void copy_bits(const digit_t * x, std::size_t low, std::size_t high, | |
139 digit_t * out) | |
140 { | |
141 const std::size_t bits = std::numeric_limits<digit_t>::digits; | |
142 std::size_t offset = low/bits; | |
143 x += offset; | |
144 low -= offset*bits; | |
145 high -= offset*bits; | |
146 std::size_t n = (high-low)/bits; | |
147 if(low == 0) { | |
148 for(std::size_t i = 0; i < n; ++i) { | |
149 out[i] = x[i]; | |
150 } | |
151 } else { | |
152 for(std::size_t i = 0; i < n; ++i) { | |
153 out[i] = (x[i] >> low) | (x[i+1] << (bits-low)); | |
154 } | |
155 } | |
156 if((high-low)%bits) { | |
157 digit_t low_mask = (digit_t(1) << ((high-low)%bits)) - 1; | |
158 digit_t result = (x[n] >> low); | |
159 if(low != 0 && (n+1)*bits < high) { | |
160 result |= (x[n+1] << (bits-low)); | |
161 } | |
162 out[n] = (result & low_mask); | |
163 } | |
164 } | |
165 | |
166 static void shift_left(digit_t * val, std::size_t size, std::size_t shift) | |
167 { | |
168 const std::size_t bits = std::numeric_limits<digit_t>::digits; | |
169 BOOST_ASSERT(shift > 0); | |
170 BOOST_ASSERT(shift < bits); | |
171 digit_t prev = 0; | |
172 for(std::size_t i = 0; i < size; ++i) { | |
173 digit_t tmp = val[i]; | |
174 val[i] = (prev >> (bits - shift)) | (val[i] << shift); | |
175 prev = tmp; | |
176 } | |
177 } | |
178 | |
179 static digit_t sqr(digit_t val) { | |
180 const std::size_t bits = std::numeric_limits<digit_t>::digits; | |
181 digit_t mask = (digit_t(1) << bits/2) - 1; | |
182 for(std::size_t i = bits; i > 1; i /= 2) { | |
183 val = ((val & ~mask) << i/2) | (val & mask); | |
184 mask = mask & (mask >> i/4); | |
185 mask = mask | (mask << i/2); | |
186 } | |
187 return val; | |
188 } | |
189 | |
190 static void sqr(digit_t * val, std::size_t size) | |
191 { | |
192 const std::size_t bits = std::numeric_limits<digit_t>::digits; | |
193 digit_t mask = (digit_t(1) << bits/2) - 1; | |
194 for(std::size_t i = 0; i < size; ++i) { | |
195 digit_t x = val[size - i - 1]; | |
196 val[(size - i - 1) * 2] = sqr(x & mask); | |
197 val[(size - i - 1) * 2 + 1] = sqr(x >> bits/2); | |
198 } | |
199 } | |
200 | |
201 // optimized for the case when the modulus has few bits set. | |
202 struct sparse_mod { | |
203 sparse_mod(const digit_t * divisor, std::size_t divisor_bits) | |
204 { | |
205 const std::size_t bits = std::numeric_limits<digit_t>::digits; | |
206 _remainder_bits = divisor_bits - 1; | |
207 for(std::size_t i = 0; i < divisor_bits; ++i) { | |
208 if(divisor[i/bits] & (digit_t(1) << i%bits)) { | |
209 _bit_indices.push_back(i); | |
210 } | |
211 } | |
212 BOOST_ASSERT(_bit_indices.back() == divisor_bits - 1); | |
213 _bit_indices.pop_back(); | |
214 if(_bit_indices.empty()) { | |
215 _block_bits = divisor_bits; | |
216 _lower_bits = 0; | |
217 } else { | |
218 _block_bits = divisor_bits - _bit_indices.back() - 1; | |
219 _lower_bits = _bit_indices.back() + 1; | |
220 } | |
221 | |
222 _partial_quotient.resize((_block_bits + bits - 1)/bits); | |
223 } | |
224 void operator()(digit_t * dividend, std::size_t dividend_bits) | |
225 { | |
226 const std::size_t bits = std::numeric_limits<digit_t>::digits; | |
227 while(dividend_bits > _remainder_bits) { | |
228 std::size_t block_start = (std::max)(dividend_bits - _block_bits, _remainder_bits); | |
229 std::size_t block_size = (dividend_bits - block_start + bits - 1) / bits; | |
230 copy_bits(dividend, block_start, dividend_bits, &_partial_quotient[0]); | |
231 for(std::size_t i = 0; i < _bit_indices.size(); ++i) { | |
232 std::size_t pos = _bit_indices[i] + block_start - _remainder_bits; | |
233 add_shifted_inplace(block_size, &_partial_quotient[0], dividend + pos/bits, pos%bits); | |
234 } | |
235 add_shifted_inplace(block_size, &_partial_quotient[0], dividend + block_start/bits, block_start%bits); | |
236 dividend_bits = block_start; | |
237 } | |
238 } | |
239 std::vector<digit_t> _partial_quotient; | |
240 std::size_t _remainder_bits; | |
241 std::size_t _block_bits; | |
242 std::size_t _lower_bits; | |
243 std::vector<std::size_t> _bit_indices; | |
244 }; | |
245 | |
246 // base should have the same number of bits as mod | |
247 // base, and mod should both be able to hold a power | |
248 // of 2 >= mod_bits. out needs to be twice as large. | |
249 static void mod_pow_x(boost::uintmax_t exponent, const digit_t * mod, std::size_t mod_bits, digit_t * out) | |
250 { | |
251 const std::size_t bits = std::numeric_limits<digit_t>::digits; | |
252 const std::size_t n = (mod_bits + bits - 1) / bits; | |
253 const std::size_t highbit = mod_bits - 1; | |
254 if(exponent == 0) { | |
255 out[0] = 1; | |
256 std::fill_n(out + 1, n - 1, digit_t(0)); | |
257 return; | |
258 } | |
259 boost::uintmax_t i = std::numeric_limits<boost::uintmax_t>::digits - 1; | |
260 while(((boost::uintmax_t(1) << i) & exponent) == 0) { | |
261 --i; | |
262 } | |
263 out[0] = 2; | |
264 std::fill_n(out + 1, n - 1, digit_t(0)); | |
265 sparse_mod m(mod, mod_bits); | |
266 while(i--) { | |
267 sqr(out, n); | |
268 m(out, 2 * mod_bits - 1); | |
269 if((boost::uintmax_t(1) << i) & exponent) { | |
270 shift_left(out, n, 1); | |
271 if(out[highbit / bits] & (digit_t(1) << highbit%bits)) | |
272 add(n, out, mod, out); | |
273 } | |
274 } | |
275 } | |
276 }; | |
277 | |
278 class polynomial | |
279 { | |
280 typedef polynomial_ops::digit_t digit_t; | |
281 public: | |
282 polynomial() : _size(0) {} | |
283 class reference { | |
284 public: | |
285 reference(digit_t &value, int idx) | |
286 : _value(value), _idx(idx) {} | |
287 operator bool() const { return (_value & (digit_t(1) << _idx)) != 0; } | |
288 reference& operator=(bool b) | |
289 { | |
290 if(b) { | |
291 _value |= (digit_t(1) << _idx); | |
292 } else { | |
293 _value &= ~(digit_t(1) << _idx); | |
294 } | |
295 return *this; | |
296 } | |
297 reference &operator^=(bool b) | |
298 { | |
299 _value ^= (digit_t(b) << _idx); | |
300 return *this; | |
301 } | |
302 | |
303 reference &operator=(const reference &other) | |
304 { | |
305 return *this = static_cast<bool>(other); | |
306 } | |
307 private: | |
308 digit_t &_value; | |
309 int _idx; | |
310 }; | |
311 reference operator[](std::size_t i) | |
312 { | |
313 static const std::size_t bits = std::numeric_limits<digit_t>::digits; | |
314 ensure_bit(i); | |
315 return reference(_storage[i/bits], i%bits); | |
316 } | |
317 bool operator[](std::size_t i) const | |
318 { | |
319 static const std::size_t bits = std::numeric_limits<digit_t>::digits; | |
320 if(i < size()) | |
321 return (_storage[i/bits] & (digit_t(1) << (i%bits))) != 0; | |
322 else | |
323 return false; | |
324 } | |
325 std::size_t size() const | |
326 { | |
327 return _size; | |
328 } | |
329 void resize(std::size_t n) | |
330 { | |
331 static const std::size_t bits = std::numeric_limits<digit_t>::digits; | |
332 _storage.resize((n + bits - 1)/bits); | |
333 // clear the high order bits in case we're shrinking. | |
334 if(n%bits) { | |
335 _storage.back() &= ((digit_t(1) << (n%bits)) - 1); | |
336 } | |
337 _size = n; | |
338 } | |
339 friend polynomial operator*(const polynomial &lhs, const polynomial &rhs); | |
340 friend polynomial mod_pow_x(boost::uintmax_t exponent, polynomial mod); | |
341 private: | |
342 std::vector<polynomial_ops::digit_t> _storage; | |
343 std::size_t _size; | |
344 void ensure_bit(std::size_t i) | |
345 { | |
346 if(i >= size()) { | |
347 resize(i + 1); | |
348 } | |
349 } | |
350 void normalize() | |
351 { | |
352 while(size() && (*this)[size() - 1] == 0) | |
353 resize(size() - 1); | |
354 } | |
355 }; | |
356 | |
357 inline polynomial operator*(const polynomial &lhs, const polynomial &rhs) | |
358 { | |
359 polynomial result; | |
360 result._storage.resize(lhs._storage.size() + rhs._storage.size()); | |
361 polynomial_ops::multiply(&lhs._storage[0], lhs._storage.size(), | |
362 &rhs._storage[0], rhs._storage.size(), | |
363 &result._storage[0]); | |
364 result._size = lhs._size + rhs._size; | |
365 return result; | |
366 } | |
367 | |
368 inline polynomial mod_pow_x(boost::uintmax_t exponent, polynomial mod) | |
369 { | |
370 polynomial result; | |
371 mod.normalize(); | |
372 std::size_t mod_size = mod.size(); | |
373 result._storage.resize(mod._storage.size() * 2); | |
374 result._size = mod.size() * 2; | |
375 polynomial_ops::mod_pow_x(exponent, &mod._storage[0], mod_size, &result._storage[0]); | |
376 result.resize(mod.size() - 1); | |
377 return result; | |
378 } | |
379 | |
380 } | |
381 } | |
382 } | |
383 | |
384 #endif // BOOST_RANDOM_DETAIL_POLYNOMIAL_HPP |