Research on fast algorithm of factorial

Keywords: Algorithm linear algebra

solve the problem

Find a factorial n !   m o d P n! \ mod P n! modP.

executive summary

In this paper, a fast factorial algorithm is proposed, which has no requirement of prime or composite number for modulus P P P P for prime numbers p p The maximum decomposition of p is p k p^k pk, the algorithm can achieve in O ( m a x p k ∣ P k 2 p l o g p n ) O(max_{p^k|P}k^2plog_pn) O(maxpk ∣ P ∣ k2plogp ∣ n) r ∗ p t r*p^t The form of r * pt, where r r r and p p p coprime.
At the end of this paper, the optimization direction is pointed out, which can optimize the algorithm to O ( m a x p k ∣ P p l o g 2 p + k 2 l o g p n ) O(max_{p^k|P}\sqrt plog_2p+k^2log_pn) O(maxpk∣P​p The time complexity of log2 (p+k2logp (n)).

Dependency algorithm

  1. Inverse of extended Euclidean algorithm: obtain the inverse element of a number under a module through Euclidean algorithm.
  2. The first kind of Stirling number is to sum the powers of natural numbers: it can be used in the case of no requirements for modulus O ( k 2 ) O(k^2) To obtain the power of natural number within the time complexity of O(k2) 1 1 1 to k k k-power sum.
  3. Extended Lucas theorem (exLucas): can O ( m a x p k ∣ P p k ) O(max_{p^k|P}p^k) The factorial is obtained within the time complexity of O(maxpk ∣ P ∣ pk).

Algorithm overview

First, decompose the modulus P quality factor and consider n ! % p k n!\%p^k n!% How to solve PK.
We should first put n! All p factors in are eliminated. This step can be solved recursively, so that f(n,p,k) is represented n ! % p k n!\%p^k n!%pk and all factors of p are eliminated.
Then there f ( n , p , k ) = { ∏ i = 1 n i , n < p f ( n / p , p , k ) ∗ ∏ i = 1 , i ∤ p n i , n > = p f(n,p,k)= \left\{ \begin{array} {lc} \prod_{i=1}^ni, n<p \\ f(n/p,p,k)*\prod_{i=1,i\nmid p}^n i ,n>=p \end{array} \right. f(n,p,k)={∏i=1n​i,n<pf(n/p,p,k)∗∏i=1,i∤pn​i,n>=p​
consider ∏ i = 1 , i ∤ p n i \prod_{i=1,i\nmid p}^n i Π i = 1, how to calculate i W (pn i, deal with the numbers of modular p congruences together.
order g ( t , d , p , k ) = ∏ i = 0 t ( i p + d ) g(t,d,p,k)=\prod_{i=0}^t(ip+d) g(t,d,p,k)=∏i=0t​(ip+d). The expression can be expanded in the form of polynomial of P. when the degree is greater than or equal to K, in p k p^k pk can be ignored.
At this time f ( n , p , k ) = { ∏ i = 1 n i , n < p f ( n / p , p , k ) ∗ ∏ i = 1 p − 1 g ( t , i , p , k ) , n > = p f(n,p,k)= \left\{ \begin{array} {lc} \prod_{i=1}^ni, n<p \\ f(n/p,p,k)*\prod_{i=1}^{p-1}g(t,i,p,k) ,n>=p \end{array} \right. f(n,p,k) = {Π i=1n I, n < PF (n / P, P, K) * Π i=1p − 1 g (T, I, P, K), n > = P, where t = n / p + ( n % p > = i ? 0 : − 1 ) t=n/p+(n\%p>=i ? 0 : -1) t=n/p+(n%p>=i?0:−1).
For functions g ( t , d , p , k ) = ∏ i = 0 t ( i p + d ) g(t,d,p,k)=\prod_{i=0}^t(ip+d) g(t,d,p,k) = Π i=0t (ip+d), which can be written as ∑ i = 0 k − 1 a i p i \sum_{i=0}^{k-1}a_ip^i Σ i=0k − 1 − ai pi, for p c , c > = k p^c,c>=k The item of PC, C > = k will be replaced by the module p k p^k We find that we can use the power of natural number and the principle of compatibility and exclusion to obtain each coefficient, and finally obtain the function g g The value of g.

Subsequent optimization direction

1. We find that the function f f f takes a factorial at line 103 of code, which is only n < p n<p When n < p, it is solved once. For the fast solution of factorial less than modulus, in Luogu P5282 Place has O ( p l o g 2 p ) O(\sqrt plog_2p) O(p log2 (p).
2. Function f f f called the function at line 108 g g g. And every time you call a function g g Parameters of g t t t has only two values, and the parameter d d d although different every time, but d d d parameter only in function g g g is called at line 96. Therefore, the function g g The rest of g is the same and can be reused.
Combining the two optimizations, the fast factorial algorithm can be optimized to O ( m a x p k ∣ P p l o g 2 p + k 2 l o g p n ) O(max_{p^k|P}\sqrt plog_2p+k^2log_pn) O(maxpk∣P​p ​log2​p+k2logp​n).

Examples

51nod1151N! Non-zero minimum 18 bits

code

#include <bits/stdc++.h>
using namespace std;

typedef long long ll;
typedef __int128 Int;

const int N = 1e5 + 100;

Int add(Int a, Int b, Int mod) { return a + b >= mod ? a + b - mod : a + b; }
Int mul(Int a, Int b, Int mod) { return a * b % mod; }
Int qpow(Int x, Int n, Int mod) {
	Int r = 1;
	while (n > 0) {
		if (n & 1) r = mul(r, x, mod);
		x = mul(x, x, mod); n /= 2;
	}
	return r;
}

namespace Fac {
	Int mod;
	Int add(Int a, Int b) { return a + b >= mod ? a + b - mod : a + b; }
	Int mul(Int a, Int b) { return a * b % mod; }
	Int qpow(Int x, Int n) {
		Int r = 1;
		while (n > 0) {
			if (n & 1) r = mul(r, x);
			x = mul(x, x); n /= 2;
		}
		return r;
	}

	Int s1[110][110];
	void init() {
		s1[0][0] = 1;
		for (int i = 1; i <= 50; i++) {
			for (int j = 1; j <= i; j++) {
				s1[i][j] = add(s1[i - 1][j - 1], mul(i - 1, s1[i - 1][j]));
			}
		}
	}
	Int sum[110];
	void get(Int n, Int k) {
		sum[0] = n % mod; sum[1] = n * (n + 1) / 2 % mod;
		for (int i = 2; i <= k; i++) {
			sum[i] = 1;
			for (int j = 0; j < i + 1; j++) {
				if ((n - j + 1) % (i + 1) == 0) sum[i] = mul(sum[i], (n - j + 1) / (i + 1));
				else sum[i] = mul(sum[i], n - j + 1);
			}
			for (int j = 1; j < i; j++) {
				if ((i - j) % 2 == 0) sum[i] = add(sum[i], mod - mul(s1[i][j], sum[j]));
				else sum[i] = add(sum[i], mul(s1[i][j], sum[j]));
			}
		}
	}
	void exgcd(Int a, Int b, Int &x, Int &y, Int &d) {
		if (b == 0) { x = 1; y = 0; d = a; }
		else {
			exgcd(b, a % b, y, x, d);
			y -= a / b * x;
		}
	}
	Int rev(Int a, Int p) {
		Int b, c, d; exgcd(a, p, b, c, d);
		return d == 1 ? (b + p) % p : -1;
	}
	Int g(Int t, Int d, Int p, Int k) {
		if (t <= k) {
			Int res = 1;
			for (int i = 0; i <= t; i++) res = mul(res, add(mul(i, p), d));
			return res;
		}
		else {
			static Int dp[110], cd[110], cp[110], cnt[110];
			cd[k - 1] = qpow(d, t - k + 2);
			for (int i = k - 2; i >= 0; i--) cd[i] = mul(d, cd[i + 1]);
			get(t, k); sum[0] = 1;
			cp[0] = 1;
			for (int i = 1; i < k; i++) cp[i] = mul(cp[i - 1], p);
			cnt[0] = 0; dp[0] = 1;
			Int ma = 0;
			for (int i = 1; i < k; i++) {
				dp[i] = 0;
				for (int j = 1, f = 1; j <= i; j++, f = -f) {
					Int r = mul(sum[j], mul(dp[i - j], cp[ma - cnt[i - j]]));
					if (f > 0) dp[i] = add(dp[i], r);
					else dp[i] = add(dp[i], mod - r);
				}
				Int x = i; cnt[i] = ma;
				while (x % p == 0) x /= p, cnt[i]++, ma++;
				dp[i] = mul(dp[i], rev(x, mod));
			}
			Int res = 0;
			for (int i = 0; i < k; i++)
				res = add(res, mul(cp[i - cnt[i]], mul(cd[i], dp[i])));
			return res;
		}
	}
	Int f(Int n, Int p, Int k) {
		if (n < p) {
			Int res = 1;
			for (int i = 2; i <= n; i++)  res = mul(res, i);
			return res;
		}
		else {
			Int res = f(n / p, p, k);
			for (int i = 1; i < p; i++) res = mul(res, g(n / p + (n % p >= i ? 0 : -1), i, p, k));
			return res;
		}
	}


	void solve(Int n, Int p, Int k, Int mod, Int &res, Int &cnt) {
		Fac::mod = mod; init();
		res = f(n, p, k);
		cnt = 0;
		while (n > 0) cnt += n / p, n /= p;
	}
}
using Fac::rev;

int main() {
	ll n; scanf("%lld", &n);
	ll n5 = qpow(5, 18, 1e18), n2 = qpow(2, 18, 1e18), nn = n2 * n5;
	Int r2, c2, r5, c5;
	Fac::solve(n, 2, 18, n2, r2, c2); Fac::solve(n, 5, 18, n5, r5, c5);
	r2 = mul(r2, qpow(rev(5, n2), c5, n2), n2);
	r5 = mul(r5, qpow(rev(2, n5), c2, n5), n5);
	Int t5 = mul(mul(rev(n2, n5), n2, nn), r5, nn);
	Int t2 = mul(mul(rev(n5, n2), n5, nn), r2, nn);
	ll r = add(t2, t5, nn);
	r = mul(r, qpow(2, c2 - c5, nn), nn);
	printf("%lld\n", r);
	return 0;
}

Posted by yapyapayap on Mon, 20 Sep 2021 08:27:33 -0700