0%

NTT模板

从多项式乘法到快速傅里叶变换,Miskcoo
FFT用到的各种素数,Miskcoo

直接上三模数NTT

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#pragma GCC optimize(2)
#include<iostream>
#include<cstdio>
#include<algorithm>
#include<cmath>
using namespace std;
typedef long long LL;
const int MAXN=2100000;//两倍空间
int n,m,GP,r[MAXN],nn,mm;
LL a[MAXN],b[MAXN],c[MAXN],d[MAXN],ans[MAXN][4];
inline int read() {
char ch=getchar();int sum=0;
while (ch>'9' || ch<'0') ch=getchar();
while (ch>='0' && ch<='9') sum=sum*10+ch-'0',ch=getchar();
return sum;
}
void print(LL x) {
if (x>9) print(x/10);
putchar(x%10+'0');
}
//2009国家集训队论文:骆可强:《论程序底层优化的一些方法与技巧》
inline LL fast_mul(LL a,LL b,LL P) {
a%=P;b%=P;
LL d=(LL)floor(a*(long double)b/P+0.5);
LL res=a*b-d*P;
if (res<0) res+=P;
return res;
//return ((a*b-(LL)((LL)((long double)a/P*b+1e-3)*P))%P+P)%P;
}
inline LL fast_pow(LL b,LL x,LL P) {
LL res=1;
while (x) {
if (x&1) res=res*b%P;
b=b*b%P;
x>>=1;
}
return res;
}
void DFT(LL c[],LL g,LL M,int opt) {
for (int i=0;i<nn;i++) {
if (i<r[i]) swap(c[i],c[r[i]]);
}
for (int i=1;i<nn;i<<=1) {
LL x=fast_pow(g,(M-1)/(i<<1),M);
for (int j=0;j<nn;j+=(i<<1)) {
LL y=1;
for (int k=0;k<i;k++,y=y*x%M) {
LL z=y*c[i+j+k]%M;
c[i+j+k]=(c[j+k]-z+M)%M;
c[j+k]=(c[j+k]+z)%M;
}
}
}
}
void NTT(LL g,LL M,int p) {
copy(a,a+nn+1,c);copy(b,b+nn+1,d);//防止上次残余影响
DFT(c,g,M,1);DFT(d,g,M,1);
for (int i=0;i<nn;i++) c[i]=(c[i]*d[i])%M;
DFT(c,fast_pow(g,M-2,M),M,-1);
LL inv=fast_pow(nn,M-2,M);
for (int i=0;i<=mm;i++) ans[i][p]=c[i]*inv%M;
}
void Arbitrary_Modulo_NTT() {
LL m1=167772161,m2=998244353,m3=1004535809,g=3;
mm=n+m;nn=1;int step=0;
while (nn<=mm) nn<<=1,step++;
for (int i=0;i<nn;i++) r[i]=(r[i>>1]>>1)|((i&1)<<(step-1));
NTT(g,m1,0);
NTT(g,m2,1);
NTT(g,m3,2);
for (int i=0;i<=mm;i++) {//中国剩余定理合并
LL M=m1*m2,A=(fast_mul(ans[i][0]*m2,fast_pow(m2,m1-2,m1),M)+fast_mul(ans[i][1]*m1,fast_pow(m1,m2-2,m2),M))%M;
LL k=(((ans[i][2]-A)%m3+m3)%m3)*fast_pow(M%m3,m3-2,m3)%m3;
ans[i][3]=(M%GP*k%GP+A%GP)%GP;
}
}
int main() {
n=read(),m=read(),GP=read();
for (int i=0;i<=n;i++) a[i]=read();
for (int i=0;i<=m;i++) b[i]=read();
Arbitrary_Modulo_NTT();
for (int i=0;i<=mm;i++) printf("%lld ",ans[i][3]);
return 0;
}
速度奇慢无比 qaq