好久都没敲过KMP和AC自动机了。以前只会敲个kuangbin牌板子套题。现在重新写了自己的板子加深了印象。并且刷了一些题来增加自己的理解。
KMP网上教程很多,但我的建议还是先看AC自动机(Trie图)的构造后再去理解。板子的话大家大同小异。
而AC自动机的构造则是推荐王贇的《Trie图的构建、活用与改进》。
前面的备用知识则是字典树。推荐董华星的《浅析字母树在信息学竞赛中的应用》。董聚聚不仅仅是介绍了字典树,包括一些常见的应用也有论述,介绍的挺详细的。
接下来就是刷题的部分了。
hdu 5880
Time Limit: 3000/1000 MS (Java/Others) Memory Limit: 65536/65536 K (Java/Others)
Total Submission(s): 2660 Accepted Submission(s): 577
Problem Description
Steam is a digital distribution platform developed by Valve Corporation offering digital rights management (DRM), multiplayer gaming and social networking services. A family view can help you to prevent your children access to some content which are not suitable for them.
Take an MMORPG game as an example, given a sentence T, and a list of forbidden words {P}, your job is to use '*' to subsititute all the characters, which is a part of the substring matched with at least one forbidden word in the list (case-insensitive).
For example, T is: "I love Beijing's Tiananmen, the sun rises over Tiananmen. Our great leader Chairman Mao, he leades us marching on."
And {P} is: {"tiananmen", "eat"}
The result should be: "I love Beijing's *********, the sun rises over *********. Our gr*** leader Chairman Mao, he leades us marching on."
Take an MMORPG game as an example, given a sentence T, and a list of forbidden words {P}, your job is to use '*' to subsititute all the characters, which is a part of the substring matched with at least one forbidden word in the list (case-insensitive).
For example, T is: "I love Beijing's Tiananmen, the sun rises over Tiananmen. Our great leader Chairman Mao, he leades us marching on."
And {P} is: {"tiananmen", "eat"}
The result should be: "I love Beijing's *********, the sun rises over *********. Our gr*** leader Chairman Mao, he leades us marching on."
Input
The first line contains the number of test cases. For each test case:
The first line contains an integer , represneting the size of the forbidden words list . Each line of the next lines contains a forbidden words where only contains lowercase letters.
The last line contains a string .
The first line contains an integer , represneting the size of the forbidden words list . Each line of the next lines contains a forbidden words where only contains lowercase letters.
The last line contains a string .
Output
For each case output the sentence in a line.
Sample Input
1
3
trump
ri
o
Donald John Trump (born June 14, 1946) is an American businessman, television personality, author, politician, and the Republican Party nominee for President of the United States in the 2016 election. He is chairman of The Trump Organization, which is the principal holding company for his real estate ventures and other business interests.
Sample Output
D*nald J*hn ***** (b*rn June 14, 1946) is an Ame**can businessman, televisi*n pers*nality, auth*r, p*litician, and the Republican Party n*minee f*r President *f the United States in the 2016 electi*n. He is chairman *f The ***** *rganizati*n, which is the p**ncipal h*lding c*mpany f*r his real estate ventures and *ther business interests.
Source
裸题,把模式串构成Trie图然后在主串中匹配。Trie图中设置标记tag,若本身就是词尾节点则tag指向自己,否则指向该节点离他最近的词尾后缀节点。没有则指向根节点。接下来就转化为线段覆盖问题了。扫主串的时候扫到模式串对应的词头位置pos++,词尾pos--。接着求字符串pos前缀和,前缀和>0的部分赋’*’,然后输出字符串。
有两个版本,一个是刚开始按KMP的想法写的,稍微改改就能改成孩子兄弟链法:
1 #include<cstdio> 2 #include<iostream> 3 #include<cstring> 4 #include<queue> 5 #define clr(x) memset(x,0,sizeof(x)) 6 #define clr_1(x) memset(x,-1,sizeof(x)) 7 #define INF 0x3f3f3f3f 8 #define mod 1000000007 9 #define LL long long 10 #define next nexted 11 using namespace std; 12 const int N=1e6+100; 13 const int type=26; 14 struct node 15 { 16 int pre; 17 int dep; 18 int tag; 19 int next[type]; 20 }trie[N]; 21 int tot; 22 int pos[N]; 23 int newnode() 24 { 25 trie[++tot]=(node){}; 26 return tot; 27 } 28 void add(int root,char *s) 29 { 30 int len=strlen(s); 31 int now=root; 32 int p; 33 for(int i=0;i<len;i++) 34 { 35 p=s[i]-'a'; 36 if(!trie[now].next[p]) 37 { 38 trie[now].next[p]=newnode(); 39 } 40 now=trie[now].next[p]; 41 trie[now].dep=i+1; 42 } 43 trie[now].tag=now; 44 } 45 void init() 46 { 47 tot=0; 48 trie[0]=(node){}; 49 clr(pos); 50 return ; 51 } 52 void getfail() 53 { 54 queue<int> que; 55 int now,nowto,j; 56 for(int i=0;i<type;i++) 57 if(trie[0].next[i]) 58 que.push(trie[0].next[i]); 59 while(!que.empty()) 60 { 61 now=que.front(); 62 que.pop(); 63 for(int i=0;i<type;i++) 64 { 65 nowto=trie[now].next[i]; 66 if(nowto) 67 { 68 que.push(nowto); 69 j=trie[now].pre; 70 while(j && !trie[j].next[i]) 71 j=trie[j].pre; 72 trie[nowto].pre=trie[j].next[i]; 73 if(trie[trie[j].next[i]].tag && !trie[nowto].tag) 74 trie[nowto].tag=trie[trie[j].next[i]].tag; 75 } 76 } 77 } 78 return ; 79 } 80 void acm(char *s) 81 { 82 int j=0,p,tmp; 83 int len=strlen(s); 84 for(int i=0;i<len;i++) 85 if((s[i]>='a' && s[i]<='z')||(s[i]>='A' && s[i]<='Z')) 86 { 87 p=(s[i]>='a' && s[i]<='z')?s[i]-'a':s[i]-'A'; 88 while(j && !trie[j].next[p]) 89 { 90 j=trie[j].pre; 91 } 92 j=trie[j].next[p]; 93 if(trie[j].tag) 94 { 95 pos[i+1]--; 96 pos[i-trie[trie[j].tag].dep+1]++; 97 } 98 } 99 else 100 { 101 j=0; 102 continue; 103 } 104 j=0; 105 for(int i=0;i<len;i++) 106 { 107 j+=pos[i]; 108 if(j>0) 109 s[i]='*'; 110 } 111 return ; 112 } 113 int T,n,m; 114 char s[N]; 115 int main() 116 { 117 scanf("%d",&T); 118 while(T--) 119 { 120 init(); 121 scanf("%d",&n); 122 gets(s); 123 for(int i=1;i<=n;i++) 124 { 125 gets(s); 126 add(0,s); 127 } 128 getfail(); 129 gets(s); 130 acm(s); 131 printf("%s\n",s); 132 } 133 return 0; 134 }
然后是按照论文里面bfs的构造法写的:
1 #include<cstdio> 2 #include<iostream> 3 #include<cstring> 4 #include<queue> 5 #define clr(x) memset(x,0,sizeof(x)) 6 #define clr_1(x) memset(x,-1,sizeof(x)) 7 #define INF 0x3f3f3f3f 8 #define mod 1000000007 9 #define LL long long 10 #define next nexted 11 using namespace std; 12 const int N=1e6+100; 13 const int type=26; 14 struct node 15 { 16 int pre; 17 int dep; 18 int tag; 19 int next[type]; 20 }trie[N]; 21 int tot; 22 int pos[N]; 23 int newnode() 24 { 25 trie[++tot]=(node){}; 26 return tot; 27 } 28 void add(int root,char *s) 29 { 30 int len=strlen(s); 31 int now=root; 32 int p; 33 for(int i=0;i<len;i++) 34 { 35 p=s[i]-'a'; 36 if(!trie[now].next[p]) 37 trie[now].next[p]=newnode(); 38 now=trie[now].next[p]; 39 trie[now].dep=i+1; 40 } 41 trie[now].tag=now; 42 } 43 void init() 44 { 45 tot=0; 46 trie[0]=(node){}; 47 clr(pos); 48 return ; 49 } 50 void build() 51 { 52 queue<int> que; 53 int now,nowto; 54 for(int i=0;i<type;i++) 55 if(trie[0].next[i]) 56 que.push(trie[0].next[i]); 57 while(!que.empty()) 58 { 59 now=que.front(); 60 que.pop(); 61 for(int i=0;i<type;i++) 62 { 63 nowto=trie[now].next[i]; 64 if(nowto) 65 { 66 que.push(nowto); 67 trie[nowto].pre=trie[trie[now].pre].next[i]; 68 if(trie[trie[nowto].pre].tag && !trie[nowto].tag) 69 trie[nowto].tag=trie[trie[nowto].pre].tag; 70 } 71 else 72 trie[now].next[i]=trie[trie[now].pre].next[i]; 73 } 74 } 75 return ; 76 } 77 void acm(char *s) 78 { 79 int now=0,p,tmp; 80 int len=strlen(s); 81 for(int i=0;i<len;i++) 82 if((s[i]>='a' && s[i]<='z')||(s[i]>='A' && s[i]<='Z')) 83 { 84 p=(s[i]>='a' && s[i]<='z')?s[i]-'a':s[i]-'A'; 85 now=trie[now].next[p]; 86 if(trie[now].tag) 87 { 88 pos[i+1]--; 89 pos[i-trie[trie[now].tag].dep+1]++; 90 } 91 } 92 else 93 { 94 now=0; 95 continue; 96 } 97 now=0; 98 for(int i=0;i<len;i++) 99 { 100 now+=pos[i]; 101 if(now>0) 102 s[i]='*'; 103 } 104 return ; 105 } 106 int T,n,m; 107 char s[N]; 108 int main() 109 { 110 scanf("%d",&T); 111 while(T--) 112 { 113 init(); 114 scanf("%d",&n); 115 gets(s); 116 for(int i=1;i<=n;i++) 117 { 118 gets(s); 119 add(0,s); 120 } 121 build(); 122 gets(s); 123 acm(s); 124 printf("%s\n",s); 125 } 126 return 0; 127 }