UVAlive 4670 Dominating Patterns

 

 

题目:

 

 
Time Limit: 3000MS   Memory Limit: Unknown   64bit IO Format: %lld & %llu

 

 

Status

Description

【暑假】[实用数据结构]UVAlive 4670 Dominating Patterns

The archaeologists are going to decipher a very mysterious ``language". Now, they know many language patterns; each pattern can be treated as a string on English letters (only lower case). As a sub string, these patterns may appear more than one times in a large text string (also only lower case English letters).

What matters most is that which patterns are the dominating patterns. Dominating pattern is the pattern whose appearing times is not less than other patterns.

It is your job to find the dominating pattern(s) and their appearing times.

Input

The entire input contains multi cases. The first line of each case is an integer, which is the number of patterns N, 1【暑假】[实用数据结构]UVAlive 4670 Dominating PatternsN【暑假】[实用数据结构]UVAlive 4670 Dominating Patterns150. Each of the following N lines contains one pattern, whose length is in range [1, 70]. The rest of the case is one line contains a large string as the text to lookup, whose length is up to 106.

At the end of the input file, number `0' indicates the end of input file.

Output

For each of the input cases, output the appearing times of the dominating pattern(s). If there are more than one dominating pattern, output them in separate lines; and keep their input order to the output.

Sample Input

2 
aba 
bab 
ababababac 
6 
beta 
alpha 
haha 
delta 
dede 
tata 
dedeltalphahahahototatalpha 
0

Sample Output

4 
aba 
2 
alpha 
haha

思路:

 题目给出一个文本串多个模板串,要求出现最多的模板串。这恰好可以用AC自动机解决,只不过需要将print修改为cnt[val]++ 统计标号为val的模板串出现的次数。

 原理:在文本串不同位置出现的模板都可以通过自动机匹配找到。

 注意:为什么模板要开始从1标号? : 因为调用了insert(word[i],i)语句,如果给模板标号0的话相当于舍弃了这个模板串(val==0代表非单词结点),因此调用AhoCorasickaotomata的时候一定要注意不能把单词结点的val设为0。

 

代码:

  这里给出三份AC代码:

  无去重

  1 #include<cstdio>
  2 #include<cstring>
  3 #include<queue>
  4 #include<map>
  5 #include<string>
  6 using namespace std;
  7 
  8 const int maxl = 1000000 + 10;
  9 const int maxw = 150 + 5;
 10 const int maxwl = 70 + 5;
 11 const int sigma_size = 26;
 12 
 13 struct AhoCorasickaotomata{
 14 int ch[maxl][sigma_size];
 15 int val[maxl];
 16 int cnt[maxw];  //计数
 17 int f[maxl];
 18 int last[maxl];
 19 int sz;
 20 
 21    void clear(){ 
 22      sz=1; 
 23      memset(ch[0],0,sizeof(ch[0]));
 24      memset(cnt,0,sizeof(cnt));
 25     }
 26    int ID(char c) { return c-'a'; }
 27    
 28    void insert(char* s,int v){
 29          int u=0 , n=strlen(s);
 30          for(int i=0;i<n;i++){
 31              int c=ID(s[i]);
 32              if(!ch[u][c]) {    //if ! 初始化结点 
 33                   memset(ch[sz],0,sizeof(ch[sz]));
 34              val[sz]=0;
 35              ch[u][c]= sz++;
 36              }
 37              u=ch[u][c];
 38          }
 39          val[u]=v;
 40    }
 41    
 42   void print(int j){
 43       if(j){                 //递归结尾 
 44           cnt[val[j]] ++;
 45           print(last[j]);
 46       }
 47   }
 48   void find(char* s){
 49       int n=strlen(s);
 50       int j=0;
 51     for(int i=0;i<n;i++){
 52           int c=ID(s[i]);
 53           while(j && !ch[j][c]) j=f[j];  
 54         //沿着失配边寻找与接下来一个字符可以匹配的字串 
 55           j=ch[j][c]; 
 56           if(val[j]) print(j);
 57           else if(last[j]) print(last[j]);
 58       }
 59   }
 60   
 61   void getFail() {
 62       queue<int> q;
 63       f[0]=0;
 64       for(int i=0;i<sigma_size;i++){  //以0结点拓展入队 
 65           int u=ch[0][i];
 66           if(u) {  //u存在 
 67               q.push(u); f[u]=0; last[u]=0;
 68           }
 69       }
 70       //按照BFS熟悉构造失配 f & last 
 71       while(!q.empty()){
 72           int r=q.front(); q.pop();
 73           for(int i=0;i<sigma_size;i++){
 74               int u=ch[r][i];
 75               if(!u) continue;    //本字符不存在 
 76               q.push(u);
 77               int v=f[r];
 78               while(v && !ch[v][i]) v=f[v];  //与该字符匹配 
 79               v=ch[v][i];         //相同字符的序号 
 80               f[u]=v;
 81               last[u] = val[v]? v : last[v];
 82               //递推 last 
 83               //保证作为短后缀的字串可以匹配 
 84           }
 85       }
 86   } 
 87 };
 88 
 89 AhoCorasickaotomata ac;
 90 char T[maxl];
 91 
 92 int main(){
 93 int n; 
 94   while(scanf("%d",&n)==1 && n){
 95       char word[maxw][maxwl];
 96       ac.clear();          //operation 1 //init
 97     int x=n; 
 98     for(int i=1;i<=n;i++){  //i 从 1 开始到 n 
 99         scanf("%s",word[i]);
100         ac.insert(word[i],i);
101       }
102       ac.getFail();       //operation 2
103       scanf("%s",T);
104       int L=strlen(T);
105       ac.find(T);        //operation 3
106       int best = -1;
107       for(int i=1;i<=n;i++) best=max(best,ac.cnt[i]);
108       printf("%d\n",best);
109       for(int i=1;i<=n;i++)
110        if(ac.cnt[i] == best)  printf("%s\n",word[i]);
111   }
112   return 0;
113 }

  时间:46 ms

 

+map处理 

 

我的代码: 

  1 #include<cstdio>
  2 #include<cstring>
  3 #include<queue>
  4 #include<map>
  5 #include<string>
  6 using namespace std;
  7 
  8 const int maxl = 1000000 + 10;
  9 const int maxw = 150 + 5;
 10 const int maxwl = 70 + 5;
 11 const int sigma_size = 26;
 12 
 13 struct AhoCorasickaotomata{
 14 int ch[maxl][sigma_size];
 15 int val[maxl];
 16 int cnt[maxw];  //计数
 17 int f[maxl];
 18 int last[maxl];
 19 int sz;
 20 map<string,int> ms;   //对string打标记 避免重复 
 21 
 22    void clear(){ 
 23      sz=1; 
 24      memset(ch[0],0,sizeof(ch[0]));
 25      memset(cnt,0,sizeof(cnt));
 26      ms.clear();  
 27     }
 28    int ID(char c) { return c-'a'; }
 29    
 30    void insert(char* s,int v){
 31          int u=0 , n=strlen(s);
 32          for(int i=0;i<n;i++){
 33              int c=ID(s[i]);
 34              if(!ch[u][c]) {    //if ! 初始化结点 
 35                   memset(ch[sz],0,sizeof(ch[sz]));
 36              val[sz]=0;
 37              ch[u][c]= sz++;
 38              }
 39              u=ch[u][c];
 40          }
 41          val[u]=v;
 42    }
 43    
 44   void print(int j){
 45       if(j){                 //递归结尾 
 46           cnt[val[j]] ++;
 47           print(last[j]);
 48       }
 49   }
 50   void find(char* s){
 51       int n=strlen(s);
 52       int j=0;
 53     for(int i=0;i<n;i++){
 54           int c=ID(s[i]);
 55           while(j && !ch[j][c]) j=f[j];  
 56         //沿着失配边寻找与接下来一个字符可以匹配的字串 
 57           j=ch[j][c]; 
 58           if(val[j]) print(j);
 59           else if(last[j]) print(last[j]);
 60       }
 61   }
 62   
 63   void getFail() {
 64       queue<int> q;
 65       f[0]=0;
 66       for(int i=0;i<sigma_size;i++){  //以0结点拓展入队 
 67           int u=ch[0][i];
 68           if(u) {  //u存在 
 69               q.push(u); f[u]=0; last[u]=0;
 70           }
 71       }
 72       //按照BFS熟悉构造失配 f & last 
 73       while(!q.empty()){
 74           int r=q.front(); q.pop();
 75           for(int i=0;i<sigma_size;i++){
 76               int u=ch[r][i];
 77               if(!u) continue;    //本字符不存在 
 78               q.push(u);
 79               int v=f[r];
 80               while(v && !ch[v][i]) v=f[v];  //与该字符匹配 
 81               v=ch[v][i];         //相同字符的序号 
 82               f[u]=v;
 83               last[u] = val[v]? v : last[v];
 84               //递推 last 
 85               //保证作为短后缀的字串可以匹配 
 86           }
 87       }
 88   } 
 89 };
 90 
 91 AhoCorasickaotomata ac;
 92 char T[maxl];
 93 
 94 int main(){
 95 int n; 
 96   while(scanf("%d",&n)==1 && n){
 97       char word[maxw][maxwl];
 98       ac.clear();          //operation 1 //init
 99     int x=n; 
100     for(int i=1;i<=n;i++){  //i 从 1 开始到 n 
101         scanf("%s",word[i]);
102       if(!ac.ms.count(word[i])){
103             ac.insert(word[i],i);
104             ac.ms[string(word[i])] =i;  //string(char[])=>string
105       }
106       else x--;      //改变长度 
107       }
108       n=x;         //n为去重之后的长 
109       ac.getFail();       //operation 2
110       scanf("%s",T);
111       int L=strlen(T);
112       ac.find(T);        //operation 3
113       int best = -1;
114       for(int i=1;i<=n;i++) best=max(best,ac.cnt[i]);
115       printf("%d\n",best);
116       for(int i=1;i<=n;i++)
117        if(ac.cnt[i] == best)  printf("%s\n",word[i]);
118   }
119   return 0;
120 }
Code 1:我的代码

相关文章: