一、初始化敏感词库

  1 import java.io.BufferedReader;
  2 import java.io.File;
  3 import java.io.FileInputStream;
  4 import java.io.InputStreamReader;
  5 import java.util.HashMap;
  6 import java.util.HashSet;
  7 import java.util.Iterator;
  8 import java.util.Map;
  9 import java.util.Set;
 10 
 11 /**
 12  * 初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型
 13  */
 14 public class SensitiveWordInit {
 15     private String ENCODING = "utf-8";    //字符编码
 16     public HashMap sensitiveWordMap;
 17     public SensitiveWordInit(){
 18         super();
 19     }
 20 
 21     /**
 22      * 初始化
 23      */
 24     public Map initKeyWord(){
 25         try {
 26             //读取敏感词库
 27             Set<String> keyWordSet = readSensitiveWordFile();
 28             //将敏感词库加入到HashMap中
 29             addSensitiveWordToHashMap(keyWordSet);
 30             //spring获取application,然后application.setAttribute("sensitiveWordMap",sensitiveWordMap);
 31         } catch (Exception e) {
 32             e.printStackTrace();
 33         }
 34         return sensitiveWordMap;
 35     }
 36 
 37     /**
 38      * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:<br>
 39      * 中 = {
 40      *      isEnd = 0
 41      *      国 = {<br>
 42      *           isEnd = 1
 43      *           人 = {isEnd = 0
 44      *                民 = {isEnd = 1}
 45      *                }
 46      *           男  = {
 47      *                  isEnd = 0
 48      *                   人 = {
 49      *                        isEnd = 1
 50      *                       }
 51      *               }
 52      *           }
 53      *      }
 54      *  五 = {
 55      *      isEnd = 0
 56      *      星 = {
 57      *          isEnd = 0
 58      *          红 = {
 59      *              isEnd = 0
 60      *              旗 = {
 61      *                   isEnd = 1
 62      *                  }
 63      *              }
 64      *          }
 65      *      }
 66      */
 67     private void addSensitiveWordToHashMap(Set<String> keyWordSet) {
 68         sensitiveWordMap = new HashMap(keyWordSet.size());     //初始化敏感词容器,减少扩容操作
 69         String key = null;
 70         Map nowMap = null;
 71         Map<String, String> newWorMap = null;
 72         //迭代keyWordSet
 73         Iterator<String> iterator = keyWordSet.iterator();
 74         while(iterator.hasNext()){
 75             key = iterator.next();    //关键字
 76             nowMap = sensitiveWordMap;
 77             for(int i = 0 ; i < key.length() ; i++){
 78                 char keyChar = key.charAt(i);       //转换成char型
 79                 Object wordMap = nowMap.get(keyChar);       //获取
 80 
 81                 if(wordMap != null){        //如果存在该key,直接赋值
 82                     nowMap = (Map) wordMap;
 83                 }
 84                 else{     //不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个
 85                     newWorMap = new HashMap<String,String>();
 86                     newWorMap.put("isEnd", "0");     //不是最后一个
 87                     nowMap.put(keyChar, newWorMap);
 88                     nowMap = newWorMap;
 89                 }
 90 
 91                 if(i == key.length() - 1){
 92                     nowMap.put("isEnd", "1");    //最后一个
 93                 }
 94             }
 95         }
 96     }
 97 
 98     /**
 99      * 读取敏感词库中的内容,将内容添加到set集合中
100      */
101     @SuppressWarnings("resource")
102     private Set<String> readSensitiveWordFile() throws Exception{
103         Set<String> set = null;
104         //https://github.com/heqiyoujing/config_file 词库地址
105         File file = new File("D:\\SensitiveWord.txt");    //读取文件
106         InputStreamReader read = new InputStreamReader(new FileInputStream(file),ENCODING);
107         try {
108             if(file.isFile() && file.exists()){      //文件流是否存在
109                 set = new HashSet<String>();
110                 BufferedReader bufferedReader = new BufferedReader(read);
111                 String txt = null;
112                 while((txt = bufferedReader.readLine()) != null){    //读取文件,将文件内容放入到set中
113                     set.add(txt);
114                 }
115             }
116             else{         //不存在抛出异常信息
117                 throw new Exception("敏感词库文件不存在");
118             }
119         } catch (Exception e) {
120             throw e;
121         }finally{
122             read.close();     //关闭文件流
123         }
124         return set;
125     }
126 }
View Code

相关文章: