一、初始化敏感词库
![]()
1 import java.io.BufferedReader;
2 import java.io.File;
3 import java.io.FileInputStream;
4 import java.io.InputStreamReader;
5 import java.util.HashMap;
6 import java.util.HashSet;
7 import java.util.Iterator;
8 import java.util.Map;
9 import java.util.Set;
10
11 /**
12 * 初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型
13 */
14 public class SensitiveWordInit {
15 private String ENCODING = "utf-8"; //字符编码
16 public HashMap sensitiveWordMap;
17 public SensitiveWordInit(){
18 super();
19 }
20
21 /**
22 * 初始化
23 */
24 public Map initKeyWord(){
25 try {
26 //读取敏感词库
27 Set<String> keyWordSet = readSensitiveWordFile();
28 //将敏感词库加入到HashMap中
29 addSensitiveWordToHashMap(keyWordSet);
30 //spring获取application,然后application.setAttribute("sensitiveWordMap",sensitiveWordMap);
31 } catch (Exception e) {
32 e.printStackTrace();
33 }
34 return sensitiveWordMap;
35 }
36
37 /**
38 * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:<br>
39 * 中 = {
40 * isEnd = 0
41 * 国 = {<br>
42 * isEnd = 1
43 * 人 = {isEnd = 0
44 * 民 = {isEnd = 1}
45 * }
46 * 男 = {
47 * isEnd = 0
48 * 人 = {
49 * isEnd = 1
50 * }
51 * }
52 * }
53 * }
54 * 五 = {
55 * isEnd = 0
56 * 星 = {
57 * isEnd = 0
58 * 红 = {
59 * isEnd = 0
60 * 旗 = {
61 * isEnd = 1
62 * }
63 * }
64 * }
65 * }
66 */
67 private void addSensitiveWordToHashMap(Set<String> keyWordSet) {
68 sensitiveWordMap = new HashMap(keyWordSet.size()); //初始化敏感词容器,减少扩容操作
69 String key = null;
70 Map nowMap = null;
71 Map<String, String> newWorMap = null;
72 //迭代keyWordSet
73 Iterator<String> iterator = keyWordSet.iterator();
74 while(iterator.hasNext()){
75 key = iterator.next(); //关键字
76 nowMap = sensitiveWordMap;
77 for(int i = 0 ; i < key.length() ; i++){
78 char keyChar = key.charAt(i); //转换成char型
79 Object wordMap = nowMap.get(keyChar); //获取
80
81 if(wordMap != null){ //如果存在该key,直接赋值
82 nowMap = (Map) wordMap;
83 }
84 else{ //不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个
85 newWorMap = new HashMap<String,String>();
86 newWorMap.put("isEnd", "0"); //不是最后一个
87 nowMap.put(keyChar, newWorMap);
88 nowMap = newWorMap;
89 }
90
91 if(i == key.length() - 1){
92 nowMap.put("isEnd", "1"); //最后一个
93 }
94 }
95 }
96 }
97
98 /**
99 * 读取敏感词库中的内容,将内容添加到set集合中
100 */
101 @SuppressWarnings("resource")
102 private Set<String> readSensitiveWordFile() throws Exception{
103 Set<String> set = null;
104 //https://github.com/heqiyoujing/config_file 词库地址
105 File file = new File("D:\\SensitiveWord.txt"); //读取文件
106 InputStreamReader read = new InputStreamReader(new FileInputStream(file),ENCODING);
107 try {
108 if(file.isFile() && file.exists()){ //文件流是否存在
109 set = new HashSet<String>();
110 BufferedReader bufferedReader = new BufferedReader(read);
111 String txt = null;
112 while((txt = bufferedReader.readLine()) != null){ //读取文件,将文件内容放入到set中
113 set.add(txt);
114 }
115 }
116 else{ //不存在抛出异常信息
117 throw new Exception("敏感词库文件不存在");
118 }
119 } catch (Exception e) {
120 throw e;
121 }finally{
122 read.close(); //关闭文件流
123 }
124 return set;
125 }
126 }
View Code