trie树(字典树)DFA算法
@Service
public class SensitiveWordUtil extends TimerTask {
private static final Log log = Log(SensitiveWordUtil.class);
public static int minMatchTYpe = 1;//最⼩匹配规则:以重复词汇的最少词来匹配
public static int maxMatchType = 2;//最⼤匹配规则:以重复词汇的最多词来匹配
private static String isEnd = "isEnd";
private static String isEnd_0 = "0";//没有结束
private static String isEnd_1 = "1";//结束
private String configKey;//校验key是否⼀样的
@Resource
private ConfigService configService;恭喜发财 英文
private String nsitiveWorld = "饿了";
private static ConcurrentHashMap nsitiveWordMap = new ConcurrentHashMap();//⽤于本地查询
/**
* 使⽤内部定时器匹配敏感词
* 1.初始化敏感词
*/sleepwalker
@Override
public void run() {
String keyWord = Config(Type(), StoreConstants.ConfigKey.SENSITIVE_WORD, nsitiveWorld);
if (keyWord.equals(configKey)) {
if (log.isDebugEnabled()) {
log.debug("=nsitiveWord=" + configKey);
}
ox是什么意思return;
} el {
configKey = keyWord;
}
String[] strs = keyWord.split(",");
Set<String> keyWordSet = new HashSet<String>();
for (String str : strs) {
keyWordSet.add(str);
}
nsitiveWordMap = addSensitiveWordToHashMap(keyWordSet);
log.info("=nsitiveWord=" + JSONString(nsitiveWordMap));
}
/**
* 读取敏感词库,将敏感词放⼊HashSet中,构建⼀个DFA算法模型:<br>
* {天={'isEnd':0,'猫'={'isEnd:1'}},习={isEnd:0,⼤={isEnd:0,⼤={isEnd:1}}}}
* 待整个map⽣成好最后直接赋值
*
* @param keyWordSet 敏感词库
*/
private ConcurrentHashMap addSensitiveWordToHashMap(Set<String> keyWordSet) {
ConcurrentHashMap nsitiveWordMapOrg = new ConcurrentHashMap(keyWordSet.size());//初始化敏感词容器,减少扩容操作//迭代keyWordSet英语四级单词
Iterator<String> iterator = keyWordSet.iterator();
while (iterator.hasNext()) {
String key = ();//关键字
Map nowMap = nsitiveWordMapOrg;
for (int i = 0; i < key.length(); i++) {
char keyChar = key.charAt(i);//转换成char型
Object wordMap = (keyChar); //获取
if (wordMap != null) { //如果存在该key,直接赋值
nowMap = (Map) wordMap;
} el { //不存在则,则构建⼀个map,同时将isEnd设置为0,因为他不是最后⼀个
Map<String, String> newWorMap = new HashMap<String, String>();
newWorMap.put(isEnd, isEnd_0); //不是最后⼀个
nowMap.put(keyChar, newWorMap);
nowMap = newWorMap;
nowMap = newWorMap;
}
if (i == key.length() - 1) {
nowMap.put(isEnd, isEnd_1); //最后⼀个
}
}
}
return nsitiveWordMapOrg;
}
/**
* 判断⽂字是否包含敏感字符
*
* @param txt ⽂字
* @param matchType 匹配规则 1:最⼩匹配规则,2:最⼤匹配规则
* @return若包含返回true,否则返回fal
*/
public static boolean isContaintSensitiveWord(String txt, int matchType) {
somebody是什么意思boolean flag = fal;
if (StringUtils.isBlank(txt)) {
return flag;
}
for (int i = 0; i < txt.length(); i++) {
int matchFlag = checkSensitiveWord(txt, i, matchType); //判断是否包含敏感字符if (matchFlag > 0) { //⼤于0存在,返回true干涸的意思
flag = true;
}
}
return flag;
}
/**
* 获取⽂字中的敏感词
*
* @param txt ⽂字
* @param matchType 匹配规则:1:最⼩匹配规则,2:最⼤匹配规则
* @return
*/
public static Set<String> getSensitiveWord(String txt, int matchType) {
Set<String> nsitiveWordList = new HashSet<String>();
if (StringUtils.isBlank(txt)) {
return nsitiveWordList;
}
for (int i = 0; i < txt.length(); i++) {
int length = checkSensitiveWord(txt, i, matchType); //判断是否包含敏感字符
if (length > 0) { //存在,加⼊list中
nsitiveWordList.add(txt.substring(i, i + length));
i = i + length - 1; //减1的原因,是因为for会⾃增
}
}
return nsitiveWordList;
}
/**
* 替换敏感字字符
*
* @param txt 替换字符,默认*
*/
public static String replaceSensitiveWord(String txt) {
if (StringUtils.isBlank(txt)) {
return txt;
}
String word = replaceSensitiveWord(txt, maxMatchType, null);
return word;
}
/
**
* 替换敏感字字符
*
* @param txt
* @param matchType
* @param replaceChar 替换字符,默认*
*/
public static String replaceSensitiveWord(String txt, int matchType, String replaceChar) {
if (StringUtils.isBlank(replaceChar)) {
replaceChar = "*";
}
String resultTxt = txt;
Set<String> t = getSensitiveWord(txt, matchType);//获取所有的敏感词
Iterator<String> iterator = t.iterator();
while (iterator.hasNext()) {
String word = ();
String replaceString = getReplaceChars(replaceChar, word.length());
resultTxt = placeAll(word, replaceString);
}
return resultTxt;
}
/**
* 获取替换字符串
*
* @param replaceChar
* @param length
* @return
*/
private static String getReplaceChars(String replaceChar, int length) {
String resultReplace = replaceChar;
for (int i = 1; i < length; i++) {
resultReplace += replaceChar;
}
return resultReplace;
}
/**
* 检查⽂字中是否包含敏感字符,检查规则如下:<br>
*
* @param txt
* @param beginIndex
* @param matchType
* @return如果存在,则返回敏感词字符的长度,不存在返回0
*/
@SuppressWarnings({"rawtypes"})
public static int checkSensitiveWord(String txt, int beginIndex, int matchType) {
boolean flag = fal; //敏感词结束标识位:⽤于敏感词只有1位的情况
int matchFlag = 0; //匹配标识数默认为0
Map nowMap = nsitiveWordMap;
if (nowMap == null) {
nowMap = new HashMap();
}
for (int i = beginIndex; i < txt.length(); i++) {
char word = txt.charAt(i);
nowMap = (Map) (word); //获取指定key
if (nowMap != null) { //存在,则判断是否为最后⼀个
matchFlag++; //找到相应key,匹配标识+1
英语一对一家教南京新东方官网if (isEnd_1.(isEnd))) { //如果为最后⼀个匹配规则,结束循环,返回匹配标识数 flag = true; //结束标志位为true
if (minMatchTYpe == matchType) { //最⼩规则,直接返回,最⼤规则还需继续查找
圣诞节歌break;
}
}
} el {//不存在,直接返回
break;
}
}
if (matchFlag < 1 || !flag) { //长度必须⼤于等于2,为词 matchFlag = 0;
}with的用法
return matchFlag;
}