初始化敏感词
使用set集合,可以查询数据库。
/**
* 初始化值
* @return
*/
private static Set<String> getKeySet() {
HashSet<String> set = new HashSet<>(3);
//敏感词 河南 编码 0.3300
set.add("河南0.3300");
set.add("河北0.3200");
set.add("北京0.1000");
return set;
}
构建树
将set集合中的数据构建成树,方便查询;树如图所示:
/**
* 构建树
*/
public static void add() {
keySet = getKeySet();
String key;
Map nowMap;
Map<String, String> newWordMap;
Iterator<String> iterator = keySet.iterator();
while (iterator.hasNext()) {
//北京0.1000
key = iterator.next();
nowMap = cityMap;
for (int i = 0; i < key.length(); i++) {
char keyChar = key.charAt(i);
//获取敏感词id
if('0'==keyChar && '.'==key.charAt(i+1)){
nowMap.put("isEnd", "1");
nowMap.put("id",key.substring(i));
break;
}else {
Object wordMap = nowMap.get(keyChar);
if (wordMap != null) {
nowMap = (Map) wordMap;
} else {
newWordMap = new HashMap<>();
newWordMap.put("isEnd", "0");
nowMap.put(keyChar, newWordMap);
nowMap = newWordMap;
}
}
}
}
}
查询
传入要匹配的字符串,如果匹配到了就返回敏感词id(仅匹配一个敏感词)
/**
* 匹配txt是否含有对应字段
* @param txt
* @return
*/
public static String check(String txt) {
if (cityMap.size() == 0) {
add();
}
int matchFlag = 0;
char word;
Map nowMap = cityMap;
for (int i = 0; i < txt.length(); i++) {
word = txt.charAt(i);
if (nowMap.get(word) != null) {
nowMap = (Map) nowMap.get(word);
matchFlag++;
if ("1".equals(nowMap.get("isEnd"))) {
return nowMap.get("id").toString();
}
}else {
i-=matchFlag;
matchFlag=0;
nowMap = cityMap;
}
}
return "0.";
}
}
完整代码如下
/**
* @projectName XXXX
* @package com.XXXX.system
* @className com.XXXX.system.CityTree
* @copyright Copyright 2021 XXXX, Inc All rights reserved.
*/
package com.XXXX.system;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
/**
* CityTree
* @description 构建城市省份查询树,用于根据企业名称查找企业所在省份
* @author XXX
* @date 2022/10/17 17:38
* @version 1.0
*/
public class CityTree {
private static Map<String, Object> cityMap = new HashMap(6);
private static Set<String> keySet;
/**
* 构建树
*/
public static void add() {
keySet = getKeySet();
String key;
Map nowMap;
Map<String, String> newWordMap;
Iterator<String> iterator = keySet.iterator();
while (iterator.hasNext()) {
//北京0.1000
key = iterator.next();
nowMap = cityMap;
for (int i = 0; i < key.length(); i++) {
char keyChar = key.charAt(i);
//获取id
if('0'==keyChar && '.'==key.charAt(i+1)){
nowMap.put("isEnd", "1");
nowMap.put("id",key.substring(i));
break;
}else {
Object wordMap = nowMap.get(keyChar);
if (wordMap != null) {
nowMap = (Map) wordMap;
} else {
newWordMap = new HashMap<>();
newWordMap.put("isEnd", "0");
nowMap.put(keyChar, newWordMap);
nowMap = newWordMap;
}
}
}
}
}
/**
* 初始化值
* @return
*/
private static Set<String> getKeySet() {
HashSet<String> set = new HashSet<>(3);
//敏感词 河南 编码 0.3300
set.add("河南0.3300");
set.add("河北0.3200");
set.add("北京0.1000");
return set;
}
/**
* 匹配txt是否含有对应字段
* @param txt
* @return
*/
public static String check(String txt) {
if (cityMap.size() == 0) {
add();
}
int matchFlag = 0;
char word;
Map nowMap = cityMap;
for (int i = 0; i < txt.length(); i++) {
word = txt.charAt(i);
if (nowMap.get(word) != null) {
nowMap = (Map) nowMap.get(word);
matchFlag++;
if ("1".equals(nowMap.get("isEnd"))) {
return nowMap.get("id").toString();
}
}else {
i-=matchFlag;
matchFlag=0;
nowMap = cityMap;
}
}
return "";
}
}
后记
1.查询方法可根据不同的需求修改为匹配多个敏感词、修改敏感词等
2.构建树方法可修改为不要id,仅构建敏感词树
版权声明:本文为tjk12345原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。