C#过滤敏感词DFA算法
作者:互联网
今天游戏正好用到需要过滤敏感词将出现的敏感词替换成*,在网上找了许久找了一片可用的java版本的DFA算法,最后费了一番功夫将其思路用C#实现,里面的注释甚至都没改动的,这里直接上代码,这里不借助任何第三方工具,复制粘贴就是用
当然想看原博客的点击这里
using UnityEngine;
using System.Collections.Generic;
public class Test : MonoBehaviour
{
public TextAsset txt;
private void Start()
{
}
private void Update()
{
if (Input.GetKeyDown("x")) {
string str = "后来的啊微信哈哈嘀嘀嘀微信qqwx呼呼呵呵";
HashSet<string> set = ReadTxtByLine(txt);
Dictionary<object, object> map = AddBadWordToHashMap(set);
string set1 = GetBadWord(str, 2, map);
print(set1);
}
}
private HashSet<string> ReadTxtByLine(TextAsset file) {
//使用换行和回车符分割敏感词
string[] str = file.text.Split('\n','\r');
HashSet<string> setTemp = new HashSet<string>();
if (str != null) {
foreach (string s in str)
{
setTemp.Add(s);
}
}
return setTemp;
}
private Dictionary<object, object> AddBadWordToHashMap(HashSet<string> keyWordSet)
{
//初始化敏感词容器,减少扩容操作
Dictionary<object, object> wordMap = new Dictionary<object, object>(keyWordSet.Count);
string key = null;
Dictionary<object,object> nowMap = null;
Dictionary<object, object> newWorMap = null;
//迭代keyWordSet
foreach (var v in keyWordSet) {
key = v;
nowMap = wordMap;
char[] keyChars = key.ToCharArray();
for (int i = 0; i < keyChars.Length; i++) {
//转换成char型
char keyChar = keyChars[i];
//如果存在该key,直接赋值
if (nowMap.ContainsKey(keyChar.ToString())) {
nowMap = wordMap;
}else{
//不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个
newWorMap = new Dictionary<object, object>();
//不是最后一个
newWorMap.Add("isEnd", "0");
nowMap.Add(keyChar.ToString(), newWorMap);
nowMap = newWorMap;
}
if (i == keyChars.Length - 1){
//最后一个
if (nowMap.ContainsKey("isEnd")) {
nowMap.Remove("isEnd");
}
nowMap.Add("isEnd", "1");
}
}
}
return wordMap;
}
public string GetBadWord(string txt, int matchType, Dictionary<object, object> wordMap)
{
char[] c = txt.ToCharArray();
for (int i = 0; i < txt.Length; i++)
{
//判断是否包含敏感字符
int length = CheckBadWord(txt, i, matchType, wordMap);
if (length > 0)
{
string oldStr = txt.Substring(i, length);
//替换敏感词
for (int j = 0; j < length; j++) {
c[j + i] = '*';
}
//减1的原因,是因为for会自增
i = i + length - 1;
}
}
return new string(c);
}
private int CheckBadWord(string txt, int beginIndex, int matchType, Dictionary<object, object> wordMap)
{
//敏感词结束标识位:用于敏感词只有1位的情况
bool flag = false;
//匹配标识数默认为0
int matchFlag = 0;
char word = '0';
object nowMap = wordMap;
char[] words = txt.ToCharArray();
for (int i = beginIndex; i < words.Length; i++)
{
word = words[i];
//获取指定key
if (((Dictionary<object, object>)nowMap).ContainsKey(word.ToString())) {
//存在,则判断是否为最后一个
//找到相应key,匹配标识+1
nowMap = ((Dictionary<object, object>)nowMap)[word.ToString()];
matchFlag++;
if (((Dictionary<object, object>)nowMap).ContainsKey("isEnd")) {
string temp = (string)((Dictionary<object, object>)nowMap)["isEnd"];
if ("1".Equals(temp))
{
//如果为最后一个匹配规则,结束循环,返回匹配标识数
//结束标志位为true
flag = true;
if (1 == matchType)
{
//最小规则,直接返回,最大规则还需继续查找
break;
}
}
}
}
else
{ //不存在,直接返回
break;
}
}
if (!flag)
{
matchFlag = 0;
}
return matchFlag;
}
}
下面是是敏感词库,其实就是一个.txt文件大家自己创建就行我这内容也不多上个截图就好
好了今天就这么多
标签:string,Dictionary,C#,DFA,int,算法,txt,nowMap,wordMap 来源: https://blog.csdn.net/qq_32169479/article/details/100634103