基于PHP实现高性能敏感词过滤算法
生活随笔
收集整理的這篇文章主要介紹了
基于PHP实现高性能敏感词过滤算法
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
- 公司新項目素材編輯功能需要提供敏感詞過濾功能,于是上網查了下,很多都是基于trie算法的,但基于PHP寫的卻少有,或者部分存在bug。所以,自己在別人的基礎上進行了完善。
敏感詞過濾算法實現
class TreeMap {public $data; // 節點字符public $children = []; // 存放子節點引用(因為有任意個子節點,所以靠數組來存儲)public $isEndingChar = false; // 是否是字符串結束字符public function __construct($data){$this->data = $data;} }class TrieTree {/*** 敏感詞數組* * @var array* @author qpf*/public $trieTreeMap = array();public function __construct(){$this->trieTreeMap = new TreeMap('/');}/*** 獲取敏感詞Map* * @return array* @author qpf*/public function getTreeMap(){return $this->trieTreeMap;}/*** 添加敏感詞* * @param array $txtWords* @author qpf*/public function addWords(array $wordsList){foreach ($wordsList as $words) {$trieTreeMap = $this->trieTreeMap;$len = mb_strlen($words);for ($i = 0; $i < $len; $i++) {$word = mb_substr($words, $i, 1);if(!isset($trieTreeMap->children[$word])){$newNode = new TreeMap($word);$trieTreeMap->children[$word] = $newNode;}$trieTreeMap = $trieTreeMap->children[$word];}$trieTreeMap->isEndingChar = true;}}/*** 查找對應敏感詞* * @param string $txt* @return array* @author qpf*/public function search($txt){$wordsList = array();$txtLength = mb_strlen($txt);for ($i = 0; $i < $txtLength; $i++) {$wordLength = $this->checkWord($txt, $i, $txtLength);if($wordLength > 0) {echo $wordLength;$words = mb_substr($txt, $i, $wordLength);$wordsList[] = $words;$i += $wordLength - 1;}}return $wordsList;}/*** 敏感詞檢測* * @param $txt* @param $beginIndex* @param $length* @return int*/private function checkWord($txt, $beginIndex, $length){$flag = false;$wordLength = 0;$trieTree = $this->trieTreeMap; //獲取敏感詞樹for ($i = $beginIndex; $i < $length; $i++) {$word = mb_substr($txt, $i, 1); //檢驗單個字if (!isset($trieTree->children[$word])) { //如果樹中不存在,結束 break;}//如果存在$wordLength++; $trieTree = $trieTree->children[$word];if ($trieTree->isEndingChar === true) { $flag = true;break;}}if($beginIndex > 0) {$flag || $wordLength = 0; //如果$flag == false 賦值$wordLenth為0}return $wordLength;}}$data = ['白粉', '白粉人', '白粉人嫩','不該大']; $wordObj = new TrieTree(); $wordObj->addWords($data);$txt = "白粉啊,白粉人,我不該大啊"; $words = $wordObj->search($txt); var_dump($words);die;總結
以上是生活随笔為你收集整理的基于PHP实现高性能敏感词过滤算法的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: [导入]设计模式初学者系列-工厂方法
- 下一篇: javascript之嵌套函数