基于Laravel 敏感词过滤

敏感词过滤,不能让用户作死带动平台作死.

  • PhraseFilter
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
<?php

namespace App\Support;

use App\Concerns\Singleton;

/**
* 敏感词过滤
* Class PhraseFilter
* @package App\Support
*/
class PhraseFilter
{
use Singleton;

const MATCH_SIZE = 256;

public $matched;

protected $source_txt;

/**
* PhraseFilter constructor.
* @param string|null $source_txt
*/
public function __construct(string $source_txt = null)
{
if (null === $source_txt) {
$source_txt = database_path() . '/app/filter_word.txt';
}

// PCRE的回溯限制
ini_set('pcre.backtrack_limit', '23001337');
// PCRE的递归限制
ini_set('pcre.recursion_limit', '23001337');

$this->source_txt = $source_txt;
}

/**
* @param string $target
* @return string
*/
private function prepare(string $target)
{
$target = strtolower($target);
$pattern = "/([0-9a-zA-Z\x{4e00}-\x{9fa5}]+)/u";
preg_match_all($pattern, $target, $matched);
$result = implode('', $matched[0]);

return $result;
}

/**
* 检查是否匹配
* @param string $target
* @param callable|null $callback
* @return bool true:detected, false:not detected.
*/
public function match(string $target, callable $callback = null)
{
static $patterns = null;
if (null === $patterns) {
$patterns = $this->getPatterns();
}
$this->matched = null;
$target = $this->prepare($target);

foreach ($patterns as $pattern) {
if (preg_match($pattern, $target, $this->matched)) {
if (strlen($this->matched[0]) > 0) {
if (!empty($callback)) {
$callback($this->matched);
}
return true;
}
}
}

return false;
}

/**
* 敏感词匹配规则
* @return array
*/
private function getPatterns()
{
$src_txt = file_get_contents($this->source_txt);
$src_txt = preg_replace('/(\r\n)|(\r\n)|(\r)|(\n)/', '|', $src_txt);

$word_list = explode('|', $src_txt);
$result = [];
$word_count = count($word_list);

for ($i = 0; $i < $word_count; $i += self::MATCH_SIZE) {
$src = array_splice($word_list, 0, self::MATCH_SIZE);
$src_count = count($src);
if (empty($src[$src_count - 1])) {
$src = array_slice($src, 0, $src_count - 1);
}
$result[] = '/' . implode('|', $src) . '/';
}

return $result;
}
}
  • 使用test_filter
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/**
* @param Request $request
* @param PhraseFilter $filter
* @return mixed
* @Get("/test_filter")
*/
public function test_filter(Request $request, PhraseFilter $filter)
{
$word = $request->get('word');

if (true == $filter->match($word)) {
return System::apiResponse(4003, '含有敏感词');
}

return System::apiResponse(200, 'Success');
}

Powered by Hexo and Hexo-theme-hiker

Copyright © 2017 - 2023 Keep It Simple And Stupid All Rights Reserved.

访客数 : | 访问量 :