-
Notifications
You must be signed in to change notification settings - Fork 6
/
StopWords.php
68 lines (61 loc) · 2.02 KB
/
StopWords.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
<?php
/*
You may not change or alter any portion of this comment or credits
of supporting developers from this source code or any supporting source code
which is considered copyrighted (c) material of the original comment or credit authors.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
namespace Xmf;
/**
* StopWords - facilitate filtering of common or purely connective words for natural language processing
*
* @category Xmf\StopWords
* @package Xmf
* @author Richard Griffith <richard@geekwright.com>
* @author trabis <lusopoemas@gmail.com>
* @copyright 2011-2018 XOOPS Project (https://xoops.org)
* @license GNU GPL 2.0 or later (https://www.gnu.org/licenses/gpl-2.0.html)
* @link https://xoops.org
* @see https://en.wikipedia.org/wiki/Stop_words
*/
class StopWords
{
/**
* mbstring encoding
*/
const ENCODING = 'UTF-8';
/** @var string[] */
protected $stopwordList = array();
/**
* StopWords constructor - load stop words for current locale
*
* @todo specify locale to constructor, will require shift away from defined constant
*/
public function __construct()
{
if (!defined('_XMF_STOPWORDS')) {
Language::load('stopwords');
}
if (defined('_XMF_STOPWORDS')) {
$sw = explode(' ', _XMF_STOPWORDS);
$this->stopwordList = array_fill_keys($sw, true);
}
}
/**
* check - look up a word in a list of stop words and
* classify it as a significant word or a stop word.
*
* @param string $key the word to check
*
* @return bool True if word is significant, false if it is a stop word
*/
public function check($key)
{
$key = function_exists('mb_strtolower')
? mb_strtolower($key, static::ENCODING)
: strtolower($key);
return !isset($this->stopwordList[$key]);
}
}