/** * @since 2.5 * * @param SQLStore $store * * @return SearchTable */ public function newTextSanitizer() { $settings = ApplicationFactory::getInstance()->getSettings(); $textSanitizer = new TextSanitizer(new SanitizerFactory()); $textSanitizer->setLanguageDetection($settings->get('smwgFulltextLanguageDetection')); $textSanitizer->setMinTokenSize($settings->get('smwgFulltextSearchMinTokenSize')); return $textSanitizer; }
/** * @dataProvider textOnMockProvider */ public function testSanitizs($text, $expected) { $sanitizer = $this->getMockBuilder('\\Onoi\\Tesa\\Sanitizer')->disableOriginalConstructor()->getMock(); $sanitizer->expects($this->atLeastOnce())->method('sanitizeWith')->will($this->returnValue($text)); $stopwordAnalyzer = $this->getMockBuilder('\\Onoi\\Tesa\\StopwordAnalyzer\\StopwordAnalyzer')->disableOriginalConstructor()->getMock(); $synonymizer = $this->getMockBuilder('\\Onoi\\Tesa\\Synonymizer\\Synonymizer')->disableOriginalConstructor()->getMock(); $tokenizer = $this->getMockBuilder('\\Onoi\\Tesa\\Tokenizer\\Tokenizer')->disableOriginalConstructor()->getMock(); $this->sanitizerFactory->expects($this->atLeastOnce())->method('newSanitizer')->will($this->returnValue($sanitizer)); $this->sanitizerFactory->expects($this->atLeastOnce())->method('newPreferredTokenizerByLanguage')->will($this->returnValue($tokenizer)); $this->sanitizerFactory->expects($this->atLeastOnce())->method('newStopwordAnalyzerByLanguage')->will($this->returnValue($stopwordAnalyzer)); $this->sanitizerFactory->expects($this->atLeastOnce())->method('newSynonymizerByLanguage')->will($this->returnValue($synonymizer)); $instance = new TextSanitizer($this->sanitizerFactory); $this->assertEquals($expected, $instance->sanitize($text)); }
private function doAggregateFromFieldChangeOp($type, $fieldChangeOp, &$aggregate) { $searchTable = $this->searchTableUpdater->getSearchTable(); // Exempted property -> out if (!$fieldChangeOp->has('p_id') || $searchTable->isExemptedPropertyById($fieldChangeOp->get('p_id'))) { return; } // Only text components if (!$fieldChangeOp->has('o_blob') && !$fieldChangeOp->has('o_hash') && !$fieldChangeOp->has('o_serialized')) { return; } // Re-map (url type) if ($fieldChangeOp->has('o_serialized')) { $fieldChangeOp->set('o_blob', $fieldChangeOp->get('o_serialized')); } // Build a temporary stable key for the diff match $key = $fieldChangeOp->get('s_id') . ':' . $fieldChangeOp->get('p_id'); // If the blob value is empty then the DIHandler has put any text < 72 // into the hash field $text = $fieldChangeOp->get('o_blob'); if ($text === null || $text === '') { $text = $fieldChangeOp->get('o_hash'); } if (!isset($aggregate[$key])) { $aggregate[$key] = $type === TableChangeOp::OP_DELETE ? array() : ''; } // Concatenate the inserts but keep the deletes separate to allow // for them to be removed individually if ($type === TableChangeOp::OP_INSERT) { $aggregate[$key] = trim($aggregate[$key] . ' ' . trim($text)); } elseif ($type === TableChangeOp::OP_DELETE) { $aggregate[$key][] = $this->textSanitizer->sanitize($text); } }
/** * @since 2.5 * * @param integer $sid * @param integer $pid * @param string $text */ public function update($sid, $pid, $text) { if (trim($text) === '') { return $this->delete($sid, $pid); } $this->connection->update($this->searchTable->getTableName(), array('o_text' => $this->textSanitizer->sanitize($text), 'o_sort' => mb_substr($text, 0, 32)), array('s_id' => (int) $sid, 'p_id' => (int) $pid), __METHOD__); }