/**
  * @since 2.5
  *
  * @param SQLStore $store
  *
  * @return SearchTable
  */
 public function newTextSanitizer()
 {
     $settings = ApplicationFactory::getInstance()->getSettings();
     $textSanitizer = new TextSanitizer(new SanitizerFactory());
     $textSanitizer->setLanguageDetection($settings->get('smwgFulltextLanguageDetection'));
     $textSanitizer->setMinTokenSize($settings->get('smwgFulltextSearchMinTokenSize'));
     return $textSanitizer;
 }
 /**
  * @dataProvider textOnMockProvider
  */
 public function testSanitizs($text, $expected)
 {
     $sanitizer = $this->getMockBuilder('\\Onoi\\Tesa\\Sanitizer')->disableOriginalConstructor()->getMock();
     $sanitizer->expects($this->atLeastOnce())->method('sanitizeWith')->will($this->returnValue($text));
     $stopwordAnalyzer = $this->getMockBuilder('\\Onoi\\Tesa\\StopwordAnalyzer\\StopwordAnalyzer')->disableOriginalConstructor()->getMock();
     $synonymizer = $this->getMockBuilder('\\Onoi\\Tesa\\Synonymizer\\Synonymizer')->disableOriginalConstructor()->getMock();
     $tokenizer = $this->getMockBuilder('\\Onoi\\Tesa\\Tokenizer\\Tokenizer')->disableOriginalConstructor()->getMock();
     $this->sanitizerFactory->expects($this->atLeastOnce())->method('newSanitizer')->will($this->returnValue($sanitizer));
     $this->sanitizerFactory->expects($this->atLeastOnce())->method('newPreferredTokenizerByLanguage')->will($this->returnValue($tokenizer));
     $this->sanitizerFactory->expects($this->atLeastOnce())->method('newStopwordAnalyzerByLanguage')->will($this->returnValue($stopwordAnalyzer));
     $this->sanitizerFactory->expects($this->atLeastOnce())->method('newSynonymizerByLanguage')->will($this->returnValue($synonymizer));
     $instance = new TextSanitizer($this->sanitizerFactory);
     $this->assertEquals($expected, $instance->sanitize($text));
 }
 private function doAggregateFromFieldChangeOp($type, $fieldChangeOp, &$aggregate)
 {
     $searchTable = $this->searchTableUpdater->getSearchTable();
     // Exempted property -> out
     if (!$fieldChangeOp->has('p_id') || $searchTable->isExemptedPropertyById($fieldChangeOp->get('p_id'))) {
         return;
     }
     // Only text components
     if (!$fieldChangeOp->has('o_blob') && !$fieldChangeOp->has('o_hash') && !$fieldChangeOp->has('o_serialized')) {
         return;
     }
     // Re-map (url type)
     if ($fieldChangeOp->has('o_serialized')) {
         $fieldChangeOp->set('o_blob', $fieldChangeOp->get('o_serialized'));
     }
     // Build a temporary stable key for the diff match
     $key = $fieldChangeOp->get('s_id') . ':' . $fieldChangeOp->get('p_id');
     // If the blob value is empty then the DIHandler has put any text < 72
     // into the hash field
     $text = $fieldChangeOp->get('o_blob');
     if ($text === null || $text === '') {
         $text = $fieldChangeOp->get('o_hash');
     }
     if (!isset($aggregate[$key])) {
         $aggregate[$key] = $type === TableChangeOp::OP_DELETE ? array() : '';
     }
     // Concatenate the inserts but keep the deletes separate to allow
     // for them to be removed individually
     if ($type === TableChangeOp::OP_INSERT) {
         $aggregate[$key] = trim($aggregate[$key] . ' ' . trim($text));
     } elseif ($type === TableChangeOp::OP_DELETE) {
         $aggregate[$key][] = $this->textSanitizer->sanitize($text);
     }
 }
 /**
  * @since 2.5
  *
  * @param integer $sid
  * @param integer $pid
  * @param string $text
  */
 public function update($sid, $pid, $text)
 {
     if (trim($text) === '') {
         return $this->delete($sid, $pid);
     }
     $this->connection->update($this->searchTable->getTableName(), array('o_text' => $this->textSanitizer->sanitize($text), 'o_sort' => mb_substr($text, 0, 32)), array('s_id' => (int) $sid, 'p_id' => (int) $pid), __METHOD__);
 }