コード例 #1
0
ファイル: Highlighter.php プロジェクト: ngangchill/po
 /**
  * {@inheritdoc}
  */
 public function highlight($words)
 {
     $this->_doc->highlightExtended($words, [$this, 'wrapWords'], []);
 }
コード例 #2
0
 /**
  * Highlight matches in $inputHTMLFragment and return it (without HTML header and body tag)
  *
  * @param string $inputHTMLFragment
  * @param string  $encoding   Input HTML string encoding
  * @param Highlighter|null $highlighter
  * @return string
  */
 public function htmlFragmentHighlightMatches($inputHTMLFragment, $encoding = 'UTF-8', $highlighter = null)
 {
     if ($highlighter === null) {
         $highlighter = new DefaultHighlighter();
     }
     $inputHTML = '<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>' . iconv($encoding, 'UTF-8//IGNORE', $inputHTMLFragment) . '</body></html>';
     $doc = Document\HTML::loadHTML($inputHTML);
     $highlighter->setDocument($doc);
     $this->_highlightMatches($highlighter);
     return $doc->getHTMLBody();
 }
コード例 #3
0
ファイル: indexer.php プロジェクト: ntvis/search_lucene
 /**
  * index a file
  *
  * @param File $file the file to be indexed
  * @param bool $commit
  *
  * @return bool true when something was stored in the index, false otherwise (eg, folders are not indexed)
  * @throws NotIndexedException when an unsupported file type is encountered
  */
 public function indexFile(File $file, $commit = true)
 {
     // we decide how to index on mime type or file extension
     $mimeType = $file->getMimeType();
     $fileExtension = strtolower(pathinfo($file->getName(), PATHINFO_EXTENSION));
     // initialize plain lucene document
     $doc = new Document();
     // index content for local files only
     $storage = $file->getStorage();
     if ($storage->isLocal()) {
         $path = $storage->getLocalFile($file->getInternalPath());
         //try to use special lucene document types
         if ('text/html' === $mimeType) {
             //TODO could be indexed, even if not local
             $doc = HTML::loadHTML($file->getContent());
         } else {
             if ('text/' === substr($mimeType, 0, 5) || 'application/x-tex' === $mimeType) {
                 $body = $file->getContent();
                 if ($body != '') {
                     $doc->addField(Document\Field::UnStored('body', $body));
                 }
             } else {
                 if ('application/pdf' === $mimeType) {
                     $doc = Pdf::loadPdf($file->getContent());
                     // the zend classes only understand docx and not doc files
                 } else {
                     if ($fileExtension === 'docx') {
                         $doc = Document\Docx::loadDocxFile($path);
                         //} else if ('application/msexcel' === $mimeType) {
                     } else {
                         if ($fileExtension === 'xlsx') {
                             $doc = Document\Xlsx::loadXlsxFile($path);
                             //} else if ('application/mspowerpoint' === $mimeType) {
                         } else {
                             if ($fileExtension === 'pptx') {
                                 $doc = Document\Pptx::loadPptxFile($path);
                             } else {
                                 if ($fileExtension === 'odt') {
                                     $doc = Odt::loadOdtFile($path);
                                 } else {
                                     if ($fileExtension === 'ods') {
                                         $doc = Ods::loadOdsFile($path);
                                     } else {
                                         throw new NotIndexedException();
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     // Store filecache id as unique id to lookup by when deleting
     $doc->addField(Document\Field::Keyword('fileId', $file->getId()));
     // Store document path for the search results
     $doc->addField(Document\Field::Text('path', $file->getPath(), 'UTF-8'));
     $doc->addField(Document\Field::unIndexed('mtime', $file->getMTime()));
     $doc->addField(Document\Field::unIndexed('size', $file->getSize()));
     $doc->addField(Document\Field::unIndexed('mimetype', $mimeType));
     $this->index->updateFile($doc, $file->getId(), $commit);
     return true;
 }
コード例 #4
0
 /**
  * @dataProvider dataProviderXHTML
  */
 function testParseXHTMLSplit($term, $field, $descriptiveLocation, $skipped)
 {
     if ($skipped) {
         $this->markTestSkipped('TODO search ' . $descriptiveLocation . ' in ' . $field);
     }
     $data = file_get_contents(__DIR__ . '/data/libreoffice/document split.xhtml');
     $doc = HTML::loadHTML($data, true);
     $value = $doc->getFieldValue($field);
     $containsTestTerm = is_string(stristr($value, $term));
     $this->assertTrue($containsTestTerm, $field . '/' . $descriptiveLocation . ' does not contain "' . $term . '" in ' . $value);
 }
コード例 #5
0
ファイル: DocumentTest.php プロジェクト: tonylow/skillslink
 public function testHtmlNoFollowLinks()
 {
     $html = '<HTML>' . '<HEAD><TITLE>Page title</TITLE></HEAD>' . '<BODY>' . 'Document body.' . '<a href="link1.html">Link 1</a>.' . '<a href="link2.html" rel="nofollow">Link 1</a>.' . '</BODY>' . '</HTML>';
     $oldNoFollowValue = Document\HTML::getExcludeNoFollowLinks();
     Document\HTML::setExcludeNoFollowLinks(false);
     $doc1 = Document\HTML::loadHTML($html);
     $this->assertTrue($doc1 instanceof Document\HTML);
     $this->assertTrue(array_values($doc1->getLinks()) == array('link1.html', 'link2.html'));
     Document\HTML::setExcludeNoFollowLinks(true);
     $doc2 = Document\HTML::loadHTML($html);
     $this->assertTrue($doc2 instanceof Document\HTML);
     $this->assertTrue(array_values($doc2->getLinks()) == array('link1.html'));
 }
コード例 #6
0
 /**
  * Highlight specified words
  *
  * @param string|array $words  Words to highlight. They could be organized using the array or string.
  */
 public function highlight($words)
 {
     $color = $this->_highlightColors[$this->_currentColorIndex];
     $this->_currentColorIndex = ($this->_currentColorIndex + 1) % count($this->_highlightColors);
     $this->_doc->highlight($words, $color);
 }
コード例 #7
0
ファイル: Highlighter.php プロジェクト: nqxcode/search-engine
 /**
  * {@inheritdoc}
  */
 public function highlight($words)
 {
     $this->doc->highlightExtended($words, array($this, 'applyColour'), array());
 }