/** * Show details for metaphone value * * @param integer Metaphone integer hash * @return string HTML content * @todo Define visibility */ public function showDetailsForMetaphone($metaphone) { // Finding top-20 on frequency for this phash: $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('index_words.*', 'index_words', 'index_words.metaphone = ' . (int) $metaphone, '', 'index_words.baseword', ''); if (count($ftrows)) { $content .= '<h4>Metaphone: ' . $this->indexerObj->metaphone($ftrows[0]['baseword'], 1) . '</h4>'; $content .= ' <tr class="tableheader bgColor5"> <td>Word</td> <td>Is stopword?</td> </tr>'; if (is_array($ftrows)) { foreach ($ftrows as $wDat) { $content .= ' <tr class="bgColor4"> <td>' . $this->linkWordDetails(htmlspecialchars($wDat['baseword']), $wDat['wid']) . '</td> <td>' . htmlspecialchars($wDat['is_stopword'] ? 'YES' : 'No') . '</td> </tr>'; } } $content = ' <table border="0" cellspacing="1" cellpadding="2" class="c-list">' . $content . ' </table>'; if ($this->indexerObj->metaphone($ftrows[0]['baseword']) != $metaphone) { $content .= 'ERROR: Metaphone string and hash did not match for some reason!?'; } // Add go-back link: $content = $content . $this->linkList(); } return $content; }
/** * @test */ public function extractBaseHrefExtractsBaseHref() { $baseHref = 'http://example.com/'; $html = '<html><head><Base Href="' . $baseHref . '" /></head></html>'; $result = $this->fixture->extractBaseHref($html); $this->assertEquals($baseHref, $result); }
/** * Checks that base HREF is extracted correctly * * @return void */ public function textExtractBaseHref() { $baseHref = 'http://example.com/'; $html = '<html><head><Base Href="' . $baseHref . '" /></head></html>'; $result = $this->indexer->extractHyperLinks($html); $this->assertEquals($baseHref, $result, 'Incorrect base href was extracted'); }
/** * Statistics for a given page hash * * @param int $pageHash * @return void */ public function statisticDetailsAction($pageHash = 0) { $pageHash = (int) $pageHash; // Set back button $icon = $this->view->getModuleTemplate()->getIconFactory()->getIcon('actions-view-go-up', Icon::SIZE_SMALL); $backButton = $this->view->getModuleTemplate()->getDocHeaderComponent()->getButtonBar()->makeLinkButton()->setTitle($this->getLanguageService()->sL('LLL:EXT:indexed_search/Resources/Private/Language/locallang.xml:administration.back'))->setIcon($icon)->setHref($this->getHref('Administration', 'statistic')); $this->view->getModuleTemplate()->getDocHeaderComponent()->getButtonBar()->addButton($backButton); $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_phash'); $pageHashRow = $queryBuilder->select('*')->from('index_phash')->where($queryBuilder->expr()->eq('phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)))->execute()->fetch(); if (!is_array($pageHashRow)) { $this->redirect('statistic'); } $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_debug'); $debugRow = $queryBuilder->select('*')->from('index_debug')->where($queryBuilder->expr()->eq('phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)))->execute()->fetchAll(); $debugInfo = []; $lexer = ''; if (is_array($debugRow)) { $debugInfo = unserialize($debugRow[0]['debuginfo']); $lexer = $debugInfo['lexer']; unset($debugInfo['lexer']); } $pageRecord = BackendUtility::getRecord('pages', $pageHashRow['data_page_id']); $keywords = is_array($pageRecord) ? array_flip(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true)) : []; $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words'); $wordRecords = $queryBuilder->select('index_words.*', 'index_rel.*')->from('index_words')->from('index_rel')->where($queryBuilder->expr()->eq('index_rel.phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)), $queryBuilder->expr()->eq('index_words.wid', $queryBuilder->quoteIdentifier('index_rel.wid')))->orderBy('index_words.baseword')->execute()->fetchAll(); foreach ($wordRecords as $id => $row) { if (isset($keywords[$row['baseword']])) { $wordRecords[$id]['is_keyword'] = true; } } $metaphoneRows = $metaphone = []; if ($this->enableMetaphoneSearch && is_array($wordRecords)) { // Group metaphone hash foreach ($wordRecords as $row) { $metaphoneRows[$row['metaphone']][] = $row['baseword']; } foreach ($metaphoneRows as $hash => $words) { if (count($words) > 1) { $metaphone[] = ['metaphone' => $this->indexer->metaphone($words[0], 1), $hash, 'words' => $words, 'hash' => $hash]; } } } // sections $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_section'); $sections = $queryBuilder->select('*')->from('index_section')->where($queryBuilder->expr()->eq('phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)))->execute()->fetchAll(); // top words $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words'); $topCountWords = $queryBuilder->select('index_words.baseword', 'index_words.metaphone', 'index_rel.*')->from('index_words')->from('index_rel')->setMaxResults(20)->where($queryBuilder->expr()->eq('index_rel.phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)), $queryBuilder->expr()->eq('index_words.is_stopword', $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)), $queryBuilder->expr()->eq('index_words.wid', $queryBuilder->quoteIdentifier('index_rel.wid')))->orderBy('index_rel.count', 'DESC')->execute()->fetchAll(); // top frequency $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words'); $topFrequency = $queryBuilder->select('index_words.baseword', 'index_words.metaphone', 'index_rel.*')->from('index_words')->from('index_rel')->setMaxResults(20)->where($queryBuilder->expr()->eq('index_rel.phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)), $queryBuilder->expr()->eq('index_words.is_stopword', $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)), $queryBuilder->expr()->eq('index_words.wid', $queryBuilder->quoteIdentifier('index_rel.wid')))->orderBy('index_rel.freq', 'DESC')->execute()->fetchAll(); $this->view->assignMultiple(['phash' => (int) $pageHash, 'phashRow' => $pageHashRow, 'words' => $wordRecords, 'sections' => $sections, 'topCount' => $topCountWords, 'topFrequency' => $topFrequency, 'debug' => $debugInfo, 'lexer' => $lexer, 'metaphone' => $metaphone, 'page' => $pageRecord, 'keywords' => $keywords]); }
/** * Tests whether indexer can extract content between multiple pairs of "TYPO3SEARCH" markers * * @test */ public function typoSearchTagsHandlesMultipleMarkerPairs() { $body = <<<EOT <html> <head> <meta http-equiv="Content-Type" content="text/html;charset=utf-8"/> <title>Some Title</title> <link href='css/normalize.css' rel='stylesheet' type='text/css'/> </head> <body> <div> <div class="non_searchable"> not searchable content </div> <!--TYPO3SEARCH_begin--> <div class="searchable"> lorem ipsum </div> <!--TYPO3SEARCH_end--> <div class="non_searchable"> not searchable content </div> <!--TYPO3SEARCH_begin--> <div class="searchable"> lorem ipsum2 </div> <!--TYPO3SEARCH_end--> <div class="non_searchable"> not searchable content </div> </body> </html> EOT; $expected = <<<EOT <div class="searchable"> lorem ipsum </div> <div class="searchable"> lorem ipsum2 </div> EOT; $result = $this->subject->typoSearchTags($body); $this->assertTrue($result); $this->assertEquals($expected, $body); }
/** * Statistics for a given page hash * * @param int $pageHash * @return void */ public function statisticDetailsAction($pageHash = 0) { // Set back button $icon = $this->view->getModuleTemplate()->getIconFactory()->getIcon('actions-view-go-up', Icon::SIZE_SMALL); $backButton = $this->view->getModuleTemplate()->getDocHeaderComponent()->getButtonBar()->makeLinkButton()->setTitle($this->getLanguageService()->sL('LLL:EXT:indexed_search/Resources/Private/Language/locallang.xml:administration.back'))->setIcon($icon)->setHref($this->getHref('Administration', 'statistic')); $this->view->getModuleTemplate()->getDocHeaderComponent()->getButtonBar()->addButton($backButton); $pageHash = (int) $pageHash; $db = $this->getDatabaseConnection(); $pageHashRow = $db->exec_SELECTgetSingleRow('*', 'index_phash', 'phash = ' . (int) $pageHash); if (!is_array($pageHashRow)) { $this->redirect('statistic'); } $debugRow = $db->exec_SELECTgetRows('*', 'index_debug', 'phash = ' . (int) $pageHash); $debugInfo = array(); $lexer = ''; if (is_array($debugRow)) { $debugInfo = unserialize($debugRow[0]['debuginfo']); $lexer = $debugInfo['lexer']; unset($debugInfo['lexer']); } $pageRecord = BackendUtility::getRecord('pages', $pageHashRow['data_page_id']); $keywords = is_array($pageRecord) ? array_flip(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true)) : array(); $wordRecords = $db->exec_SELECTgetRows('index_words.*, index_rel.*', 'index_rel, index_words', 'index_rel.phash = ' . (int) $pageHash . ' AND index_words.wid = index_rel.wid', '', 'index_words.baseword'); foreach ($wordRecords as $id => $row) { if (isset($keywords[$row['baseword']])) { $wordRecords[$id]['is_keyword'] = true; } } $metaphoneRows = $metaphone = array(); if ($this->enableMetaphoneSearch && is_array($wordRecords)) { // Group metaphone hash foreach ($wordRecords as $row) { $metaphoneRows[$row['metaphone']][] = $row['baseword']; } foreach ($metaphoneRows as $hash => $words) { if (count($words) > 1) { $metaphone[] = array('metaphone' => $this->indexer->metaphone($words[0], 1), $hash, 'words' => $words, 'hash' => $hash); } } } $this->view->assignMultiple(array('phash' => $pageHash, 'phashRow' => $pageHashRow, 'words' => $wordRecords, 'sections' => $db->exec_SELECTgetRows('*', 'index_section', 'index_section.phash = ' . (int) $pageHash), 'topCount' => $db->exec_SELECTgetRows('index_words.baseword, index_words.metaphone, index_rel.*', 'index_rel, index_words', 'index_rel.phash = ' . (int) $pageHash . ' AND index_words.wid = index_rel.wid AND index_words.is_stopword=0', '', 'index_rel.count DESC', '20'), 'topFrequency' => $db->exec_SELECTgetRows('index_words.baseword, index_words.metaphone, index_rel.*', 'index_rel, index_words', 'index_rel.phash = ' . (int) $pageHash . ' AND index_words.wid = index_rel.wid AND index_words.is_stopword=0', '', 'index_rel.freq DESC', '20'), 'debug' => $debugInfo, 'lexer' => $lexer, 'metaphone' => $metaphone, 'page' => $pageRecord, 'keywords' => $keywords)); }
/** * Check if the tables provided are configured for usage. * This becomes neccessary for extensions that provide additional database * functionality like indexed_search_mysql. * * @param string $table_list Comma-separated list of tables * @return boolean TRUE if given tables are enabled */ protected function isTableUsed($table_list) { return \TYPO3\CMS\IndexedSearch\Indexer::isTableUsed($table_list); }
/** * Statistics for a given page hash * * @param int $pageHash * @return void */ public function statisticDetailsAction($pageHash = 0) { $pageHash = (int)$pageHash; $db = $this->getDatabaseConnection(); $pageHashRow = $db->exec_SELECTgetSingleRow('*', 'index_phash', 'phash = ' . (int)$pageHash); if (!is_array($pageHashRow)) { $this->redirect('statistic'); } $debugRow = $db->exec_SELECTgetRows('*', 'index_debug', 'phash = ' . (int)$pageHash); $debugInfo = array(); $lexer = ''; if (is_array($debugRow)) { $debugInfo = unserialize($debugRow[0]['debuginfo']); $lexer = $debugInfo['lexer']; unset($debugInfo['lexer']); } $pageRecord = BackendUtility::getRecord('pages', $pageHashRow['data_page_id']); $keywords = is_array($pageRecord) ? array_flip(GeneralUtility::trimExplode(',', $pageRecord['keywords'], TRUE)) : array(); $wordRecords = $db->exec_SELECTgetRows( 'index_words.*, index_rel.*', 'index_rel, index_words', 'index_rel.phash = ' . (int)$pageHash . ' AND index_words.wid = index_rel.wid', '', 'index_words.baseword' ); foreach($wordRecords as $id => $row) { if (isset($keywords[$row['baseword']])) { $wordRecords[$id]['is_keyword'] = TRUE; } } $metaphoneRows = $metaphone = array(); if ($this->enableMetaphoneSearch && is_array($wordRecords)) { // Group metaphone hash foreach ($wordRecords as $row) { $metaphoneRows[$row['metaphone']][] = $row['baseword']; } foreach ($metaphoneRows as $hash => $words) { if (count($words) > 1) { $metaphone[] = array( 'metaphone' => $this->indexer->metaphone($words[0], 1), $hash, 'words' => $words, 'hash' => $hash ); } } } $this->view->assignMultiple(array( 'phash' => $pageHash, 'phashRow' => $pageHashRow, 'words' => $wordRecords, 'sections' => $db->exec_SELECTgetRows( '*', 'index_section', 'index_section.phash = ' . (int)$pageHash ), 'topCount' => $db->exec_SELECTgetRows( 'index_words.baseword, index_words.metaphone, index_rel.*', 'index_rel, index_words', 'index_rel.phash = ' . (int)$pageHash . ' AND index_words.wid = index_rel.wid AND index_words.is_stopword=0', '', 'index_rel.count DESC', '20' ), 'topFrequency' => $db->exec_SELECTgetRows( 'index_words.baseword, index_words.metaphone, index_rel.*', 'index_rel, index_words', 'index_rel.phash = ' . (int)$pageHash . ' AND index_words.wid = index_rel.wid AND index_words.is_stopword=0', '', 'index_rel.freq DESC', '20' ), 'debug' => $debugInfo, 'lexer' => $lexer, 'metaphone' => $metaphone, 'page' => $pageRecord, 'keywords' => $keywords )); }
/** * Reads the content of an external file being indexed. * * @param string $ext File extension, eg. "pdf", "doc" etc. * @param string $absFile Absolute filename of file (must exist and be validated OK before calling function) * @param string $cPKey Pointer to section (zero for all other than PDF which will have an indication of pages into which the document should be split.) * @return array Standard content array (title, description, keywords, body keys) */ public function readFileContent($ext, $absFile, $cPKey) { $contentArr = null; // Return immediately if initialization didn't set support up: if (!$this->supportedExtensions[$ext]) { return false; } // Switch by file extension switch ($ext) { case 'pdf': if ($this->app['pdfinfo']) { $this->setLocaleForServerFileSystem(); // Getting pdf-info: $cmd = $this->app['pdfinfo'] . ' ' . escapeshellarg($absFile); CommandUtility::exec($cmd, $res); $pdfInfo = $this->splitPdfInfo($res); unset($res); if ((int) $pdfInfo['pages']) { list($low, $high) = explode('-', $cPKey); // Get pdf content: $tempFileName = GeneralUtility::tempnam('Typo3_indexer'); // Create temporary name @unlink($tempFileName); // Delete if exists, just to be safe. $cmd = $this->app['pdftotext'] . ' -f ' . $low . ' -l ' . $high . ' -enc UTF-8 -q ' . escapeshellarg($absFile) . ' ' . $tempFileName; CommandUtility::exec($cmd); if (@is_file($tempFileName)) { $content = GeneralUtility::getUrl($tempFileName); unlink($tempFileName); } else { $content = ''; $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:pdfToolsFailed'), $absFile), 2); } if ((string) $content !== '') { $contentArr = $this->pObj->splitRegularContent($this->removeEndJunk($content)); } } if (!empty($pdfInfo['title'])) { $contentArr['title'] = $pdfInfo['title']; } $this->setLocaleForServerFileSystem(true); } break; case 'doc': if ($this->app['catdoc']) { $this->setLocaleForServerFileSystem(); $cmd = $this->app['catdoc'] . ' -d utf-8 ' . escapeshellarg($absFile); CommandUtility::exec($cmd, $res); $content = implode(LF, $res); unset($res); $contentArr = $this->pObj->splitRegularContent($this->removeEndJunk($content)); $this->setLocaleForServerFileSystem(true); } break; case 'pps': case 'ppt': if ($this->app['ppthtml']) { $this->setLocaleForServerFileSystem(); $cmd = $this->app['ppthtml'] . ' ' . escapeshellarg($absFile); CommandUtility::exec($cmd, $res); $content = implode(LF, $res); unset($res); $content = $this->pObj->convertHTMLToUtf8($content); $contentArr = $this->pObj->splitHTMLContent($this->removeEndJunk($content)); $contentArr['title'] = basename($absFile); $this->setLocaleForServerFileSystem(true); } break; case 'xls': if ($this->app['xlhtml']) { $this->setLocaleForServerFileSystem(); $cmd = $this->app['xlhtml'] . ' -nc -te ' . escapeshellarg($absFile); CommandUtility::exec($cmd, $res); $content = implode(LF, $res); unset($res); $content = $this->pObj->convertHTMLToUtf8($content); $contentArr = $this->pObj->splitHTMLContent($this->removeEndJunk($content)); $contentArr['title'] = basename($absFile); $this->setLocaleForServerFileSystem(true); } break; case 'docx': case 'dotx': case 'pptx': case 'ppsx': case 'potx': case 'xlsx': case 'xltx': if ($this->app['unzip']) { $this->setLocaleForServerFileSystem(); switch ($ext) { case 'docx': case 'dotx': // Read document.xml: $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' word/document.xml'; break; case 'ppsx': case 'pptx': case 'potx': // Read slide1.xml: $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' ppt/slides/slide1.xml'; break; case 'xlsx': case 'xltx': // Read sheet1.xml: $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' xl/worksheets/sheet1.xml'; break; } CommandUtility::exec($cmd, $res); $content_xml = implode(LF, $res); unset($res); $utf8_content = trim(strip_tags(str_replace('<', ' <', $content_xml))); $contentArr = $this->pObj->splitRegularContent($utf8_content); // Make sure the title doesn't expose the absolute path! $contentArr['title'] = basename($absFile); // Meta information $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' docProps/core.xml'; CommandUtility::exec($cmd, $res); $meta_xml = implode(LF, $res); unset($res); $metaContent = GeneralUtility::xml2tree($meta_xml); if (is_array($metaContent)) { $contentArr['title'] .= ' ' . $metaContent['cp:coreProperties'][0]['ch']['dc:title'][0]['values'][0]; $contentArr['description'] = $metaContent['cp:coreProperties'][0]['ch']['dc:subject'][0]['values'][0]; $contentArr['description'] .= ' ' . $metaContent['cp:coreProperties'][0]['ch']['dc:description'][0]['values'][0]; $contentArr['keywords'] = $metaContent['cp:coreProperties'][0]['ch']['cp:keywords'][0]['values'][0]; } $this->setLocaleForServerFileSystem(true); } break; case 'sxi': case 'sxc': case 'sxw': case 'ods': case 'odp': case 'odt': if ($this->app['unzip']) { $this->setLocaleForServerFileSystem(); // Read content.xml: $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' content.xml'; CommandUtility::exec($cmd, $res); $content_xml = implode(LF, $res); unset($res); // Read meta.xml: $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' meta.xml'; CommandUtility::exec($cmd, $res); $meta_xml = implode(LF, $res); unset($res); $utf8_content = trim(strip_tags(str_replace('<', ' <', $content_xml))); $contentArr = $this->pObj->splitRegularContent($utf8_content); $contentArr['title'] = basename($absFile); // Make sure the title doesn't expose the absolute path! // Meta information $metaContent = GeneralUtility::xml2tree($meta_xml); $metaContent = $metaContent['office:document-meta'][0]['ch']['office:meta'][0]['ch']; if (is_array($metaContent)) { $contentArr['title'] = $metaContent['dc:title'][0]['values'][0] ? $metaContent['dc:title'][0]['values'][0] : $contentArr['title']; $contentArr['description'] = $metaContent['dc:subject'][0]['values'][0] . ' ' . $metaContent['dc:description'][0]['values'][0]; // Keywords collected: if (is_array($metaContent['meta:keywords'][0]['ch']['meta:keyword'])) { foreach ($metaContent['meta:keywords'][0]['ch']['meta:keyword'] as $kwDat) { $contentArr['keywords'] .= $kwDat['values'][0] . ' '; } } } $this->setLocaleForServerFileSystem(true); } break; case 'rtf': if ($this->app['unrtf']) { $this->setLocaleForServerFileSystem(); $cmd = $this->app['unrtf'] . ' ' . escapeshellarg($absFile); CommandUtility::exec($cmd, $res); $fileContent = implode(LF, $res); unset($res); $fileContent = $this->pObj->convertHTMLToUtf8($fileContent); $contentArr = $this->pObj->splitHTMLContent($fileContent); $this->setLocaleForServerFileSystem(true); } break; case 'txt': case 'csv': $this->setLocaleForServerFileSystem(); // Raw text $content = GeneralUtility::getUrl($absFile); // @todo Implement auto detection of charset (currently assuming utf-8) $contentCharset = 'utf-8'; $content = $this->pObj->convertHTMLToUtf8($content, $contentCharset); $contentArr = $this->pObj->splitRegularContent($content); $contentArr['title'] = basename($absFile); // Make sure the title doesn't expose the absolute path! $this->setLocaleForServerFileSystem(true); break; case 'html': case 'htm': $fileContent = GeneralUtility::getUrl($absFile); $fileContent = $this->pObj->convertHTMLToUtf8($fileContent); $contentArr = $this->pObj->splitHTMLContent($fileContent); break; case 'xml': $this->setLocaleForServerFileSystem(); // PHP strip-tags() $fileContent = GeneralUtility::getUrl($absFile); // Finding charset: preg_match('/^[[:space:]]*<\\?xml[^>]+encoding[[:space:]]*=[[:space:]]*["\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["\']/i', substr($fileContent, 0, 200), $reg); $charset = $reg[1] ? $this->pObj->csObj->parse_charset($reg[1]) : 'utf-8'; // Converting content: $fileContent = $this->pObj->convertHTMLToUtf8(strip_tags(str_replace('<', ' <', $fileContent)), $charset); $contentArr = $this->pObj->splitRegularContent($fileContent); $contentArr['title'] = basename($absFile); // Make sure the title doesn't expose the absolute path! $this->setLocaleForServerFileSystem(true); break; case 'jpg': case 'jpeg': case 'tif': $this->setLocaleForServerFileSystem(); // PHP EXIF if (function_exists('exif_read_data')) { $exif = @exif_read_data($absFile, 'IFD0'); } else { $exif = false; } if ($exif) { $comment = trim($exif['COMMENT'][0] . ' ' . $exif['ImageDescription']); } else { $comment = ''; } $contentArr = $this->pObj->splitRegularContent($comment); $contentArr['title'] = basename($absFile); // Make sure the title doesn't expose the absolute path! $this->setLocaleForServerFileSystem(true); break; default: return false; } // If no title (and why should there be...) then the file-name is set as title. This will raise the hits considerably if the search matches the document name. if (is_array($contentArr) && !$contentArr['title']) { // Substituting "_" for " " because many filenames may have this instead of a space char. $contentArr['title'] = str_replace('_', ' ', basename($absFile)); } return $contentArr; }