/**
     * Show details for metaphone value
     *
     * @param 	integer		Metaphone integer hash
     * @return 	string		HTML content
     * @todo Define visibility
     */
    public function showDetailsForMetaphone($metaphone)
    {
        // Finding top-20 on frequency for this phash:
        $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('index_words.*', 'index_words', 'index_words.metaphone = ' . (int) $metaphone, '', 'index_words.baseword', '');
        if (count($ftrows)) {
            $content .= '<h4>Metaphone: ' . $this->indexerObj->metaphone($ftrows[0]['baseword'], 1) . '</h4>';
            $content .= '
				<tr class="tableheader bgColor5">
					<td>Word</td>
					<td>Is stopword?</td>
				</tr>';
            if (is_array($ftrows)) {
                foreach ($ftrows as $wDat) {
                    $content .= '
						<tr class="bgColor4">
							<td>' . $this->linkWordDetails(htmlspecialchars($wDat['baseword']), $wDat['wid']) . '</td>
							<td>' . htmlspecialchars($wDat['is_stopword'] ? 'YES' : 'No') . '</td>
						</tr>';
                }
            }
            $content = '
				<table border="0" cellspacing="1" cellpadding="2" class="c-list">' . $content . '
				</table>';
            if ($this->indexerObj->metaphone($ftrows[0]['baseword']) != $metaphone) {
                $content .= 'ERROR: Metaphone string and hash did not match for some reason!?';
            }
            // Add go-back link:
            $content = $content . $this->linkList();
        }
        return $content;
    }
Example #2
0
 /**
  * @test
  */
 public function extractBaseHrefExtractsBaseHref()
 {
     $baseHref = 'http://example.com/';
     $html = '<html><head><Base Href="' . $baseHref . '" /></head></html>';
     $result = $this->fixture->extractBaseHref($html);
     $this->assertEquals($baseHref, $result);
 }
Example #3
0
 /**
  * Checks that base HREF is extracted correctly
  *
  * @return void
  */
 public function textExtractBaseHref()
 {
     $baseHref = 'http://example.com/';
     $html = '<html><head><Base Href="' . $baseHref . '" /></head></html>';
     $result = $this->indexer->extractHyperLinks($html);
     $this->assertEquals($baseHref, $result, 'Incorrect base href was extracted');
 }
Example #4
0
 /**
  * Statistics for a given page hash
  *
  * @param int $pageHash
  * @return void
  */
 public function statisticDetailsAction($pageHash = 0)
 {
     $pageHash = (int) $pageHash;
     // Set back button
     $icon = $this->view->getModuleTemplate()->getIconFactory()->getIcon('actions-view-go-up', Icon::SIZE_SMALL);
     $backButton = $this->view->getModuleTemplate()->getDocHeaderComponent()->getButtonBar()->makeLinkButton()->setTitle($this->getLanguageService()->sL('LLL:EXT:indexed_search/Resources/Private/Language/locallang.xml:administration.back'))->setIcon($icon)->setHref($this->getHref('Administration', 'statistic'));
     $this->view->getModuleTemplate()->getDocHeaderComponent()->getButtonBar()->addButton($backButton);
     $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_phash');
     $pageHashRow = $queryBuilder->select('*')->from('index_phash')->where($queryBuilder->expr()->eq('phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)))->execute()->fetch();
     if (!is_array($pageHashRow)) {
         $this->redirect('statistic');
     }
     $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_debug');
     $debugRow = $queryBuilder->select('*')->from('index_debug')->where($queryBuilder->expr()->eq('phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)))->execute()->fetchAll();
     $debugInfo = [];
     $lexer = '';
     if (is_array($debugRow)) {
         $debugInfo = unserialize($debugRow[0]['debuginfo']);
         $lexer = $debugInfo['lexer'];
         unset($debugInfo['lexer']);
     }
     $pageRecord = BackendUtility::getRecord('pages', $pageHashRow['data_page_id']);
     $keywords = is_array($pageRecord) ? array_flip(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true)) : [];
     $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words');
     $wordRecords = $queryBuilder->select('index_words.*', 'index_rel.*')->from('index_words')->from('index_rel')->where($queryBuilder->expr()->eq('index_rel.phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)), $queryBuilder->expr()->eq('index_words.wid', $queryBuilder->quoteIdentifier('index_rel.wid')))->orderBy('index_words.baseword')->execute()->fetchAll();
     foreach ($wordRecords as $id => $row) {
         if (isset($keywords[$row['baseword']])) {
             $wordRecords[$id]['is_keyword'] = true;
         }
     }
     $metaphoneRows = $metaphone = [];
     if ($this->enableMetaphoneSearch && is_array($wordRecords)) {
         // Group metaphone hash
         foreach ($wordRecords as $row) {
             $metaphoneRows[$row['metaphone']][] = $row['baseword'];
         }
         foreach ($metaphoneRows as $hash => $words) {
             if (count($words) > 1) {
                 $metaphone[] = ['metaphone' => $this->indexer->metaphone($words[0], 1), $hash, 'words' => $words, 'hash' => $hash];
             }
         }
     }
     // sections
     $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_section');
     $sections = $queryBuilder->select('*')->from('index_section')->where($queryBuilder->expr()->eq('phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)))->execute()->fetchAll();
     // top words
     $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words');
     $topCountWords = $queryBuilder->select('index_words.baseword', 'index_words.metaphone', 'index_rel.*')->from('index_words')->from('index_rel')->setMaxResults(20)->where($queryBuilder->expr()->eq('index_rel.phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)), $queryBuilder->expr()->eq('index_words.is_stopword', $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)), $queryBuilder->expr()->eq('index_words.wid', $queryBuilder->quoteIdentifier('index_rel.wid')))->orderBy('index_rel.count', 'DESC')->execute()->fetchAll();
     // top frequency
     $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('index_words');
     $topFrequency = $queryBuilder->select('index_words.baseword', 'index_words.metaphone', 'index_rel.*')->from('index_words')->from('index_rel')->setMaxResults(20)->where($queryBuilder->expr()->eq('index_rel.phash', $queryBuilder->createNamedParameter($pageHash, \PDO::PARAM_INT)), $queryBuilder->expr()->eq('index_words.is_stopword', $queryBuilder->createNamedParameter(0, \PDO::PARAM_INT)), $queryBuilder->expr()->eq('index_words.wid', $queryBuilder->quoteIdentifier('index_rel.wid')))->orderBy('index_rel.freq', 'DESC')->execute()->fetchAll();
     $this->view->assignMultiple(['phash' => (int) $pageHash, 'phashRow' => $pageHashRow, 'words' => $wordRecords, 'sections' => $sections, 'topCount' => $topCountWords, 'topFrequency' => $topFrequency, 'debug' => $debugInfo, 'lexer' => $lexer, 'metaphone' => $metaphone, 'page' => $pageRecord, 'keywords' => $keywords]);
 }
    /**
     * Tests whether indexer can extract content between multiple pairs of "TYPO3SEARCH" markers
     *
     * @test
     */
    public function typoSearchTagsHandlesMultipleMarkerPairs()
    {
        $body = <<<EOT
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
<title>Some Title</title>
<link href='css/normalize.css' rel='stylesheet' type='text/css'/>
</head>
<body>
<div>
<div class="non_searchable">
    not searchable content
</div>
<!--TYPO3SEARCH_begin-->
<div class="searchable">
    lorem ipsum
</div>
<!--TYPO3SEARCH_end-->
<div class="non_searchable">
    not searchable content
</div>
<!--TYPO3SEARCH_begin-->
<div class="searchable">
    lorem ipsum2
</div>
<!--TYPO3SEARCH_end-->
<div class="non_searchable">
    not searchable content
</div>
</body>
</html>
EOT;
        $expected = <<<EOT

<div class="searchable">
    lorem ipsum
</div>

<div class="searchable">
    lorem ipsum2
</div>

EOT;
        $result = $this->subject->typoSearchTags($body);
        $this->assertTrue($result);
        $this->assertEquals($expected, $body);
    }
    /**
     * Statistics for a given page hash
     *
     * @param int $pageHash
     * @return void
     */
    public function statisticDetailsAction($pageHash = 0)
    {
        // Set back button
        $icon = $this->view->getModuleTemplate()->getIconFactory()->getIcon('actions-view-go-up', Icon::SIZE_SMALL);
        $backButton = $this->view->getModuleTemplate()->getDocHeaderComponent()->getButtonBar()->makeLinkButton()->setTitle($this->getLanguageService()->sL('LLL:EXT:indexed_search/Resources/Private/Language/locallang.xml:administration.back'))->setIcon($icon)->setHref($this->getHref('Administration', 'statistic'));
        $this->view->getModuleTemplate()->getDocHeaderComponent()->getButtonBar()->addButton($backButton);
        $pageHash = (int) $pageHash;
        $db = $this->getDatabaseConnection();
        $pageHashRow = $db->exec_SELECTgetSingleRow('*', 'index_phash', 'phash = ' . (int) $pageHash);
        if (!is_array($pageHashRow)) {
            $this->redirect('statistic');
        }
        $debugRow = $db->exec_SELECTgetRows('*', 'index_debug', 'phash = ' . (int) $pageHash);
        $debugInfo = array();
        $lexer = '';
        if (is_array($debugRow)) {
            $debugInfo = unserialize($debugRow[0]['debuginfo']);
            $lexer = $debugInfo['lexer'];
            unset($debugInfo['lexer']);
        }
        $pageRecord = BackendUtility::getRecord('pages', $pageHashRow['data_page_id']);
        $keywords = is_array($pageRecord) ? array_flip(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true)) : array();
        $wordRecords = $db->exec_SELECTgetRows('index_words.*, index_rel.*', 'index_rel, index_words', 'index_rel.phash = ' . (int) $pageHash . ' AND index_words.wid = index_rel.wid', '', 'index_words.baseword');
        foreach ($wordRecords as $id => $row) {
            if (isset($keywords[$row['baseword']])) {
                $wordRecords[$id]['is_keyword'] = true;
            }
        }
        $metaphoneRows = $metaphone = array();
        if ($this->enableMetaphoneSearch && is_array($wordRecords)) {
            // Group metaphone hash
            foreach ($wordRecords as $row) {
                $metaphoneRows[$row['metaphone']][] = $row['baseword'];
            }
            foreach ($metaphoneRows as $hash => $words) {
                if (count($words) > 1) {
                    $metaphone[] = array('metaphone' => $this->indexer->metaphone($words[0], 1), $hash, 'words' => $words, 'hash' => $hash);
                }
            }
        }
        $this->view->assignMultiple(array('phash' => $pageHash, 'phashRow' => $pageHashRow, 'words' => $wordRecords, 'sections' => $db->exec_SELECTgetRows('*', 'index_section', 'index_section.phash = ' . (int) $pageHash), 'topCount' => $db->exec_SELECTgetRows('index_words.baseword, index_words.metaphone, index_rel.*', 'index_rel, index_words', 'index_rel.phash = ' . (int) $pageHash . ' AND index_words.wid = index_rel.wid
					 AND index_words.is_stopword=0', '', 'index_rel.count DESC', '20'), 'topFrequency' => $db->exec_SELECTgetRows('index_words.baseword, index_words.metaphone, index_rel.*', 'index_rel, index_words', 'index_rel.phash = ' . (int) $pageHash . ' AND index_words.wid = index_rel.wid
					 AND index_words.is_stopword=0', '', 'index_rel.freq DESC', '20'), 'debug' => $debugInfo, 'lexer' => $lexer, 'metaphone' => $metaphone, 'page' => $pageRecord, 'keywords' => $keywords));
    }
 /**
  * Check if the tables provided are configured for usage.
  * This becomes neccessary for extensions that provide additional database
  * functionality like indexed_search_mysql.
  *
  * @param string $table_list Comma-separated list of tables
  * @return boolean TRUE if given tables are enabled
  */
 protected function isTableUsed($table_list)
 {
     return \TYPO3\CMS\IndexedSearch\Indexer::isTableUsed($table_list);
 }
Example #8
0
	/**
	 * Statistics for a given page hash
	 *
	 * @param int $pageHash
	 * @return void
	 */
	public function statisticDetailsAction($pageHash = 0) {
		$pageHash = (int)$pageHash;
		$db = $this->getDatabaseConnection();
		$pageHashRow = $db->exec_SELECTgetSingleRow('*', 'index_phash', 'phash = ' . (int)$pageHash);

		if (!is_array($pageHashRow)) {
			$this->redirect('statistic');
		}

		$debugRow = $db->exec_SELECTgetRows('*', 'index_debug', 'phash = ' . (int)$pageHash);
		$debugInfo = array();
		$lexer = '';
		if (is_array($debugRow)) {
			$debugInfo = unserialize($debugRow[0]['debuginfo']);
			$lexer = $debugInfo['lexer'];
			unset($debugInfo['lexer']);
		}
		$pageRecord = BackendUtility::getRecord('pages', $pageHashRow['data_page_id']);
		$keywords = is_array($pageRecord) ? array_flip(GeneralUtility::trimExplode(',', $pageRecord['keywords'], TRUE)) : array();
		$wordRecords = $db->exec_SELECTgetRows(
			'index_words.*, index_rel.*',
			'index_rel, index_words',
			'index_rel.phash = ' . (int)$pageHash . ' AND index_words.wid = index_rel.wid',
			'',
			'index_words.baseword'
		);
		foreach($wordRecords as $id => $row) {
			if (isset($keywords[$row['baseword']])) {
				$wordRecords[$id]['is_keyword'] = TRUE;
			}
		}
		$metaphoneRows = $metaphone = array();
		if ($this->enableMetaphoneSearch && is_array($wordRecords)) {
			// Group metaphone hash
			foreach ($wordRecords as $row) {
				$metaphoneRows[$row['metaphone']][] = $row['baseword'];
			}

			foreach ($metaphoneRows as $hash => $words) {
				if (count($words) > 1) {
					$metaphone[] = array(
						'metaphone' => $this->indexer->metaphone($words[0], 1), $hash,
						'words' => $words,
						'hash' => $hash
					);
				}
			}
		}
		$this->view->assignMultiple(array(
			'phash' => $pageHash,
			'phashRow' => $pageHashRow,
			'words' => $wordRecords,
			'sections' => $db->exec_SELECTgetRows(
				'*',
				'index_section',
				'index_section.phash = ' . (int)$pageHash
			),
			'topCount' => $db->exec_SELECTgetRows(
				'index_words.baseword, index_words.metaphone, index_rel.*',
				'index_rel, index_words',
				'index_rel.phash = ' . (int)$pageHash . ' AND index_words.wid = index_rel.wid
					 AND index_words.is_stopword=0',
			 	'',
			 	'index_rel.count DESC',
				 '20'
			 ),
			'topFrequency' => $db->exec_SELECTgetRows(
				'index_words.baseword, index_words.metaphone, index_rel.*',
				'index_rel, index_words',
				'index_rel.phash = ' . (int)$pageHash . ' AND index_words.wid = index_rel.wid
					 AND index_words.is_stopword=0',
			 	'',
				'index_rel.freq DESC',
				'20'
			),
			'debug' => $debugInfo,
			'lexer' => $lexer,
			'metaphone' => $metaphone,
			'page' => $pageRecord,
			'keywords' => $keywords
	));
	}
 /**
  * Reads the content of an external file being indexed.
  *
  * @param string $ext File extension, eg. "pdf", "doc" etc.
  * @param string $absFile Absolute filename of file (must exist and be validated OK before calling function)
  * @param string $cPKey Pointer to section (zero for all other than PDF which will have an indication of pages into which the document should be split.)
  * @return array Standard content array (title, description, keywords, body keys)
  */
 public function readFileContent($ext, $absFile, $cPKey)
 {
     $contentArr = null;
     // Return immediately if initialization didn't set support up:
     if (!$this->supportedExtensions[$ext]) {
         return false;
     }
     // Switch by file extension
     switch ($ext) {
         case 'pdf':
             if ($this->app['pdfinfo']) {
                 $this->setLocaleForServerFileSystem();
                 // Getting pdf-info:
                 $cmd = $this->app['pdfinfo'] . ' ' . escapeshellarg($absFile);
                 CommandUtility::exec($cmd, $res);
                 $pdfInfo = $this->splitPdfInfo($res);
                 unset($res);
                 if ((int) $pdfInfo['pages']) {
                     list($low, $high) = explode('-', $cPKey);
                     // Get pdf content:
                     $tempFileName = GeneralUtility::tempnam('Typo3_indexer');
                     // Create temporary name
                     @unlink($tempFileName);
                     // Delete if exists, just to be safe.
                     $cmd = $this->app['pdftotext'] . ' -f ' . $low . ' -l ' . $high . ' -enc UTF-8 -q ' . escapeshellarg($absFile) . ' ' . $tempFileName;
                     CommandUtility::exec($cmd);
                     if (@is_file($tempFileName)) {
                         $content = GeneralUtility::getUrl($tempFileName);
                         unlink($tempFileName);
                     } else {
                         $content = '';
                         $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/Resources/Private/Language/locallang_main.xlf:pdfToolsFailed'), $absFile), 2);
                     }
                     if ((string) $content !== '') {
                         $contentArr = $this->pObj->splitRegularContent($this->removeEndJunk($content));
                     }
                 }
                 if (!empty($pdfInfo['title'])) {
                     $contentArr['title'] = $pdfInfo['title'];
                 }
                 $this->setLocaleForServerFileSystem(true);
             }
             break;
         case 'doc':
             if ($this->app['catdoc']) {
                 $this->setLocaleForServerFileSystem();
                 $cmd = $this->app['catdoc'] . ' -d utf-8 ' . escapeshellarg($absFile);
                 CommandUtility::exec($cmd, $res);
                 $content = implode(LF, $res);
                 unset($res);
                 $contentArr = $this->pObj->splitRegularContent($this->removeEndJunk($content));
                 $this->setLocaleForServerFileSystem(true);
             }
             break;
         case 'pps':
         case 'ppt':
             if ($this->app['ppthtml']) {
                 $this->setLocaleForServerFileSystem();
                 $cmd = $this->app['ppthtml'] . ' ' . escapeshellarg($absFile);
                 CommandUtility::exec($cmd, $res);
                 $content = implode(LF, $res);
                 unset($res);
                 $content = $this->pObj->convertHTMLToUtf8($content);
                 $contentArr = $this->pObj->splitHTMLContent($this->removeEndJunk($content));
                 $contentArr['title'] = basename($absFile);
                 $this->setLocaleForServerFileSystem(true);
             }
             break;
         case 'xls':
             if ($this->app['xlhtml']) {
                 $this->setLocaleForServerFileSystem();
                 $cmd = $this->app['xlhtml'] . ' -nc -te ' . escapeshellarg($absFile);
                 CommandUtility::exec($cmd, $res);
                 $content = implode(LF, $res);
                 unset($res);
                 $content = $this->pObj->convertHTMLToUtf8($content);
                 $contentArr = $this->pObj->splitHTMLContent($this->removeEndJunk($content));
                 $contentArr['title'] = basename($absFile);
                 $this->setLocaleForServerFileSystem(true);
             }
             break;
         case 'docx':
         case 'dotx':
         case 'pptx':
         case 'ppsx':
         case 'potx':
         case 'xlsx':
         case 'xltx':
             if ($this->app['unzip']) {
                 $this->setLocaleForServerFileSystem();
                 switch ($ext) {
                     case 'docx':
                     case 'dotx':
                         // Read document.xml:
                         $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' word/document.xml';
                         break;
                     case 'ppsx':
                     case 'pptx':
                     case 'potx':
                         // Read slide1.xml:
                         $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' ppt/slides/slide1.xml';
                         break;
                     case 'xlsx':
                     case 'xltx':
                         // Read sheet1.xml:
                         $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' xl/worksheets/sheet1.xml';
                         break;
                 }
                 CommandUtility::exec($cmd, $res);
                 $content_xml = implode(LF, $res);
                 unset($res);
                 $utf8_content = trim(strip_tags(str_replace('<', ' <', $content_xml)));
                 $contentArr = $this->pObj->splitRegularContent($utf8_content);
                 // Make sure the title doesn't expose the absolute path!
                 $contentArr['title'] = basename($absFile);
                 // Meta information
                 $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' docProps/core.xml';
                 CommandUtility::exec($cmd, $res);
                 $meta_xml = implode(LF, $res);
                 unset($res);
                 $metaContent = GeneralUtility::xml2tree($meta_xml);
                 if (is_array($metaContent)) {
                     $contentArr['title'] .= ' ' . $metaContent['cp:coreProperties'][0]['ch']['dc:title'][0]['values'][0];
                     $contentArr['description'] = $metaContent['cp:coreProperties'][0]['ch']['dc:subject'][0]['values'][0];
                     $contentArr['description'] .= ' ' . $metaContent['cp:coreProperties'][0]['ch']['dc:description'][0]['values'][0];
                     $contentArr['keywords'] = $metaContent['cp:coreProperties'][0]['ch']['cp:keywords'][0]['values'][0];
                 }
                 $this->setLocaleForServerFileSystem(true);
             }
             break;
         case 'sxi':
         case 'sxc':
         case 'sxw':
         case 'ods':
         case 'odp':
         case 'odt':
             if ($this->app['unzip']) {
                 $this->setLocaleForServerFileSystem();
                 // Read content.xml:
                 $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' content.xml';
                 CommandUtility::exec($cmd, $res);
                 $content_xml = implode(LF, $res);
                 unset($res);
                 // Read meta.xml:
                 $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' meta.xml';
                 CommandUtility::exec($cmd, $res);
                 $meta_xml = implode(LF, $res);
                 unset($res);
                 $utf8_content = trim(strip_tags(str_replace('<', ' <', $content_xml)));
                 $contentArr = $this->pObj->splitRegularContent($utf8_content);
                 $contentArr['title'] = basename($absFile);
                 // Make sure the title doesn't expose the absolute path!
                 // Meta information
                 $metaContent = GeneralUtility::xml2tree($meta_xml);
                 $metaContent = $metaContent['office:document-meta'][0]['ch']['office:meta'][0]['ch'];
                 if (is_array($metaContent)) {
                     $contentArr['title'] = $metaContent['dc:title'][0]['values'][0] ? $metaContent['dc:title'][0]['values'][0] : $contentArr['title'];
                     $contentArr['description'] = $metaContent['dc:subject'][0]['values'][0] . ' ' . $metaContent['dc:description'][0]['values'][0];
                     // Keywords collected:
                     if (is_array($metaContent['meta:keywords'][0]['ch']['meta:keyword'])) {
                         foreach ($metaContent['meta:keywords'][0]['ch']['meta:keyword'] as $kwDat) {
                             $contentArr['keywords'] .= $kwDat['values'][0] . ' ';
                         }
                     }
                 }
                 $this->setLocaleForServerFileSystem(true);
             }
             break;
         case 'rtf':
             if ($this->app['unrtf']) {
                 $this->setLocaleForServerFileSystem();
                 $cmd = $this->app['unrtf'] . ' ' . escapeshellarg($absFile);
                 CommandUtility::exec($cmd, $res);
                 $fileContent = implode(LF, $res);
                 unset($res);
                 $fileContent = $this->pObj->convertHTMLToUtf8($fileContent);
                 $contentArr = $this->pObj->splitHTMLContent($fileContent);
                 $this->setLocaleForServerFileSystem(true);
             }
             break;
         case 'txt':
         case 'csv':
             $this->setLocaleForServerFileSystem();
             // Raw text
             $content = GeneralUtility::getUrl($absFile);
             // @todo Implement auto detection of charset (currently assuming utf-8)
             $contentCharset = 'utf-8';
             $content = $this->pObj->convertHTMLToUtf8($content, $contentCharset);
             $contentArr = $this->pObj->splitRegularContent($content);
             $contentArr['title'] = basename($absFile);
             // Make sure the title doesn't expose the absolute path!
             $this->setLocaleForServerFileSystem(true);
             break;
         case 'html':
         case 'htm':
             $fileContent = GeneralUtility::getUrl($absFile);
             $fileContent = $this->pObj->convertHTMLToUtf8($fileContent);
             $contentArr = $this->pObj->splitHTMLContent($fileContent);
             break;
         case 'xml':
             $this->setLocaleForServerFileSystem();
             // PHP strip-tags()
             $fileContent = GeneralUtility::getUrl($absFile);
             // Finding charset:
             preg_match('/^[[:space:]]*<\\?xml[^>]+encoding[[:space:]]*=[[:space:]]*["\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["\']/i', substr($fileContent, 0, 200), $reg);
             $charset = $reg[1] ? $this->pObj->csObj->parse_charset($reg[1]) : 'utf-8';
             // Converting content:
             $fileContent = $this->pObj->convertHTMLToUtf8(strip_tags(str_replace('<', ' <', $fileContent)), $charset);
             $contentArr = $this->pObj->splitRegularContent($fileContent);
             $contentArr['title'] = basename($absFile);
             // Make sure the title doesn't expose the absolute path!
             $this->setLocaleForServerFileSystem(true);
             break;
         case 'jpg':
         case 'jpeg':
         case 'tif':
             $this->setLocaleForServerFileSystem();
             // PHP EXIF
             if (function_exists('exif_read_data')) {
                 $exif = @exif_read_data($absFile, 'IFD0');
             } else {
                 $exif = false;
             }
             if ($exif) {
                 $comment = trim($exif['COMMENT'][0] . ' ' . $exif['ImageDescription']);
             } else {
                 $comment = '';
             }
             $contentArr = $this->pObj->splitRegularContent($comment);
             $contentArr['title'] = basename($absFile);
             // Make sure the title doesn't expose the absolute path!
             $this->setLocaleForServerFileSystem(true);
             break;
         default:
             return false;
     }
     // If no title (and why should there be...) then the file-name is set as title. This will raise the hits considerably if the search matches the document name.
     if (is_array($contentArr) && !$contentArr['title']) {
         // Substituting "_" for " " because many filenames may have this instead of a space char.
         $contentArr['title'] = str_replace('_', ' ', basename($absFile));
     }
     return $contentArr;
 }