/** * creates a index entry for a given file * @param string $file * @param string $content */ public function storeToIndex($file, $content) { $tags = ''; tx_kesearch_helper::makeTags($tags, array('file')); // get data from FAL if ($file instanceof \TYPO3\CMS\Core\Resource\File) { $metadata = $file->_getMetaData(); $orig_uid = $file->getUid(); } else { $metadata = false; $orig_uid = 0; } $indexRecordValues = array('storagepid' => $this->indexerConfig['storagepid'], 'title' => $this->fileInfo->getName(), 'type' => 'file:' . $this->fileInfo->getExtension(), 'targetpid' => 1, 'tags' => $tags, 'params' => '', 'abstract' => '', 'language_uid' => -1, 'starttime' => 0, 'endtime' => 0, 'fe_group' => 0, 'debug' => false); $additionalFields = array('sortdate' => $this->fileInfo->getModificationTime(), 'orig_uid' => $orig_uid, 'orig_pid' => 0, 'directory' => $this->fileInfo->getRelativePath(), 'hash' => $this->getUniqueHashForFile()); // add additional content if FAL is used if ($this->pObj->indexerConfig['fal_storage'] > 0) { // index meta data from FAL: title, description, alternative if ($metadata['title']) { $indexRecordValues['content'] = $metadata['title'] . "\n" . $indexRecordValues['content']; } if ($metadata['description']) { $indexRecordValues['abstract'] = $metadata['description']; $content = $metadata['description'] . "\n" . $content; } if ($metadata['alternative']) { $content .= "\n" . $metadata['alternative']; } // make tags from assigned categories $categories = tx_kesearch_helper::getCategories($metadata['uid'], 'sys_file_metadata'); tx_kesearch_helper::makeTags($indexRecordValues['tags'], $categories['title_list']); // assign categories as generic tags (eg. "syscat123") tx_kesearch_helper::makeSystemCategoryTags($indexRecordValues['tags'], $metadata['uid'], 'sys_file_metadata'); } // hook for custom modifications of the indexed data, e. g. the tags if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyFileIndexEntry'])) { foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyFileIndexEntry'] as $_classRef) { $_procObj =& GeneralUtility::getUserObj($_classRef); $_procObj->modifyFileIndexEntry($file, $content, $additionalFields, $indexRecordValues, $this); } } // store record in index table $this->pObj->storeInIndex($indexRecordValues['storagepid'], $indexRecordValues['title'], $indexRecordValues['type'], $indexRecordValues['targetpid'], $content, $indexRecordValues['tags'], $indexRecordValues['params'], $indexRecordValues['abstract'], $indexRecordValues['language_uid'], $indexRecordValues['starttime'], $indexRecordValues['endtime'], $indexRecordValues['fe_group'], $indexRecordValues['debug'], $additionalFields); }
/** * * Store the file content and additional information to the index * * @param $fileObject file reference object or file object * @param string $content file text content * @param tx_kesearch_indexer_types_file $fileIndexerObject * @param string $feGroups comma list of groups to assign * @param array $ttContentRow tt_content element the file was assigned to * @author Christian Bülter <*****@*****.**> * @since 25.09.13 */ public function storeFileContentToIndex($fileObject, $content, $fileIndexerObject, $feGroups, $tags, $ttContentRow) { // get metadata if ($fileObject instanceof TYPO3\CMS\Core\Resource\FileReference) { $orig_uid = $fileObject->getOriginalFile()->getUid(); $metadata = $fileObject->getOriginalFile()->_getMetaData(); } else { $orig_uid = $fileObject->getUid(); $metadata = $fileObject->_getMetaData(); } // assign categories as tags (as cleartext, eg. "colorblue") $categories = tx_kesearch_helper::getCategories($metadata['uid'], 'sys_file_metadata'); tx_kesearch_helper::makeTags($tags, $categories['title_list']); // assign categories as generic tags (eg. "syscat123") tx_kesearch_helper::makeSystemCategoryTags($tags, $metadata['uid'], 'sys_file_metadata'); if ($metadata['title']) { $content = $metadata['title'] . "\n" . $content; } if ($metadata['description']) { $abstract = $metadata['description']; $content = $metadata['description'] . "\n" . $content; } if ($metadata['alternative']) { $content .= "\n" . $metadata['alternative']; } $title = $fileIndexerObject->fileInfo->getName(); $storagePid = $this->indexerConfig['storagepid']; $type = 'file:' . $fileObject->getExtension(); $additionalFields = array('sortdate' => $fileIndexerObject->fileInfo->getModificationTime(), 'orig_uid' => $orig_uid, 'orig_pid' => 0, 'directory' => $fileIndexerObject->fileInfo->getRelativePath(), 'hash' => $fileIndexerObject->getUniqueHashForFile()); //hook for custom modifications of the indexed data, e. g. the tags if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyFileIndexEntryFromContentIndexer'])) { foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyFileIndexEntryFromContentIndexer'] as $_classRef) { $_procObj =& \TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($_classRef); $_procObj->modifyFileIndexEntryFromContentIndexer($fileObject, $content, $fileIndexerObject, $feGroups, $ttContentRow, $storagePid, $title, $tags, $abstract, $additionalFields); } } // Store record in index table: // Add usergroup restrictions of the page and the // content element to the index data. // Add time restrictions to the index data. $this->pObj->storeInIndex($storagePid, $title, $type, $ttContentRow['pid'], $content, $tags, '', $abstract, $ttContentRow['sys_language_uid'], $ttContentRow['starttime'], $ttContentRow['endtime'], $feGroups, false, $additionalFields); }
/** * This function was called from indexer object and saves content to index table * @return string content which will be displayed in backend */ public function startIndexing() { $content = ''; $table = 'tx_news_domain_model_news'; // get the pages from where to index the news $indexPids = $this->getPidList($this->indexerConfig['startingpoints_recursive'], $this->indexerConfig['sysfolder'], $table); // add the tags of each page to the global page array if ($this->indexerConfig['index_use_page_tags']) { $this->pageRecords = $this->getPageRecords($indexPids); $this->addTagsToRecords($indexPids); } // get all the news entries to index, don't index hidden or // deleted news, BUT get the news with frontend user group // access restrictions or time (start / stop) restrictions. // Copy those restrictions to the index. $fields = '*'; $where = 'pid IN (' . implode(',', $indexPids) . ') '; // index archived news // 0: index all news // 1: index only active (not archived) news // 2: index only archived news if ($this->indexerConfig['index_news_archived'] == 1) { $where .= 'AND ( archive = 0 OR archive > ' . time() . ') '; } elseif ($this->indexerConfig['index_news_archived'] == 2) { $where .= 'AND ( archive > 0 AND archive < ' . time() . ') '; } $where .= BackendUtility::BEenableFields($table); $where .= BackendUtility::deleteClause($table); $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery($fields, $table, $where); $indexedNewsCounter = 0; $resCount = $GLOBALS['TYPO3_DB']->sql_num_rows($res); if ($resCount) { while ($newsRecord = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { // get category data for this news record (list of // assigned categories and single view from category, if it exists) $categoryData = $this->getCategoryData($newsRecord); // If mode equals 2 ('choose categories for indexing') // check if the current news record has one of the categories // assigned that should be indexed. // mode 1 means 'index all news no matter what category // they have' if ($this->indexerConfig['index_news_category_mode'] == '2') { $isInList = false; foreach ($categoryData['uid_list'] as $catUid) { // if category was found in list, set isInList // to true and break further processing. if (GeneralUtility::inList($this->indexerConfig['index_extnews_category_selection'], $catUid)) { $isInList = true; break; } } // if category was not fount stop further processing // and continue with next news record if (!$isInList) { continue; } } // compile the information which should go into the index: // title, teaser, bodytext $type = 'news'; $title = strip_tags($newsRecord['title']); $abstract = strip_tags($newsRecord['teaser']); $content = strip_tags($newsRecord['bodytext']); // add additional fields to the content: // alternative_title, author, author_email, keywords if (isset($newsRecord['author'])) { $content .= "\n" . strip_tags($newsRecord['author']); } if (isset($newsRecord['author_email'])) { $content .= "\n" . strip_tags($newsRecord['author_email']); } if (!empty($newsRecord['keywords'])) { $content .= "\n" . $newsRecord['keywords']; } // index attached content elements $contentElements = $this->getAttachedContentElements($newsRecord); $content .= $this->getContentFromContentElements($contentElements); // create content $fullContent = ''; if (isset($abstract)) { $fullContent .= $abstract . "\n"; } $fullContent .= $content; // make it possible to modify the indexerConfig via hook $indexerConfig = $this->indexerConfig; // create params and custom single view page: // if it is a default news (type = 0), add params // if it is an internal page (type = 1), put that into the "targetpid" field // if it is an external url (type = 2), put that into the "params" field if ($newsRecord['type'] == 1) { $indexerConfig['targetpid'] = $newsRecord['internalurl']; $params = ''; } else { if ($newsRecord['type'] == 2) { $type = 'external:news'; $params = $newsRecord['externalurl']; } else { // overwrite the targetpid if there is a category assigned // which has its own single view page if ($categoryData['single_pid']) { $indexerConfig['targetpid'] = $categoryData['single_pid']; } // create params for news single view, example: // index.php?id=123&tx_news_pi1[news]=9&tx_news_pi1[controller]=News&tx_news_pi1[action]=detail $paramsSingleView['tx_news_pi1']['news'] = $newsRecord['uid']; $paramsSingleView['tx_news_pi1']['controller'] = 'News'; $paramsSingleView['tx_news_pi1']['action'] = 'detail'; $params = '&' . http_build_query($paramsSingleView, null, '&'); $params = rawurldecode($params); } } // add tags from pages if ($indexerConfig['index_use_page_tags']) { $tags = $this->pageRecords[intval($newsRecord['pid'])]['tags']; } else { $tags = ''; } // add keywords from ext:news as tags $tags = $this->addTagsFromNewsKeywords($tags, $newsRecord); // add tags from ext:news as tags $tags = $this->addTagsFromNewsTags($tags, $newsRecord); // add categories from from ext:news as tags $tags = $this->addTagsFromNewsCategories($tags, $categoryData); // add system categories as tags tx_kesearch_helper::makeSystemCategoryTags($tags, $newsRecord['uid'], $table); // set additional fields $additionalFields = array(); $additionalFields['orig_uid'] = $newsRecord['uid']; $additionalFields['orig_pid'] = $newsRecord['pid']; $additionalFields['sortdate'] = $newsRecord['crdate']; if (isset($newsRecord['datetime']) && $newsRecord['datetime'] > 0) { $additionalFields['sortdate'] = $newsRecord['datetime']; } // hook for custom modifications of the indexed data, e.g. the tags if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyExtNewsIndexEntry'])) { foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyExtNewsIndexEntry'] as $_classRef) { $_procObj =& \TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($_classRef); $_procObj->modifyExtNewsIndexEntry($title, $abstract, $fullContent, $params, $tags, $newsRecord, $additionalFields, $indexerConfig, $categoryData, $this); } } // store this record to the index $this->pObj->storeInIndex($indexerConfig['storagepid'], $title, $type, $indexerConfig['targetpid'], $fullContent, $tags, $params, $abstract, $newsRecord['sys_language_uid'], $newsRecord['starttime'], $newsRecord['endtime'], $newsRecord['fe_group'], false, $additionalFields); $indexedNewsCounter++; } $content = '<p><b>Indexer "' . $this->indexerConfig['title'] . '":</b><br />' . "\n" . $indexedNewsCounter . ' News have been indexed.</p>' . "\n"; $content .= $this->showErrors(); $content .= $this->showTime(); } return $content; }
/** * get content of current page and save data to db * @param $uid page-UID that has to be indexed */ function getPageContent($uid) { // get content elements for this page $fields = '*'; $table = 'tt_content'; $where = 'pid = ' . intval($uid); $where .= ' AND (' . $this->whereClauseForCType . ')'; // add condition for not indexing gridelement columns with colPos = -2 (= invalid) if (\TYPO3\CMS\Core\Utility\ExtensionManagementUtility::isLoaded('gridelements')) { $where .= ' AND colPos <> -2 '; } // don't index elements which are hidden or deleted, but do index // those with time restrictons, the time restrictens will be // copied to the index //$where .= t3lib_BEfunc::BEenableFields($table); $where .= ' AND hidden=0'; $where .= TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause($table); // Get access restrictions for this page $pageAccessRestrictions = $this->getInheritedAccessRestrictions($uid); $rows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows($fields, $table, $where); if (count($rows)) { foreach ($rows as $row) { // skip this content element if the page itself is hidden or a // parent page with "extendToSubpages" set is hidden if ($pageAccessRestrictions['hidden']) { continue; } if ($row['sys_language_uid'] > 0 && $this->cachedPageRecords[$row['sys_language_uid']][$row['pid']]['hidden']) { continue; } // combine group access restrictons from page(s) and content element $feGroups = $this->getCombinedFeGroupsForContentElement($pageAccessRestrictions['fe_group'], $row['fe_group']); // skip this content element if either the page or the content // element is set to "hide at login" // and the other one has a frontend group attached to it if ($feGroups == DONOTINDEX) { continue; } // get content for this content element $content = ''; // get tags from page $tags = $this->pageRecords[$uid]['tags']; // assign categories as tags (as cleartext, eg. "colorblue") $categories = tx_kesearch_helper::getCategories($row['uid'], $table); tx_kesearch_helper::makeTags($tags, $categories['title_list']); // assign categories as generic tags (eg. "syscat123") tx_kesearch_helper::makeSystemCategoryTags($tags, $row['uid'], $table); // index header // add header only if not set to "hidden" if ($row['header_layout'] != 100) { $content .= strip_tags($row['header']) . "\n"; } // index content of this content element and find attached or linked files. // Attached files are saved as file references, the RTE links directly to // a file, thus we get file objects. if (in_array($row['CType'], $this->fileCTypes)) { $fileObjects = $this->findAttachedFiles($row); } else { $fileObjects = $this->findLinkedFilesInRte($row); $content .= $this->getContentFromContentElement($row) . "\n"; } // index the files fond $this->indexFiles($fileObjects, $row, $pageAccessRestrictions['fe_group]'], $tags) . "\n"; // Combine starttime and endtime from page, page language overlay // and content element. // TODO: // If current content element is a localized content // element, fetch startdate and enddate from original conent // element as the localized content element cannot have it's // own start- end enddate $starttime = $pageAccessRestrictions['starttime']; if ($this->cachedPageRecords[$row['sys_language_uid']][$row['pid']]['starttime'] > $starttime) { $starttime = $this->cachedPageRecords[$row['sys_language_uid']][$row['pid']]['starttime']; } if ($row['starttime'] > $starttime) { $starttime = $row['starttime']; } $endtime = $pageAccessRestrictions['endtime']; if ($endtime == 0 || $this->cachedPageRecords[$row['sys_language_uid']][$row['pid']]['endtime'] && $this->cachedPageRecords[$row['sys_language_uid']][$row['pid']]['endtime'] < $endtime) { $endtime = $this->cachedPageRecords[$row['sys_language_uid']][$row['pid']]['endtime']; } if ($endtime == 0 || $row['endtime'] && $row['endtime'] < $endtime) { $endtime = $row['endtime']; } // prepare additionalFields (to be added via hook) $additionalFields = array(); // make it possible to modify the indexerConfig via hook $indexerConfig = $this->indexerConfig; // hook for custom modifications of the indexed data, e. g. the tags if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyContentIndexEntry'])) { foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['ke_search']['modifyContentIndexEntry'] as $_classRef) { $_procObj =& TYPO3\CMS\Core\Utility\GeneralUtility::getUserObj($_classRef); $_procObj->modifyContentIndexEntry($row['header'], $row, $tags, $row['uid'], $additionalFields, $indexerConfig); } } // compile title from page title and content element title // TODO: make changeable via hook $title = $this->cachedPageRecords[$row['sys_language_uid']][$row['pid']]['title']; if ($row['header'] && $row['header_layout'] != 100) { $title = $title . ' - ' . $row['header']; } // save record to index $this->pObj->storeInIndex($indexerConfig['storagepid'], $title, 'content', $row['pid'] . '#c' . $row['uid'], $content, $tags, '', '', $row['sys_language_uid'], $starttime, $endtime, $feGroups, false, $additionalFields); // count elements written to the index $this->counter++; } } else { return; } return; }
/** * Add Tags to records array * * @param array Simple array with uids of pages * @param string additional where-clause * @return array extended array with uids and tags for records */ public function addTagsToRecords($uids, $pageWhere = '1=1') { $tagChar = $this->pObj->extConf['prePostTagChar']; // add tags which are defined by page properties $fields = 'pages.*, GROUP_CONCAT(CONCAT("' . $tagChar . '", tx_kesearch_filteroptions.tag, "' . $tagChar . '")) as tags'; $table = 'pages, tx_kesearch_filteroptions'; $where = 'pages.uid IN (' . implode(',', $uids) . ')'; $where .= ' AND pages.tx_kesearch_tags <> "" '; $where .= ' AND FIND_IN_SET(tx_kesearch_filteroptions.uid, pages.tx_kesearch_tags)'; $where .= \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_kesearch_filteroptions'); $where .= \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('tx_kesearch_filteroptions'); $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery($fields, $table, $where, 'pages.uid', '', ''); while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { $this->pageRecords[$row['uid']]['tags'] = $row['tags']; } // add system categories as tags foreach ($uids as $page_uid) { tx_kesearch_helper::makeSystemCategoryTags($this->pageRecords[$page_uid]['tags'], $page_uid, 'pages'); } // add tags which are defined by filteroption records $fields = 'automated_tagging, automated_tagging_exclude, tag'; $table = 'tx_kesearch_filteroptions'; $where = 'automated_tagging <> "" '; $where .= \TYPO3\CMS\Backend\Utility\BackendUtility::BEenableFields('tx_kesearch_filteroptions'); $where .= \TYPO3\CMS\Backend\Utility\BackendUtility::deleteClause('tx_kesearch_filteroptions'); $rows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows($fields, $table, $where); $where = $pageWhere . ' AND no_search <> 1 '; foreach ($rows as $row) { $tempTags = array(); if ($row['automated_tagging_exclude'] > '') { $whereRow = $where . 'AND FIND_IN_SET(pages.pid, "' . $row['automated_tagging_exclude'] . '") = 0'; } else { $whereRow = $where; } $pageList = \TYPO3\CMS\Core\Utility\GeneralUtility::trimExplode(',', $this->queryGen->getTreeList($row['automated_tagging'], 99, 0, $whereRow)); foreach ($pageList as $uid) { if ($this->pageRecords[$uid]['tags']) { $this->pageRecords[$uid]['tags'] .= ',' . $tagChar . $row['tag'] . $tagChar; } else { $this->pageRecords[$uid]['tags'] = $tagChar . $row['tag'] . $tagChar; } } } }