/** * @param AJXP_Node $ajxpNode * @param Zend_Search_Lucene_Interface $index * @throws Exception * @return Zend_Search_Lucene_Document */ public function createIndexedDocument($ajxpNode, &$index) { if (!empty($this->metaFields)) { $ajxpNode->loadNodeInfo(false, false, "all"); } else { $ajxpNode->loadNodeInfo(); } $ext = strtolower(pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION)); $parseContent = $this->indexContent; if ($parseContent && $ajxpNode->bytesize > $this->getFilteredOption("PARSE_CONTENT_MAX_SIZE")) { $parseContent = false; } if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_HTML")))) { $doc = @Zend_Search_Lucene_Document_Html::loadHTMLFile($ajxpNode->getUrl()); } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Docx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Docx::loadDocxFile($realFile); } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Pptx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Pptx::loadPptxFile($realFile); } elseif ($parseContent && $ext == "xlsx" && class_exists("Zend_Search_Lucene_Document_Xlsx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($realFile); } else { $doc = new Zend_Search_Lucene_Document(); } if ($doc == null) { throw new Exception("Could not load document"); } $doc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl()), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath())), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Text("basename", basename($ajxpNode->getPath())), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_node", "yes"), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "shared")); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_modiftime", date("Ymd", $ajxpNode->ajxp_modiftime))); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_bytesize", $ajxpNode->bytesize)); $ajxpMime = $ajxpNode->ajxp_mime; if (empty($ajxpMime)) { $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION))); } else { $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", $ajxpNode->ajxp_mime)); } // Store a cached copy of the metadata $serializedMeta = base64_encode(serialize($ajxpNode->metadata)); $doc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta)); if (isset($ajxpNode->indexableMetaKeys["shared"])) { foreach ($ajxpNode->indexableMetaKeys["shared"] as $sharedField) { if ($ajxpNode->{$sharedField}) { $doc->addField(Zend_search_Lucene_Field::keyword($sharedField, $ajxpNode->{$sharedField})); } } } foreach ($this->metaFields as $field) { if ($ajxpNode->{$field} != null) { $doc->addField(Zend_Search_Lucene_Field::Text("ajxp_meta_{$field}", $ajxpNode->{$field}), SystemTextEncoding::getEncoding()); } } if (isset($ajxpNode->indexableMetaKeys["user"]) && count($ajxpNode->indexableMetaKeys["user"]) && AuthService::usersEnabled() && AuthService::getLoggedUser() != null) { $privateDoc = new Zend_Search_Lucene_Document(); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl(), SystemTextEncoding::getEncoding())); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath()), SystemTextEncoding::getEncoding())); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "user")); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_user", AuthService::getLoggedUser()->getId())); foreach ($ajxpNode->indexableMetaKeys["user"] as $userField) { if ($ajxpNode->{$userField}) { $privateDoc->addField(Zend_search_Lucene_Field::keyword($userField, $ajxpNode->{$userField})); } } $privateDoc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta)); $index->addDocument($privateDoc); } if ($parseContent) { $body = $this->extractIndexableContent($ajxpNode); if (!empty($body)) { $doc->addField(Zend_Search_Lucene_Field::unStored("body", $body)); } } $index->addDocument($doc); return $doc; }
/** * @param AJXP_Node $ajxpNode * @param Zend_Search_Lucene_Interface $index * @throws Exception * @return Zend_Search_Lucene_Document */ public function createIndexedDocument($ajxpNode, &$index) { $ajxpNode->loadNodeInfo(); $ext = strtolower(pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION)); $parseContent = $this->indexContent; if ($parseContent && $ajxpNode->bytesize > $this->getFilteredOption("PARSE_CONTENT_MAX_SIZE")) { $parseContent = false; } if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_HTML")))) { $doc = @Zend_Search_Lucene_Document_Html::loadHTMLFile($ajxpNode->getUrl()); } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Docx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Docx::loadDocxFile($realFile); } elseif ($parseContent && $ext == "docx" && class_exists("Zend_Search_Lucene_Document_Pptx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Pptx::loadPptxFile($realFile); } elseif ($parseContent && $ext == "xlsx" && class_exists("Zend_Search_Lucene_Document_Xlsx")) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $doc = @Zend_Search_Lucene_Document_Xlsx::loadXlsxFile($realFile); } else { $doc = new Zend_Search_Lucene_Document(); } if ($doc == null) { throw new Exception("Could not load document"); } $doc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl()), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath())), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Text("basename", basename($ajxpNode->getPath())), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_node", "yes"), SystemTextEncoding::getEncoding()); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "shared")); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_modiftime", date("Ymd", $ajxpNode->ajxp_modiftime))); $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_bytesize", $ajxpNode->bytesize)); $ajxpMime = $ajxpNode->ajxp_mime; if (empty($ajxpMime)) { $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", pathinfo($ajxpNode->getLabel(), PATHINFO_EXTENSION))); } else { $doc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_mime", $ajxpNode->ajxp_mime)); } // Store a cached copy of the metadata $serializedMeta = base64_encode(serialize($ajxpNode->metadata)); $doc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta)); if (isset($ajxpNode->indexableMetaKeys["shared"])) { foreach ($ajxpNode->indexableMetaKeys["shared"] as $sharedField) { if ($ajxpNode->{$sharedField}) { $doc->addField(Zend_search_Lucene_Field::keyword($sharedField, $ajxpNode->{$sharedField})); } } } foreach ($this->metaFields as $field) { if ($ajxpNode->{$field} != null) { $doc->addField(Zend_Search_Lucene_Field::Text("ajxp_meta_{$field}", $ajxpNode->{$field}), SystemTextEncoding::getEncoding()); } } if (isset($ajxpNode->indexableMetaKeys["user"]) && count($ajxpNode->indexableMetaKeys["user"]) && AuthService::usersEnabled() && AuthService::getLoggedUser() != null) { $privateDoc = new Zend_Search_Lucene_Document(); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_url", $ajxpNode->getUrl(), SystemTextEncoding::getEncoding())); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("node_path", str_replace("/", "AJXPFAKESEP", $ajxpNode->getPath()), SystemTextEncoding::getEncoding())); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_scope", "user")); $privateDoc->addField(Zend_Search_Lucene_Field::Keyword("ajxp_user", AuthService::getLoggedUser()->getId())); foreach ($ajxpNode->indexableMetaKeys["user"] as $userField) { if ($ajxpNode->{$userField}) { $privateDoc->addField(Zend_search_Lucene_Field::keyword($userField, $ajxpNode->{$userField})); } } $privateDoc->addField(Zend_Search_Lucene_Field::Binary("serialized_metadata", $serializedMeta)); $index->addDocument($privateDoc); } if ($parseContent && in_array($ext, explode(",", $this->getFilteredOption("PARSE_CONTENT_TXT")))) { $doc->addField(Zend_Search_Lucene_Field::unStored("body", file_get_contents($ajxpNode->getUrl()))); } $unoconv = $this->getFilteredOption("UNOCONV"); $pipe = false; if ($parseContent && !empty($unoconv) && in_array($ext, array("doc", "odt", "xls", "ods"))) { $targetExt = "txt"; if (in_array($ext, array("xls", "ods"))) { $targetExt = "csv"; } else { if (in_array($ext, array("odp", "ppt"))) { $targetExt = "pdf"; $pipe = true; } } $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); $unoconv = "HOME=" . AJXP_Utils::getAjxpTmpDir() . " " . $unoconv . " --stdout -f {$targetExt} " . escapeshellarg($realFile); if ($pipe) { $newTarget = str_replace(".{$ext}", ".pdf", $realFile); $unoconv .= " > {$newTarget}"; register_shutdown_function("unlink", $newTarget); } $output = array(); exec($unoconv, $output, $return); if (!$pipe) { $out = implode("\n", $output); $enc = 'ISO-8859-1'; $asciiString = iconv($enc, 'ASCII//TRANSLIT//IGNORE', $out); $doc->addField(Zend_Search_Lucene_Field::unStored("body", $asciiString)); } else { $ext = "pdf"; } } $pdftotext = $this->getFilteredOption("PDFTOTEXT"); if ($parseContent && !empty($pdftotext) && in_array($ext, array("pdf"))) { $realFile = call_user_func(array($ajxpNode->wrapperClassName, "getRealFSReference"), $ajxpNode->getUrl()); if ($pipe && isset($newTarget) && is_file($newTarget)) { $realFile = $newTarget; } $cmd = $pdftotext . " " . escapeshellarg($realFile) . " -"; $output = array(); exec($cmd, $output, $return); $out = implode("\n", $output); $enc = 'UTF8'; $asciiString = iconv($enc, 'ASCII//TRANSLIT//IGNORE', $out); $doc->addField(Zend_Search_Lucene_Field::unStored("body", $asciiString)); } $index->addDocument($doc); return $doc; }