/** * create document from configured fields within extracted data * @param string $url * @param array $page * @return Document */ protected function createDocument($url, $page) { $document = new Document(); if (!isset($page['status_code'])) { $page['status_code'] = 00; //tmp } setlocale(LC_ALL, "cs_CZ.UTF-8"); $document->addField(Field::keyword('url', $url)); // ancestor URLs to search by URL $urlParts = parse_url($url); if (isset($urlParts['path']) && $urlParts['path'] && strlen($urlParts['path']) > 1) { $uri = $urlParts['path']; $uris = array($uri); do { $uri = substr($uri, 0, strrpos($uri, '/')); $uris[] = $uri; } while (strrpos($uri, '/') > 1); $document->addField(Field::text(Page::URIS_KEY, implode(' ', $uris))); } foreach (array(Page::TITLE_KEY, Page::DESCRIPTION_KEY, Page::BODY_KEY, Page::IMAGE_KEY) as $fieldName) { $fieldValue = isset($page[$fieldName]) ? $page[$fieldName] : ''; switch ($fieldName) { case Page::TITLE_KEY: case Page::DESCRIPTION_KEY: case Page::BODY_KEY: $field = Field::text($fieldName, $fieldValue); // translit $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue))); $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25; $document->addField($fieldTranslit); break; case Page::IMAGE_KEY: $field = Field::unIndexed($fieldName, $fieldValue); break; default: $translitValue = str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue)); $field = Field::text($fieldName, $fieldValue . ($translitValue != $fieldValue ? ' ' . $translitValue : '')); } $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25; $document->addField($field); } // title tags as configured i.e. h1, h2, ... foreach ($this->parameters[self::TITLE_TAGS_PARAM] as $fieldName) { $fieldValue = Page::hasHeadlineType($page, $fieldName) ? Page::getHeadline($page, $fieldName) : ''; $field = Field::text($fieldName, $fieldValue); $field->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1; $document->addField($field); $fieldTranslit = Field::text($fieldName . '_translit', str_replace("'", '', iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $fieldValue))); $fieldTranslit->boost = isset($this->parameters[self::BOOST_PARAM][$fieldName]) ? $this->parameters[self::BOOST_PARAM][$fieldName] : 1.25; $document->addField($fieldTranslit); } // page ID if selector defined if ($this->parameters[self::PAGE_ID_PARAM]) { $fieldValue = isset($page[Page::PAGE_ID_KEY]) ? $page[Page::PAGE_ID_KEY] : ''; $field = Field::unIndexed(Page::PAGE_ID_KEY, $fieldValue); $document->addField($field); } // route name if selector defined if ($this->parameters[self::ROUTE_NAME_PARAM]) { $fieldValue = isset($page[Page::ROUTE_NAME_KEY]) ? $page[Page::ROUTE_NAME_KEY] : ''; $field = Field::unIndexed(Page::ROUTE_NAME_KEY, $fieldValue); $document->addField($field); } return $document; }