/** * Helper for solr raw searches. * Deals with the language, meta_installation, section and visibility filters + solr sharding if any * @param array $params * @param string $requestType * @param bool $useDefaultFilters * @return array */ public static function rawSearch( $params, $requestType = 'php', $useDefaultFilters = true, $includeIsInvisible = true ) { eZDebug::accumulatorStart( __CLASS__ . '::' . __FUNCTION__, 'Merck' ); $findINI = eZINI::instance( 'ezfind.ini' ); $solrINI = eZINI::instance( 'solr.ini' ); $siteINI = eZINI::instance(); $currentLanguage = $siteINI->variable( 'RegionalSettings', 'ContentObjectLocale' ); // always use extended Dismax query handler when available if( isset($params['qt']) && $params['qt'] == 'ezpublish' ) $params['defType'] = 'edismax'; if ( $useDefaultFilters ) { if ( !isset( $params['fq'] ) ) $params['fq'] = ''; else $params['fq'] .= ' AND '; $params['fq'] .= implode( ' AND ', array( 'meta_installation_id_ms:' . eZSolr::installationID(), '(attr_offline_date_dt:"1970-01-01T01:00:00Z" OR attr_offline_date_dt:[NOW TO *])', '( meta_section_id_si:1 OR meta_section_id_si:3 )', ) ); if ($includeIsInvisible) { $params['fq'] .= ' AND ' . 'attr_is_invisible_' . ClusterTool::clusterIdentifier() . '_b:false'; } } if ( $findINI->variable( 'LanguageSearch', 'MultiCore' ) == 'enabled' ) { $languageMapping = $findINI->variable( 'LanguageSearch', 'LanguagesCoresMap' ); $shardMapping = $solrINI->variable( 'SolrBase', 'Shards' ); $fullSolrURI = $shardMapping[$languageMapping[$currentLanguage]]; } else { $fullSolrURI = $solrINI->variable( 'SolrBase', 'SearchServerURI' ); // Autocomplete search should be done in current language and fallback languages $validLanguages = array_unique( array_merge( LocaleTool::languageList(), array( $currentLanguage ) ) ); if( $useDefaultFilters ) $params['fq'] .= ' AND meta_language_code_ms:(' . implode( ' OR ', $validLanguages ) . ')'; } solrTool::solrStopWordsFilter( $params ); //excluding stopwords self::parseBooleanOperators( $params ); // translations for bookean operators $solrBase = new eZSolrBase( $fullSolrURI ); $result = $solrBase->rawSolrRequest( '/select', $params, $requestType ); if ( !$result ) self::addNoCacheHeaders(); eZDebug::accumulatorStop( __CLASS__ . '::' . __FUNCTION__ ); return $result; }
/** * Create policy limitation query. * * @param array $limitation Override the limitation of the user. Same format as the return of eZUser::hasAccessTo() * @param boolean $ignoreVisibility Set to true for the visibility to be ignored * @return string Lucene/Solr query string which can be used as filter query for Solr */ protected function policyLimitationFilterQuery($limitation = null, $ignoreVisibility = null) { $eZFindIni = eZINI::instance('ezfind.ini'); $filterQuery = false; $policies = array(); $pathFieldName = $ignoreVisibility ? eZSolr::getMetaFieldName('path') : eZSolr::getMetaFieldName('visible_path'); if (is_array($limitation)) { if (empty($limitation)) { $limitation['accessWord'] = 'yes'; } } else { $limitation = eZUser::currentUser()->hasAccessTo('content', 'read'); } if (isset($limitation['accessWord'])) { switch ($limitation['accessWord']) { case 'limited': if (isset($limitation['policies'])) { $policies = $limitation['policies']; break; } // break omitted, "limited" without policies == "no" // break omitted, "limited" without policies == "no" case 'no': return ' NOT *:* '; case 'yes': break; default: return false; } } // Add limitations for filter query based on local permissions. $limitationHash = array('Class' => eZSolr::getMetaFieldName('contentclass_id'), 'Section' => eZSolr::getMetaFieldName('section_id'), 'User_Section' => eZSolr::getMetaFieldName('section_id'), 'Subtree' => eZSolr::getMetaFieldName('path_string'), 'User_Subtree' => eZSolr::getMetaFieldName('path_string'), 'Node' => eZSolr::getMetaFieldName('main_node_id'), 'Owner' => eZSolr::getMetaFieldName('owner_id'), 'Group' => eZSolr::getMetaFieldName('owner_group_id'), 'ObjectStates' => eZSolr::getMetaFieldName('object_states')); $filterQueryPolicies = array(); // policies are concatenated with OR foreach ($policies as $limitationList) { // policy limitations are concatenated with AND // except for locations policity limitations, concatenated with OR $filterQueryPolicyLimitations = array(); $policyLimitationsOnLocations = array(); foreach ($limitationList as $limitationType => $limitationValues) { // limitation values of one type in a policy are concatenated with OR $filterQueryPolicyLimitationParts = array(); switch ($limitationType) { case 'User_Subtree': case 'Subtree': foreach ($limitationValues as $limitationValue) { $pathString = trim($limitationValue, '/'); $pathArray = explode('/', $pathString); // we only take the last node ID in the path identification string $subtreeNodeID = array_pop($pathArray); $policyLimitationsOnLocations[] = $pathFieldName . ':' . $subtreeNodeID; if (isset($this->searchPluginInstance->postSearchProcessingData['subtree_limitations'])) { $this->searchPluginInstance->postSearchProcessingData['subtree_limitations'][] = $subtreeNodeID; } else { $this->searchPluginInstance->postSearchProcessingData['subtree_limitations'] = array($subtreeNodeID); } } break; case 'Node': foreach ($limitationValues as $limitationValue) { $pathString = trim($limitationValue, '/'); $pathArray = explode('/', $pathString); // we only take the last node ID in the path identification string $nodeID = array_pop($pathArray); $policyLimitationsOnLocations[] = $limitationHash[$limitationType] . ':' . $nodeID; if (isset($this->searchPluginInstance->postSearchProcessingData['subtree_limitations'])) { $this->searchPluginInstance->postSearchProcessingData['subtree_limitations'][] = $nodeID; } else { $this->searchPluginInstance->postSearchProcessingData['subtree_limitations'] = array($nodeID); } } break; case 'Group': foreach (eZUser::currentUser()->attribute('contentobject')->attribute('parent_nodes') as $groupID) { $filterQueryPolicyLimitationParts[] = $limitationHash[$limitationType] . ':' . $groupID; } break; case 'Owner': $filterQueryPolicyLimitationParts[] = $limitationHash[$limitationType] . ':' . eZUser::currentUser()->attribute('contentobject_id'); break; case 'Class': case 'Section': case 'User_Section': foreach ($limitationValues as $limitationValue) { $filterQueryPolicyLimitationParts[] = $limitationHash[$limitationType] . ':' . $limitationValue; } break; default: //hacky, object state limitations reference the state group name in their //limitation //hence the following match on substring if (strpos($limitationType, 'StateGroup') !== false) { foreach ($limitationValues as $limitationValue) { $filterQueryPolicyLimitationParts[] = $limitationHash['ObjectStates'] . ':' . $limitationValue; } } else { eZDebug::writeDebug($limitationType, __METHOD__ . ' unknown limitation type: ' . $limitationType); continue; } } if (!empty($filterQueryPolicyLimitationParts)) { $filterQueryPolicyLimitations[] = '( ' . implode(' OR ', $filterQueryPolicyLimitationParts) . ' )'; } } // Policy limitations on locations (node and/or subtree) need to be concatenated with OR // unlike the other types of limitation if (!empty($policyLimitationsOnLocations)) { $filterQueryPolicyLimitations[] = '( ' . implode(' OR ', $policyLimitationsOnLocations) . ')'; } if (!empty($filterQueryPolicyLimitations)) { $filterQueryPolicies[] = '( ' . implode(' AND ', $filterQueryPolicyLimitations) . ')'; } } if (!empty($filterQueryPolicies)) { $filterQuery = implode(' OR ', $filterQueryPolicies); } // Add limitations for allowing search of other installations. $anonymousPart = ''; if ($eZFindIni->variable('SiteSettings', 'SearchOtherInstallations') == 'enabled') { $anonymousPart = ' OR ' . eZSolr::getMetaFieldName('anon_access') . ':true '; } if (!empty($filterQuery)) { $filterQuery = '((' . eZSolr::getMetaFieldName('installation_id') . ':' . eZSolr::installationID() . ' AND (' . $filterQuery . ')) ' . $anonymousPart . ' )'; } else { $filterQuery = '(' . eZSolr::getMetaFieldName('installation_id') . ':' . eZSolr::installationID() . $anonymousPart . ')'; } // Add ignore visibility condition, either explicitely set to boolean false or not specified if ($ignoreVisibility === false || $ignoreVisibility === null) { $filterQuery .= ' AND ' . eZSolr::getMetaFieldName('is_invisible') . ':false'; } eZDebugSetting::writeDebug('extension-ezfind-query', $filterQuery, __METHOD__); return $filterQuery; }
$cli->error('Please provide a top node id'); $script->shutdown(1); } $script->initialize(); $cli->output('Getting solr results for ' . $top_node_id); /* @var $solr eZSolr */ $solr = new eZSolr(); $solrBase = new eZSolrBase(); $params = array( 'indent' => 'on', 'q' => '', 'fq' => 'meta_installation_id_ms:' . eZSolr::installationID() . ' AND meta_path_si:' . $top_node_id, 'start' => 0, 'rows' => 0, 'fl' => 'meta_main_url_alias_ms,meta_main_node_id_si,meta_name_t,meta_guid_ms,meta_language_code_ms', 'qt' => 'ezpublish', 'explainOther' => '', 'hl.fl' => '', ); $r = $solrBase->rawSolrRequest('/select', $params); $num_found = $r['response']['numFound']; $offset = 0; $params['rows'] = 100; while ( $offset < $num_found ) {
function solrFilter ($publisherNode) { $pathString = $publisherNode->attribute('path_string'); $filters = array( "meta_class_identifier_ms:article", "main_node_meta_path_string_ms:$pathString*", 'meta_installation_id_ms:' . eZSolr::installationID(), ); return implode(' AND ', $filters); }
protected function searchInFront() { $filters = array( '(attr_archive_date_dt:"1970-01-01T01:00:00Z" OR attr_archive_date_dt:[NOW TO *])', 'meta_class_identifier_ms:article', 'meta_installation_id_ms:' . eZSolr::installationID(), 'attr_is_invisible_' . ClusterTool::clusterIdentifier() . '_b:false' ); $results = SolrTool::rawSearch( array( 'indent' => 'on', 'q' => $this->keyword, 'fq' => implode( ' AND ', $filters ), 'start' => 0, 'rows' => 10, 'fl' => 'attr_headline_s', 'qt' => 'ezpublish', 'explainOther' => '', 'hl.fl' => '', 'sort' => "score desc" ) ); if( $results['response']['numFound'] > 0 ) { foreach( $results['response']['docs'] as $doc ) { if( trim( $doc['attr_headline_s'] ) == $this->keyword ) { $this->article->setAttribute( 'date_front', time() ); if( !is_null( $this->article->attribute( 'date_newsletter' ) ) ) { $this->article->setAttribute( 'new_relic_report', 1 ); $this->reportToNewRelic( $this->newRelicMetricName, 0 ); } $this->article->store(); break; } } } }
<?php /* @type $cli eZCli */ /* @type $script eZScript */ $clusters = ClusterTool::globCluster(); $solrLimit = 200; $globalObjectIds = array(); $solrStart = 0; $orFilters = array(); $filters = array( 'meta_installation_id_ms:' . eZSolr::installationID(), "meta_class_identifier_ms:article", ); $rawSolrParams = array( 'indent' => 'on', 'q' => '', 'fl' => "meta_id_si", 'rows' => $solrLimit, 'qt' => 'ezpublish', ); foreach ( $clusters as $cluster ) { $orFilters[] = "attr_{$cluster}_url_s:full/*"; } $filters[] = "( " . implode(' OR ', $orFilters ) . " )"; $rawSolrParams['fq'] = implode(' AND ', $filters); while (true)
/** * @return array */ public function getResponseSolr() { $applicationDictionaryRows = $this->prepareConfig(); $forbiddenWords = NodeVisibilityCheck::getForbiddenWordsArray( $this->_cluster_identifier ); $queryTerm = count($forbiddenWords) ? implode(' ', $forbiddenWords) : '*:*'; foreach ( $applicationDictionaryRows as $applicationDictionaryRow ) { // Get application node_id $applicationId = $applicationDictionaryRow['application_id']; /** @var ApplicationLocalized[] $_localApplication */ $this->_localApplications[$applicationId] = CacheApplicationTool::buildLocalizedApplicationByApplication( $applicationId ); $appLocalizedIsProper = ( $this->_localApplications[$applicationId] instanceof ApplicationLocalized ); if ( !$appLocalizedIsProper ) { eZDebug::writeError( sprintf( 'Cannot fetch localized application %s for cluster %s', $applicationId, $this->_cluster_identifier ), __FILE__ . '::' . __LINE__ ); continue; } /* @type $validLanguages array */ $newsletterStyle = $applicationDictionaryRow['newsletter_style']; $clusterSiteIni = eZINI::fetchFromFile( "extension/{$this->_cluster_identifier}/settings/site.ini.append.php" ); $validLanguages = $clusterSiteIni->variable( 'RegionalSettings', 'SiteLanguageList' ); // Common $fq = array( 'meta_class_identifier_ms:"article"', '(attr_archive_date_dt:"1970-01-01T01:00:00Z" OR attr_archive_date_dt:[NOW TO *])', 'meta_installation_id_ms:'.eZSolr::installationID(), 'attr_is_invisible_' . $this->_cluster_identifier . '_b:false', 'meta_language_code_ms:(' . implode( ' OR ', $validLanguages ) . ')', ); $taxonomyList = json_decode( $applicationDictionaryRow['taxonomy_filter'], true ); if(count($taxonomyList) > 0){ foreach ($taxonomyList as $row) { foreach($row as $taxonomyCategory => $taxonomies){ $taxonomies = array_map(function($value) { return '"' . $value . '"'; }, $taxonomies); $fq[] = "subattr_{$taxonomyCategory}___source_id____s: (" . implode(',', $taxonomies) . ')'; } } } // NO SDK $publisherNodeIds = $this->_localApplications[$applicationId]->publisherNodeIds(); if(count($publisherNodeIds) == 1 ) { $newsletterStyle = $applicationDictionaryRow['newsletter_style']; $fq = array_merge($fq, array( 'meta_path_si:' . $publisherNodeIds[0], )); } elseif (count($publisherNodeIds) > 1 ) { $publisherFilter = implode(' OR ', $publisherNodeIds); $newsletterStyle = $applicationDictionaryRow['newsletter_style']; $fq = array_merge($fq, array( "meta_path_si:($publisherFilter)", )); } /** * SDK Specific treatment; dead code for now * if ( $this->_localApplications[$applicationId] instanceof SDKApplication ) { // SDK application $fq = array_merge($fq, array( 'subattr_local_application___source_id____s:' . $applicationId, 'is_sdk_b:true AND is_newsletter_b:true' )); } * */ // Solr query parameters $rows = 100000; $queryParams = array( 'indent' => 'on', 'q' => $queryTerm, 'start' => 0, 'rows' => $rows, 'fq' => $fq, 'fl' => array( 'attr_has_image_' . $this->_cluster_identifier . '_bst', 'meta_remote_id_ms', 'meta_node_id_si', 'meta_main_node_id_si', 'attr_featured_content_b', 'attr_date_dt', 'meta_path_string_ms', 'meta_language_code_ms', 'attr_view_counter_' . $this->_cluster_identifier . '_i', 'subattr_speciality___source_id____s', 'subattr_customer_type___source_id____s', 'meta_current_version_si', 'attr_promo_description_t', 'attr_author_t', 'attr_source_t', 'attr_online_date_dt', 'attr_headline_s', 'subattr_publisher_folder___source_id____s', 'attr_promo_headline_s', 'attr_'.$this->_cluster_identifier.'_remote_s', 'attr_'.$this->_cluster_identifier.'_node_remote_s', //'attr_media_content_image_'.$this->_cluster_identifier.'____ms', //'attr_promo_image_'.$this->_cluster_identifier.'_s', 'is_sdk_b', 'meta_url_alias_ms', 'attr_promo_headline_t', 'subattr_publisher_folder___source_id____s', 'meta_id_si', 'attr_media_content_types_' . ClusterTool::clusterIdentifier() . '_bst', 'attr_' . ClusterTool::clusterIdentifier() . '_url_s', 'attr_core_content_t', 'subattr_download_ressource___expiration_date____dt', 'attr_node_remote_s', 'attr_media_content_quiz_replies_' . $this->_cluster_identifier . '____ms', 'attr_media_content_quiz_points_' . $this->_cluster_identifier . '_i', 'attr_media_content_quiz_question_' . $this->_cluster_identifier . '_ms', ), 'qt' => '', 'explainOther' => '', 'hl.fl' => '', 'sort' => $this->_configuration['sort'] ); $publisherFilters = $this->_localApplications[$applicationId]->getPublishersFilter(); if ( $publisherFilters ) { $queryParams['fq'][] = $publisherFilters; } if ( !empty($this->_customerType) ) { $customerTypeCondition = implode(',', $this->stringArrayToFilterQueryParam($this->_customerType)); $queryParams['fq'][] = sprintf( 'subattr_customer_type___source_id____s:(%s)', $customerTypeCondition ); } if ( !empty($this->_specialty) ) { $specialtyCondition = implode(',', $this->stringArrayToFilterQueryParam($this->_specialty)); $queryParams['fq'][] = sprintf( 'subattr_speciality___source_id____s:(%s)', $specialtyCondition ); } if ( $applicationDictionaryRow["publisher"] ) { $queryParams['fq'][] = sprintf( 'subattr_publisher_folder___source_id____s:(%s)', $applicationDictionaryRow["publisher"] ); } if ( $newsletterStyle == 'PICL' ) { $queryParams['fq'][] = 'subattr_media_type___source_id____s:107.2'; } //taxonomies $taxonomies = $this->getApplicationTaxonomies($applicationDictionaryRow['feed_id']); $firstCategory = reset(array_keys($taxonomies)); $queryTaxonomies = ''; foreach($taxonomies as $categorie=>$taxonomie) { if($categorie != $firstCategory) { $queryTaxonomies .= ' AND '; } $queryTaxonomies .= 'subattr_' . $categorie . '___source_id____s:(' . $taxonomie . ')'; } if($queryTaxonomies != '') { $queryParams['fq'][] = $queryTaxonomies; } $queryParamsFqFallback = $queryParams['fq']; $rowValueKey = ''; // different sort and filters by mechanism switch ($applicationDictionaryRow["mechanism"]) { case 1: $rowValueKey = 'number_article_list'; $queryParams['sort'] = implode( ', ', $queryParams['sort'] ); //if begin/end date is set if(!empty($this->_beginDate) && !empty($this->_endDate)) { $queryParams['fq'][] = 'attr_online_date_dt:[' . $this->_beginDate . ' TO ' . $this->_endDate . ']'; } else { $queryParams['fq'][] = sprintf( 'attr_online_date_dt:[NOW-%sDAY TO *]', $this->_configuration['days'] ); } break; case 2: $rowValueKey = 'number_article_random'; $oneHourRandom = floor(time() / 3600); $queryParams['sort'] = 'attr_' . $oneHourRandom . '_random asc'; break; case 4: $rowValueKey = 'number_article_last_x'; $queryParams['sort'] = "attr_online_date_dt desc"; $queryParams['fq'][] = 'attr_online_date_dt:[NOW-7DAY TO *]'; //if begin/end date is set if(!empty($this->_beginDate) && !empty($this->_endDate)) { $queryParams['fq'][] = 'attr_online_date_dt:[' . $this->_beginDate . ' TO ' . $this->_endDate . ']'; } else { $queryParams['fq'][] = sprintf( 'attr_online_date_dt:[NOW-%sDAY TO *]', $this->_configuration['days'] ); } break; case 5: $rowValueKey = 'number_article_list'; if(!empty($this->_beginDate) && !empty($this->_endDate)) { $queryParams['fq'][] = 'attr_online_date_dt:[' . $this->_beginDate . ' TO ' . $this->_endDate . ']'; } $queryParams['sort'] = implode( ', ', $queryParams['sort'] ); if( $applicationDictionaryRow['number_article_ns'] ) { $queryParams['rows'] = (int)$applicationDictionaryRow['number_article_ns']; } else { $queryParams['rows'] = 100; } break; } if ( (int) $applicationDictionaryRow[$rowValueKey] >= 0 ) { $queryParams['rows'] = (int) $applicationDictionaryRow[$rowValueKey]; } $queryParams['fl'] = implode( ',', $queryParams['fl'] ); $queryParams['fq'] = implode( ' AND ', $queryParams['fq'] ); // main fetch solr $result = SolrTool::rawSearch( $queryParams, 'php', false ); if ( !isset( $result['response']['docs'] ) ) { eZDebug::writeError( 'Error from Solr for query : ' . $result['params']['fq'], __FILE__ . '::' . __LINE__ ); if( php_sapi_name() != 'cli' ) { header( 'HTTP/1.x 500 Internal Server Error' ); eZExecution::cleanExit(); } } if ( count($result['response']['docs']) == 0 && $applicationDictionaryRow['mechanism'] == 5) { $queryParams['fq'] = $queryParamsFqFallback; $queryParams['sort'] = array( 'attr_featured_content_b desc', 'attr_online_date_dt desc', ); $queryParams['sort'] = implode( ', ', $queryParams['sort'] ); //if begin/end date is set if(!empty($this->_beginDate) && !empty($this->_endDate)) { $queryParams['fq'][] = 'attr_online_date_dt:[' . $this->_beginDate . ' TO ' . $this->_endDate . ']'; } $queryParams['fq'] = implode( ' AND ', $queryParams['fq'] ); $result = SolrTool::rawSearch( $queryParams, 'php', false ); } // if no result fallback for mechanism 3 if ( count( $result['response']['docs'] ) == 0 && $applicationDictionaryRow["mechanism"] == 3 ) { $queryParams['fq'] = $queryParamsFqFallback; $queryParams['fq'] = implode( ' AND ', $queryParams['fq'] ); if ( (int) $applicationDictionaryRow['number_article_random'] >= 0 ) { $queryParams['rows'] = (int) $applicationDictionaryRow['number_article_random']; } else { $queryParams['rows'] = $rows; } $oneHourRandom = floor(time() / 3600); $queryParams['sort'] = 'attr_' . $oneHourRandom . '_random asc'; $result = SolrTool::rawSearch( $queryParams, 'php', false ); if ( !isset( $result['response']['docs'] ) ) { eZDebug::writeError( 'Error from Solr for query : ' . $result['params']['fq'], __FILE__ . '::' . __LINE__ ); if( php_sapi_name() != 'cli' ) { header( 'HTTP/1.x 500 Internal Server Error' ); eZExecution::cleanExit(); } } } $articles = $result['response']['docs']; // if mechanism 4, we need to have articles also from previous week if($applicationDictionaryRow["mechanism"] == 4) { $queryParams['fq'] = $queryParamsFqFallback; $queryParams['fq'][] = 'attr_online_date_dt:[NOW-14DAY TO NOW-7DAY]'; $oneHourRandom = floor(time()); $queryParams['sort'] = 'attr_' . $oneHourRandom . '_random asc'; $queryParams['fq'] = implode( ' AND ', $queryParams['fq'] ); $queryParams['rows'] = (int) $applicationDictionaryRow['number_article_random_y']; $resultRandom = SolrTool::rawSearch( $queryParams, 'php', false ); if ( !isset( $resultRandom['response']['docs'] ) ) { eZDebug::writeError( 'Error from Solr for query : ' . $result['params']['fq'], __FILE__ . '::' . __LINE__ ); if( php_sapi_name() != 'cli' ) { header( 'HTTP/1.x 500 Internal Server Error' ); eZExecution::cleanExit(); } } if(count($resultRandom['response']['docs'])) { $articles = array_merge($articles, $resultRandom['response']['docs']); } } $this->_applicationsData[$applicationId][] = array( 'articles' => $articles, 'applicationDictionaryRow' => $applicationDictionaryRow ); } return $this->_applicationsData; }
/** * return all news objects since the date * * @param $date * @return mixed */ private function getLastObjects($date) { $locales = array('eng-'); foreach( glob('extension/cluster_*/settings/site.ini*') as $f ) { $ini = eZINI::fetchFromFile( $f ); foreach( $ini->variable('RegionalSettings', 'SiteLanguageList') as $locale ) { if( !in_array(substr($locale, 0, 4), $locales) ) $locales[] = substr($locale, 0, 4); } } $filters = array( "meta_modified_dt:[$date TO NOW]", '('.implode(' OR ', SolrTool::solrLanguageFilter($locales)).')', 'meta_class_identifier_ms:article', 'meta_installation_id_ms:'.eZSolr::installationID() ); $locale = 'eng'; $continue = true; $offset = 0; while($continue) { $params = array( 'indent' => 'on', 'start' => $offset, 'rows' => 2000, 'q' => '', 'fq' => implode( ' AND ', $filters ), 'fl' => 'meta_id_si, meta_name_t, meta_modified_dt', 'qt' => 'ezpublish', 'explainOther' => '', 'hl.fl' => '', 'sort' => 'meta_modified_dt asc' ); $raw = SolrTool::rawSearch($params, 'php', false); $continue = count($raw['response']['docs']); $offset += 2000; foreach($raw['response']['docs'] as $result) { fputcsv( $this->csvFile(), array($result['meta_id_si'], str_replace( array("\n", "\r"), array(' ', ''), $result['meta_name_t'] )) ); if(!isset($lastDate) || $result['meta_modified_dt'] > $lastDate) { $lastDate = $result['meta_modified_dt']; } } } // security overlap to to avoid delayed indexing gap $overlap = eZINI::instance('merck.ini')->variable( 'AnalyticsExportSettings', 'LastDateOverlap' ); $d = gmdate( 'Y-m-d\TH:i:s\Z', strtotime( $lastDate ) - $overlap ); $this->saveNewLastDate($d); return $raw['response']['numFound']; }
<?php require 'autoload.php'; $script = eZScript::instance(array('description' => "Reset ezfind id\n\n", 'use-session' => false, 'use-modules' => true, 'use-extensions' => true)); $script->startup(); $options = $script->getOptions(); $script->initialize(); $script->setUseDebugAccumulators(true); try { $old = eZSolr::installationID(); $db = eZDB::instance(); $db->arrayQuery('DELETE FROM ezsite_data WHERE name=\'ezfind_site_id\''); $solr = new eZSolr(); $solr::$InstallationID = null; $id = eZSolr::installationID(); eZCLI::instance()->output("Old: {$old}, New: {$id}"); $script->shutdown(); } catch (Exception $e) { $errCode = $e->getCode(); $errCode = $errCode != 0 ? $errCode : 1; // If an error has occured, script must terminate with a status other than 0 $script->shutdown($errCode, $e->getMessage()); }
function fakeAddObject($contentObject) { $eZSolr = new eZSolr(); // Add all translations to the document list $docList = array(); // Check if we need to index this object after all // Exclude if class identifier is in the exclude list for classes $excludeClasses = $eZSolr->FindINI->variable('IndexExclude', 'ClassIdentifierList'); if ($excludeClasses && in_array($contentObject->attribute('class_identifier'), $excludeClasses)) { return false; } // Get global object values $mainNode = $contentObject->attribute('main_node'); if (!$mainNode) { eZDebug::writeError('Unable to fetch main node for object: ' . $contentObject->attribute('id'), __METHOD__); return false; } $mainNodePathArray = $mainNode->attribute('path_array'); // initialize array of parent node path ids, needed for multivalued path field and subtree filters $nodePathArray = array(); //included in $nodePathArray //$pathArray = $mainNode->attribute( 'path_array' ); $currentVersion = $contentObject->currentVersion(); // Get object meta attributes. $metaAttributeValues = eZSolr::getMetaAttributesForObject($contentObject); // Get node attributes. $nodeAttributeValues = array(); foreach ($contentObject->attribute('assigned_nodes') as $contentNode) { foreach (eZSolr::nodeAttributes() as $attributeName => $fieldType) { $nodeAttributeValues[] = array('name' => $attributeName, 'value' => $contentNode->attribute($attributeName), 'fieldType' => $fieldType); } $nodePathArray[] = $contentNode->attribute('path_array'); } // Check anonymous user access. if ($eZSolr->FindINI->variable('SiteSettings', 'IndexPubliclyAvailable') == 'enabled') { $anonymousUserID = $eZSolr->SiteINI->variable('UserSettings', 'AnonymousUserID'); $currentUserID = eZUser::currentUserID(); $user = eZUser::instance($anonymousUserID); eZUser::setCurrentlyLoggedInUser($user, $anonymousUserID); $anonymousAccess = $contentObject->attribute('can_read'); $user = eZUser::instance($currentUserID); eZUser::setCurrentlyLoggedInUser($user, $currentUserID); $anonymousAccess = $anonymousAccess ? 'true' : 'false'; } else { $anonymousAccess = 'false'; } // Load index time boost factors if any //$boostMetaFields = $eZSolr->FindINI->variable( "IndexBoost", "MetaField" ); $boostClasses = $eZSolr->FindINI->variable('IndexBoost', 'Class'); $boostAttributes = $eZSolr->FindINI->variable('IndexBoost', 'Attribute'); $boostDatatypes = $eZSolr->FindINI->variable('IndexBoost', 'Datatype'); $reverseRelatedScale = $eZSolr->FindINI->variable('IndexBoost', 'ReverseRelatedScale'); // Initialise default doc boost $docBoost = 1.0; $contentClassIdentifier = $contentObject->attribute('class_identifier'); // Just test if the boost factor is defined by checking if it has a numeric value if (isset($boostClasses[$contentClassIdentifier]) && is_numeric($boostClasses[$contentClassIdentifier])) { $docBoost += $boostClasses[$contentClassIdentifier]; } // Google like boosting, using eZ Publish reverseRelatedObjectCount $reverseRelatedObjectCount = $contentObject->reverseRelatedObjectCount(); $docBoost += $reverseRelatedScale * $reverseRelatedObjectCount; // Create the list of available languages for this version : $availableLanguages = $currentVersion->translationList(false, false); // Loop over each language version and create an eZSolrDoc for it foreach ($availableLanguages as $languageCode) { $doc = new eZSolrDoc($docBoost); // Set global unique object ID $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('guid'), $eZSolr->guid($contentObject, $languageCode)); // Set installation identifier $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('installation_id'), eZSolr::installationID()); $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('installation_url'), $eZSolr->FindINI->variable('SiteSettings', 'URLProtocol') . $eZSolr->SiteINI->variable('SiteSettings', 'SiteURL') . '/'); // Set Object attributes $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('name'), $contentObject->name(false, $languageCode)); // Also add value to the "sort_name" field as "name" is unsortable, due to Solr limitation (tokenized field) $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('sort_name'), $contentObject->name(false, $languageCode)); $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('anon_access'), $anonymousAccess); $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('language_code'), $languageCode); $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('available_language_codes'), $availableLanguages); if ($owner = $contentObject->attribute('owner')) { // Set owner name $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('owner_name'), $owner->name(false, $languageCode)); // Set owner group ID foreach ($owner->attribute('parent_nodes') as $groupID) { $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('owner_group_id'), $groupID); } } // from eZ Publish 4.1 only: object states // so let's check if the content object has it if (method_exists($contentObject, 'stateIDArray')) { $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('object_states'), $contentObject->stateIDArray()); } // Set content object meta attribute values. foreach ($metaAttributeValues as $metaInfo) { $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName($metaInfo['name']), ezfSolrDocumentFieldBase::preProcessValue($metaInfo['value'], $metaInfo['fieldType'])); } // Set content node meta attribute values. foreach ($nodeAttributeValues as $metaInfo) { $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName($metaInfo['name']), ezfSolrDocumentFieldBase::preProcessValue($metaInfo['value'], $metaInfo['fieldType'])); } // Add main url_alias $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('main_url_alias'), $mainNode->attribute('url_alias')); // Add main path_string $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('main_path_string'), $mainNode->attribute('path_string')); // add nodeid of all parent nodes path elements foreach ($nodePathArray as $pathArray) { foreach ($pathArray as $pathNodeID) { $doc->addField(ezfSolrDocumentFieldBase::generateMetaFieldName('path'), $pathNodeID); } } // Since eZ Fnd 2.3 // cannot call metafield field bame constructor as we are creating multiple fields foreach ($mainNodePathArray as $key => $pathNodeID) { $doc->addField('meta_main_path_element_' . $key . '_si', $pathNodeID); } eZContentObject::recursionProtectionStart(); // Loop through all eZContentObjectAttributes and add them to the Solr document. // @since eZ Find 2.3: look for the attribute storage setting $doAttributeStorage = $eZSolr->FindINI->variable('IndexOptions', 'EnableSolrAttributeStorage') === 'true' ? true : false; if ($doAttributeStorage) { $allAttributeData = array(); } foreach ($currentVersion->contentObjectAttributes($languageCode) as $attribute) { $metaDataText = ''; $classAttribute = $attribute->contentClassAttribute(); $attributeIdentifier = $classAttribute->attribute('identifier'); $combinedIdentifier = $contentClassIdentifier . '/' . $attributeIdentifier; $boostAttribute = false; if (isset($boostAttributes[$attributeIdentifier]) && is_numeric($boostAttributes[$attributeIdentifier])) { $boostAttribute = $boostAttributes[$attributeIdentifier]; } if (isset($boostAttributes[$combinedIdentifier]) && is_numeric($boostAttributes[$combinedIdentifier])) { $boostAttribute += $boostAttributes[$combinedIdentifier]; } if ($classAttribute->attribute('is_searchable') == 1) { $documentFieldBase = ezfSolrDocumentFieldBase::getInstance($attribute); $eZSolr->addFieldBaseToDoc($documentFieldBase, $doc, $boostAttribute); } if ($doAttributeStorage) { $storageFieldName = ezfSolrStorage::getSolrStorageFieldName($attributeIdentifier); $attributeData = ezfSolrStorage::getAttributeData($attribute); $allAttributeData['data_map'][$attributeIdentifier] = $attributeData; $doc->addField($storageFieldName, ezfSolrStorage::serializeData($attributeData)); } } eZContentObject::recursionProtectionEnd(); if ($doAttributeStorage) { $doc->addField('as_all_bst', ezfSolrStorage::serializeData($allAttributeData)); } $docList[$languageCode] = $doc; $generalPlugins = (array) eZINI::instance('ezfind.ini')->variable('IndexPlugins', 'General'); $classPlugins = (array) eZINI::instance('ezfind.ini')->variable('IndexPlugins', 'Class'); if (!empty($generalPlugins)) { foreach ($generalPlugins as $pluginClassString) { if (!class_exists($pluginClassString)) { eZDebug::writeError("Unable to find the PHP class '{$pluginClassString}' defined for index time plugins for eZ Find", __METHOD__); continue; } $plugin = new $pluginClassString(); if ($plugin instanceof ezfIndexPlugin) { $plugin->modify($contentObject, $docList); } } } if (array_key_exists($contentObject->attribute('class_identifier'), $classPlugins)) { $pluginClassString = $classPlugins[$contentObject->attribute('class_identifier')]; if (class_exists($pluginClassString)) { $plugin = new $pluginClassString(); if ($plugin instanceof ezfIndexPlugin) { $plugin->modify($contentObject, $docList); } } } } return $docList; }
/** * @param eZContentObjectTreenode $node * @param array $row * @return array */ protected static function nodeHasForbiddenWords( &$node, &$row ) { /* @type $clustersToHide array */ $clustersToHide = eZINI::instance( 'merck.ini' )->variable( 'PublishSettings', 'clustersToHide' ); $returnArray = array(); foreach ($clustersToHide as $cluster) { /* @type $languageList array */ $clusterIni = eZINI::fetchFromFile( "./extension/$cluster/settings/site.ini" ); $languageList = $clusterIni->variable('RegionalSettings', 'SiteLanguageList'); foreach( $languageList as $locale ) { /* @type $nodeDatamap eZContentObjectAttribute[] */ $nodeDatamap = $node->object()->fetchDataMap(false, $locale); if( !$nodeDatamap ) continue; if( $nodeDatamap['forbidden_article']->attribute('data_int') == 1 ) { // node is marked from publisher as containing some forbidden words = we hide $returnArray[$cluster] = array( 'toHide' => true, 'toDelete' => true, 'comment' => 'marked by publisher', ); break; } $forbiddenWordsArray = self::getForbiddenWordsArray($cluster); if(empty($forbiddenWordsArray)) { $returnArray[$cluster] = array( 'toHide' => false, 'toDelete' => true, 'comment' => 'no forbidden words on cluster', ); continue; } $lgExplode = explode('-', $locale); $languageFilter = $lgExplode[0] . '-*'; $params = array( 'indent' => 'on', 'qt' => 'standard', 'q' => '*:*', 'start' => 0, 'stop' => 0, 'fq' => implode(' AND ', array( 'meta_node_id_si:'.$node->attribute('node_id'), 'meta_language_code_ms:'.$languageFilter, 'meta_installation_id_ms:'.eZSolr::installationID() )), ); $isInSolrResult = SolrTool::rawSearch($params, 'php', false); if( !$isInSolrResult['response']['numFound'] ) { // the node is not in solr. We postpone its check if( $row['created'] < time() - 3600 * 4 ) { // the node was added more than 4 hours ago. It should be in solr. We ask for a reindex eZSearch::addObject( $node->object() ); $returnArray[$cluster] = array( 'toHide' => true, 'toDelete' => false, 'comment' => 'not indexed in solr yet', ); break; } if( $row['created'] < time() - 3600 * 48 ) { eZLog::write( sprintf( "%s\t Node %s still not in solr after 48h", date('Y-m-d H:i:s'), $node->attribute('node_id') ), 'updatevisibility.log' ); $returnArray[$cluster] = array( 'toHide' => true, 'toDelete' => true, 'comment' => 'node is taking too long to be indexed', ); break; } } $params['q'] = implode(' ', $forbiddenWordsArray); $solrResults = SolrTool::rawSearch($params, 'php', false); if( !$solrResults['response']['numFound'] ) { // content has forbidden words => we hide $returnArray[$cluster] = array( 'toHide' => true, 'toDelete' => true, 'comment' => 'has forbidden words', ); break; } } if ( !isset($returnArray[$cluster]) ) { $returnArray[$cluster] = array( 'toHide' => false, 'toDelete' => true, 'comment' => 'default case' ); } } return $returnArray; }