private function processXML($curlXMLResponse) { $xml = simplexml_load_string($curlXMLResponse, null, LIBXML_NOCDATA); $xmlArray = json_decode(json_encode($xml), true); $i = 0; $formattedArticles = array(); if(!empty($xmlArray) && isset($xmlArray['NewsArticle']) && !empty($xmlArray['NewsArticle'])) { foreach($xmlArray['NewsArticle'] as $article) { $formattedArticle = array( 'meta_installation_id_ms' => keZSolr::installationID(), //'meta_path_id_si' => self::getMetaPathId(), 'meta_guid_ms' => md5('trip' . $article['URL']), 'meta_section_id_si' => 1, 'attr_cluster_uk_url_s' => md5('trip' . $article['URL']), 'attr_date_dt' => self::formatDate(), 'attr_offline_date_dt' => '1970-01-01T01:00:00Z', 'attr_archived_date_dt' => '1970-01-01T01:00:00Z', 'attr_headline_t' => $article['title'], 'attr_headline_lc_s' => iconv('UTF-8', 'ASCII//TRANSLIT', strtolower($article['title'])), 'attr_promo_description_t' => empty($article['Snippet']) ? '' : $article['Snippet'], 'attr_is_invisible_cluster_uk_b' => false, 'attr_depth_cluster_uk_i' => 0, 'attr_relative_depth_i' => 0, 'subattr_customer_type___source_id____s' => self::$customerTypeIds, 'subattr_language___source_id____s' => 'en', 'subattr_language_cluster_uk____s' => 'en', 'subattr_local_application___source_id____s' => self::$tripApplication->application_id, 'subattr_local_application___source_mixed____s' => "cluster_uk##unused##".self::$tripApplication->application_id, 'subattr_publisher_folder___source_id____s' => "tripdb_uk", 'attr_trip_speciality_s' => self::getTaxonomyCodeFromArticleCategory(str_replace('_', ' ', $article['Category'])), 'attr_trip_external_url_s' => $article['URL'], 'attr_trip_publication_t' => $article['Publication'], 'meta_language_code_ms' => 'eng-GB', 'meta_class_identifier_ms' => 'article', 'attr_exclude_from_search_cluster_uk_b' => false, 'attr_hide_in_search_b' => false, );//subattr_publisher_folder___source_id____s:"tripdb_uk" $formattedArticles[] = json_encode($formattedArticle); $i++; } } echo "|-- $i articles found.\r\n"; self::$totalProcessed += $i; return $formattedArticles; }
protected function processArticle($article) { $articleData = array( 'meta_installation_id_ms' => keZSolr::installationID(), //'meta_path_id_si' => self::getMetaPathId(), 'meta_guid_ms' => md5('bbc_feed' . $article['link']), 'meta_section_id_si' => 1, 'attr_cluster_uk_url_s' => md5('bbc_feed' . $article['link']), 'attr_date_dt' => $this->formatDate($article['pubDate']), 'attr_offline_date_dt' => '1970-01-01T01:00:00Z', 'attr_archived_date_dt' => '1970-01-01T01:00:00Z', 'attr_headline_t' => $article['title'], 'attr_headline_lc_s' => iconv('UTF-8', 'ASCII//TRANSLIT', strtolower($article['title'])), 'attr_promo_description_t' => empty($article['description']) ? '' : $article['description'], 'attr_is_invisible_cluster_uk_b' => false, 'attr_depth_cluster_uk_i' => 0, 'attr_relative_depth_i' => 0, 'subattr_customer_type___source_id____s' => self::getCustomerTypes(), 'subattr_language___source_id____s' => 'en', 'subattr_language_cluster_uk____s' => 'en', 'subattr_local_application___source_id____s' => $this->application->application_id, 'subattr_local_application___source_mixed____s' => "cluster_uk##unused##".$this->application->application_id, 'subattr_publisher_folder___source_id____s' => "bbc_feed", 'attr_trip_external_url_s' => $article['link'], 'meta_language_code_ms' => 'eng-GB', 'meta_class_identifier_ms' => 'article', 'attr_exclude_from_search_cluster_uk_b' => false, 'attr_hide_in_search_b' => false, ); return json_encode($articleData); }
/** * @param resource $curlXMLResponse * @return array */ protected function processXML($curlXMLResponse) { //delete existing nodes try { echo "Deleting Previous articles\n"; $this->deletePreviousArticles(); } catch (Exception $e) { return null; } $xml = simplexml_load_string($curlXMLResponse, null, LIBXML_NOCDATA); $xmlArray = json_decode(json_encode($xml), true); $i = 0; $formattedArticles = array(); if(isset($xmlArray['channel'])) { if(isset($xmlArray['channel']['item']) && !empty($xmlArray['channel']['item'])) { if(!isset($xmlArray['channel']['item'][0])) { $xmlArray['channel']['item'] = array($xmlArray['channel']['item']); } foreach($xmlArray['channel']['item'] as $article) { if($i < 3) { try { $formattedArticle = array( 'meta_installation_id_ms' => keZSolr::installationID(), 'meta_guid_ms' => md5($article['link'] . $article['category']), 'meta_section_id_si' => 1, 'attr_' . ClusterTool::clusterIdentifier() . '_url_s' => $article['link'], 'attr_date_dt' => $this->formatDate(DATE_RFC2822, $article['pubDate'], "Y-m-d\TH:i:s\Z"), 'attr_offline_date_dt' => '1970-01-01T01:00:00Z', 'attr_archive_date_dt' => '1970-01-01T01:00:00Z', 'attr_headline_t' => $article['title'], 'attr_headline_lc_s' => strtolower($article['title']), 'attr_promo_description_t' => empty($article['description']) ? '' : $article['description'], 'attr_category_t' => empty($article['category']) ? '' : $article['category'], 'attr_is_invisible_' . ClusterTool::clusterIdentifier() . '_b' => false, 'subattr_language___source_id____s' => $xmlArray['channel']['language'], 'subattr_language_' . ClusterTool::clusterIdentifier() . '____s' => $xmlArray['channel']['language'], 'subattr_publisher_folder___source_id____s' => 'congress_report_pt', 'meta_language_code_ms' => LocaleTool::mainLanguage(), 'meta_class_identifier_ms' => 'article', 'attr_relative_depth_i' => 0, ); $formattedArticles[] = json_encode($formattedArticle); } catch(Exception $e) { echo "\n Error with item : " , $article['title'] , ' : ' , $e->getMessage() , "\n"; continue; } $i++; } else { break; } } } } echo "|-- $i articles found.\n"; $this->totalProcessed += $i; return $formattedArticles; }
public function importTrial($trialId) { $url = $this->buildUrl('GetTrialDetails', array( 'TrialIDnum' => $trialId, 'username' => self::TRIAL_DETAILS_USERNAME, 'password' => self::TRIAL_DETAILS_PASSWORD, )); $contents = $this->executeCurl($url); $trialData = $this->buildTrialData($trialId, $contents); if ($trialData == null) { return null; } $coreContentFields = array( $trialData['title'], $trialData['details']['scientific_title'], $trialData['details']['sponsor'], $trialData['conditions'], $trialData['details']['interventions'], implode(',', $trialData['country']), $trialData['id'], implode(',', $trialData['details']['secondary_id']), ); $articleData = array( 'meta_installation_id_ms' => keZSolr::installationID(), //'meta_path_id_si' => self::getMetaPathId(), 'meta_guid_ms' => md5('clinical_trials' . $trialData['id']), 'meta_remote_id_ms' => md5('clinical_trials' . $trialData['id']), 'meta_section_id_si' => 1, 'attr_cluster_uk_url_s' => md5('clinical_trials' . $trialData['link']), 'attr_date_dt' => $trialData['details']['update_date'], 'attr_offline_date_dt' => '1970-01-01T01:00:00Z', 'attr_archived_date_dt' => '1970-01-01T01:00:00Z', 'attr_headline_t' => $trialData['title'], 'attr_headline_s' => $trialData['title'], 'attr_headline_lc_s' => iconv('UTF-8', 'ASCII//TRANSLIT', strtolower($trialData['title'])), 'attr_is_invisible_cluster_uk_b' => false, 'attr_depth_cluster_uk_i' => 0, 'attr_relative_depth_i' => 0, 'subattr_customer_type___source_id____s' => self::getCustomerTypes(), 'subattr_language___source_id____s' => 'en', 'subattr_local_application___source_id____s' => $this->application->id, 'subattr_publisher_folder___source_id____s' => "clinical_trials_oncology", 'meta_language_code_ms' => 'eng-GB', 'meta_class_identifier_ms' => 'article', 'attr_exclude_from_search_cluster_uk_b' => false, 'attr_hide_in_search_b' => false, "attr_clinical_trials_details_s" => json_encode($trialData['details']), 'attr_core_content_t' => implode(',', $coreContentFields), 'subattr_recruitment_status___source_id____s' => $trialData['status'], 'attr_clinical_trials_conditions_s' => $trialData['conditions'], 'subattr_gender___source_id____s' => $trialData['sex'], 'attr_clinical_trials_min_age_i' => $trialData['min_age'], 'attr_clinical_trials_max_age_i' => $trialData['max_age'], "attr_clinical_trials_country____s" => $trialData['country'], ); $clusters = ClusterTool::getAllClusters(); foreach ($clusters as $cluster) { $articleData["attr_{$cluster}_url_s"] = $trialData['id']; $articleData["subattr_language_{$cluster}____s"] = 'en'; $articleData['subattr_local_application___source_mixed____s'][] = "{$cluster}##unused##{$this->application->id}"; $articleData["attr_is_invisible_{$cluster}_b"] = false; } $data = json_encode($articleData); $solrIndexationJob = new SolrIndexationJob(); $solrIndexationJob->setAttribute('data', $data); $solrIndexationJob->store(); return true; }
/** * @param $curlXMLResponse * @return array */ function processXML($curlXMLResponse) { $xml = simplexml_load_string($curlXMLResponse, null, LIBXML_NOCDATA); $xmlArray = json_decode(json_encode($xml), true); $i = 0; $formattedArticles = array(); if(isset($xmlArray['channel'])) { if(isset($xmlArray['channel']['item']) && !empty($xmlArray['channel']['item'])) { if(!isset($xmlArray['channel']['item'][0])) { $xmlArray['channel']['item'] = array($xmlArray['channel']['item']); } foreach($xmlArray['channel']['item'] as $article) { try { if( !isset($article['dateStart']) || !$article['dateStart'] ) { $article['dateStart'] = $this->formatDate('d-m-Y H:i:s O', $article['pubDate'], "Y-m-d\TH:i:s\Z"); } if( !isset($article['dateEnd']) || !$article['dateEnd'] ) { $article['dateEnd'] = $this->formatDate('d-m-Y H:i:s O', $article['pubDate'], "Y-m-d\TH:i:s\Z"); } $formattedArticle = array( 'meta_installation_id_ms' => keZSolr::installationID(), 'meta_path_si' => $this->getMetaPathId(), 'meta_guid_ms' => md5($this->applicationIdentifier . $article['guid']. $article['link']), 'meta_remote_id_ms' => md5($this->applicationIdentifier . $article['guid']. $article['link']), 'meta_section_id_si' => 1, 'attr_' . ClusterTool::clusterIdentifier() . '_url_s' => md5($this->applicationIdentifier . $article['guid']. $article['link']), 'attr_date_dt' => $this->formatDate('d-m-Y H:i:s O', $article['pubDate'], "Y-m-d\TH:i:s\Z"), //'attr_article_date_dt' => $this->formatDate('d-m-Y H:i:s', $article['pubDate'], "Y-m-d\TH:i:s\Z"), //'attr_article_year_i' => $this->formatDate('d-m-Y H:i:s', $article['pubDate'], "Y"), //'attr_article_month_i' => $this->formatDate('d-m-Y H:i:s', $article['pubDate'], "m"), 'attr_date_start_dt' => $this->formatDate('d-m-Y H:i:s', $article['dateStart'], "Y-m-d\TH:i:s\Z"), 'attr_article_year_start_i' => $this->formatDate('d-m-Y H:i:s', $article['dateStart'], "Y"), 'attr_article_month_start_i' => $this->formatDate('d-m-Y H:i:s', $article['dateStart'], "m"), 'attr_date_end_dt' => $this->formatDate('d-m-Y H:i:s', $article['dateEnd'], "Y-m-d\TH:i:s\Z"), 'attr_article_year_end_i' => $this->formatDate('d-m-Y H:i:s', $article['dateEnd'], "Y"), 'attr_article_month_end_i' => $this->formatDate('d-m-Y H:i:s', $article['dateEnd'], "m"), 'attr_offline_date_dt' => '1970-01-01T01:00:00Z', 'attr_archive_date_dt' => '1970-01-01T01:00:00Z', 'attr_headline_t' => $article['title'], 'attr_headline_s' => $article['title'], 'attr_headline_lc_s' => strtolower($article['title']), 'attr_promo_description_t' => empty($article['description']) ? '' : $article['description'], 'attr_is_invisible_' . ClusterTool::clusterIdentifier() . '_b' => false, 'subattr_speciality___source_id____s' => $this->specialityIds, 'subattr_language___source_id____s' => $xmlArray['channel']['language'], 'subattr_language_' . ClusterTool::clusterIdentifier() . '____s' => $xmlArray['channel']['language'], 'subattr_local_application___source_id____s' => $this->application->application_id, 'subattr_local_application___source_mixed____s' => $this->getAllApplicationLocalizedIds(), 'subattr_publisher_folder___source_id____s' => $this->getPublisherFolderPath(), 'meta_language_code_ms' => LocaleTool::mainLanguage(), 'meta_class_identifier_ms' => 'article', 'attr_content_rating_'. ClusterTool::clusterIdentifier() .'_f' => 0, 'attr_view_counter_'. ClusterTool::clusterIdentifier() .'_i' => 0, 'attr_evrika_address_city_s' => trim($article['address']), 'subattr_evrika_specialty____s' => array_map(function($item) { return trim($item); }, array_filter(explode(';', $article['specialties']), function($item) { return strlen(trim($item)) > 0; })), 'attr_evrika_address_city_t_ru' => trim($article['address']), ); $formattedArticles[] = json_encode($formattedArticle); $i++; } catch(Exception $e) { echo "\n Error with item guid : " , $article['guid'] , ' : ' , $e->getMessage() , "\n"; continue; } } } } echo "|-- $i articles found.\n"; $this->totalProcessed += $i; return $formattedArticles; }