private function processXML($curlXMLResponse)
    {
        $xml = simplexml_load_string($curlXMLResponse,  null, LIBXML_NOCDATA);
        $xmlArray = json_decode(json_encode($xml), true);

        $i = 0;
        $formattedArticles = array();
        if(!empty($xmlArray) && isset($xmlArray['NewsArticle']) && !empty($xmlArray['NewsArticle']))
        {
            foreach($xmlArray['NewsArticle'] as $article)
            {
                $formattedArticle = array(
                    'meta_installation_id_ms'           => keZSolr::installationID(),
                    //'meta_path_id_si'                   => self::getMetaPathId(),
                    'meta_guid_ms'                      => md5('trip' . $article['URL']),
                    'meta_section_id_si'                => 1,
                    'attr_cluster_uk_url_s'	            => md5('trip' . $article['URL']),
                    'attr_date_dt'                      => self::formatDate(),
                    'attr_offline_date_dt'              => '1970-01-01T01:00:00Z',
                    'attr_archived_date_dt'	            => '1970-01-01T01:00:00Z',
                    'attr_headline_t'                   => $article['title'],
                    'attr_headline_lc_s'                => iconv('UTF-8', 'ASCII//TRANSLIT', strtolower($article['title'])),
                    'attr_promo_description_t'          => empty($article['Snippet']) ? '' : $article['Snippet'],
                    'attr_is_invisible_cluster_uk_b'    => false,
                    'attr_depth_cluster_uk_i'           => 0,
                    'attr_relative_depth_i'             => 0,
                    'subattr_customer_type___source_id____s'        => self::$customerTypeIds,
                    'subattr_language___source_id____s'             => 'en',
                    'subattr_language_cluster_uk____s'              => 'en',
                    'subattr_local_application___source_id____s'    => self::$tripApplication->application_id,
                    'subattr_local_application___source_mixed____s' => "cluster_uk##unused##".self::$tripApplication->application_id,
                    'subattr_publisher_folder___source_id____s'	    => "tripdb_uk",
                    'attr_trip_speciality_s'    => self::getTaxonomyCodeFromArticleCategory(str_replace('_', ' ', $article['Category'])),
                    'attr_trip_external_url_s'  => $article['URL'],
                    'attr_trip_publication_t'   => $article['Publication'],
                    'meta_language_code_ms'     => 'eng-GB',
                    'meta_class_identifier_ms' => 'article',
                    'attr_exclude_from_search_cluster_uk_b' => false,
                    'attr_hide_in_search_b' => false,
                );//subattr_publisher_folder___source_id____s:"tripdb_uk"
                $formattedArticles[] = json_encode($formattedArticle);
                $i++;
            }
        }
        echo "|-- $i articles found.\r\n";
        self::$totalProcessed += $i;

        return $formattedArticles;
    }
    protected function processArticle($article)
    {
        $articleData = array(
            'meta_installation_id_ms'           => keZSolr::installationID(),
            //'meta_path_id_si'                   => self::getMetaPathId(),
            'meta_guid_ms'                      => md5('bbc_feed' . $article['link']),
            'meta_section_id_si'                => 1,
            'attr_cluster_uk_url_s'	            => md5('bbc_feed' . $article['link']),
            'attr_date_dt'                      => $this->formatDate($article['pubDate']),
            'attr_offline_date_dt'              => '1970-01-01T01:00:00Z',
            'attr_archived_date_dt'	            => '1970-01-01T01:00:00Z',
            'attr_headline_t'                   => $article['title'],
            'attr_headline_lc_s'                => iconv('UTF-8', 'ASCII//TRANSLIT', strtolower($article['title'])),
            'attr_promo_description_t'          => empty($article['description']) ? '' : $article['description'],
            'attr_is_invisible_cluster_uk_b'    => false,
            'attr_depth_cluster_uk_i'           => 0,
            'attr_relative_depth_i'             => 0,
            'subattr_customer_type___source_id____s'        => self::getCustomerTypes(),
            'subattr_language___source_id____s'             => 'en',
            'subattr_language_cluster_uk____s'              => 'en',
            'subattr_local_application___source_id____s'    => $this->application->application_id,
            'subattr_local_application___source_mixed____s' => "cluster_uk##unused##".$this->application->application_id,
            'subattr_publisher_folder___source_id____s'     => "bbc_feed",
            'attr_trip_external_url_s'  => $article['link'],
            'meta_language_code_ms'     => 'eng-GB',
            'meta_class_identifier_ms' => 'article',
            'attr_exclude_from_search_cluster_uk_b' => false,
            'attr_hide_in_search_b' => false,
        );

        return json_encode($articleData);
    }
    /**
     * @param resource $curlXMLResponse
     * @return array
     */
    protected function processXML($curlXMLResponse)
    {
        //delete existing nodes
        try
        {
            echo "Deleting Previous articles\n";
            $this->deletePreviousArticles();
        }
        catch (Exception $e)
        {
            return null;
        }

        $xml = simplexml_load_string($curlXMLResponse,  null, LIBXML_NOCDATA);
        $xmlArray = json_decode(json_encode($xml), true);

        $i = 0;
        $formattedArticles = array();

        if(isset($xmlArray['channel']))
        {
            if(isset($xmlArray['channel']['item']) && !empty($xmlArray['channel']['item']))
            {
                if(!isset($xmlArray['channel']['item'][0]))
                {
                    $xmlArray['channel']['item'] = array($xmlArray['channel']['item']);
                }

                foreach($xmlArray['channel']['item'] as $article)
                {
                    if($i < 3)
                    {
                        try
                        {
                            $formattedArticle = array(
                                'meta_installation_id_ms'           => keZSolr::installationID(),
                                'meta_guid_ms'                      => md5($article['link'] . $article['category']),
                                'meta_section_id_si'                => 1,
                                'attr_' . ClusterTool::clusterIdentifier() . '_url_s' => $article['link'],
                                'attr_date_dt'                      => $this->formatDate(DATE_RFC2822, $article['pubDate'], "Y-m-d\TH:i:s\Z"),
                                'attr_offline_date_dt'              => '1970-01-01T01:00:00Z',
                                'attr_archive_date_dt'	            => '1970-01-01T01:00:00Z',
                                'attr_headline_t'                   => $article['title'],
                                'attr_headline_lc_s'                => strtolower($article['title']),
                                'attr_promo_description_t'          => empty($article['description']) ? '' : $article['description'],
                                'attr_category_t'                   => empty($article['category']) ? '' : $article['category'],
                                'attr_is_invisible_' . ClusterTool::clusterIdentifier() . '_b'    => false,
                                'subattr_language___source_id____s' => $xmlArray['channel']['language'],
                                'subattr_language_' . ClusterTool::clusterIdentifier() . '____s'              => $xmlArray['channel']['language'],
                                'subattr_publisher_folder___source_id____s'	    => 'congress_report_pt',
                                'meta_language_code_ms'             => LocaleTool::mainLanguage(),
                                'meta_class_identifier_ms'          => 'article',
                                'attr_relative_depth_i'             => 0,
                            );
                            $formattedArticles[] = json_encode($formattedArticle);
                        }
                        catch(Exception $e)
                        {
                            echo "\n Error with item : " , $article['title'] , ' : ' , $e->getMessage() , "\n";
                            continue;
                        }

                        $i++;
                    }
                    else
                    {
                        break;
                    }
                }
            }
        }

        echo "|-- $i articles found.\n";
        $this->totalProcessed += $i;

        return $formattedArticles;
    }
    public function importTrial($trialId)
    {
        $url = $this->buildUrl('GetTrialDetails', array(
            'TrialIDnum' => $trialId,
            'username' => self::TRIAL_DETAILS_USERNAME,
            'password' => self::TRIAL_DETAILS_PASSWORD,
        ));

        $contents = $this->executeCurl($url);
        $trialData = $this->buildTrialData($trialId, $contents);
        if ($trialData == null) {
            return null;
        }
        $coreContentFields = array(
            $trialData['title'],
            $trialData['details']['scientific_title'],
            $trialData['details']['sponsor'],
            $trialData['conditions'],
            $trialData['details']['interventions'],
            implode(',', $trialData['country']),
            $trialData['id'],
            implode(',', $trialData['details']['secondary_id']),
        );

        $articleData = array(
            'meta_installation_id_ms'           => keZSolr::installationID(),
            //'meta_path_id_si'                   => self::getMetaPathId(),
            'meta_guid_ms'                      => md5('clinical_trials' . $trialData['id']),
            'meta_remote_id_ms'                 => md5('clinical_trials' . $trialData['id']),
            'meta_section_id_si'                => 1,
            'attr_cluster_uk_url_s'	            => md5('clinical_trials' . $trialData['link']),
            'attr_date_dt'                      => $trialData['details']['update_date'],
            'attr_offline_date_dt'              => '1970-01-01T01:00:00Z',
            'attr_archived_date_dt'	            => '1970-01-01T01:00:00Z',
            'attr_headline_t'                   => $trialData['title'],
            'attr_headline_s'                   => $trialData['title'],
            'attr_headline_lc_s'                => iconv('UTF-8', 'ASCII//TRANSLIT', strtolower($trialData['title'])),
            'attr_is_invisible_cluster_uk_b'    => false,
            'attr_depth_cluster_uk_i'           => 0,
            'attr_relative_depth_i'             => 0,
            'subattr_customer_type___source_id____s'        => self::getCustomerTypes(),
            'subattr_language___source_id____s'             => 'en',
            'subattr_local_application___source_id____s'    => $this->application->id,
            'subattr_publisher_folder___source_id____s'     => "clinical_trials_oncology",
            'meta_language_code_ms'     => 'eng-GB',
            'meta_class_identifier_ms' => 'article',
            'attr_exclude_from_search_cluster_uk_b' => false,
            'attr_hide_in_search_b' => false,
            "attr_clinical_trials_details_s" => json_encode($trialData['details']),
            'attr_core_content_t' => implode(',', $coreContentFields),
            'subattr_recruitment_status___source_id____s' => $trialData['status'],
            'attr_clinical_trials_conditions_s' => $trialData['conditions'],
            'subattr_gender___source_id____s' => $trialData['sex'],
            'attr_clinical_trials_min_age_i' => $trialData['min_age'],
            'attr_clinical_trials_max_age_i' => $trialData['max_age'],
            "attr_clinical_trials_country____s" => $trialData['country'],
        );
        $clusters = ClusterTool::getAllClusters();
        foreach ($clusters as $cluster)
        {
            $articleData["attr_{$cluster}_url_s"] = $trialData['id'];
            $articleData["subattr_language_{$cluster}____s"] = 'en';
            $articleData['subattr_local_application___source_mixed____s'][] = "{$cluster}##unused##{$this->application->id}";
            $articleData["attr_is_invisible_{$cluster}_b"] = false;
        }
        $data = json_encode($articleData);

        $solrIndexationJob = new SolrIndexationJob();
        $solrIndexationJob->setAttribute('data', $data);
        $solrIndexationJob->store();

        return true;
    }
예제 #5
0
    /**
     * @param $curlXMLResponse
     * @return array
     */
    function processXML($curlXMLResponse)
    {
        $xml = simplexml_load_string($curlXMLResponse,  null, LIBXML_NOCDATA);
        $xmlArray = json_decode(json_encode($xml), true);

        $i = 0;
        $formattedArticles = array();

        if(isset($xmlArray['channel']))
        {
            if(isset($xmlArray['channel']['item']) && !empty($xmlArray['channel']['item']))
            {
                if(!isset($xmlArray['channel']['item'][0]))
                {
                    $xmlArray['channel']['item'] = array($xmlArray['channel']['item']);
                }

                foreach($xmlArray['channel']['item'] as $article)
                {
                    try
                    {
                        if( !isset($article['dateStart']) || !$article['dateStart'] )
                        {
                            $article['dateStart'] = $this->formatDate('d-m-Y H:i:s O', $article['pubDate'], "Y-m-d\TH:i:s\Z");
                        }
                        if(  !isset($article['dateEnd']) || !$article['dateEnd'] )
                        {
                            $article['dateEnd'] = $this->formatDate('d-m-Y H:i:s O', $article['pubDate'], "Y-m-d\TH:i:s\Z");
                        }

                        $formattedArticle = array(
                            'meta_installation_id_ms'           => keZSolr::installationID(),
                            'meta_path_si'                      => $this->getMetaPathId(),
                            'meta_guid_ms'                      => md5($this->applicationIdentifier . $article['guid']. $article['link']),
                            'meta_remote_id_ms'                 => md5($this->applicationIdentifier . $article['guid']. $article['link']),
                            'meta_section_id_si'                => 1,
                            'attr_' . ClusterTool::clusterIdentifier() . '_url_s' => md5($this->applicationIdentifier . $article['guid']. $article['link']),
                            'attr_date_dt'                      => $this->formatDate('d-m-Y H:i:s O', $article['pubDate'], "Y-m-d\TH:i:s\Z"),
                            //'attr_article_date_dt'              => $this->formatDate('d-m-Y H:i:s', $article['pubDate'], "Y-m-d\TH:i:s\Z"),
                            //'attr_article_year_i'               => $this->formatDate('d-m-Y H:i:s', $article['pubDate'], "Y"),
                            //'attr_article_month_i'              => $this->formatDate('d-m-Y H:i:s', $article['pubDate'], "m"),
                            'attr_date_start_dt'                => $this->formatDate('d-m-Y H:i:s', $article['dateStart'], "Y-m-d\TH:i:s\Z"),
                            'attr_article_year_start_i'         => $this->formatDate('d-m-Y H:i:s', $article['dateStart'], "Y"),
                            'attr_article_month_start_i'        => $this->formatDate('d-m-Y H:i:s', $article['dateStart'], "m"),
                            'attr_date_end_dt'                  => $this->formatDate('d-m-Y H:i:s', $article['dateEnd'], "Y-m-d\TH:i:s\Z"),
                            'attr_article_year_end_i'           => $this->formatDate('d-m-Y H:i:s', $article['dateEnd'], "Y"),
                            'attr_article_month_end_i'          => $this->formatDate('d-m-Y H:i:s', $article['dateEnd'], "m"),
                            'attr_offline_date_dt'              => '1970-01-01T01:00:00Z',
                            'attr_archive_date_dt'	            => '1970-01-01T01:00:00Z',
                            'attr_headline_t'                   => $article['title'],
                            'attr_headline_s'                   => $article['title'],
                            'attr_headline_lc_s'                => strtolower($article['title']),
                            'attr_promo_description_t'          => empty($article['description']) ? '' : $article['description'],
                            'attr_is_invisible_' . ClusterTool::clusterIdentifier() . '_b'    => false,
                            'subattr_speciality___source_id____s'        => $this->specialityIds,
                            'subattr_language___source_id____s'             => $xmlArray['channel']['language'],
                            'subattr_language_' . ClusterTool::clusterIdentifier() . '____s'              => $xmlArray['channel']['language'],
                            'subattr_local_application___source_id____s'    => $this->application->application_id,
                            'subattr_local_application___source_mixed____s' => $this->getAllApplicationLocalizedIds(),
                            'subattr_publisher_folder___source_id____s'	    => $this->getPublisherFolderPath(),
                            'meta_language_code_ms'     => LocaleTool::mainLanguage(),
                            'meta_class_identifier_ms' => 'article',
                            'attr_content_rating_'. ClusterTool::clusterIdentifier() .'_f'  =>  0,
                            'attr_view_counter_'. ClusterTool::clusterIdentifier() .'_i'    =>  0,
                            'attr_evrika_address_city_s'         => trim($article['address']),
                            'subattr_evrika_specialty____s'          => array_map(function($item) { return trim($item); }, array_filter(explode(';', $article['specialties']), function($item) { return strlen(trim($item)) > 0; })),
                            'attr_evrika_address_city_t_ru'      => trim($article['address']),
                        );
                        $formattedArticles[] = json_encode($formattedArticle);
                        $i++;
                    }
                    catch(Exception $e)
                    {
                        echo "\n Error with item guid : " , $article['guid'] , ' : ' , $e->getMessage() , "\n";
                        continue;
                    }
                }
            }
        }

        echo "|-- $i articles found.\n";
        $this->totalProcessed += $i;

        return $formattedArticles;
    }