Пример #1
0
 public function process_listing($a_sListingURL, $a_aDOM, $a_sListingDOM = 0, $a_bSave = true)
 {
     //$this->m_bIsTestRun = true;
     $this->m_oDOM->clear();
     //$this->m_oDOM->load_file($a_sListingURL);
     $l_sHtmlStr = '';
     $l_sListingNoHref = '';
     if ($this->load_content($a_sListingURL, $l_sHtmlStr)) {
         $l_sListingNoHref = str_replace($a_sListingDOM['add_href'], '', $a_sListingURL);
         $this->m_oDOM->load($l_sHtmlStr);
         $this->m_bPageLoadOK = true;
     } else {
         $this->m_bPageLoadOK = false;
         $this->m_oTheLogger->write_log("**WARNING: cURL cant load page " . $a_sListingURL);
         return false;
     }
     $l_aPropResults = array();
     foreach ($a_aDOM as $l_aProperty) {
         $l_aDetail = array();
         $l_iPropertyID = $l_aProperty['id'];
         $l_sContainerDOM = $l_aProperty['container_dom'];
         $l_sKeyDomET = $l_aProperty['property_key_dom']['extract_type'];
         $l_sValDomET = $l_aProperty['property_dom']['extract_type'];
         $l_oExtractor = new CExtractor();
         $l_oExtractor->reset_values($l_sKeyDomET, '');
         $l_aProperty['property_key_dom']['extract_type'] = $l_oExtractor->get_extract_type();
         $l_oExtractor->reset_values($l_sValDomET, '');
         $l_aProperty['property_dom']['extract_type'] = $l_oExtractor->get_extract_type();
         //print "\n Property $l_iPropertyID = ";
         if ($l_aProperty['source'] == 'parent') {
             // IF THE SOURCE IS NOT A CHILD
             #                $this->m_oTheLogger->write_log("\n   *  PROPERTY id =  ".$l_iPropertyID);
             if ($l_aProperty['property_key_dom']['main_dom'] == _PROP_VAL_STATIC) {
                 $l_aDetail[0] = $l_aProperty['property_key_dom']['element_id'];
             } else {
                 //first get all the links from the site
                 // reset the href since we will check if its the right one
                 $l_sTmpContainer = $this->extract_container($this->m_sListingHTML, $a_sListingDOM['container'], $l_sListingNoHref);
                 $this->m_oDOM->clear();
                 $this->m_oDOM->load($l_sTmpContainer);
                 $l_aDetail = $this->extract_detail_dom($l_aProperty['property_dom'], $l_sContainerDOM, $l_aProperty['property_key_dom']);
                 $this->m_oDOM->clear();
                 $this->m_oDOM->load($l_sHtmlStr);
                 unset($l_sHtmlStr);
                 if ($l_aProperty['group']) {
                     if (strlen(trim($l_aDetail[0])) >= 1) {
                         $l_aDetail[0] = htmlspecialchars_decode(trim($l_aDetail[0]));
                         $l_oExtractor->reset_values($l_sValDomET, $l_aDetail[0]);
                         $l_aPropResults["{$l_iPropertyID}"] = $l_aPropResults["{$l_iPropertyID}"] . $l_oExtractor->get_result() . ', ';
                     }
                 } else {
                     if (strlen(trim($l_aDetail[0])) >= 1) {
                         $l_aDetail[0] = htmlspecialchars_decode(trim($l_aDetail[0]));
                         $l_oExtractor->reset_values($l_sValDomET, $l_aDetail[0]);
                         $l_aPropResults["{$l_iPropertyID}"] = $l_oExtractor->get_result();
                     }
                 }
                 #                    $this->m_oTheLogger->write_log("\n   *  RESULT =  ".$l_aPropResults["$l_iPropertyID"]);
                 if ($a_bIsPreview) {
                     return htmlspecialchars_decode($l_aDetail[0]);
                 }
             }
         } else {
             #                $this->m_oTheLogger->write_log("\n   *  PROPERTY id =  ".$l_iPropertyID);
             if ($l_aProperty['group']) {
                 if ($l_aProperty['property_key_dom']['main_dom'] == _PROP_VAL_STATIC) {
                     $l_aDetail[0] = $l_aProperty['property_key_dom']['element_id'];
                 } else {
                     $l_aDetail = $this->extract_detail_dom($l_aProperty['property_dom'], $l_sContainerDOM, $l_aProperty['property_key_dom']);
                 }
                 //print "\n result : ".$l_aDetail[0];
                 if (strlen(trim($l_aDetail[0])) >= 1) {
                     $l_aDetail[0] = htmlspecialchars_decode(trim($l_aDetail[0]));
                     $l_oExtractor->reset_values($l_sValDomET, $l_aDetail[0]);
                     $l_aPropResults["{$l_iPropertyID}"] = $l_aPropResults["{$l_iPropertyID}"] . $l_oExtractor->get_result() . ', ';
                 }
             } else {
                 if ($l_aProperty['property_key_dom']['main_dom'] == _PROP_VAL_STATIC) {
                     $l_aDetail[0] = $l_aProperty['property_key_dom']['element_id'];
                 } else {
                     $l_aDetail = $this->extract_detail_dom($l_aProperty['property_dom'], $l_sContainerDOM, $l_aProperty['property_key_dom']);
                 }
                 if (strlen(trim($l_aDetail[0])) >= 1) {
                     $l_aDetail[0] = htmlspecialchars_decode(trim($l_aDetail[0]));
                     $l_oExtractor->reset_values($l_sValDomET, $l_aDetail[0]);
                     $l_aPropResults["{$l_iPropertyID}"] = $l_oExtractor->get_result();
                 }
             }
             #                $this->m_oTheLogger->write_log("\n   *  RESULT =  ".$l_aPropResults["$l_iPropertyID"]);
             unset($l_oExtractor);
             if ($a_bIsPreview) {
                 return htmlspecialchars_decode(trim($l_aDetail[0]));
             }
         }
         //  check prices
         if (!(stristr($l_aPropResults["{$l_iPropertyID}"], '{%PRICE%}') === FALSE)) {
             $l_aPropResults["{$l_iPropertyID}"] = $this->process_price($l_iPropertyID, $l_aPropResults["{$l_iPropertyID}"]);
         }
     }
     if ($a_bSave) {
         try {
             $this->save_listing($a_sListingURL, $l_aPropResults);
         } catch (Exception $e) {
             $this->m_oTheLogger->write_log("**EXCEPTION: Saving - " . $e->getMessage());
             $this->m_bHasFails = true;
             return false;
         }
     }
     return true;
 }
Пример #2
0
 /**
     Complex string extractor
     
     @var  $extractType = {%} delimited extract type string
       example: left(x){%}numerics{%}price{%}plaintext
     @var  $originalValue = page source after loading or not set on error
     @return = http return code (200 = OK)
 */
 protected function extract($extractType, &$originalValue)
 {
     $l_oExtractor = new CExtractor();
     $l_oExtractor->reset_values($extractType, htmlspecialchars_decode(trim($originalValue)));
     $res = $l_oExtractor->get_result();
     unset($l_oExtractor);
     return $res;
 }