public function process_listing($a_sListingURL, $a_aDOM, $a_sListingDOM = 0, $a_bSave = true) { //$this->m_bIsTestRun = true; $this->m_oDOM->clear(); //$this->m_oDOM->load_file($a_sListingURL); $l_sHtmlStr = ''; $l_sListingNoHref = ''; if ($this->load_content($a_sListingURL, $l_sHtmlStr)) { $l_sListingNoHref = str_replace($a_sListingDOM['add_href'], '', $a_sListingURL); $this->m_oDOM->load($l_sHtmlStr); $this->m_bPageLoadOK = true; } else { $this->m_bPageLoadOK = false; $this->m_oTheLogger->write_log("**WARNING: cURL cant load page " . $a_sListingURL); return false; } $l_aPropResults = array(); foreach ($a_aDOM as $l_aProperty) { $l_aDetail = array(); $l_iPropertyID = $l_aProperty['id']; $l_sContainerDOM = $l_aProperty['container_dom']; $l_sKeyDomET = $l_aProperty['property_key_dom']['extract_type']; $l_sValDomET = $l_aProperty['property_dom']['extract_type']; $l_oExtractor = new CExtractor(); $l_oExtractor->reset_values($l_sKeyDomET, ''); $l_aProperty['property_key_dom']['extract_type'] = $l_oExtractor->get_extract_type(); $l_oExtractor->reset_values($l_sValDomET, ''); $l_aProperty['property_dom']['extract_type'] = $l_oExtractor->get_extract_type(); //print "\n Property $l_iPropertyID = "; if ($l_aProperty['source'] == 'parent') { // IF THE SOURCE IS NOT A CHILD # $this->m_oTheLogger->write_log("\n * PROPERTY id = ".$l_iPropertyID); if ($l_aProperty['property_key_dom']['main_dom'] == _PROP_VAL_STATIC) { $l_aDetail[0] = $l_aProperty['property_key_dom']['element_id']; } else { //first get all the links from the site // reset the href since we will check if its the right one $l_sTmpContainer = $this->extract_container($this->m_sListingHTML, $a_sListingDOM['container'], $l_sListingNoHref); $this->m_oDOM->clear(); $this->m_oDOM->load($l_sTmpContainer); $l_aDetail = $this->extract_detail_dom($l_aProperty['property_dom'], $l_sContainerDOM, $l_aProperty['property_key_dom']); $this->m_oDOM->clear(); $this->m_oDOM->load($l_sHtmlStr); unset($l_sHtmlStr); if ($l_aProperty['group']) { if (strlen(trim($l_aDetail[0])) >= 1) { $l_aDetail[0] = htmlspecialchars_decode(trim($l_aDetail[0])); $l_oExtractor->reset_values($l_sValDomET, $l_aDetail[0]); $l_aPropResults["{$l_iPropertyID}"] = $l_aPropResults["{$l_iPropertyID}"] . $l_oExtractor->get_result() . ', '; } } else { if (strlen(trim($l_aDetail[0])) >= 1) { $l_aDetail[0] = htmlspecialchars_decode(trim($l_aDetail[0])); $l_oExtractor->reset_values($l_sValDomET, $l_aDetail[0]); $l_aPropResults["{$l_iPropertyID}"] = $l_oExtractor->get_result(); } } # $this->m_oTheLogger->write_log("\n * RESULT = ".$l_aPropResults["$l_iPropertyID"]); if ($a_bIsPreview) { return htmlspecialchars_decode($l_aDetail[0]); } } } else { # $this->m_oTheLogger->write_log("\n * PROPERTY id = ".$l_iPropertyID); if ($l_aProperty['group']) { if ($l_aProperty['property_key_dom']['main_dom'] == _PROP_VAL_STATIC) { $l_aDetail[0] = $l_aProperty['property_key_dom']['element_id']; } else { $l_aDetail = $this->extract_detail_dom($l_aProperty['property_dom'], $l_sContainerDOM, $l_aProperty['property_key_dom']); } //print "\n result : ".$l_aDetail[0]; if (strlen(trim($l_aDetail[0])) >= 1) { $l_aDetail[0] = htmlspecialchars_decode(trim($l_aDetail[0])); $l_oExtractor->reset_values($l_sValDomET, $l_aDetail[0]); $l_aPropResults["{$l_iPropertyID}"] = $l_aPropResults["{$l_iPropertyID}"] . $l_oExtractor->get_result() . ', '; } } else { if ($l_aProperty['property_key_dom']['main_dom'] == _PROP_VAL_STATIC) { $l_aDetail[0] = $l_aProperty['property_key_dom']['element_id']; } else { $l_aDetail = $this->extract_detail_dom($l_aProperty['property_dom'], $l_sContainerDOM, $l_aProperty['property_key_dom']); } if (strlen(trim($l_aDetail[0])) >= 1) { $l_aDetail[0] = htmlspecialchars_decode(trim($l_aDetail[0])); $l_oExtractor->reset_values($l_sValDomET, $l_aDetail[0]); $l_aPropResults["{$l_iPropertyID}"] = $l_oExtractor->get_result(); } } # $this->m_oTheLogger->write_log("\n * RESULT = ".$l_aPropResults["$l_iPropertyID"]); unset($l_oExtractor); if ($a_bIsPreview) { return htmlspecialchars_decode(trim($l_aDetail[0])); } } // check prices if (!(stristr($l_aPropResults["{$l_iPropertyID}"], '{%PRICE%}') === FALSE)) { $l_aPropResults["{$l_iPropertyID}"] = $this->process_price($l_iPropertyID, $l_aPropResults["{$l_iPropertyID}"]); } } if ($a_bSave) { try { $this->save_listing($a_sListingURL, $l_aPropResults); } catch (Exception $e) { $this->m_oTheLogger->write_log("**EXCEPTION: Saving - " . $e->getMessage()); $this->m_bHasFails = true; return false; } } return true; }
/** Complex string extractor @var $extractType = {%} delimited extract type string example: left(x){%}numerics{%}price{%}plaintext @var $originalValue = page source after loading or not set on error @return = http return code (200 = OK) */ protected function extract($extractType, &$originalValue) { $l_oExtractor = new CExtractor(); $l_oExtractor->reset_values($extractType, htmlspecialchars_decode(trim($originalValue))); $res = $l_oExtractor->get_result(); unset($l_oExtractor); return $res; }