function process_raw($htmdata, $page) { global $process_result; // contains metainformation from the process_raw() function global $test_100_resultpage; global $NL; global $B; global $B_; $dom = new domDocument(); $dom->strictErrorChecking = false; $dom->preserveWhiteSpace = true; @$dom->loadHTML($htmdata); $lists = $dom->getElementsByTagName('li'); $num = 0; $results = array(); foreach ($lists as $list) { unset($ar); unset($divs); unset($div); unset($cont); unset($result); unset($tmp); $ar = dom2array_full($list); if (count($ar) < 2) { verbose("s"); continue; // skipping advertisements } if (!isset($ar['class']) || $ar['class'] != 'g') { verbose("x"); continue; // skipping non-search result entries } // adaption to new google layout if (isset($ar['div'][1])) { $ar['div'] =& $ar['div'][0]; } if (isset($ar['div'][1])) { $ar['div'] =& $ar['div'][0]; } //$ar=&$ar['div']['span']; // changes 2011 - Google changed layout //$ar=&$ar['div']; // changes 2011 - Google changed layout // change again, 2012-2013 $orig_ar = $ar; // 2012-2013 // adaption finished $divs = $list->getElementsByTagName('div'); $div = $divs->item(1); getContent($cont, $div); $num++; $result['title'] =& $ar['h3']['a']['textContent']; $tmp = strstr(&$ar['h3']['a']['@attributes']['href'], "http"); $result['url'] = $tmp; if (strstr(&$ar['h3']['a']['@attributes']['href'], "interstitial")) { echo "!"; } $tmp = parse_url(&$result['url']); $result['host'] =& $tmp['host']; $desc = strstr($cont, "<span class='st'>"); // instead of using DOM the string is parsed traditional due to frequent layout changes by Google $desc = substr($desc, 17); $desc = strip_tags($desc); $result['desc'] = $desc; // 2012-2013 addon, might be extended with on request if (isset($ar['table']) && strlen($result['title'] < 2)) { // if interesting the object can be parsed here $result['title'] = "embedded object"; $result['url'] = "embedded object"; } //echo "$B Result parsed:$B_ $result[title]$NL"; verbose("r"); flush(); $results[] = $result; // This adds the result to our large result array } verbose(" !{$NL}"); // Analyze if more results are available (next page) $next = 0; $tables = $dom->getElementsByTagName('table'); if (strstr($htmdata, "Next</a>")) { $next = 1; } else { if ($test_100_resultpage) { $needstart = ($page + 1) * 100; } else { $needstart = ($page + 1) * 10; } $findstr = "start={$needstart}"; if (strstr($htmdata, $findstr)) { $next = 1; } } $page++; if ($next) { $process_result = "PROCESS_SUCCESS_MORE"; // more data available } else { $process_result = "PROCESS_SUCCESS_LAST"; } // last page reached //var_dump($results); return $results; }
$dom = new domDocument(); $dom->strictErrorChecking = false; $dom->preserveWhiteSpace = true; @$dom->loadHTML($htmdata); $lists = $dom->getElementsByTagName('li'); $num = 0; foreach ($lists as $list) { unset($ar); unset($divs); unset($div); unset($cont); unset($result); unset($tmp); $result['main_keyword'] = $main_keyword; $result['sub_keyword'] = $keyword; $ar = dom2array_full($list); if (count($ar) < 2) { echo "S"; continue; // skipping advertisement and similar spam } if (!isset($ar['class']) || $ar['class'] != 'g') { echo "?"; continue; // skipping non-search results } // adaption to new google layout //if ($num==2)var_dump($ar); //if ($num==3)var_dump($ar); if (isset($ar['div'][1])) { $ar['div'] =& $ar['div'][0];
function loadSalsaActionContentDetail() { if ($this->isValid() && !empty($this->mInfo['action_content_detail_key'])) { $url = $this->getServiceUrl('load'); $reqHash = array('object' => 'action_content_detail', 'key' => $this->mInfo['action_content_detail_key']); $rslt = $this->curlExec($url, $reqHash); $xml = $this->string2XML($rslt); if ($this->validateXMLRequest($xml)) { $item = $xml->getElementsByTagName('item')->item(0); $data = dom2array_full($item); $this->mInfo['recommended_subject'] = !empty($data['Recommended_Subject']) ? $data['Recommended_Subject']['#text'] : ""; $this->mInfo['recommended_content'] = !empty($data['Recommended_Content']) ? $data['Recommended_Content']['#text'] : ""; // flip these since affirming checkboxes are easier to understand then negating checkboxes $this->mInfo['subject_editable'] = !empty($data['Fixed_Subject']) && $data['Fixed_Subject']['#text'] == 'true' ? false : true; $this->mInfo['content_editable'] = !empty($data['Fixed_Content']) && $data['Fixed_Content']['#text'] == 'true' ? false : true; } } else { $this->mErrors['diasalsa_load'] = tra('Error attemped to load data from DIA for an invalid Action.'); } }