protected function loadPage() { $bOldSetting = libxml_use_internal_errors(true); libxml_clear_errors(); $oHtml = new DOMDocument(); $oHtml->loadHtmlFile($this->buildUrl()); libxml_clear_errors(); libxml_use_internal_errors($bOldSetting); return $oHtml; }
private function get_graph($url) { //fetch html content from web source and filter to meta data $dom = new DOMDocument(); @$dom->loadHtmlFile($url); $tags = $dom->getElementsByTagName('meta'); //set open graph search tag and return object $og_pattern = '/^og:/'; $graph_content = array(); //for each open graph tag, store in return object as property : content foreach ($tags as $element) { if (preg_match($og_pattern, $element->getAttribute('property'))) { $graph_content[preg_replace($og_pattern, '', $element->getAttribute('property'))] = $element->getAttribute('content'); } } //store all open graph tags return $graph_content; }
$pages = $pages0 + 1; } //USERS $file_string2 = file_get_contents($url); preg_match('#<b>(.*?)</b>#i', $file_string2, $users); $nbusers = str_replace(",", "", $users[1]); //RATIO $ratio = $nbreviews / $nbusers; //INITIALIZATION $add = 0; //LOOP REVIEWS SCRAPING for ($i = 1; $i <= $pages; $i++) { $oldSetting = libxml_use_internal_errors(true); libxml_clear_errors(); $html = new DOMDocument(); $html->loadHtmlFile($url . '/reviews/?page=' . $i); $xpath = new DOMXPath($html); $links = $xpath->query("//div[contains(@class, 'review')and \n not(contains(@class,'reply'))]"); //Do not include the comments written by the addon's developer $return = array(); foreach ($links as $item) { $newDom = new DOMDocument(); $newDom->appendChild($newDom->importNode($item, true)); $xpath = new DOMXPath($newDom); $review = str_replace("\"", "", trim($xpath->query("//p[@class='review-body']")->item(0)->nodeValue)); //$review = "\"".$review."\","; $return[] = array($review); } // REVIEWS ARRAY $return = print_r($return, true); $return = htmlspecialchars($return);