function outputScrape($url) { $returnvalue = ""; $data1 = preg_replace('#(<\\/ul>\\s*)+#i', '</ul>', scrapePage($url)); $data1a = str_replace('files//', 'http://resources21.org/cl/files/', $data1); $data2 = preg_replace(array('/<head>(.*)<\\/head>/iUs', '/<html>/', '/<\\/html>/', '/<body>/', '/<\\/body>/', '/<\\/td><td>/'), "", $data1a); $returnvalue .= '<div id="lessonplan-content">'; $doc = new DOMDocument(); $doc->loadHTML($data2); libxml_use_internal_errors(false); $xpath = new DOMXpath($doc); $elements = $xpath->query('//ul'); $elementsh = $xpath->query("//*[@class='secHed']"); $header = array(); if (!is_null($elementsh)) { foreach ($elementsh as $elementh) { $header[] = "<h3>" . $elementh->nodeValue . "</h3>"; } } if (!is_null($elements)) { $i = 0; foreach ($elements as $element) { $returnvalue .= '<div id="' . $element->getAttribute('id') . '">' . $header[$i] . "<ul>"; $nodes = $element->childNodes; foreach ($nodes as $node) { $returnvalue .= '<li>' . innerXML($node) . "</li>\n"; } $returnvalue .= "</ul></div>"; $i++; } $returnvalue .= "</div>"; } // $returnvalue = "<[CDATA[ " . $returnvalue ." ]]>"; return $returnvalue; }
scraperWiki::save_sqlite(array('name'), $payload); } } } scrapePage("http://surname.sofeminine.co.uk/w/surnames/most-common-surnames-in-great-britain.html"); //scrapePage("http://surname.sofeminine.co.uk/w/surnames/most-common-surnames-in-great-britain-2.html"); //scrapePage("http://surname.sofeminine.co.uk/w/surnames/most-common-surnames-in-great-britain-3.html"); //scrapePage("http://surname.sofeminine.co.uk/w/surnames/most-common-surnames-in-great-britain-4.html"); //scrapePage("http://surname.sofeminine.co.uk/w/surnames/most-common-surnames-in-great-britain-5.html"); require 'scraperwiki/simple_html_dom.php'; function scrapePage($url) { $html = scraperWiki::scrape($url); $dom = new simple_html_dom(); $dom->load($html); $cells = $dom->find('td.nom'); foreach ($cells as $cell) { $name = $cell->find('a', 0)->plaintext; $parent = $cell->parent(); $count = $parent->find('td.compte', 0)->plaintext; if ($count) { $payload = array('name' => $name, 'count' => $count); scraperWiki::save_sqlite(array('name'), $payload); } } } scrapePage("http://surname.sofeminine.co.uk/w/surnames/most-common-surnames-in-great-britain.html"); //scrapePage("http://surname.sofeminine.co.uk/w/surnames/most-common-surnames-in-great-britain-2.html"); //scrapePage("http://surname.sofeminine.co.uk/w/surnames/most-common-surnames-in-great-britain-3.html"); //scrapePage("http://surname.sofeminine.co.uk/w/surnames/most-common-surnames-in-great-britain-4.html"); //scrapePage("http://surname.sofeminine.co.uk/w/surnames/most-common-surnames-in-great-britain-5.html");