} function oneline($code) { $code = str_replace("\n", '', $code); $code = str_replace("\r", '', $code); return $code; } ###################################### # Basic PHP scraper ###################################### $max = 159710; for ($i = 1; $i <= $max; $i++) { // $html = scraperwiki::scrape('http://sarki.alternatifim.com/data.asp?ID=13914'); // $html = scraperwiki::scrape('http://sarki.alternatifim.com/data.asp?ID='.$i); $html = oneline($html); preg_match_all('|<h1>(.*?)</h1>|', $html, $arr); $parts = explode(' - ', $arr[1][0]); preg_match_all('|<p id="sarkisozu">(.*?)</p>|', $html, $arr); $text = str_replace("<span style='color:#888888;font-size:0.75em'>[ kaynak: http://sarki.alternatifim.com/goster.asp?ac=" . $i . " ]</span>", '', $arr[1][0]); if (trim($parts[0]) != '' && trim($parts[1]) != '') { scraperwiki::save(array('id'), array('id' => "" . $i, 'artist' => clean($parts[0]), 'song' => clean($parts[1]), 'lyrics' => addslashes($text))); } exit; } function clean($val) { $val = str_replace(' ', ' ', $val); $val = str_replace('&', '&', $val); $val = html_entity_decode($val); $val = strip_tags($val);
if ($max == '') { $max = 1; } for ($p = 1; $p <= $max; $p++) { $html = scraperwiki::scrape('http://www.dft.gov.uk/dsa/dsa_theory_test_az.asp?letter=' . substr($l, $i, 1) . '&CAT=-1&page=' . $p . '&TypeID=18&TestType='); preg_match_all('|<dt><a href="(.*?)" title="(.*?)">.*?</a></dt>|', $html, $arr); if (isset($arr[1][0])) { $centers['name'] = array_merge($centers['name'], $arr[2]); $centers['url'] = array_merge($centers['url'], $arr[1]); } } } $i = 0; foreach ($centers['url'] as $url) { if ($url != '') { $html = oneline(scraperwiki::scrape('http://www.dft.gov.uk/dsa/' . $url)); preg_match_all('|<h3>(.*?)</h3>.*?<p>(.*?)</p><br />.*?<h3>|', $html, $a); $name = trim($a[1][0]); $address = trim(str_replace('<br />', ', ', $a[2][0])); $address = trim(str_replace('<br>', ', ', $address)); scraperwiki::save(array('id'), array('id' => '' . $counter, 'name' => $name, 'address' => $address, 'car' => 'NO', 'taxi' => 'NO', 'motorcycle' => 'NO', 'lgv' => 'NO', 'theory' => 'YES')); $i++; $counter++; } } function oneline($code) { $code = str_replace("\n", '', $code); $code = str_replace("\r", '', $code); return $code; }
$result = mysql_query($query); while ($row = mysql_fetch_array($result, MYSQL_ASSOC)) { // $datetime=getdatetime($row["lastchange"]); oneline($row["typ"], $row["subtype"], $row["content"], $i++); } $imax = $i + 5; } else { $i = 0; oneline("source", "", "", $i++); oneline("type", "", "", $i++); oneline("original", "", "", $i++); oneline("translation", "", "", $i++); $imax = 8; } for ($j = $i; $j < $imax; $j++) { oneline("", "", "", $j); } print "</table>"; print "</td><td valign=\"top\" rowspan=\"7\">"; // show version-1 if ($version > 0) { $query = "SELECT fk_element,d.subtype,d.version,d.typ,d.content,d.author,d.lastchange FROM " . $DBParams["mysql_prefix"] . "data d where d.fk_element=" . $element_id . " and d.version>" . $version . "-2 order by d.version desc,d.typ='translation', d.typ='original', d.typ='type',d.typ='source';"; $result = mysql_query($query); print "<table>"; print "<tr><td><em>version</em></td>"; print "<td><em>typ</em></td>"; print "<td><em>subtype</em></td>"; print "<td><em>content</em></td>"; print "<td><em>author</em></td><td><em>date/time</em></td></tr>"; while ($row = mysql_fetch_array($result, MYSQL_ASSOC)) { $datetime = getdatetime($row["lastchange"]);
<?php require 'scraperwiki.php'; ###################################### # Basic PHP scraper ###################################### #scraperwiki::sqliteexecute("CREATE TABLE `swvariables` (`valueblob` blob, `type` text, `name` text)"); #scraperwiki::sqliteexecute("CREATE TABLE `swdata` (`datescraped` text, `primaryweb` text, `name` text, `primaryphone` text, `legalemail` text, `legalweb` text, `legalfax` text, `num` text, `trading` text, `legalphone` text, `primaryemail` text, `primaryfax` text, `primaryaddress` text, `legaladdress` text, `primarycourses` text)"); #return; $max = 10045263; $counter = scraperwiki::get_var('counter', 10000000); if ($counter < 10000000) { $counter = 10000000; } for ($i = 0; $i < 1000; $i++) { $html = oneline(scraperwiki::scrape("http://www.ukrlp.co.uk/ukrlp/ukrlp_provider.page_pls_provDetails?x=&pn_p_id=" . $counter . "&pv_status=VERIFIED&pv_vis_code=L")); preg_match_all('|<div class="pod_main_body">(.*?<div )class="searchleft">|', $html, $arr); if (isset($arr[1][0])) { $code = $arr[1][0]; } else { $code = ''; } if ($code != '') { #echo "code \n"; #echo json_encode($code); #echo "\n"; preg_match_all('|<div class="provhead">UKPRN: ([0-9]*?)</div>|', $code, $num); if (isset($num[1][0])) { $num = trim($num[1][0]); } else { $num = '';
function oneline($code) { $code = str_replace("\n", '', $code); $code = str_replace("\r", '', $code); return $code; } ?> <?php ###################################### # Basic PHP scraper ###################################### require 'scraperwiki/simple_html_dom.php'; $html = oneline(scraperwiki::scrape("http://www.iso.org/iso/support/faqs/faqs_widely_used_standards/widely_used_standards_other/currency_codes/currency_codes_list-1.htm")); preg_match_all('|<tr.*?>.*?<td valign="top">(.*?)</td>.*?<td valign="top">(.*?)</td>.*?<td valign="top">(.*?)</td>.*?<td valign="top">(.*?)</td>.*?</tr>|', $html, $arr); $last = ''; $z = false; foreach ($arr[1] as $key => $val) { if (strtolower(substr(clean($arr[1][$key]), 0, 1)) == 'z') { $z = true; } if ($z == true && strtolower(substr(clean($arr[1][$key]), 0, 1)) != 'z') { exit; } scraperwiki::save(array('country'), array('country' => clean($arr[1][$key]), 'currency' => clean($arr[2][$key]), 'alpha_code' => clean($arr[3][$key]), 'num_code' => clean($arr[4][$key]))); } function clean($val) { $val = str_replace(' ', '', $val);