function debug_tables($schema) { global $DEBUG_TABLES; if (!$DEBUG_TABLES) { return; } $tables = scraperwiki::show_tables($schema); print "Schema '" . $schema . "' contains " . count($tables) . " tables\n"; foreach (array_keys($tables) as $tableName) { debug_table($schema, $tableName, TRUE); } }
foreach ($nextPage->find('a') as $element) { $MyString = $element->href; $MyString = htmlspecialchars_decode($MyString); // print "Próxima página: " . $MyString . "\n"; } } } //************************programa principal************************// require 'scraperwiki/simple_html_dom.php'; $dom = new simple_html_dom(); //*Pegar os memberId dos avaliadores pelo sql e passar para a função*// avaliaVendedor("blowitoutahere"); print "MyString = " . $MyString . "\n"; //limitando em percorrer 3 páginas for ($pag = 0; $pag < 3; $pag++) { $html = scraperWiki::scrape($MyString); $dom->load($html); foreach ($dom->find("table.FbOuterYukon") as $data) { $tds = $data->find("td"); for ($i = 4; $i <= 197; $i += 8) { $record = array('data_autocount' => $i, 'data_Feedback' => $tds[$i + 1]->plaintext, 'data_MemberID_AND_FeedbackScore' => $tds[$i + 2]->plaintext, 'data_Date-Time' => $tds[$i + 3]->plaintext, 'data_Item_Weight_Price_ItemNumber' => $tds[$i + 5]->plaintext, 'data_Price' => $tds[$i + 6]->plaintext); // Salva o record na tabela // Salvar o $MyStringVendedor também. saveData(array("Data_autocount", "data_Feedback", "data_MemberID_AND_FeedbackScore"), $record); } getLinks($data); } proxPaginaVendedor($dom); } print_r(scraperwiki::show_tables()); print_r(scraperwiki::sqliteexecute("select * from membersLinks")); //print_r(scraperwiki::sqliteexecute("select * from allFeedBacks"));
$a = substr($aa->innertext, 0, strpos($aa->innertext, ' ')); $b = substr($bb->innertext, 0, strpos($bb->innertext, ' ')); if ($a == $b) { return 0; } return $a < $b ? -1 : 1; } $tablename = "miway20121004"; require 'scraperwiki/simple_html_dom.php'; $html = scraperWiki::scrape("http://m.miway.ca/routes.jsp"); $dom = new simple_html_dom(); $dom->load($html); $count = 0; $toplinks = $dom->find('table tr td a[href^=routeStops.jsp]'); usort($toplinks, "toplinks_sort"); $tables = scraperwiki::show_tables(); if (count($tables) > 0 && $tables[$tablename]) { $lastruninfo = scraperwiki::select("* FROM " . $tablename . " ORDER BY routeid DESC LIMIT 1"); $lastrouteid = $lastruninfo[0]["routeid"]; } else { //this is a fresh run $lastrouteid = -1; } foreach ($toplinks as $data) { list($routeid, $nothing, $routename) = explode(" ", $data->innertext, 3); // if ($routeid >= $lastrouteid) { if ($routeid > 91) { $cleanlink = preg_replace('/;jsessionid=[A-Z0-9]*/i', '', $data->href); print "VISITING http://m.miway.ca/" . $cleanlink . "\n"; $route_html_content = scraperwiki::scrape("http://m.miway.ca/" . $cleanlink); $route_html = str_get_html($route_html_content);
//current club ids $url = "http://www.nrsr.sk/web/default.aspx?sid=poslanci/kluby/zoznam"; $html = scraperwiki::scrape($url); //get dom $dom = new simple_html_dom(); $dom->load($html); $uls = $dom->find('ul[class=longlist]'); preg_match_all('/ID=([0-9]{1,})/', $uls[0], $matches); $ids = $matches[1]; //current term $divs = $dom->find('select[id=_sectionLayoutContainer_ctl01__currentTerm]'); $options = $divs[0]->find('option[selected=selected]'); $term = $options[0]->value; //compare saved term $saved_term = scraperwiki::get_var('current_term'); $info = scraperwiki::show_tables(); if ($term != $saved_term) { if (isset($info['club'])) { scraperwiki::sqliteexecute("delete from club"); scraperwiki::sqlitecommit(); } if (isset($info['membership'])) { scraperwiki::sqliteexecute("delete from membership"); scraperwiki::sqlitecommit(); } } scraperwiki::save_var('current_term', $term); //current clubs foreach ($ids as $i) { $url = "http://www.nrsr.sk/web/Default.aspx?sid=poslanci/kluby/klub&ID=" . $i; $html = scraperwiki::scrape($url);
$header = $detail->prev_sibling()->plaintext; $header = strtolower(preg_replace('/ /', '_', $header)); $header = preg_replace('/:/', '', $header); $detail_record[$header] = preg_replace('/\\s*$/', '', preg_replace('/ˆ\\s*/', '', $detail->plaintext)); } if ($DEBUG) { print_r($datail_record); } else { if ($detail_record['camera_type']) { scraperwiki::save_sqlite(array('url'), $detail_record, $table_name = "cameras"); } } } require 'scraperwiki/simple_html_dom.php'; $DEBUG = false; $init = count(scraperwiki::show_tables()) == 0; $lastbrandmodelcrawl = scraperwiki::get_var('lastcrawl'); if (time() > $lastbrandmodelcrawl + 3600 * 24 * 7) { //Find the brands/Makers $allbrands = scraperWiki::scrape("http://www.flickr.com/cameras/brands/"); $allbrands_dom = new simple_html_dom(); $allbrands_dom->load($allbrands); $brands_node = $allbrands_dom->find("td.clTxt h4 a"); if (!$init) { $known_brands = scraperwiki::select("count(url) as cnt from brands"); } if ($init || $known_brands[0]['cnt'] != count($brands_node)) { foreach ($brands_node as $data) { $record = array('name' => $data->plaintext, 'url' => $data->href); if ($DEBUG) { print_r($record);