} } else { $synthManufacturer = $manufacturerQuery; //Search for a specific manufacturer $tManName = preg_replace("/\\s/", "_", $synthManufacturer); //Build a temp name for the array key $synthManufacturer = urlencode($synthManufacturer); echo "Searching manufacturer: " . $synthManufacturer . "\n"; $foundSynths[$state][$tManName] = findSynthsOnCraigslist($state, $jsonManufacturerScraper, $synthManufacturer, $ignoreWords, $cityDepth, $synthDepth, $depthOverride); if (!empty($foundSynths[$state][$tManName])) { //scraperwiki::save_var($state.'-'.$synthManufacturer.'-'.'synths', json_encode($foundSynths[$state][$synthManufacturer])); $saveMessage = scraperWiki::save_sqlite(array('manufacturer', 'synth_name', 'post_item_date', 'post_item_name', 'post_item_price', 'post_item_link', 'post_item_state', 'query', 'link', 'post_item_description', 'post_item_images'), $foundSynths[$state][$tManName]); print strval($saveMessage); } else { //Delete existing data $info = scraperwiki::table_info($name = "swdata"); if (!empty($info)) { scraperwiki::sqliteexecute("DELETE FROM swdata"); //Truncate the table before adding new results } } } } } /** * Parses craigslist and returns synths * @param String state * @param String synthManufacturer * @param ignoreWords * @param cityDepth * @param synthDepth
$OBJ['linkedIn'] = $LinkedIn; } } // Clean certifications $certifications = array_unique(json_decode($row['certifications'])); $OBJ['certifications'] = json_encode($certifications); // Geo scraperwiki::save_sqlite(array('id', 'name', 'company', 'location', 'date', 'url', 'profile', 'twitter', 'klout', 'profile_url', 'linkedIn', 'certifications'), $OBJ); scraperwiki::save_var('last_page', $counter); $counter = $counter + 1; print_r($counter); } print_r("start"); scraperwiki::attach("appcelerator_devlink"); // Bootstrap variables if (!scraperwiki::table_info($name = "swvariables")) { scraperwiki::save_var('last_page', 0); } $lastPage = scraperwiki::get_var('last_page'); if ($lastPage > 0) { $offset = " OFFSET " . $lastPage; $counter = $lastPage; } else { $offset = ""; $counter = 0; } print_r($offset); $data = scraperwiki::select("* from appcelerator_devlink.swdata LIMIT 1500" . $offset); foreach ($data as $row) { $OBJ = array('id' => $row['id'], 'name' => $row['name'], 'company' => $row['company'], 'location' => $row['location'], 'date' => $row['date'], 'url' => $row["url"], 'profile' => $row["profile"], 'twitter' => '', 'klout' => '', 'linkedIn' => '', 'certifications' => ''); // Clean Links
if (!empty($total_opp_days)) { $bug_rate = $total_bugs / $total_opp_days; } // Build data record $uniquekeys = array("name", "url"); $data = array("name" => $data->plaintext, "url" => $data->href, "author" => $author, "maintenance" => $maintenance_status, "dev_status" => $dev_status, "rec_release_version" => $rec_release['version'], "rec_release_type" => $rec_release['type'], "rec_release_timestamp" => $rec_release['timestamp'], "downloads" => $downloads, "installs" => $installs, "bugs" => $open_bugs, "total_bugs" => $total_bugs, "age_years" => round($age / 86400 / 365.25, 2), "mbug_rate" => round($bug_rate * 1000, 2), "created" => $created, "modified" => $lastmodified, "last_commit" => $last_commit); scraperwiki::save($uniquekeys, $data); } } } $page = array("http://drupal.org/search/site/commerce?f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=1&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=2&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=3&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=4&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=5&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=6&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=7&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=8&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=9&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=10&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=11&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule", "http://drupal.org/search/site/commerce?page=12&f[0]=drupal_core%3A103&f[1]=bs_project_sandbox%3A0&f[2]=ss_meta_type%3Amodule"); require 'scraperwiki/simple_html_dom.php'; // To change schema: // - Clear data // - Run if (!scraperwiki::table_info('swdata')) { scraperwiki::sqliteexecute("create table swdata (`name` string, `url` string, `author` string, `maintenance` string, `dev_status` string, `rec_release_version` string, `rec_release_type` string, `rec_release_timestamp` datetime, `downloads` int, `installs` int, `bugs` int, `total_bugs` int, `age_years` float, `mbug_rate` float, `created` datetime, `modified` datetime, `last_commit` datetime)"); scraperwiki::sqlitecommit(); } $now = time(); $i = 0; foreach ($page as $id => $url) { $html = scraperWiki::scrape($url); $dom = new simple_html_dom(); $dom->load($html); // Grab urls for each module foreach ($dom->find("dt[@class='title'] a") as $data) { $i++; if (stristr($data->href, "commerce_")) { // Load Module Data $html2 = scraperWiki::scrape($data->href);
function debug_table($schema, $tname, $showSchema = FALSE) { global $DEBUG_TABLES; if (!$DEBUG_TABLES) { return; } $tablename = $schema . "." . $tname; $num = scraperwiki::select("count(*) AS n FROM " . $tablename); print "{$tablename} size: " . $num[0]['n'] . " rows.\n"; if ($showSchema) { print "{$tablename} schema: "; $info = scraperwiki::table_info($tablename); #debug_array($info, "Table_info($tablename)"); foreach ($info as $column) { print $column['name'] . "(" . $column['type'] . "); "; } print "\n"; } }
function getData($query) { if (!count(scraperwiki::table_info("swdata"))) { return false; } return scraperWiki::select($query); }