curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); $store = curl_exec($ch); echo "Executing Login..."; echo "<br>login-Store:"; echo $store; echo "<br>login-Ch:"; echo $ch; var_dump($ch); return $ch; } ini_set('display_errors', 1); error_reporting(E_ALL ^ E_NOTICE); echo "Starting Scraper"; $ch = login(); $html = downloadUrl('https://scraperwiki.com/profiles/edit/', $ch); echo $html; function downloadUrl($Url, $ch) { curl_setopt($ch, CURLOPT_URL, $Url); curl_setopt($ch, CURLOPT_POST, 0); curl_setopt($ch, CURLOPT_REFERER, "https://scraperwiki.com/login/"); curl_setopt($ch, CURLOPT_USERAGENT, "MozillaXYZ/1.0"); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_TIMEOUT, 10); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); $output = curl_exec($ch); echo "Executing download of page..."; echo "<br>login-Ch:"; echo $ch;
mkdir($downloadDir, 0777, true); } if (!is_dir($extractedDir)) { mkdir($extractedDir, 0777, true); } $db->exec('ALTER TABLE curve DROP CONSTRAINT curve_pkey'); $db->exec('ALTER TABLE curve DROP CONSTRAINT curve_datasetid_fkey'); $db->exec('ALTER TABLE curve DROP CONSTRAINT curve_identifier'); $db->exec('SET CONSTRAINTS ALL DEFERRED'); foreach ($dataFiles as $dataFile) { // Get the file locally $downloadFile = $ftpRoot . '/' . $dataFile; $tgzFile = $downloadDir . DIRECTORY_SEPARATOR . $dataFile; $txtFile = $extractedDir . DIRECTORY_SEPARATOR . str_replace('.tar.gz', '.txt', $dataFile); try { if (!downloadUrl($downloadFile, $tgzFile)) { echo ' already downloaded.' . PHP_EOL; } extractTarGz($tgzFile, $extractedDir); // Should create $txtFile echo 'Parsing and loading data file into database...'; $dataLoader->load($txtFile, null, true); echo '...done.' . PHP_EOL; } catch (Exception $e) { echo PHP_EOL . 'Failed loading data file [' . $dataFile . '].' . PHP_EOL . $e->getMessage() . PHP_EOL; } finally { if (file_exists($tgzFile)) { unlink($tgzFile); } if (file_exists($txtFile)) { unlink($txtFile);
if (preg_match($aUrlPattern, $aUrl, $regs)) { $fdsource = fopen($regs[1], "w"); echo $aUrl; $ch = curl_init($aUrl); curl_setopt($ch, CURLOPT_FILE, $fdsource); curl_setopt($ch, CURLOPT_HEADER, 0); curl_exec($ch); curl_close($ch); fclose($fdsource); } } fclose($fd); } // }}} // {{{ main # This script expects a list of Debian packages if ($argc != 2) { syntax(); exit(1); } if (is_readable($argv[1])) { listFilename($argv[1]); } else { if ($argv[1] == "--download") { downloadUrl(); } else { syntax(); exit(1); } } // }}}