curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
    $store = curl_exec($ch);
    echo "Executing Login...";
    echo "<br>login-Store:";
    echo $store;
    echo "<br>login-Ch:";
    echo $ch;
    var_dump($ch);
    return $ch;
}
ini_set('display_errors', 1);
error_reporting(E_ALL ^ E_NOTICE);
echo "Starting Scraper";
$ch = login();
$html = downloadUrl('https://scraperwiki.com/profiles/edit/', $ch);
echo $html;
function downloadUrl($Url, $ch)
{
    curl_setopt($ch, CURLOPT_URL, $Url);
    curl_setopt($ch, CURLOPT_POST, 0);
    curl_setopt($ch, CURLOPT_REFERER, "https://scraperwiki.com/login/");
    curl_setopt($ch, CURLOPT_USERAGENT, "MozillaXYZ/1.0");
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
    $output = curl_exec($ch);
    echo "Executing download of page...";
    echo "<br>login-Ch:";
    echo $ch;
    mkdir($downloadDir, 0777, true);
}
if (!is_dir($extractedDir)) {
    mkdir($extractedDir, 0777, true);
}
$db->exec('ALTER TABLE curve DROP CONSTRAINT curve_pkey');
$db->exec('ALTER TABLE curve DROP CONSTRAINT curve_datasetid_fkey');
$db->exec('ALTER TABLE curve DROP CONSTRAINT curve_identifier');
$db->exec('SET CONSTRAINTS ALL DEFERRED');
foreach ($dataFiles as $dataFile) {
    // Get the file locally
    $downloadFile = $ftpRoot . '/' . $dataFile;
    $tgzFile = $downloadDir . DIRECTORY_SEPARATOR . $dataFile;
    $txtFile = $extractedDir . DIRECTORY_SEPARATOR . str_replace('.tar.gz', '.txt', $dataFile);
    try {
        if (!downloadUrl($downloadFile, $tgzFile)) {
            echo ' already downloaded.' . PHP_EOL;
        }
        extractTarGz($tgzFile, $extractedDir);
        // Should create $txtFile
        echo 'Parsing and loading data file into database...';
        $dataLoader->load($txtFile, null, true);
        echo '...done.' . PHP_EOL;
    } catch (Exception $e) {
        echo PHP_EOL . 'Failed loading data file [' . $dataFile . '].' . PHP_EOL . $e->getMessage() . PHP_EOL;
    } finally {
        if (file_exists($tgzFile)) {
            unlink($tgzFile);
        }
        if (file_exists($txtFile)) {
            unlink($txtFile);
예제 #3
0
        if (preg_match($aUrlPattern, $aUrl, $regs)) {
            $fdsource = fopen($regs[1], "w");
            echo $aUrl;
            $ch = curl_init($aUrl);
            curl_setopt($ch, CURLOPT_FILE, $fdsource);
            curl_setopt($ch, CURLOPT_HEADER, 0);
            curl_exec($ch);
            curl_close($ch);
            fclose($fdsource);
        }
    }
    fclose($fd);
}
// }}}
// {{{ main
# This script expects a list of Debian packages
if ($argc != 2) {
    syntax();
    exit(1);
}
if (is_readable($argv[1])) {
    listFilename($argv[1]);
} else {
    if ($argv[1] == "--download") {
        downloadUrl();
    } else {
        syntax();
        exit(1);
    }
}
// }}}