コード例 #1
0
function spin_article($text_to_spin, $include_capitalized = false)
{
    $client = new \Goutte\Client();
    $spinner_url = 'http://paraphrasing-tool.com/';
    $crawler = $client->request('GET', $spinner_url);
    $math_captcha_equation = $crawler->filter('#math_captcha_equation')->first()->attr('value');
    $math_captcha_equation = str_replace(['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], $math_captcha_equation);
    $math_captcha_equation = explode(' ', $math_captcha_equation);
    $math_captcha_result = null;
    switch ($math_captcha_equation[1]) {
        case '+':
            $math_captcha_result = $math_captcha_equation[0] + $math_captcha_equation[2];
            break;
        case '-':
            $math_captcha_result = $math_captcha_equation[0] - $math_captcha_equation[2];
            break;
    }
    if (is_null($math_captcha_result)) {
        return false;
    }
    $crawler = $client->submit($crawler->selectButton('Go!')->form(), ['math_captcha_answer' => $math_captcha_result, 'formNameLabelTextBefore' => $text_to_spin, 'formNameLabelSpinCapWords' => $include_capitalized]);
    return $crawler->filter('#formNameLabelTextAfter')->first()->text();
}
コード例 #2
0
ファイル: lashowroom.php プロジェクト: GeekAb/Crawlers
<?php

// Include config and initiate
include_once __DIR__ . '/../config/default_config.php';
includeMyFiles();
$goutte = new Goutte\Client();
// Get Database
$db = new Db();
$db->query("UPDATE products_data SET status=0 WHERE source='lashowroom'");
$url = 'https://www.lashowroom.com/login?previous=/wholesalefashionistas/browse/category/3/srd/small/70/1';
$crawler = $goutte->request('GET', $url);
$form = $crawler->selectButton('Log In')->form();
// exit;
$crawler = $goutte->submit($form, array('login_id' => '*****@*****.**', 'login_key' => 'uplp'));
/*Master URL*/
$crawlUrl = 'https://www.lashowroom.com/wholesalefashionistas/browse/all/1/srd/large/70/';
$count = 1;
$status = 1;
$urls = array();
while ($status == 1) {
    // Selector string
    $domSelector = '//*[@class="store-front-browse-gallery"]/ul/li/div/a';
    /*Count selector for page number*/
    $countSelector = '//*[@class="store-front-browse-title"]/text()';
    /*Main crawl , adding count to master*/
    $crawler = $goutte->request('GET', $crawlUrl . $count);
    /*Getting total products string*/
    $pcount = $crawler->filterXPath($countSelector)->each(function ($node) {
        return $node->text();
    });
    $tProducts = '';
コード例 #3
0
$goutte = new Goutte\Client();
// Get Database
$db = new Db();
$db->query("UPDATE products_data SET status=0 WHERE source='numberonewholesales'");
$loginUrl = 'https://numberonewholesales.com/login.php';
$crawler = $goutte->request('GET', $loginUrl);
$html = $crawler->html();
$re = "/(osCsid)=\\w*/";
$subst = "";
$newHtml = preg_replace($re, $subst, $html);
$crawler->clear();
$crawler->addHtmlContent($newHtml);
$form = $crawler->selectButton('Sign In')->form();
$form['email_address'] = '*****@*****.**';
$form['password'] = '******';
$crawler = $goutte->submit($form);
// Default Status code
$status_code = 200;
$count = 1;
$data = $db->query("SELECT url from product_urls WHERE source='numberonewholesales'");
$url = '';
foreach ($data as $value) {
    $url = $value['url'];
    $data = getProductData($goutte, $url, $db);
}
function getProductData($goutte, $url, $db)
{
    $crawler = $goutte->request('GET', $url);
    $status_code = $goutte->getResponse()->getStatus();
    $data = array();
    if ($status_code == 200) {
コード例 #4
0
ファイル: generate_url.php プロジェクト: GeekAb/Crawlers
// Get Database
$db = new Db();
$db->query("UPDATE products_data SET status=0 WHERE source='numberonewholesales'");
// URL and EndPoints
$baseURL = 'http://numberonewholesales.com/';
$endPoints['leggings'] = 'http://numberonewholesales.com/leggings-c-1067.html';
$endPoints['kids'] = 'http://numberonewholesales.com/kids-wear-c-1113.html';
$endPoints['palazzo'] = 'http://numberonewholesales.com/palazzo-pants-c-1112.html';
$endPoints['skirts'] = 'http://numberonewholesales.com/skirts-c-1080.html';
$endPoints['shorts'] = 'http://numberonewholesales.com/shorts-c-1079.html';
$endPoints['pants'] = 'http://numberonewholesales.com/pants-c-1084.html';
$loginUrl = 'https://numberonewholesales.com/login.php';
$crawler = $goutte->request('GET', $loginUrl);
$form = $crawler->selectButton('Sign In')->form();
// exit;
$crawler = $goutte->submit($form, array('email_address' => '*****@*****.**', 'password' => 'abhishek'));
// Default Status code
$status_code = 200;
$count = 1;
$productUrls = array();
foreach ($endPoints as $key => $value) {
    $productUrls[] = getLink($goutte, $baseURL . $value);
    $sleep_time = rand(1 * 1000000, 2 * 1000000);
    echo "\tSleeping for " . number_format($sleep_time / 1000000, 2) . " sec\n";
    usleep($sleep_time);
}
$urls = getUrlFromArray($productUrls);
foreach ($urls as $key => $value) {
    // Insert Product URLs
    $db->query("INSERT IGNORE INTO product_urls(url, url_hash, source, status) \n\t\t\tVALUES(:url,:url_hash,:source,:status)", array("url" => $value, "url_hash" => hash('ripemd160', $value), "source" => "numberonewholesales", "status" => 1));
}