function spin_article($text_to_spin, $include_capitalized = false) { $client = new \Goutte\Client(); $spinner_url = 'http://paraphrasing-tool.com/'; $crawler = $client->request('GET', $spinner_url); $math_captcha_equation = $crawler->filter('#math_captcha_equation')->first()->attr('value'); $math_captcha_equation = str_replace(['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], $math_captcha_equation); $math_captcha_equation = explode(' ', $math_captcha_equation); $math_captcha_result = null; switch ($math_captcha_equation[1]) { case '+': $math_captcha_result = $math_captcha_equation[0] + $math_captcha_equation[2]; break; case '-': $math_captcha_result = $math_captcha_equation[0] - $math_captcha_equation[2]; break; } if (is_null($math_captcha_result)) { return false; } $crawler = $client->submit($crawler->selectButton('Go!')->form(), ['math_captcha_answer' => $math_captcha_result, 'formNameLabelTextBefore' => $text_to_spin, 'formNameLabelSpinCapWords' => $include_capitalized]); return $crawler->filter('#formNameLabelTextAfter')->first()->text(); }
<?php // Include config and initiate include_once __DIR__ . '/../config/default_config.php'; includeMyFiles(); $goutte = new Goutte\Client(); // Get Database $db = new Db(); $db->query("UPDATE products_data SET status=0 WHERE source='lashowroom'"); $url = 'https://www.lashowroom.com/login?previous=/wholesalefashionistas/browse/category/3/srd/small/70/1'; $crawler = $goutte->request('GET', $url); $form = $crawler->selectButton('Log In')->form(); // exit; $crawler = $goutte->submit($form, array('login_id' => '*****@*****.**', 'login_key' => 'uplp')); /*Master URL*/ $crawlUrl = 'https://www.lashowroom.com/wholesalefashionistas/browse/all/1/srd/large/70/'; $count = 1; $status = 1; $urls = array(); while ($status == 1) { // Selector string $domSelector = '//*[@class="store-front-browse-gallery"]/ul/li/div/a'; /*Count selector for page number*/ $countSelector = '//*[@class="store-front-browse-title"]/text()'; /*Main crawl , adding count to master*/ $crawler = $goutte->request('GET', $crawlUrl . $count); /*Getting total products string*/ $pcount = $crawler->filterXPath($countSelector)->each(function ($node) { return $node->text(); }); $tProducts = '';
$goutte = new Goutte\Client(); // Get Database $db = new Db(); $db->query("UPDATE products_data SET status=0 WHERE source='numberonewholesales'"); $loginUrl = 'https://numberonewholesales.com/login.php'; $crawler = $goutte->request('GET', $loginUrl); $html = $crawler->html(); $re = "/(osCsid)=\\w*/"; $subst = ""; $newHtml = preg_replace($re, $subst, $html); $crawler->clear(); $crawler->addHtmlContent($newHtml); $form = $crawler->selectButton('Sign In')->form(); $form['email_address'] = '*****@*****.**'; $form['password'] = '******'; $crawler = $goutte->submit($form); // Default Status code $status_code = 200; $count = 1; $data = $db->query("SELECT url from product_urls WHERE source='numberonewholesales'"); $url = ''; foreach ($data as $value) { $url = $value['url']; $data = getProductData($goutte, $url, $db); } function getProductData($goutte, $url, $db) { $crawler = $goutte->request('GET', $url); $status_code = $goutte->getResponse()->getStatus(); $data = array(); if ($status_code == 200) {
// Get Database $db = new Db(); $db->query("UPDATE products_data SET status=0 WHERE source='numberonewholesales'"); // URL and EndPoints $baseURL = 'http://numberonewholesales.com/'; $endPoints['leggings'] = 'http://numberonewholesales.com/leggings-c-1067.html'; $endPoints['kids'] = 'http://numberonewholesales.com/kids-wear-c-1113.html'; $endPoints['palazzo'] = 'http://numberonewholesales.com/palazzo-pants-c-1112.html'; $endPoints['skirts'] = 'http://numberonewholesales.com/skirts-c-1080.html'; $endPoints['shorts'] = 'http://numberonewholesales.com/shorts-c-1079.html'; $endPoints['pants'] = 'http://numberonewholesales.com/pants-c-1084.html'; $loginUrl = 'https://numberonewholesales.com/login.php'; $crawler = $goutte->request('GET', $loginUrl); $form = $crawler->selectButton('Sign In')->form(); // exit; $crawler = $goutte->submit($form, array('email_address' => '*****@*****.**', 'password' => 'abhishek')); // Default Status code $status_code = 200; $count = 1; $productUrls = array(); foreach ($endPoints as $key => $value) { $productUrls[] = getLink($goutte, $baseURL . $value); $sleep_time = rand(1 * 1000000, 2 * 1000000); echo "\tSleeping for " . number_format($sleep_time / 1000000, 2) . " sec\n"; usleep($sleep_time); } $urls = getUrlFromArray($productUrls); foreach ($urls as $key => $value) { // Insert Product URLs $db->query("INSERT IGNORE INTO product_urls(url, url_hash, source, status) \n\t\t\tVALUES(:url,:url_hash,:source,:status)", array("url" => $value, "url_hash" => hash('ripemd160', $value), "source" => "numberonewholesales", "status" => 1)); }