Example #1
2
 public function processAllArticles()
 {
     foreach (\MkmScraper\Article::all() as $article) {
         $client = new \Goutte\Client();
         $crawler = $client->request('GET', $article->link);
         if ($article->publisher == "1") {
             $text = $crawler->filter("#article_content")->text();
             $title = $crawler->filter("#article_title h2")->text();
             $date = $crawler->filter("#article_date")->text();
         }
         if ($article->publisher == "2") {
             $text = $crawler->filter(".postContent")->text();
             $title = $crawler->filter(".postTitle")->text();
             $auth = $crawler->filter(".byAuthor")->text();
             $au = explode("//", $auth);
             $date = trim(preg_replace('#[^A-Za-z0-9-, /]#', '', $au[1]));
         }
         if ($article->publisher == "3") {
             $text = $crawler->filter("#blackborder_main_wrapper .content .field-name-body")->text();
             echo $text;
         }
         if ($article->publisher == "4") {
             $text = $crawler->filter(".articleBody")->text();
             echo $text;
         }
         $article->text = $text;
         $article->title = $title;
         $article->date = date("Y-m-d", strtotime($date));
         $article->save();
     }
 }
Example #2
0
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function handle()
 {
     $client = new \Goutte\Client();
     foreach (\MkmScraper\Card::all() as $card) {
         if ($card->todaysPrice()->count() < 1) {
             $crawler = $client->request('GET', 'https://www.magiccardmarket.eu/Products/Singles/' . rawurlencode($card->set) . '/' . rawurlencode($card->name));
             \Log::info('https://www.magiccardmarket.eu/Products/Singles/' . rawurlencode($card->set) . '/' . rawurlencode($card->name));
             $available = 0;
             $lowfinal = 0;
             $trendfinal = 0;
             try {
                 if ($av = $crawler->filter('.sectioncontent .availTable .row_0  .cell_0_1')->first()) {
                     $available = $av->text();
                 }
                 if ($lo = $crawler->filter('.sectioncontent .availTable .row_1  .cell_1_1 span')->first()) {
                     $low = $lo->text();
                     $lowfinal = str_replace(",", ".", $low);
                 }
                 if ($tr = $crawler->filter('.sectioncontent .availTable .row_2  .cell_2_1')->first()) {
                     $trend = $tr->text();
                     $trendnumber = explode(" ", $trend);
                     $trendfinal = str_replace(",", ".", $trendnumber[0]);
                 }
             } catch (Exception $e) {
                 print $e->getMessage();
             }
             $cardPrice = \MkmScraper\CardPrice::create(array("id_card" => $card->id, "low" => $lowfinal, "trend" => $trendfinal, "sellers" => $available));
         }
     }
 }
Example #3
0
 /**
  * AJAX
  *
  * Requisitos para la Certificación tomados de la pagina de ADM
  *
  * @Route("informacion-amd", name="front-getInfoAMD", options={"expose":true})
  * @Method({"POST"})
  */
 public function getInfoAMD_Action(Request $req)
 {
     $resp = array('resp' => FALSE, 'msg' => 'Sin Informacion al respecto.', 'content' => '0');
     $rutaWeb = $this->container->get('roots_mds')->getRootEdukConti();
     if ($req->request->has('getQ')) {
         $getQ = $req->request->get('getQ');
         $urls = array('reqCert' => 'http://adm.org.mx/requisitos-certificacion.php', 'reCert' => 'http://adm.org.mx/recertificacion.php', 'reglamento' => 'http://adm.org.mx/reglamento-certificacion.php', 'faqs' => 'http://adm.org.mx/certificacion-faqs.php', 'conv' => 'http://adm.org.mx/convocatorias-eucodon.php', 'qescerti' => $rutaWeb . '/qescerti.md', 'quiencerti' => $rutaWeb . '/quiencerti.md');
         if (strpos($urls[$getQ], 'http') !== FALSE) {
             $cliente = new \Goutte\Client();
             $crawler = $cliente->request('GET', $urls[$getQ]);
             $puzzle = $crawler->filter('div#contenido-completo')->html();
             if (strlen($puzzle) < 20) {
                 $resp['resp'] = FALSE;
                 $resp['msg'] = 'Ocurrio un Error al Recuperar, por favor, intentalo nuevamente, mas tarde.';
             } else {
                 $resp['content'] = $puzzle;
                 $resp['resp'] = TRUE;
             }
         } else {
             $resp['content'] = $this->container->get('markdown.parser')->transformMarkdown(file_get_contents($urls[$getQ]));
             $resp['resp'] = TRUE;
             $resp['msg'] = 'Ok';
         }
     }
     $respuesta = new \Symfony\Component\HttpFoundation\JsonResponse();
     return $respuesta->setData($resp);
 }
 public function testIsWeb()
 {
     $server = new \Goutte\Client();
     $response = $server->request('GEt', 'http://127.0.0.1:8888/webroot/test.php');
     $this->assertRegExp('{Is CLI SAPI: false}', $response->text());
     $this->assertRegExp('{Is PHP Server SAPI: true}', $response->text());
     $this->assertRegExp('{Is Web Server SAPI: true}', $response->text());
 }
Example #5
0
 public function process(Url &$url)
 {
     $this->extractedUrls = [];
     $client = new \Goutte\Client();
     $crawler = $client->request('GET', $url->getUrl());
     $url->setStatus($client->getResponse()->getStatus() >= 400 ? Url::STATUS_ERROR : Url::STATUS_OK);
     $links = $crawler->filter('a');
     $links = $crawler->filter('a')->links();
     foreach ($links as $link) {
         $eu = new Url($link->getUri(), $url->getDepth() + 1);
         if (preg_match('#^http(s)?://fleapop.com#i', $eu->getUrl())) {
             $this->extractedUrls[] = $eu;
         }
     }
     //
     return true;
 }
Example #6
0
 public static function company($request, $response, $params)
 {
     global $container;
     $types = Yaml::parse(file_get_contents(__DIR__ . '/../types.yml'));
     $results = Infogreffe::search($params['siret']);
     $client = new \Goutte\Client();
     if (empty($results)) {
         throw new \Exception('Numéro SIRET introuvable');
     }
     $crawler = $client->request('GET', $results[0]->getURL());
     $category = $crawler->filter('.first .identTitreValeur p:nth-of-type(5) .data');
     if ($category->count() == 0) {
         $category = $crawler->filter('[datapath="entreprise.personneMorale.identification.formeJuridique.libelle"] p');
     }
     $activity = $crawler->filter('.first .identTitreValeur p:nth-of-type(6) .data');
     if ($activity->count() == 0) {
         $activity = $crawler->filter('[datapath="activite.codeNAF"] p:first-of-type a');
     }
     if (count($category) > 0) {
         $categoryName = trim($category->text());
     } else {
         $categoryName = 'Forme juridique inconnue';
     }
     if ($categoryName == 'null') {
         $categoryName = 'Forme juridique inconnue';
     }
     if (count($activity) > 0) {
         $activityName = trim($activity->text());
     } else {
         $activityName = 'Inconnue';
     }
     if ($activityName == 'null') {
         $activityName = 'Inconnue';
     }
     $description = 'Informations sur ' . $results[0]->name;
     if ($categoryName != 'Forme juridique inconnue') {
         $description .= ', ' . $categoryName;
     }
     $container->view->render($response, 'company.tpl', array('info' => $results[0], 'types' => $types, 'category' => $categoryName, 'activity' => $activityName, 'url' => $results[0]->getURL(), 'title' => $results[0]->name, 'description' => $description));
 }
Example #7
0
 public function processAll()
 {
     $client = new \Goutte\Client();
     foreach (\MkmScraper\Event::all() as $event) {
         $crawler = $client->request('GET', $event->link);
         $i = 1;
         $link = $crawler->filter("td>div>div a")->each(function ($node) use($client, $event, &$i) {
             if (strpos($node->attr('href'), "event") !== false) {
                 $dlcrawler = $client->request('GET', $node->attr('href'));
                 $link = $dlcrawler->filter("table table td .chosen_tr,table table td .hover_tr")->each(function ($node) use($event, $i) {
                     $split = explode(" ", $node->text(), 2);
                     $card = \MkmScraper\Card::where("name", "LIKE", $split[1] . "%")->first();
                     if ($card) {
                         \MkmScraper\DecklistAppearance::create(array("id_card" => $card->id, "number" => $split[0], "place" => $i, "id_event" => $event->id));
                         //print $card->id." - ".$node->text()."<br/>";
                     }
                 });
                 $i = $i + 1;
             }
         });
     }
 }
function spin_article($text_to_spin, $include_capitalized = false)
{
    $client = new \Goutte\Client();
    $spinner_url = 'http://paraphrasing-tool.com/';
    $crawler = $client->request('GET', $spinner_url);
    $math_captcha_equation = $crawler->filter('#math_captcha_equation')->first()->attr('value');
    $math_captcha_equation = str_replace(['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], $math_captcha_equation);
    $math_captcha_equation = explode(' ', $math_captcha_equation);
    $math_captcha_result = null;
    switch ($math_captcha_equation[1]) {
        case '+':
            $math_captcha_result = $math_captcha_equation[0] + $math_captcha_equation[2];
            break;
        case '-':
            $math_captcha_result = $math_captcha_equation[0] - $math_captcha_equation[2];
            break;
    }
    if (is_null($math_captcha_result)) {
        return false;
    }
    $crawler = $client->submit($crawler->selectButton('Go!')->form(), ['math_captcha_answer' => $math_captcha_result, 'formNameLabelTextBefore' => $text_to_spin, 'formNameLabelSpinCapWords' => $include_capitalized]);
    return $crawler->filter('#formNameLabelTextAfter')->first()->text();
}
Example #9
0
 public function execute()
 {
     //$rss = ['lol'];
     $client = new Goutte\Client();
     //$client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 60);
     for ($i = 1; $i < 8; $i++) {
         if ($i == 1) {
             $crawler = $client->request('GET', 'http://buzztache.com/category/no/');
         } else {
             $crawler = $client->request('GET', 'http://buzztache.com/category/no/page/' . $i . '/');
         }
         $client->getResponse();
         //$link = $crawler->selectLink('Security Advisories')->link();
         //$crawler = $client->click($link);
         // Get the latest post in this category and display the titles
         $crawler->filter('article')->each(function ($node) use(&$rss) {
             if (!isset($node)) {
                 return;
             }
             $img = '';
             if ($node->filterXPath('//img')->count()) {
                 $img = $node->filterXPath('//img')->attr('src');
             }
             $heading = $node->filterXPath('//h3')->text();
             $link = $node->filterXPath('//a')->attr('href');
             $arr = [];
             $arr['title'] = $heading;
             $arr['description'] = "<img src='{$img}' />";
             $arr['link'] = $link;
             $rss[] = $arr;
             //print_r($rss);
             //print $heading."\n";
         });
     }
     return $rss;
 }
Example #10
0
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function handle()
 {
     $client = new \Goutte\Client();
     foreach (\MkmScraper\Card::all() as $card) {
         if (\MkmScraper\GraphPrice::where("id_card", $card->id)->where("created_at", ">", date('Y-m-d H:i:s', strtotime('-1 week')))->count() < 1) {
             $crawler = $client->request('GET', 'https://www.magiccardmarket.eu/Products/Singles/' . rawurlencode($card->set) . '/' . rawurlencode($card->name));
             $available = $crawler->filter('#ProductInformation script')->first()->text();
             $split = explode("chartData =", $available);
             $split2 = explode(";var ctx", $split[1]);
             $object = json_decode($split2[0]);
             $count = 0;
             foreach ($object->labels as $key => $label) {
                 if (\MkmScraper\GraphPrice::where("id_card", $card->id)->where("date", date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'))->count() < 1) {
                     \MkmScraper\GraphPrice::create(array("id_card" => $card->id, "date" => date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'), "sell" => $object->datasets[0]->data[$key]));
                     $count++;
                 }
             }
             if ($count == 0) {
                 \MkmScraper\GraphPrice::create(array("id_card" => $card->id, "date" => "0000-00-00", "sell" => ''));
             }
             print $card->name . "\n";
         }
     }
 }
 /**
  * Scaneia link e recupera informaçoes configuradas
  *
  * @param $website
  */
 public function scan($website)
 {
     // para cada link na lista
     foreach ($this->list as $list) {
         $url = $list;
         // Monta url
         // Monta url
         if ($website['list']['typeLink'] == 'relative') {
             $url = $website['baseUrl'] . $list;
         }
         // Pega pagina
         $crawler = $this->goutte->request('GET', $url);
         // Pega informaçoes da pagina
         $data = array();
         foreach ($website['scan'] as $key => $filter) {
             $str = strip_tags($crawler->filterXPath($filter)->text());
             $str = preg_replace($this->stop_words, "", $str);
             $data[$key] = $str;
         }
         // coloca na lista de resultados
         $this->result[$website['name']][] = $data;
     }
 }
Example #12
0
	<?php 
include_once __DIR__ . '/../goutte.phar';
$goutte = new Goutte\Client();
// $baseURL = 'http://www.numberonewholesales.com/';
//    $leggingsEndpoint = 'leggings-c-1067.html';
//    $palazzoEndpoint = 'palazzo-pants-c-1112.html';
//    $crawler = $goutte->request('GET', $baseURL . $leggingsEndpoint);
//    $status_code = $goutte->getResponse()->getStatus();
// echo $status_code;
//    // echo $crawler->html();
// if($status_code == 200){
// 	$domSelector = '//*[@class="productListing"]';
// 	$crawler->filterXPath($domSelector)->each(function ($node) {
//     	print $node->text()."\n";
// 	});
// 	// $domSelector = '//*[@id="product-54391"]/div/div[2]';
// 	// $crawler->filterXPath($domSelector)->each(function ($node) {
//  //    	print_r($node);
//  //    	echo "\n";
// 	// });
// }
// 	wholesalefashionistas@lashowroom.com
// Pass: uplp
//
$url = 'https://www.lashowroom.com/login?previous=/wholesalefashionistas/browse/category/3/srd/small/70/1';
$crawler = $goutte->request('GET', $url);
$form = $crawler->selectButton('Log In')->form();
// exit;
$crawler = $goutte->submit($form, array('head_login_id' => '*****@*****.**', 'head_login_key' => 'uplp'));
$crawlUrl = 'https://www.lashowroom.com/wholesalefashionistas/browse/category/3/srd/small/70/';
$count = 1;
Example #13
0
<?php

// Include config and initiate
include_once __DIR__ . '/../config/default_config.php';
includeMyFiles();
// Initialize goutte
$goutte = new Goutte\Client();
$log = fopen(date("Y-m-d_H-i-s", time()) . ".log", "a");
fwrite($log, "[START]\r\n");
fwrite($log, "[STARTED AT]" . time() . "\r\n");
// Get Database
$db = new Db();
$db->query("UPDATE products_data SET status=0 WHERE source='numberonewholesales'");
// URL and EndPoints
$baseURL = 'http://numberonewholesales.com/';
$endPoints['leggings'] = 'http://numberonewholesales.com/leggings-c-1067.html';
$endPoints['kids'] = 'http://numberonewholesales.com/kids-wear-c-1113.html';
$endPoints['palazzo'] = 'http://numberonewholesales.com/palazzo-pants-c-1112.html';
$endPoints['skirts'] = 'http://numberonewholesales.com/skirts-c-1080.html';
$endPoints['shorts'] = 'http://numberonewholesales.com/shorts-c-1079.html';
$endPoints['pants'] = 'http://numberonewholesales.com/pants-c-1084.html';
$loginUrl = 'https://numberonewholesales.com/login.php';
$crawler = $goutte->request('GET', $loginUrl);
$form = $crawler->selectButton('Sign In')->form();
// exit;
$crawler = $goutte->submit($form, array('email_address' => '*****@*****.**', 'password' => 'abhishek'));
// Default Status code
$status_code = 200;
$count = 1;
$productUrls = array();
foreach ($endPoints as $key => $value) {
 /**
  * @param int    $facebookId          Facebook account id
  * @param string $facebookAccessToken Facebook access token
  *
  * @return bool Facebook account status
  */
 protected function isValidFacebookAccount($id, $accessToken)
 {
     $client = new \Goutte\Client();
     $client->request('GET', sprintf('https://graph.facebook.com/me?access_token=%s', $accessToken));
     $response = json_decode($client->getResponse()->getContent());
     if ($response->error) {
         throw new InvalidPropertyUserException($response->error->message);
     }
     return $response->id == $id;
 }
Example #15
0
<?php

// Include config and initiate
include_once __DIR__ . '/../config/default_config.php';
includeMyFiles();
$goutte = new Goutte\Client();
// Get Database
$db = new Db();
$db->query("UPDATE products_data SET status=0 WHERE source='lashowroom'");
$url = 'https://www.lashowroom.com/login?previous=/wholesalefashionistas/browse/category/3/srd/small/70/1';
$crawler = $goutte->request('GET', $url);
$form = $crawler->selectButton('Log In')->form();
// exit;
$crawler = $goutte->submit($form, array('login_id' => '*****@*****.**', 'login_key' => 'uplp'));
$product = getProductData($goutte, 'https://www.lashowroom.com/wholesalefashionistas/item/1215');
print_r($product);
function getProductData($goutte, $url)
{
    $crawler = $goutte->request('GET', $url);
    $status_code = $goutte->getResponse()->getStatus();
    $data = array();
    $result = array();
    if ($status_code == 200) {
        $filterNav = '//*[@class="item-detail-header"]/p/a';
        $catStruct = $crawler->filterXPath($filterNav)->each(function ($node) {
            return $node->html();
        });
        foreach ($catStruct as $key => $catVal) {
            $catStruct[$key] = trim(str_replace(' ', '', $catVal));
        }
        $result['category'] = $catStruct[1];
Example #16
0
<?php

// Include config and initiate
include_once __DIR__ . '/../config/default_config.php';
includeMyFiles();
// Initialize goutte
$goutte = new Goutte\Client();
// Get Database
$db = new Db();
$loginUrl = 'http://www.colorfive.com/sign-in';
$crawler = $goutte->request('GET', $loginUrl);
print_r($crawler->html());
exit;
// Default Status code
$status_code = 200;
$count = 1;
$data = $db->query("SELECT url from product_urls WHERE source='numberonewholesales'");
$url = '';
foreach ($data as $value) {
    $url = $value['url'];
    $data = getProductData($goutte, $url, $db);
}
function getProductData($goutte, $url, $db)
{
    $crawler = $goutte->request('GET', $url);
    $status_code = $goutte->getResponse()->getStatus();
    $data = array();
    if ($status_code == 200) {
        $attribs = array();
        $domSelector = '//*[@class="productInfoName"]';
        $attribs['title'] = $data = $crawler->filterXPath($domSelector)->each(function ($node) {
//Request::setTrustedProxies(array('127.0.0.1'));
ini_set('max_execution_time', 300);
//300 seconds = 5 minutes
$app->get('/', function () use($app) {
    return $app['twig']->render('index.html', array());
})->bind('homepage');
$app->error(function (\Exception $e, $code) use($app) {
    if ($app['debug']) {
        return;
    }
    // 404.html, or 40x.html, or 4xx.html, or error.html
    $templates = array('errors/' . $code . '.html', 'errors/' . substr($code, 0, 2) . 'x.html', 'errors/' . substr($code, 0, 1) . 'xx.html', 'errors/default.html');
    return new Response($app['twig']->resolveTemplate($templates)->render(array('code' => $code)), $code);
});
$app->get('/untranslated', function () use($app) {
    $client = new Goutte\Client();
    $crawler = $client->request('GET', 'http://doc.php.net/revcheck.php?p=missfiles&lang=pt_BR');
    $crawler = $crawler->filter("table > tr");
    $nodeValues = $crawler->each(function (Crawler $node, $i) {
        $first = $node->children()->first()->text();
        $last = $node->children()->last()->text();
        return array($first, $last);
    });
    $extractor = new Extractor($nodeValues);
    return new Response(var_dump($extractor->getFullNames()));
})->bind('untranslated');
$app->get('/trello', function () use($app) {
    $client = new Client();
    //veja aqui para gerar a key e o token de uso
    //https://trello.com/c/jObnWvl1/25-generating-your-developer-key
    $client->authenticate('afdadfasdfasfd', 'adsfasdfsdfafas', Client::AUTH_URL_CLIENT_ID);
<?php

// Include config and initiate
include_once __DIR__ . '/../config/default_config.php';
includeMyFiles();
// Initialize goutte
$goutte = new Goutte\Client();
// Get Database
$db = new Db();
$loginUrl = 'https://numberonewholesales.com/login.php';
$crawler = $goutte->request('GET', $loginUrl);
$form = $crawler->selectButton('Sign In')->form();
// exit;
$crawler = $goutte->submit($form, array('email_address' => '*****@*****.**', 'password' => 'abhishek'));
// Default Status code
$status_code = 200;
$count = 1;
$data = $db->query("SELECT url from product_urls WHERE source='numberonewholesales'");
$url = '';
foreach ($data as $value) {
    $url = $value['url'];
    $data = getProductData($goutte, $url, $db);
}
function getProductData($goutte, $url, $db)
{
    $crawler = $goutte->request('GET', $url);
    $status_code = $goutte->getResponse()->getStatus();
    $data = array();
    if ($status_code == 200) {
        $attribs = array();
        $domSelector = '//*[@class="productInfoName"]';
Example #19
0
*/
Route::get("scrape", function () {
    $client = new \Goutte\Client();
    $crawler = $client->request('GET', 'http://mtgtop8.com/event?e=10488');
    $link = $crawler->filter("td>div>div a")->each(function ($node) use($client) {
        if (strpos($node->attr('href'), "event") !== false) {
            print "http://mtgtop8.com/" . $node->attr('href') . " - " . $node->text() . "<br>";
            $dlcrawler = $client->request('GET', "http://mtgtop8.com/" . $node->attr('href'));
            $link = $dlcrawler->filter("table table td .chosen_tr,table table td .hover_tr")->each(function ($node) {
                print "<b>" . $node->text() . "</b><br>";
            });
        }
    });
});
Route::get("htmlscrape", function () {
    $client = new \Goutte\Client();
    foreach (\MkmScraper\Card::all() as $card) {
        $time = microtime();
        $crawler = $client->request('GET', 'https://www.magiccardmarket.eu/Products/Singles/' . rawurlencode($card->set) . '/' . rawurlencode($card->name));
        $available = $crawler->filter('#ProductInformation script')->first()->text();
        $split = explode("chartData =", $available);
        $split2 = explode(";var ctx", $split[1]);
        $object = json_decode($split2[0]);
        foreach ($object->labels as $key => $label) {
            if (\MkmScraper\GraphPrice::where("id_card", $card->id)->where("date", date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'))->count() < 1) {
                \MkmScraper\GraphPrice::create(array("id_card" => $card->id, "date" => date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'), "sell" => $object->datasets[0]->data[$key]));
            }
        }
        print $card->name . " - " . (microtime() - $time) . " ms </br>";
    }
});
Example #20
0
File: goutte.php Project: yfix/yf
#!/usr/bin/php
<?php 
$config = ['require_services' => ['guzzle', 'sf_browserkit', 'sf_css_selector', 'sf_dom_crawler', 'promise'], 'git_urls' => ['https://github.com/yfix/goutte.git' => 'goutte/'], 'autoload_config' => ['goutte/Goutte/' => 'Goutte'], 'example' => function () {
    $client = new Goutte\Client();
    $crawler = $client->request('GET', 'http://google.com/');
    $crawler->filter('head > title')->each(function ($node) {
        print $node->text() . "\n";
    });
}];
if ($return_config) {
    return $config;
}
require_once __DIR__ . '/_yf_autoloader.php';
new yf_autoloader($config);
Example #21
0
<?php

// Include config and initiate
include_once __DIR__ . '/../config/default_config.php';
includeMyFiles();
$goutte = new Goutte\Client();
// Get Database
$db = new Db();
$db->query("UPDATE products_data SET status=0 WHERE source='lashowroom'");
$url = 'https://www.lashowroom.com/login?previous=/wholesalefashionistas/browse/category/3/srd/small/70/1';
$crawler = $goutte->request('GET', $url);
$form = $crawler->selectButton('Log In')->form();
// exit;
$crawler = $goutte->submit($form, array('login_id' => '*****@*****.**', 'login_key' => 'uplp'));
/*Master URL*/
$crawlUrl = 'https://www.lashowroom.com/wholesalefashionistas/browse/all/1/srd/large/70/';
$count = 1;
$status = 1;
$urls = array();
while ($status == 1) {
    // Selector string
    $domSelector = '//*[@class="store-front-browse-gallery"]/ul/li/div/a';
    /*Count selector for page number*/
    $countSelector = '//*[@class="store-front-browse-title"]/text()';
    /*Main crawl , adding count to master*/
    $crawler = $goutte->request('GET', $crawlUrl . $count);
    /*Getting total products string*/
    $pcount = $crawler->filterXPath($countSelector)->each(function ($node) {
        return $node->text();
    });
    $tProducts = '';
 public function getProductDetailScraper()
 {
     $mock = new MockHandler([new Response(200, [], $this->getPageContents('apricot.html')), new Response(200, [], $this->getPageContents('avocado.html')), new Response(200, [], $this->getPageContents('golden-kiwi.html'))]);
     $handler = HandlerStack::create($mock);
     $client = new Goutte\Client();
     $client->setClient(new Client(['handler' => $handler]));
     return new ProductDetailScraper($client);
 }
<?php

// Include config and initiate
include_once __DIR__ . '/../config/default_config.php';
includeMyFiles();
// Initialize goutte
$goutte = new Goutte\Client();
$goutte->getClient()->setDefaultOption('config/curl/' . CURLOPT_SSL_VERIFYHOST, FALSE);
$goutte->getClient()->setDefaultOption('config/curl/' . CURLOPT_SSL_VERIFYPEER, FALSE);
// Get Database
$db = new Db();
$loginUrl = 'http://numberonewholesales.com/login.php';
$crawler = $goutte->request('GET', $loginUrl);
$html = $crawler->html();
$re = "/(osCsid)=\\w*/";
$subst = "";
$newHtml = preg_replace($re, $subst, $html);
$crawler->clear();
$crawler->addHtmlContent($newHtml);
$form = $crawler->selectButton('Sign In')->form();
// exit;
$form['email_address'] = '*****@*****.**';
$form['password'] = '******';
$crawler = $goutte->submit($form);
//$crawler = $goutte->request($form->getMethod(), 'https://numberonewholesales.com/login.php?action=process', $values, $form->getPhpFiles());
print_r($form->getValues());
//print_r($crawler);
// Default Status code
$status_code = 200;
$count = 1;
$url = 'http://numberonewholesales.com/bags-printed-ankle-leggings-p-18294.html?cPath=1067';
 public function testIsLocal()
 {
     $server = new \Goutte\Client();
     $response = $server->request('GEt', 'http://127.0.0.1:8888/webroot/test.php');
     $this->assertRegExp('{Is Local Requester: true}', $response->text());
 }
Example #25
0
<?php

require_once __DIR__ . '/../vendor/autoload.php';
require_once __DIR__ . '/../config/config.php';
$blogConfigFile = __DIR__ . '/../config/blog_config.php';
$arrBlogConfig = (include $blogConfigFile);
$blogConfig = $arrBlogConfig["blogConfig"];
$cli = new Goutte\Client();
foreach ($blogConfig as $db => $item) {
    $crawler = $cli->request('GET', $item['url']);
    $urls = $crawler->filter('p.ttl a')->extract(array('_text', 'href'));
    $fileName = __DIR__ . '/../data/' . $db . '.db';
    // 一番上のブログを取得
    // [0]->タイトル
    // [1]->url
    $title = $urls[0][0];
    $blog_url = $urls[0][1];
    $newData = $title;
    $oldData = file_get_contents($fileName);
    if ($newData !== $oldData) {
        $text = $item['text'] . PHP_EOL . PHP_EOL;
        $text .= $title . PHP_EOL . PHP_EOL;
        $text .= 'http://www.keyakizaka46.com' . $blog_url;
        $text = urlencode($text);
        $url = "https://slack.com/api/chat.postMessage?token=" . SLACK_API_KEY . "&channel=%23" . $db . "_blog&text=" . $text;
        file_get_contents($url);
    }
    file_put_contents($fileName, $newData);
}