public function processAllArticles()
     foreach (\MkmScraper\Article::all() as $article) {
         $client = new \Goutte\Client();
         $crawler = $client->request('GET', $article->link);
         if ($article->publisher == "1") {
             $text = $crawler->filter("#article_content")->text();
             $title = $crawler->filter("#article_title h2")->text();
             $date = $crawler->filter("#article_date")->text();
         if ($article->publisher == "2") {
             $text = $crawler->filter(".postContent")->text();
             $title = $crawler->filter(".postTitle")->text();
             $auth = $crawler->filter(".byAuthor")->text();
             $au = explode("//", $auth);
             $date = trim(preg_replace('#[^A-Za-z0-9-, /]#', '', $au[1]));
         if ($article->publisher == "3") {
             $text = $crawler->filter("#blackborder_main_wrapper .content .field-name-body")->text();
             echo $text;
         if ($article->publisher == "4") {
             $text = $crawler->filter(".articleBody")->text();
             echo $text;
         $article->text = $text;
         $article->title = $title;
         $article->date = date("Y-m-d", strtotime($date));
Exemple #2
  * Execute the console command.
  * @return mixed
 public function handle()
     $client = new \Goutte\Client();
     foreach (\MkmScraper\Card::all() as $card) {
         if ($card->todaysPrice()->count() < 1) {
             $crawler = $client->request('GET', '' . rawurlencode($card->set) . '/' . rawurlencode($card->name));
             \Log::info('' . rawurlencode($card->set) . '/' . rawurlencode($card->name));
             $available = 0;
             $lowfinal = 0;
             $trendfinal = 0;
             try {
                 if ($av = $crawler->filter('.sectioncontent .availTable .row_0  .cell_0_1')->first()) {
                     $available = $av->text();
                 if ($lo = $crawler->filter('.sectioncontent .availTable .row_1  .cell_1_1 span')->first()) {
                     $low = $lo->text();
                     $lowfinal = str_replace(",", ".", $low);
                 if ($tr = $crawler->filter('.sectioncontent .availTable .row_2  .cell_2_1')->first()) {
                     $trend = $tr->text();
                     $trendnumber = explode(" ", $trend);
                     $trendfinal = str_replace(",", ".", $trendnumber[0]);
             } catch (Exception $e) {
                 print $e->getMessage();
             $cardPrice = \MkmScraper\CardPrice::create(array("id_card" => $card->id, "low" => $lowfinal, "trend" => $trendfinal, "sellers" => $available));
Exemple #3
  * AJAX
  * Requisitos para la Certificación tomados de la pagina de ADM
  * @Route("informacion-amd", name="front-getInfoAMD", options={"expose":true})
  * @Method({"POST"})
 public function getInfoAMD_Action(Request $req)
     $resp = array('resp' => FALSE, 'msg' => 'Sin Informacion al respecto.', 'content' => '0');
     $rutaWeb = $this->container->get('roots_mds')->getRootEdukConti();
     if ($req->request->has('getQ')) {
         $getQ = $req->request->get('getQ');
         $urls = array('reqCert' => '', 'reCert' => '', 'reglamento' => '', 'faqs' => '', 'conv' => '', 'qescerti' => $rutaWeb . '/', 'quiencerti' => $rutaWeb . '/');
         if (strpos($urls[$getQ], 'http') !== FALSE) {
             $cliente = new \Goutte\Client();
             $crawler = $cliente->request('GET', $urls[$getQ]);
             $puzzle = $crawler->filter('div#contenido-completo')->html();
             if (strlen($puzzle) < 20) {
                 $resp['resp'] = FALSE;
                 $resp['msg'] = 'Ocurrio un Error al Recuperar, por favor, intentalo nuevamente, mas tarde.';
             } else {
                 $resp['content'] = $puzzle;
                 $resp['resp'] = TRUE;
         } else {
             $resp['content'] = $this->container->get('markdown.parser')->transformMarkdown(file_get_contents($urls[$getQ]));
             $resp['resp'] = TRUE;
             $resp['msg'] = 'Ok';
     $respuesta = new \Symfony\Component\HttpFoundation\JsonResponse();
     return $respuesta->setData($resp);
 public function testIsWeb()
     $server = new \Goutte\Client();
     $response = $server->request('GEt', '');
     $this->assertRegExp('{Is CLI SAPI: false}', $response->text());
     $this->assertRegExp('{Is PHP Server SAPI: true}', $response->text());
     $this->assertRegExp('{Is Web Server SAPI: true}', $response->text());
 public function processAll()
     $client = new \Goutte\Client();
     foreach (\MkmScraper\Event::all() as $event) {
         $crawler = $client->request('GET', $event->link);
         $i = 1;
         $link = $crawler->filter("td>div>div a")->each(function ($node) use($client, $event, &$i) {
             if (strpos($node->attr('href'), "event") !== false) {
                 $dlcrawler = $client->request('GET', $node->attr('href'));
                 $link = $dlcrawler->filter("table table td .chosen_tr,table table td .hover_tr")->each(function ($node) use($event, $i) {
                     $split = explode(" ", $node->text(), 2);
                     $card = \MkmScraper\Card::where("name", "LIKE", $split[1] . "%")->first();
                     if ($card) {
                         \MkmScraper\DecklistAppearance::create(array("id_card" => $card->id, "number" => $split[0], "place" => $i, "id_event" => $event->id));
                         //print $card->id." - ".$node->text()."<br/>";
                 $i = $i + 1;
Exemple #6
 public function execute()
     //$rss = ['lol'];
     $client = new Goutte\Client();
     //$client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 60);
     for ($i = 1; $i < 8; $i++) {
         if ($i == 1) {
             $crawler = $client->request('GET', '');
         } else {
             $crawler = $client->request('GET', '' . $i . '/');
         //$link = $crawler->selectLink('Security Advisories')->link();
         //$crawler = $client->click($link);
         // Get the latest post in this category and display the titles
         $crawler->filter('article')->each(function ($node) use(&$rss) {
             if (!isset($node)) {
             $img = '';
             if ($node->filterXPath('//img')->count()) {
                 $img = $node->filterXPath('//img')->attr('src');
             $heading = $node->filterXPath('//h3')->text();
             $link = $node->filterXPath('//a')->attr('href');
             $arr = [];
             $arr['title'] = $heading;
             $arr['description'] = "<img src='{$img}' />";
             $arr['link'] = $link;
             $rss[] = $arr;
             //print $heading."\n";
     return $rss;
 public function process(Url &$url)
     $this->extractedUrls = [];
     $client = new \Goutte\Client();
     $crawler = $client->request('GET', $url->getUrl());
     $url->setStatus($client->getResponse()->getStatus() >= 400 ? Url::STATUS_ERROR : Url::STATUS_OK);
     $links = $crawler->filter('a');
     $links = $crawler->filter('a')->links();
     foreach ($links as $link) {
         $eu = new Url($link->getUri(), $url->getDepth() + 1);
         if (preg_match('#^http(s)?://', $eu->getUrl())) {
             $this->extractedUrls[] = $eu;
     return true;
Exemple #8
 public static function company($request, $response, $params)
     global $container;
     $types = Yaml::parse(file_get_contents(__DIR__ . '/../types.yml'));
     $results = Infogreffe::search($params['siret']);
     $client = new \Goutte\Client();
     if (empty($results)) {
         throw new \Exception('Numéro SIRET introuvable');
     $crawler = $client->request('GET', $results[0]->getURL());
     $category = $crawler->filter('.first .identTitreValeur p:nth-of-type(5) .data');
     if ($category->count() == 0) {
         $category = $crawler->filter('[datapath="entreprise.personneMorale.identification.formeJuridique.libelle"] p');
     $activity = $crawler->filter('.first .identTitreValeur p:nth-of-type(6) .data');
     if ($activity->count() == 0) {
         $activity = $crawler->filter('[datapath="activite.codeNAF"] p:first-of-type a');
     if (count($category) > 0) {
         $categoryName = trim($category->text());
     } else {
         $categoryName = 'Forme juridique inconnue';
     if ($categoryName == 'null') {
         $categoryName = 'Forme juridique inconnue';
     if (count($activity) > 0) {
         $activityName = trim($activity->text());
     } else {
         $activityName = 'Inconnue';
     if ($activityName == 'null') {
         $activityName = 'Inconnue';
     $description = 'Informations sur ' . $results[0]->name;
     if ($categoryName != 'Forme juridique inconnue') {
         $description .= ', ' . $categoryName;
     $container->view->render($response, 'company.tpl', array('info' => $results[0], 'types' => $types, 'category' => $categoryName, 'activity' => $activityName, 'url' => $results[0]->getURL(), 'title' => $results[0]->name, 'description' => $description));
  * Scaneia link e recupera informaçoes configuradas
  * @param $website
 public function scan($website)
     // para cada link na lista
     foreach ($this->list as $list) {
         $url = $list;
         // Monta url
         // Monta url
         if ($website['list']['typeLink'] == 'relative') {
             $url = $website['baseUrl'] . $list;
         // Pega pagina
         $crawler = $this->goutte->request('GET', $url);
         // Pega informaçoes da pagina
         $data = array();
         foreach ($website['scan'] as $key => $filter) {
             $str = strip_tags($crawler->filterXPath($filter)->text());
             $str = preg_replace($this->stop_words, "", $str);
             $data[$key] = $str;
         // coloca na lista de resultados
         $this->result[$website['name']][] = $data;
function spin_article($text_to_spin, $include_capitalized = false)
    $client = new \Goutte\Client();
    $spinner_url = '';
    $crawler = $client->request('GET', $spinner_url);
    $math_captcha_equation = $crawler->filter('#math_captcha_equation')->first()->attr('value');
    $math_captcha_equation = str_replace(['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], $math_captcha_equation);
    $math_captcha_equation = explode(' ', $math_captcha_equation);
    $math_captcha_result = null;
    switch ($math_captcha_equation[1]) {
        case '+':
            $math_captcha_result = $math_captcha_equation[0] + $math_captcha_equation[2];
        case '-':
            $math_captcha_result = $math_captcha_equation[0] - $math_captcha_equation[2];
    if (is_null($math_captcha_result)) {
        return false;
    $crawler = $client->submit($crawler->selectButton('Go!')->form(), ['math_captcha_answer' => $math_captcha_result, 'formNameLabelTextBefore' => $text_to_spin, 'formNameLabelSpinCapWords' => $include_capitalized]);
    return $crawler->filter('#formNameLabelTextAfter')->first()->text();
Exemple #11
  * Execute the console command.
  * @return mixed
 public function handle()
     $client = new \Goutte\Client();
     foreach (\MkmScraper\Card::all() as $card) {
         if (\MkmScraper\GraphPrice::where("id_card", $card->id)->where("created_at", ">", date('Y-m-d H:i:s', strtotime('-1 week')))->count() < 1) {
             $crawler = $client->request('GET', '' . rawurlencode($card->set) . '/' . rawurlencode($card->name));
             $available = $crawler->filter('#ProductInformation script')->first()->text();
             $split = explode("chartData =", $available);
             $split2 = explode(";var ctx", $split[1]);
             $object = json_decode($split2[0]);
             $count = 0;
             foreach ($object->labels as $key => $label) {
                 if (\MkmScraper\GraphPrice::where("id_card", $card->id)->where("date", date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'))->count() < 1) {
                     \MkmScraper\GraphPrice::create(array("id_card" => $card->id, "date" => date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'), "sell" => $object->datasets[0]->data[$key]));
             if ($count == 0) {
                 \MkmScraper\GraphPrice::create(array("id_card" => $card->id, "date" => "0000-00-00", "sell" => ''));
             print $card->name . "\n";
Exemple #12

// Include config and initiate
include_once __DIR__ . '/../config/default_config.php';
// Initialize goutte
$goutte = new Goutte\Client();
// Get Database
$db = new Db();
$loginUrl = '';
$crawler = $goutte->request('GET', $loginUrl);
// Default Status code
$status_code = 200;
$count = 1;
$data = $db->query("SELECT url from product_urls WHERE source='numberonewholesales'");
$url = '';
foreach ($data as $value) {
    $url = $value['url'];
    $data = getProductData($goutte, $url, $db);
function getProductData($goutte, $url, $db)
    $crawler = $goutte->request('GET', $url);
    $status_code = $goutte->getResponse()->getStatus();
    $data = array();
    if ($status_code == 200) {
        $attribs = array();
        $domSelector = '//*[@class="productInfoName"]';
        $attribs['title'] = $data = $crawler->filterXPath($domSelector)->each(function ($node) {
Exemple #13

// Include config and initiate
include_once __DIR__ . '/../config/default_config.php';
$goutte = new Goutte\Client();
// Get Database
$db = new Db();
$db->query("UPDATE products_data SET status=0 WHERE source='lashowroom'");
$url = '';
$crawler = $goutte->request('GET', $url);
$form = $crawler->selectButton('Log In')->form();
// exit;
$crawler = $goutte->submit($form, array('login_id' => '*****@*****.**', 'login_key' => 'uplp'));
/*Master URL*/
$crawlUrl = '';
$count = 1;
$status = 1;
$urls = array();
while ($status == 1) {
    // Selector string
    $domSelector = '//*[@class="store-front-browse-gallery"]/ul/li/div/a';
    /*Count selector for page number*/
    $countSelector = '//*[@class="store-front-browse-title"]/text()';
    /*Main crawl , adding count to master*/
    $crawler = $goutte->request('GET', $crawlUrl . $count);
    /*Getting total products string*/
    $pcount = $crawler->filterXPath($countSelector)->each(function ($node) {
        return $node->text();
    $tProducts = '';
ini_set('max_execution_time', 300);
//300 seconds = 5 minutes
$app->get('/', function () use($app) {
    return $app['twig']->render('index.html', array());
$app->error(function (\Exception $e, $code) use($app) {
    if ($app['debug']) {
    // 404.html, or 40x.html, or 4xx.html, or error.html
    $templates = array('errors/' . $code . '.html', 'errors/' . substr($code, 0, 2) . 'x.html', 'errors/' . substr($code, 0, 1) . 'xx.html', 'errors/default.html');
    return new Response($app['twig']->resolveTemplate($templates)->render(array('code' => $code)), $code);
$app->get('/untranslated', function () use($app) {
    $client = new Goutte\Client();
    $crawler = $client->request('GET', '');
    $crawler = $crawler->filter("table > tr");
    $nodeValues = $crawler->each(function (Crawler $node, $i) {
        $first = $node->children()->first()->text();
        $last = $node->children()->last()->text();
        return array($first, $last);
    $extractor = new Extractor($nodeValues);
    return new Response(var_dump($extractor->getFullNames()));
$app->get('/trello', function () use($app) {
    $client = new Client();
    //veja aqui para gerar a key e o token de uso
    $client->authenticate('afdadfasdfasfd', 'adsfasdfsdfafas', Client::AUTH_URL_CLIENT_ID);
    $board = $client->api('board')->show('j6Nuulpn');
  * @param int    $facebookId          Facebook account id
  * @param string $facebookAccessToken Facebook access token
  * @return bool Facebook account status
 protected function isValidFacebookAccount($id, $accessToken)
     $client = new \Goutte\Client();
     $client->request('GET', sprintf('', $accessToken));
     $response = json_decode($client->getResponse()->getContent());
     if ($response->error) {
         throw new InvalidPropertyUserException($response->error->message);
     return $response->id == $id;
Exemple #16

require_once __DIR__ . '/../vendor/autoload.php';
require_once __DIR__ . '/../config/config.php';
$blogConfigFile = __DIR__ . '/../config/blog_config.php';
$arrBlogConfig = (include $blogConfigFile);
$blogConfig = $arrBlogConfig["blogConfig"];
$cli = new Goutte\Client();
foreach ($blogConfig as $db => $item) {
    $crawler = $cli->request('GET', $item['url']);
    $urls = $crawler->filter('p.ttl a')->extract(array('_text', 'href'));
    $fileName = __DIR__ . '/../data/' . $db . '.db';
    // 一番上のブログを取得
    // [0]->タイトル
    // [1]->url
    $title = $urls[0][0];
    $blog_url = $urls[0][1];
    $newData = $title;
    $oldData = file_get_contents($fileName);
    if ($newData !== $oldData) {
        $text = $item['text'] . PHP_EOL . PHP_EOL;
        $text .= $title . PHP_EOL . PHP_EOL;
        $text .= '' . $blog_url;
        $text = urlencode($text);
        $url = "" . SLACK_API_KEY . "&channel=%23" . $db . "_blog&text=" . $text;
    file_put_contents($fileName, $newData);
Exemple #17

// Include config and initiate
include_once __DIR__ . '/../config/default_config.php';
$goutte = new Goutte\Client();
// Get Database
$db = new Db();
$db->query("UPDATE products_data SET status=0 WHERE source='lashowroom'");
$url = '';
$crawler = $goutte->request('GET', $url);
$form = $crawler->selectButton('Log In')->form();
// exit;
$crawler = $goutte->submit($form, array('login_id' => '*****@*****.**', 'login_key' => 'uplp'));
$product = getProductData($goutte, '');
function getProductData($goutte, $url)
    $crawler = $goutte->request('GET', $url);
    $status_code = $goutte->getResponse()->getStatus();
    $data = array();
    $result = array();
    if ($status_code == 200) {
        $filterNav = '//*[@class="item-detail-header"]/p/a';
        $catStruct = $crawler->filterXPath($filterNav)->each(function ($node) {
            return $node->html();
        foreach ($catStruct as $key => $catVal) {
            $catStruct[$key] = trim(str_replace(' ', '', $catVal));
        $result['category'] = $catStruct[1];
 public function testIsLocal()
     $server = new \Goutte\Client();
     $response = $server->request('GEt', '');
     $this->assertRegExp('{Is Local Requester: true}', $response->text());
Exemple #19
    $crawler = $client->request('GET', '');
    $link = $crawler->filter("td>div>div a")->each(function ($node) use($client) {
        if (strpos($node->attr('href'), "event") !== false) {
            print "" . $node->attr('href') . " - " . $node->text() . "<br>";
            $dlcrawler = $client->request('GET', "" . $node->attr('href'));
            $link = $dlcrawler->filter("table table td .chosen_tr,table table td .hover_tr")->each(function ($node) {
                print "<b>" . $node->text() . "</b><br>";
Route::get("htmlscrape", function () {
    $client = new \Goutte\Client();
    foreach (\MkmScraper\Card::all() as $card) {
        $time = microtime();
        $crawler = $client->request('GET', '' . rawurlencode($card->set) . '/' . rawurlencode($card->name));
        $available = $crawler->filter('#ProductInformation script')->first()->text();
        $split = explode("chartData =", $available);
        $split2 = explode(";var ctx", $split[1]);
        $object = json_decode($split2[0]);
        foreach ($object->labels as $key => $label) {
            if (\MkmScraper\GraphPrice::where("id_card", $card->id)->where("date", date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'))->count() < 1) {
                \MkmScraper\GraphPrice::create(array("id_card" => $card->id, "date" => date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'), "sell" => $object->datasets[0]->data[$key]));
        print $card->name . " - " . (microtime() - $time) . " ms </br>";
Route::get("decklists/wizards", function () {
    return view("decklists/wizards");
Exemple #20
$config = ['require_services' => ['guzzle', 'sf_browserkit', 'sf_css_selector', 'sf_dom_crawler', 'promise'], 'git_urls' => ['' => 'goutte/'], 'autoload_config' => ['goutte/Goutte/' => 'Goutte'], 'example' => function () {
    $client = new Goutte\Client();
    $crawler = $client->request('GET', '');
    $crawler->filter('head > title')->each(function ($node) {
        print $node->text() . "\n";
if ($return_config) {
    return $config;
require_once __DIR__ . '/_yf_autoloader.php';
new yf_autoloader($config);