public function processAllArticles() { foreach (\MkmScraper\Article::all() as $article) { $client = new \Goutte\Client(); $crawler = $client->request('GET', $article->link); if ($article->publisher == "1") { $text = $crawler->filter("#article_content")->text(); $title = $crawler->filter("#article_title h2")->text(); $date = $crawler->filter("#article_date")->text(); } if ($article->publisher == "2") { $text = $crawler->filter(".postContent")->text(); $title = $crawler->filter(".postTitle")->text(); $auth = $crawler->filter(".byAuthor")->text(); $au = explode("//", $auth); $date = trim(preg_replace('#[^A-Za-z0-9-, /]#', '', $au[1])); } if ($article->publisher == "3") { $text = $crawler->filter("#blackborder_main_wrapper .content .field-name-body")->text(); echo $text; } if ($article->publisher == "4") { $text = $crawler->filter(".articleBody")->text(); echo $text; } $article->text = $text; $article->title = $title; $article->date = date("Y-m-d", strtotime($date)); $article->save(); } }
/** * Execute the console command. * * @return mixed */ public function handle() { $client = new \Goutte\Client(); foreach (\MkmScraper\Card::all() as $card) { if ($card->todaysPrice()->count() < 1) { $crawler = $client->request('GET', 'https://www.magiccardmarket.eu/Products/Singles/' . rawurlencode($card->set) . '/' . rawurlencode($card->name)); \Log::info('https://www.magiccardmarket.eu/Products/Singles/' . rawurlencode($card->set) . '/' . rawurlencode($card->name)); $available = 0; $lowfinal = 0; $trendfinal = 0; try { if ($av = $crawler->filter('.sectioncontent .availTable .row_0 .cell_0_1')->first()) { $available = $av->text(); } if ($lo = $crawler->filter('.sectioncontent .availTable .row_1 .cell_1_1 span')->first()) { $low = $lo->text(); $lowfinal = str_replace(",", ".", $low); } if ($tr = $crawler->filter('.sectioncontent .availTable .row_2 .cell_2_1')->first()) { $trend = $tr->text(); $trendnumber = explode(" ", $trend); $trendfinal = str_replace(",", ".", $trendnumber[0]); } } catch (Exception $e) { print $e->getMessage(); } $cardPrice = \MkmScraper\CardPrice::create(array("id_card" => $card->id, "low" => $lowfinal, "trend" => $trendfinal, "sellers" => $available)); } } }
/** * AJAX * * Requisitos para la Certificación tomados de la pagina de ADM * * @Route("informacion-amd", name="front-getInfoAMD", options={"expose":true}) * @Method({"POST"}) */ public function getInfoAMD_Action(Request $req) { $resp = array('resp' => FALSE, 'msg' => 'Sin Informacion al respecto.', 'content' => '0'); $rutaWeb = $this->container->get('roots_mds')->getRootEdukConti(); if ($req->request->has('getQ')) { $getQ = $req->request->get('getQ'); $urls = array('reqCert' => 'http://adm.org.mx/requisitos-certificacion.php', 'reCert' => 'http://adm.org.mx/recertificacion.php', 'reglamento' => 'http://adm.org.mx/reglamento-certificacion.php', 'faqs' => 'http://adm.org.mx/certificacion-faqs.php', 'conv' => 'http://adm.org.mx/convocatorias-eucodon.php', 'qescerti' => $rutaWeb . '/qescerti.md', 'quiencerti' => $rutaWeb . '/quiencerti.md'); if (strpos($urls[$getQ], 'http') !== FALSE) { $cliente = new \Goutte\Client(); $crawler = $cliente->request('GET', $urls[$getQ]); $puzzle = $crawler->filter('div#contenido-completo')->html(); if (strlen($puzzle) < 20) { $resp['resp'] = FALSE; $resp['msg'] = 'Ocurrio un Error al Recuperar, por favor, intentalo nuevamente, mas tarde.'; } else { $resp['content'] = $puzzle; $resp['resp'] = TRUE; } } else { $resp['content'] = $this->container->get('markdown.parser')->transformMarkdown(file_get_contents($urls[$getQ])); $resp['resp'] = TRUE; $resp['msg'] = 'Ok'; } } $respuesta = new \Symfony\Component\HttpFoundation\JsonResponse(); return $respuesta->setData($resp); }
public function testIsWeb() { $server = new \Goutte\Client(); $response = $server->request('GEt', 'http://127.0.0.1:8888/webroot/test.php'); $this->assertRegExp('{Is CLI SAPI: false}', $response->text()); $this->assertRegExp('{Is PHP Server SAPI: true}', $response->text()); $this->assertRegExp('{Is Web Server SAPI: true}', $response->text()); }
public function process(Url &$url) { $this->extractedUrls = []; $client = new \Goutte\Client(); $crawler = $client->request('GET', $url->getUrl()); $url->setStatus($client->getResponse()->getStatus() >= 400 ? Url::STATUS_ERROR : Url::STATUS_OK); $links = $crawler->filter('a'); $links = $crawler->filter('a')->links(); foreach ($links as $link) { $eu = new Url($link->getUri(), $url->getDepth() + 1); if (preg_match('#^http(s)?://fleapop.com#i', $eu->getUrl())) { $this->extractedUrls[] = $eu; } } // return true; }
public static function company($request, $response, $params) { global $container; $types = Yaml::parse(file_get_contents(__DIR__ . '/../types.yml')); $results = Infogreffe::search($params['siret']); $client = new \Goutte\Client(); if (empty($results)) { throw new \Exception('Numéro SIRET introuvable'); } $crawler = $client->request('GET', $results[0]->getURL()); $category = $crawler->filter('.first .identTitreValeur p:nth-of-type(5) .data'); if ($category->count() == 0) { $category = $crawler->filter('[datapath="entreprise.personneMorale.identification.formeJuridique.libelle"] p'); } $activity = $crawler->filter('.first .identTitreValeur p:nth-of-type(6) .data'); if ($activity->count() == 0) { $activity = $crawler->filter('[datapath="activite.codeNAF"] p:first-of-type a'); } if (count($category) > 0) { $categoryName = trim($category->text()); } else { $categoryName = 'Forme juridique inconnue'; } if ($categoryName == 'null') { $categoryName = 'Forme juridique inconnue'; } if (count($activity) > 0) { $activityName = trim($activity->text()); } else { $activityName = 'Inconnue'; } if ($activityName == 'null') { $activityName = 'Inconnue'; } $description = 'Informations sur ' . $results[0]->name; if ($categoryName != 'Forme juridique inconnue') { $description .= ', ' . $categoryName; } $container->view->render($response, 'company.tpl', array('info' => $results[0], 'types' => $types, 'category' => $categoryName, 'activity' => $activityName, 'url' => $results[0]->getURL(), 'title' => $results[0]->name, 'description' => $description)); }
public function processAll() { $client = new \Goutte\Client(); foreach (\MkmScraper\Event::all() as $event) { $crawler = $client->request('GET', $event->link); $i = 1; $link = $crawler->filter("td>div>div a")->each(function ($node) use($client, $event, &$i) { if (strpos($node->attr('href'), "event") !== false) { $dlcrawler = $client->request('GET', $node->attr('href')); $link = $dlcrawler->filter("table table td .chosen_tr,table table td .hover_tr")->each(function ($node) use($event, $i) { $split = explode(" ", $node->text(), 2); $card = \MkmScraper\Card::where("name", "LIKE", $split[1] . "%")->first(); if ($card) { \MkmScraper\DecklistAppearance::create(array("id_card" => $card->id, "number" => $split[0], "place" => $i, "id_event" => $event->id)); //print $card->id." - ".$node->text()."<br/>"; } }); $i = $i + 1; } }); } }
function spin_article($text_to_spin, $include_capitalized = false) { $client = new \Goutte\Client(); $spinner_url = 'http://paraphrasing-tool.com/'; $crawler = $client->request('GET', $spinner_url); $math_captcha_equation = $crawler->filter('#math_captcha_equation')->first()->attr('value'); $math_captcha_equation = str_replace(['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], $math_captcha_equation); $math_captcha_equation = explode(' ', $math_captcha_equation); $math_captcha_result = null; switch ($math_captcha_equation[1]) { case '+': $math_captcha_result = $math_captcha_equation[0] + $math_captcha_equation[2]; break; case '-': $math_captcha_result = $math_captcha_equation[0] - $math_captcha_equation[2]; break; } if (is_null($math_captcha_result)) { return false; } $crawler = $client->submit($crawler->selectButton('Go!')->form(), ['math_captcha_answer' => $math_captcha_result, 'formNameLabelTextBefore' => $text_to_spin, 'formNameLabelSpinCapWords' => $include_capitalized]); return $crawler->filter('#formNameLabelTextAfter')->first()->text(); }
public function execute() { //$rss = ['lol']; $client = new Goutte\Client(); //$client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 60); for ($i = 1; $i < 8; $i++) { if ($i == 1) { $crawler = $client->request('GET', 'http://buzztache.com/category/no/'); } else { $crawler = $client->request('GET', 'http://buzztache.com/category/no/page/' . $i . '/'); } $client->getResponse(); //$link = $crawler->selectLink('Security Advisories')->link(); //$crawler = $client->click($link); // Get the latest post in this category and display the titles $crawler->filter('article')->each(function ($node) use(&$rss) { if (!isset($node)) { return; } $img = ''; if ($node->filterXPath('//img')->count()) { $img = $node->filterXPath('//img')->attr('src'); } $heading = $node->filterXPath('//h3')->text(); $link = $node->filterXPath('//a')->attr('href'); $arr = []; $arr['title'] = $heading; $arr['description'] = "<img src='{$img}' />"; $arr['link'] = $link; $rss[] = $arr; //print_r($rss); //print $heading."\n"; }); } return $rss; }
/** * Execute the console command. * * @return mixed */ public function handle() { $client = new \Goutte\Client(); foreach (\MkmScraper\Card::all() as $card) { if (\MkmScraper\GraphPrice::where("id_card", $card->id)->where("created_at", ">", date('Y-m-d H:i:s', strtotime('-1 week')))->count() < 1) { $crawler = $client->request('GET', 'https://www.magiccardmarket.eu/Products/Singles/' . rawurlencode($card->set) . '/' . rawurlencode($card->name)); $available = $crawler->filter('#ProductInformation script')->first()->text(); $split = explode("chartData =", $available); $split2 = explode(";var ctx", $split[1]); $object = json_decode($split2[0]); $count = 0; foreach ($object->labels as $key => $label) { if (\MkmScraper\GraphPrice::where("id_card", $card->id)->where("date", date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'))->count() < 1) { \MkmScraper\GraphPrice::create(array("id_card" => $card->id, "date" => date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'), "sell" => $object->datasets[0]->data[$key])); $count++; } } if ($count == 0) { \MkmScraper\GraphPrice::create(array("id_card" => $card->id, "date" => "0000-00-00", "sell" => '')); } print $card->name . "\n"; } } }
/** * Scaneia link e recupera informaçoes configuradas * * @param $website */ public function scan($website) { // para cada link na lista foreach ($this->list as $list) { $url = $list; // Monta url // Monta url if ($website['list']['typeLink'] == 'relative') { $url = $website['baseUrl'] . $list; } // Pega pagina $crawler = $this->goutte->request('GET', $url); // Pega informaçoes da pagina $data = array(); foreach ($website['scan'] as $key => $filter) { $str = strip_tags($crawler->filterXPath($filter)->text()); $str = preg_replace($this->stop_words, "", $str); $data[$key] = $str; } // coloca na lista de resultados $this->result[$website['name']][] = $data; } }
<?php include_once __DIR__ . '/../goutte.phar'; $goutte = new Goutte\Client(); // $baseURL = 'http://www.numberonewholesales.com/'; // $leggingsEndpoint = 'leggings-c-1067.html'; // $palazzoEndpoint = 'palazzo-pants-c-1112.html'; // $crawler = $goutte->request('GET', $baseURL . $leggingsEndpoint); // $status_code = $goutte->getResponse()->getStatus(); // echo $status_code; // // echo $crawler->html(); // if($status_code == 200){ // $domSelector = '//*[@class="productListing"]'; // $crawler->filterXPath($domSelector)->each(function ($node) { // print $node->text()."\n"; // }); // // $domSelector = '//*[@id="product-54391"]/div/div[2]'; // // $crawler->filterXPath($domSelector)->each(function ($node) { // // print_r($node); // // echo "\n"; // // }); // } // wholesalefashionistas@lashowroom.com // Pass: uplp // $url = 'https://www.lashowroom.com/login?previous=/wholesalefashionistas/browse/category/3/srd/small/70/1'; $crawler = $goutte->request('GET', $url); $form = $crawler->selectButton('Log In')->form(); // exit; $crawler = $goutte->submit($form, array('head_login_id' => '*****@*****.**', 'head_login_key' => 'uplp')); $crawlUrl = 'https://www.lashowroom.com/wholesalefashionistas/browse/category/3/srd/small/70/'; $count = 1;
<?php // Include config and initiate include_once __DIR__ . '/../config/default_config.php'; includeMyFiles(); // Initialize goutte $goutte = new Goutte\Client(); $log = fopen(date("Y-m-d_H-i-s", time()) . ".log", "a"); fwrite($log, "[START]\r\n"); fwrite($log, "[STARTED AT]" . time() . "\r\n"); // Get Database $db = new Db(); $db->query("UPDATE products_data SET status=0 WHERE source='numberonewholesales'"); // URL and EndPoints $baseURL = 'http://numberonewholesales.com/'; $endPoints['leggings'] = 'http://numberonewholesales.com/leggings-c-1067.html'; $endPoints['kids'] = 'http://numberonewholesales.com/kids-wear-c-1113.html'; $endPoints['palazzo'] = 'http://numberonewholesales.com/palazzo-pants-c-1112.html'; $endPoints['skirts'] = 'http://numberonewholesales.com/skirts-c-1080.html'; $endPoints['shorts'] = 'http://numberonewholesales.com/shorts-c-1079.html'; $endPoints['pants'] = 'http://numberonewholesales.com/pants-c-1084.html'; $loginUrl = 'https://numberonewholesales.com/login.php'; $crawler = $goutte->request('GET', $loginUrl); $form = $crawler->selectButton('Sign In')->form(); // exit; $crawler = $goutte->submit($form, array('email_address' => '*****@*****.**', 'password' => 'abhishek')); // Default Status code $status_code = 200; $count = 1; $productUrls = array(); foreach ($endPoints as $key => $value) {
/** * @param int $facebookId Facebook account id * @param string $facebookAccessToken Facebook access token * * @return bool Facebook account status */ protected function isValidFacebookAccount($id, $accessToken) { $client = new \Goutte\Client(); $client->request('GET', sprintf('https://graph.facebook.com/me?access_token=%s', $accessToken)); $response = json_decode($client->getResponse()->getContent()); if ($response->error) { throw new InvalidPropertyUserException($response->error->message); } return $response->id == $id; }
<?php // Include config and initiate include_once __DIR__ . '/../config/default_config.php'; includeMyFiles(); $goutte = new Goutte\Client(); // Get Database $db = new Db(); $db->query("UPDATE products_data SET status=0 WHERE source='lashowroom'"); $url = 'https://www.lashowroom.com/login?previous=/wholesalefashionistas/browse/category/3/srd/small/70/1'; $crawler = $goutte->request('GET', $url); $form = $crawler->selectButton('Log In')->form(); // exit; $crawler = $goutte->submit($form, array('login_id' => '*****@*****.**', 'login_key' => 'uplp')); $product = getProductData($goutte, 'https://www.lashowroom.com/wholesalefashionistas/item/1215'); print_r($product); function getProductData($goutte, $url) { $crawler = $goutte->request('GET', $url); $status_code = $goutte->getResponse()->getStatus(); $data = array(); $result = array(); if ($status_code == 200) { $filterNav = '//*[@class="item-detail-header"]/p/a'; $catStruct = $crawler->filterXPath($filterNav)->each(function ($node) { return $node->html(); }); foreach ($catStruct as $key => $catVal) { $catStruct[$key] = trim(str_replace(' ', '', $catVal)); } $result['category'] = $catStruct[1];
<?php // Include config and initiate include_once __DIR__ . '/../config/default_config.php'; includeMyFiles(); // Initialize goutte $goutte = new Goutte\Client(); // Get Database $db = new Db(); $loginUrl = 'http://www.colorfive.com/sign-in'; $crawler = $goutte->request('GET', $loginUrl); print_r($crawler->html()); exit; // Default Status code $status_code = 200; $count = 1; $data = $db->query("SELECT url from product_urls WHERE source='numberonewholesales'"); $url = ''; foreach ($data as $value) { $url = $value['url']; $data = getProductData($goutte, $url, $db); } function getProductData($goutte, $url, $db) { $crawler = $goutte->request('GET', $url); $status_code = $goutte->getResponse()->getStatus(); $data = array(); if ($status_code == 200) { $attribs = array(); $domSelector = '//*[@class="productInfoName"]'; $attribs['title'] = $data = $crawler->filterXPath($domSelector)->each(function ($node) {
//Request::setTrustedProxies(array('127.0.0.1')); ini_set('max_execution_time', 300); //300 seconds = 5 minutes $app->get('/', function () use($app) { return $app['twig']->render('index.html', array()); })->bind('homepage'); $app->error(function (\Exception $e, $code) use($app) { if ($app['debug']) { return; } // 404.html, or 40x.html, or 4xx.html, or error.html $templates = array('errors/' . $code . '.html', 'errors/' . substr($code, 0, 2) . 'x.html', 'errors/' . substr($code, 0, 1) . 'xx.html', 'errors/default.html'); return new Response($app['twig']->resolveTemplate($templates)->render(array('code' => $code)), $code); }); $app->get('/untranslated', function () use($app) { $client = new Goutte\Client(); $crawler = $client->request('GET', 'http://doc.php.net/revcheck.php?p=missfiles&lang=pt_BR'); $crawler = $crawler->filter("table > tr"); $nodeValues = $crawler->each(function (Crawler $node, $i) { $first = $node->children()->first()->text(); $last = $node->children()->last()->text(); return array($first, $last); }); $extractor = new Extractor($nodeValues); return new Response(var_dump($extractor->getFullNames())); })->bind('untranslated'); $app->get('/trello', function () use($app) { $client = new Client(); //veja aqui para gerar a key e o token de uso //https://trello.com/c/jObnWvl1/25-generating-your-developer-key $client->authenticate('afdadfasdfasfd', 'adsfasdfsdfafas', Client::AUTH_URL_CLIENT_ID);
<?php // Include config and initiate include_once __DIR__ . '/../config/default_config.php'; includeMyFiles(); // Initialize goutte $goutte = new Goutte\Client(); // Get Database $db = new Db(); $loginUrl = 'https://numberonewholesales.com/login.php'; $crawler = $goutte->request('GET', $loginUrl); $form = $crawler->selectButton('Sign In')->form(); // exit; $crawler = $goutte->submit($form, array('email_address' => '*****@*****.**', 'password' => 'abhishek')); // Default Status code $status_code = 200; $count = 1; $data = $db->query("SELECT url from product_urls WHERE source='numberonewholesales'"); $url = ''; foreach ($data as $value) { $url = $value['url']; $data = getProductData($goutte, $url, $db); } function getProductData($goutte, $url, $db) { $crawler = $goutte->request('GET', $url); $status_code = $goutte->getResponse()->getStatus(); $data = array(); if ($status_code == 200) { $attribs = array(); $domSelector = '//*[@class="productInfoName"]';
*/ Route::get("scrape", function () { $client = new \Goutte\Client(); $crawler = $client->request('GET', 'http://mtgtop8.com/event?e=10488'); $link = $crawler->filter("td>div>div a")->each(function ($node) use($client) { if (strpos($node->attr('href'), "event") !== false) { print "http://mtgtop8.com/" . $node->attr('href') . " - " . $node->text() . "<br>"; $dlcrawler = $client->request('GET', "http://mtgtop8.com/" . $node->attr('href')); $link = $dlcrawler->filter("table table td .chosen_tr,table table td .hover_tr")->each(function ($node) { print "<b>" . $node->text() . "</b><br>"; }); } }); }); Route::get("htmlscrape", function () { $client = new \Goutte\Client(); foreach (\MkmScraper\Card::all() as $card) { $time = microtime(); $crawler = $client->request('GET', 'https://www.magiccardmarket.eu/Products/Singles/' . rawurlencode($card->set) . '/' . rawurlencode($card->name)); $available = $crawler->filter('#ProductInformation script')->first()->text(); $split = explode("chartData =", $available); $split2 = explode(";var ctx", $split[1]); $object = json_decode($split2[0]); foreach ($object->labels as $key => $label) { if (\MkmScraper\GraphPrice::where("id_card", $card->id)->where("date", date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'))->count() < 1) { \MkmScraper\GraphPrice::create(array("id_card" => $card->id, "date" => date_format(date_create_from_format('d.m.y', $label), 'Y-m-d'), "sell" => $object->datasets[0]->data[$key])); } } print $card->name . " - " . (microtime() - $time) . " ms </br>"; } });
#!/usr/bin/php <?php $config = ['require_services' => ['guzzle', 'sf_browserkit', 'sf_css_selector', 'sf_dom_crawler', 'promise'], 'git_urls' => ['https://github.com/yfix/goutte.git' => 'goutte/'], 'autoload_config' => ['goutte/Goutte/' => 'Goutte'], 'example' => function () { $client = new Goutte\Client(); $crawler = $client->request('GET', 'http://google.com/'); $crawler->filter('head > title')->each(function ($node) { print $node->text() . "\n"; }); }]; if ($return_config) { return $config; } require_once __DIR__ . '/_yf_autoloader.php'; new yf_autoloader($config);
<?php // Include config and initiate include_once __DIR__ . '/../config/default_config.php'; includeMyFiles(); $goutte = new Goutte\Client(); // Get Database $db = new Db(); $db->query("UPDATE products_data SET status=0 WHERE source='lashowroom'"); $url = 'https://www.lashowroom.com/login?previous=/wholesalefashionistas/browse/category/3/srd/small/70/1'; $crawler = $goutte->request('GET', $url); $form = $crawler->selectButton('Log In')->form(); // exit; $crawler = $goutte->submit($form, array('login_id' => '*****@*****.**', 'login_key' => 'uplp')); /*Master URL*/ $crawlUrl = 'https://www.lashowroom.com/wholesalefashionistas/browse/all/1/srd/large/70/'; $count = 1; $status = 1; $urls = array(); while ($status == 1) { // Selector string $domSelector = '//*[@class="store-front-browse-gallery"]/ul/li/div/a'; /*Count selector for page number*/ $countSelector = '//*[@class="store-front-browse-title"]/text()'; /*Main crawl , adding count to master*/ $crawler = $goutte->request('GET', $crawlUrl . $count); /*Getting total products string*/ $pcount = $crawler->filterXPath($countSelector)->each(function ($node) { return $node->text(); }); $tProducts = '';
public function getProductDetailScraper() { $mock = new MockHandler([new Response(200, [], $this->getPageContents('apricot.html')), new Response(200, [], $this->getPageContents('avocado.html')), new Response(200, [], $this->getPageContents('golden-kiwi.html'))]); $handler = HandlerStack::create($mock); $client = new Goutte\Client(); $client->setClient(new Client(['handler' => $handler])); return new ProductDetailScraper($client); }
<?php // Include config and initiate include_once __DIR__ . '/../config/default_config.php'; includeMyFiles(); // Initialize goutte $goutte = new Goutte\Client(); $goutte->getClient()->setDefaultOption('config/curl/' . CURLOPT_SSL_VERIFYHOST, FALSE); $goutte->getClient()->setDefaultOption('config/curl/' . CURLOPT_SSL_VERIFYPEER, FALSE); // Get Database $db = new Db(); $loginUrl = 'http://numberonewholesales.com/login.php'; $crawler = $goutte->request('GET', $loginUrl); $html = $crawler->html(); $re = "/(osCsid)=\\w*/"; $subst = ""; $newHtml = preg_replace($re, $subst, $html); $crawler->clear(); $crawler->addHtmlContent($newHtml); $form = $crawler->selectButton('Sign In')->form(); // exit; $form['email_address'] = '*****@*****.**'; $form['password'] = '******'; $crawler = $goutte->submit($form); //$crawler = $goutte->request($form->getMethod(), 'https://numberonewholesales.com/login.php?action=process', $values, $form->getPhpFiles()); print_r($form->getValues()); //print_r($crawler); // Default Status code $status_code = 200; $count = 1; $url = 'http://numberonewholesales.com/bags-printed-ankle-leggings-p-18294.html?cPath=1067';
public function testIsLocal() { $server = new \Goutte\Client(); $response = $server->request('GEt', 'http://127.0.0.1:8888/webroot/test.php'); $this->assertRegExp('{Is Local Requester: true}', $response->text()); }
<?php require_once __DIR__ . '/../vendor/autoload.php'; require_once __DIR__ . '/../config/config.php'; $blogConfigFile = __DIR__ . '/../config/blog_config.php'; $arrBlogConfig = (include $blogConfigFile); $blogConfig = $arrBlogConfig["blogConfig"]; $cli = new Goutte\Client(); foreach ($blogConfig as $db => $item) { $crawler = $cli->request('GET', $item['url']); $urls = $crawler->filter('p.ttl a')->extract(array('_text', 'href')); $fileName = __DIR__ . '/../data/' . $db . '.db'; // 一番上のブログを取得 // [0]->タイトル // [1]->url $title = $urls[0][0]; $blog_url = $urls[0][1]; $newData = $title; $oldData = file_get_contents($fileName); if ($newData !== $oldData) { $text = $item['text'] . PHP_EOL . PHP_EOL; $text .= $title . PHP_EOL . PHP_EOL; $text .= 'http://www.keyakizaka46.com' . $blog_url; $text = urlencode($text); $url = "https://slack.com/api/chat.postMessage?token=" . SLACK_API_KEY . "&channel=%23" . $db . "_blog&text=" . $text; file_get_contents($url); } file_put_contents($fileName, $newData); }