addHtmlContent() public method

The libxml errors are disabled when the content is parsed. If you want to get parsing errors, be sure to enable internal errors via libxml_use_internal_errors(true) and then, get the errors via libxml_get_errors(). Be sure to clear errors with libxml_clear_errors() afterward.
public addHtmlContent ( string $content, string $charset = 'UTF-8' )
$content string The HTML content
$charset string The charset
 /**
  * Process the DOM
  *
  * @return array
  * @throws Exception
  */
 public function process()
 {
     // Check if HTML content is already set
     $this->checkIfContentIsEmpty($this->html);
     $items = [];
     $total = 0;
     $prepareItems = function (Crawler $nodeCrawler, $i) use(&$items, &$total) {
         $title = $nodeCrawler->filter('h3 > a');
         $link = $nodeCrawler->filter('h3 > a')->attr('href');
         $price = $nodeCrawler->filter('p.pricePerUnit')->text();
         $descriptionPage = $this->fetch($link);
         //prepare items array
         $items[$i]['title'] = trim($title->text());
         $items[$i]['size'] = $this->sizeOf($descriptionPage);
         $items[$i]['unit_price'] = $this->format($price);
         $items[$i]['description'] = $this->getDescriptionFor($descriptionPage);
         $total += $items[$i]['unit_price'];
     };
     // bind the closure to the object context
     // so we can access the object inside the closure
     $prepareItems->bindTo($this);
     $this->domCrawler->addHtmlContent($this->html);
     $this->domCrawler->filter('ul.productLister > li')->each($prepareItems);
     $this->items = $items;
     $this->total = number_format($total, 2);
     unset($items);
     unset($total);
     return ['items' => $this->items, 'total' => $this->total];
 }
 public function setUp()
 {
     $this->selectorProvider = new SelectorProvider();
     $this->crawler = new Crawler();
     $this->crawler->addHtmlContent($this->getValidHtml());
     $this->itemsCssSelector = '.list-group .list-group-item';
     $this->noItemsCssSelector = '.not-existing-class';
 }
Example #3
0
 public function setUp()
 {
     $this->listSelector = new Css(".list-group .list-group-item");
     $this->emptyResultSelector = new Css(".non-existing");
     $this->emptySelector = new Css("");
     $this->crawler = new Crawler();
     $this->crawler->addHtmlContent($this->getValidHtml());
 }
Example #4
0
 /**
  * @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
  */
 public function testAddHtmlContent()
 {
     $crawler = new Crawler();
     $crawler->addHtmlContent('<html><div class="foo"></html>', 'UTF-8');
     $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addHtmlContent() adds nodes from an HTML string');
     $crawler->addHtmlContent('<html><head><base href="http://symfony.com"></head><a href="/contact"></a></html>', 'UTF-8');
     $this->assertEquals('http://symfony.com', $crawler->filter('base')->attr('href'), '->addHtmlContent() adds nodes from an HTML string');
     $this->assertEquals('http://symfony.com/contact', $crawler->filter('a')->link()->getUri(), '->addHtmlContent() adds nodes from an HTML string');
 }
 public function setUp()
 {
     $this->validCorrectPattern = '/user-(?P<value>\\d+)/';
     $this->validNoMatchPattern = '/NO-MATCH_STRING-(?P<value>\\d+)/';
     $this->validPatternWrongParam = '/NO-MATCH_STRING-(?P<wrong>\\d+)/';
     $this->invalidPattern = '/$%#$>\\d+)))/';
     $this->selectorProvider = new SelectorProvider();
     $this->crawler = new Crawler();
     $this->crawler->addHtmlContent($this->getValidHtml());
     $this->itemCssSelector = '.list-group .list-group-item';
     //will select first
     $this->noItemsCssSelector = '.not-existing-class';
 }
 /**
  * @param string $url
  * @param array  $tags
  *
  * @return WatchLink
  */
 public function extract(string $url, array $tags) : WatchLink
 {
     $watchLink = new WatchLink();
     $watchLink->setUrl($url);
     $this->crawler->clear();
     $this->crawler->addHtmlContent($this->fetcher->fetch($url));
     $watchLink->setName($this->extractTitle());
     $watchLink->setDescription($this->extractDescription());
     $watchLink->setImage($this->extractImage());
     foreach ($tags as $tag) {
         $watchLink->addTag($this->tagRepository->findOrCreate($tag));
     }
     return $watchLink;
 }
 /**
  * Filter the price present on each countries price page, and return the price
  *
  * @param string $content
  *
  * @return string $price
  */
 public function filterPrice($content)
 {
     $crawler = new Crawler();
     $crawler->addHtmlContent($content);
     $price = $crawler->filterXPath("html/body/div[1]/div[3]/div/div/div[3]/div[4]/div/table/tr[1]/td[2]")->extract('_text', 'td');
     return trim($price[0]);
 }
 public function transform($category_page_url, $pretty_print_json = false)
 {
     $crawler = new Crawler();
     /** loads the initial category page into a Crawler */
     $crawler->addHtmlContent($this->page_manager->getPage($category_page_url), 'ISO-8859-1');
     $category_page = new CategoryPage($crawler);
     $product_collection = new ProductCollection();
     /** loops through all the products on the category page */
     /** @todo handle cases where HTML structure throws out the crawler more elegantly */
     $category_page->getProducts()->each(function (Crawler $category_page_product_node, $i) use($product_collection) {
         try {
             $product_node = new CategoryPageProductNode($category_page_product_node);
             $url_of_product_page = $product_node->getProductHref();
             $crawler = new Crawler();
             /** loads the product page */
             $crawler->addHtmlContent($this->page_manager->getPage($url_of_product_page), 'ISO-8859-1');
             $product_page = new ProductPage($crawler);
             $product = new Product();
             /** gets the content from either the product or category page and saves it in the product entity */
             $product->setTitle($product_node->getTitle())->setDescription($product_page->getDescription())->setUnitPrice($product_node->getUnitPrice())->setSize($this->page_manager->getSizeOfPage($url_of_product_page));
             $product_collection->addProduct($product);
         } catch (\InvalidArgumentException $ex) {
         }
     });
     /** Combines the results with the total of all the unit prices */
     return json_encode(['results' => $product_collection->toArray(), 'total' => $product_collection->getSumOfUnitPrices() / 100], $pretty_print_json ? JSON_PRETTY_PRINT : 0);
 }
Example #9
0
 /**
  * simulate worldjournal ajax call to fetch content data
  */
 public function actionTrypostdata()
 {
     $hostname = 'www.wjlife.com';
     $optionVaules = ["relation" => "AND", "0" => ["relation" => "AND", "0" => ["key" => "wj_order_id"]]];
     //all help wanted
     $currentURL = "/cls_category/03-ny-help-wanted/";
     //temp page number
     $pno = 0;
     $queryObject = ["keyword" => "", "pagesize" => 40, "pno" => $pno, "optionVaules" => $optionVaules, "currentURL" => "http://" . $hostname . $currentURL, "currentCatId" => 327, "currentStateId" => 152];
     //language: chinese simplified
     $wjlang = "zh-cn";
     $requestUrl = "http://" . $hostname . "/wp-content/themes/wjlife/includes/classified-core.php?regions=state_ny&variant=" . $wjlang . "&t=" . time();
     // echo "start...\n";
     $client = new Client();
     $crawler = $client->request("POST", $requestUrl, $queryObject, [], ['HTTP_X-Requested-With' => 'XMLHttpRequest', 'contentType' => 'application/x-www-form-urlencoded;charset=utf-8']);
     $rowHtml = $crawler->html();
     // if you want to echo out with correct encoding, do `echo utf8_decode($rowHtml)`
     // echo utf8_decode($rowHtml);
     // echo "end...\n";
     $subCrawler = new Crawler();
     $subCrawler->addHtmlContent($rowHtml);
     $linkArray = $subCrawler->filter(".catDesc a")->each(function ($node, $index) {
         return $href = $node->attr('href');
     });
     print_r($linkArray);
 }
 public function setUp()
 {
     $html = file_get_contents(__DIR__ . '/../Fixtures/category-page-product-node.html');
     $crawler = new Crawler();
     $crawler->addHtmlContent($html, 'ISO-8859-1');
     $this->SUT = new SUT($crawler);
 }
 /**
  * Replace all src of img.inline-image with an embedded image
  *
  * @param  Swift_Message $message
  */
 protected function inlineImages(Swift_Message $message)
 {
     $html = $message->getBody();
     $crawler = new Crawler();
     $crawler->addHtmlContent($html);
     $imgs = array();
     $replaces = array();
     foreach ($crawler->filterXPath("//img[contains(concat(' ',normalize-space(@class), ' '), ' inline-image ')]") as $img) {
         $normalized_src = $src = $img->getAttribute('src');
         if (isset($replaces['src="' . $src . '"'])) {
             continue;
         }
         // if starting with one slash, use local file
         if (preg_match('#^/[^/]#', $normalized_src)) {
             $normalized_src = $this->web_directory . parse_url($src, PHP_URL_PATH);
         }
         if (!isset($imgs[$normalized_src])) {
             $swift_image = Swift_Image::fromPath($normalized_src);
             $imgs[$normalized_src] = $message->embed($swift_image);
         }
         $replaces['src=\'' . $src . '\''] = 'src="' . $imgs[$normalized_src] . '"';
         $replaces['src="' . $src . '"'] = 'src="' . $imgs[$normalized_src] . '"';
     }
     if (count($replaces)) {
         $html = str_replace(array_keys($replaces), array_values($replaces), $html);
         $message->setBody($html);
     }
 }
Example #12
0
 public function setLaundryState(&$laundryPlace)
 {
     $user = '******';
     $pass = '******';
     try {
         $client = new Client($laundryPlace['url']);
         $request = $client->get('/LaundryState', [], ['auth' => [$user, $pass, 'Digest'], 'timeout' => 1.5, 'connect_timeout' => 1.5]);
         $response = $request->send();
         $body = $response->getBody();
         libxml_use_internal_errors(true);
         $crawler = new Crawler();
         $crawler->addContent($body);
         foreach ($crawler->filter('img') as $img) {
             $resource = $img->getAttribute('src');
             $img->setAttribute('src', 'http://129.241.126.11/' . trim($resource, '/'));
         }
         $crawler->addHtmlContent('<h1>foobar</h1>');
         //'<link href="http://129.241.126.11/pic/public_n.css" type="text/css">');
         $laundryPlace['html'] = $crawler->html();
         libxml_use_internal_errors(false);
         preg_match_all('/bgColor=Green/', $body, $greenMatches);
         preg_match_all('/bgColor=Red/', $body, $redMatches);
         $laundryPlace['busy'] = count($redMatches[0]);
         $laundryPlace['available'] = count($greenMatches[0]);
     } catch (\Exception $e) {
         $laundryPlace['available'] = self::NETWORK_ERROR;
         $laundryPlace['busy'] = self::NETWORK_ERROR;
         $laundryPlace['html'] = self::NETWORK_ERROR;
     }
 }
Example #13
0
    /**
     * @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
     */
    public function testAddHtmlContent()
    {
        $crawler = new Crawler();
        $crawler->addHtmlContent('<html><div class="foo"></html>', 'UTF-8');

        $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addHtmlContent() adds nodes from an HTML string');
    }
 public function test_it_extracts_description()
 {
     $html = file_get_contents(__DIR__ . '/../Fixtures/product-page.html');
     $crawler = new Crawler();
     $crawler->addHtmlContent($html, 'ISO-8859-1');
     $SUT = new SUT($crawler);
     $this->assertEquals("Apricots", $SUT->getDescription());
 }
Example #15
0
 /**
  * Scraps og:title off the page content
  * @param  string $url
  * @return string
  */
 public function scrap($url)
 {
     $title = 'Unable to parse';
     $this->request->setMethod(HTTP_METH_GET);
     $this->request->setUrl($url);
     try {
         $response = $this->request->send();
         $this->crawler->addHtmlContent($response->getBody());
         $subCrawler = $this->crawler->filterXPath('//head/meta[@property="og:title"]');
         $meta = $subCrawler->getNode(0);
         if ($meta) {
             $title = $meta->getAttribute('content');
         }
     } catch (Exception $e) {
         $title = $e->getMessage();
     }
     return $title;
 }
 public function getListFromMedia()
 {
     $crawler = new Crawler();
     $crawler->addHtmlContent(@file_get_contents($this->source->url));
     $getterClassName = 'App\\PostCrawlers\\PostLists\\' . $this->source->media_parent . 'ListGetter';
     // example, NowLebanonListGetter, which implements now lebanon's way of getting lists;
     $getter = new $getterClassName($this->source->url, $crawler);
     return $getter->getList();
 }
 public function getDetailsFromMedia()
 {
     $crawler = new Crawler();
     $htmlContent = @file_get_contents($this->url);
     $crawler->addHtmlContent($htmlContent);
     $getterClassName = 'App\\PostCrawlers\\PostDetails\\' . $this->source->media_parent . 'DetailsGetter';
     $getter = new $getterClassName($this->url, $crawler);
     return $getter->getDetails();
 }
 /**
  * assertSelectEquals("#binder .name", "Chuck", true,  $xml);  // any?
  * assertSelectEquals("#binder .name", "Chuck", false, $xml);  // none?
  *
  * @param array                 $selector
  * @param string                $content
  * @param integer|boolean|array $count
  * @param mixed                 $actual
  * @param string                $message
  * @param boolean               $isHtml
  * @since Method available since Release 1.0.0
  *
  * @throws PHPUnit_Framework_Exception
  */
 public static function assertSelectEquals($selector, $content, $count, $actual, $message = '', $isHtml = true)
 {
     $crawler = new Crawler();
     if ($actual instanceof DOMDocument) {
         $crawler->addDocument($actual);
     } else {
         if ($isHtml) {
             $crawler->addHtmlContent($actual);
         } else {
             $crawler->addXmlContent($actual);
         }
     }
     $crawler = $crawler->filter($selector);
     if (is_string($content)) {
         $crawler = $crawler->reduce(function (Crawler $node, $i) use($content) {
             if ($content === '') {
                 return $node->text() === '';
             }
             if (preg_match('/^regexp\\s*:\\s*(.*)/i', $content, $matches)) {
                 return (bool) preg_match($matches[1], $node->text());
             }
             return strstr($node->text(), $content) !== false;
         });
     }
     $found = count($crawler);
     if (is_numeric($count)) {
         self::assertEquals($count, $found, $message);
     } else {
         if (is_bool($count)) {
             $found = $found > 0;
             if ($count) {
                 self::assertTrue($found, $message);
             } else {
                 self::assertFalse($found, $message);
             }
         } else {
             if (is_array($count) && (isset($count['>']) || isset($count['<']) || isset($count['>=']) || isset($count['<=']))) {
                 if (isset($count['>'])) {
                     self::assertTrue($found > $count['>'], $message);
                 }
                 if (isset($count['>='])) {
                     self::assertTrue($found >= $count['>='], $message);
                 }
                 if (isset($count['<'])) {
                     self::assertTrue($found < $count['<'], $message);
                 }
                 if (isset($count['<='])) {
                     self::assertTrue($found <= $count['<='], $message);
                 }
             } else {
                 throw new PHPUnit_Framework_Exception('Invalid count format');
             }
         }
     }
 }
Example #19
0
 public function parseForm($data)
 {
     $doc = new Crawler();
     $doc->addHtmlContent($data);
     $ret = [];
     foreach ($doc->filter('input[type="hidden"]') as $node) {
         /** @var \DOMElement $node */
         $ret[$node->getAttribute('name')] = $node->getAttribute('value');
     }
     return $ret;
 }
Example #20
0
 /**
  * {@inheritDoc}
  */
 public function crawl($html)
 {
     $crawler = new Crawler();
     $crawler->clear();
     $crawler->addHtmlContent($html);
     $movieInfo = $crawler->filter('#overview-top')->each(function (Crawler $domCrawler) {
         $title = $domCrawler->filter('.header span')->first()->text();
         $year = $domCrawler->filter('.header span')->last()->text();
         return ['title' => $title, 'year' => $year, 'rating' => $domCrawler->filter('.star-box .giga-star ')->first()->text(), 'desc' => $domCrawler->filter('p.description')->text()];
     });
 }
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function handle(Client $client)
 {
     Crawler::create(['url' => $this->argument('url')]);
     return;
     Crawler::where('url', '=', $this->argument('url'))->delete();
     $crawler = Crawler::create(['url' => $this->argument('url')]);
     $html = $client->get($this->argument('url'))->getBody();
     $dom = new DomCrawler();
     $dom->addHtmlContent($html);
     $spider = new Spider($crawler, $dom);
     $spider->get();
 }
 public function createAccountAndShop(array $options, $onlyShop = false)
 {
     $options = array_merge(['waitForSubdomain' => true], $options);
     if ($onlyShop) {
         $this->browser->visit($this->homePage->getNewStoreURL())->fillIn('#create-online-store-shop_name', $options['shop_name'])->click('a.get-me-started');
         $confPage = new StoreConfigurationPage($this->homePage);
         $confPage->chooseCountry($options['country'])->chooseFirstQualification()->submit()->fillPassword($options['password'])->fillPasswordConfirmation($options['password'])->acceptTandC()->submit();
         $this->browser->click('a.get-me-started');
     } else {
         $this->homePage->visit()->setLanguage($options['language'])->submitShopCreationBannerForm($options['shop_name'], $options['email'])->chooseCountry($options['country'])->chooseFirstQualification()->submit()->fillFirstname('Jøħn')->fillLastname('Sölünëum')->fillPassword($options['password'])->fillPasswordConfirmation($options['password'])->acceptTandC()->submit();
         $waitForEmail = new Spinner('Could not find activation email.', 300);
         $reader = new GmailReader($this->homePage->getSecrets()['customer']['email'], $this->homePage->getSecrets()['customer']['gmail_password']);
         $expectedActivationEmailButtonTitle = static::$expectedActivationEmailButtonTitle[$options['language']];
         $activationLink = null;
         /**
          * @todo : do we want to test the order in which the emails are received?
          */
         try {
             $waitForEmail->assertBecomesTrue(function () use($reader, $options, $expectedActivationEmailButtonTitle, &$activationLink) {
                 $emails = $reader->readEmails($options['email']);
                 foreach ($emails as $email) {
                     $crawler = new Crawler('', 'http://www.example.com');
                     $crawler->addHtmlContent($email['body']);
                     $crawler = $crawler->selectLink($expectedActivationEmailButtonTitle);
                     if ($crawler->count() > 0) {
                         $activationLink = $crawler->link()->getUri();
                         return true;
                     }
                 }
                 return false;
             }, false);
         } catch (\Exception $e) {
             throw new FailedTestException($e->getMessage());
         }
         $this->browser->visit($activationLink);
     }
     $myStores = new MyStoresPage($this->homePage);
     $frontOfficeURL = $myStores->getFrontOfficeURL($options['shop_name']);
     $backOfficeURL = $myStores->getBackOfficeURL($options['shop_name']);
     if ($options['waitForSubdomain']) {
         $this->waitFor200($frontOfficeURL);
         sleep(300);
         // wait 5 minutes for the host to be ready
     }
     $shopSettings = ['front_office_url' => $frontOfficeURL, 'back_office_url' => $backOfficeURL, 'back_office_folder_name' => 'backoffice', 'prestashop_version' => '1.6.0.10'];
     $shop = new Shop($shopSettings, null);
     $shop->setBrowser($this->browser);
     $optionProvider = new OptionProvider();
     $optionProvider->setDefaultValues(['BackOfficeLogin' => ['admin_email' => $options['email'], 'admin_password' => $options['password']]]);
     $shop->setOptionProvider($optionProvider);
     return ['shop' => $shop, 'myStoresPage' => $myStores];
 }
Example #23
0
 /**
  * @return array|bool
  */
 public function parseAll()
 {
     $this->crawler->addHtmlContent($this->getHtml($this->id), 'ISO-8859-1');
     $nodeValues = $this->crawler->filter('table.dataArray tbody tr td')->each(function (Crawler $node) {
         return [$node->attr('headers') => trim($node->text())];
     });
     if ($nodeValues) {
         $rows = array_chunk($nodeValues, 3);
         foreach ($rows as $key => $value) {
             $this->dates[$key] = $value[0];
             $this->labels[$key] = $value[1];
             $this->sites[$key] = $value[2];
         }
         foreach ($this->dates as $key => $value) {
             $this->data['status'][$key] = ['date' => $this->dates[$key]['Date'], 'label' => $this->labels[$key]['Libelle'], 'location' => $this->sites[$key]['site']];
         }
         $this->data['id'] = $this->id;
         $this->data['destination'] = $this->parseDestination();
         return $this->data;
     }
     return false;
 }
Example #24
0
 /**
  * @When /^I click the ([^"]*) link in the e-?mail$/
  */
 public function iClickTheLink($linkText)
 {
     if (empty($this->email)) {
         throw new \Exception('No email to click through from.');
     }
     $crawler = new Crawler();
     $crawler->addHtmlContent($this->email['htmlContent']['htmlBody']);
     try {
         $href = $crawler->selectLink($linkText)->attr('href');
     } catch (\InvalidArgumentException $e) {
         throw new \Exception("No link with text '{$linkText}' found in email.");
     }
     $this->getSession()->visit($href);
 }
Example #25
0
 /**
  * @param $html
  * @return array
  */
 public function load($html)
 {
     $metaTags = [];
     $this->crawler->clear();
     $this->crawler->addHtmlContent($html);
     $this->crawler->filter('meta')->each(function (Crawler $node) {
         $name = strtolower($node->attr('name'));
         $content = $node->attr('content');
         $metaTags[$name] = $content;
     });
     $links = [];
     $this->crawler->filter('a')->each(function (Crawler $link) use(&$links) {
         $rel = $link->attr('rel');
         if ('nofollow' === strtolower($rel)) {
             return false;
         }
         $links[] = $link->attr('href');
         return $link;
     });
     $this->links = array_unique($links);
     $this->metaTags = $metaTags;
     return ['links' => $this->links, 'meta' => $metaTags];
 }
 public static function searchFor($search)
 {
     $rawData = Request::get('prothom-alo', $search);
     $rawData = json_decode($rawData);
     $document = '<html><head><meta charset="UTF-8"></head><body>' . $rawData->html . '</body></html>';
     $crawler = new Crawler();
     $crawler->addHtmlContent($document, 'UTF-8');
     $items = $crawler->filter('body > div.search_reslut > div.search_item > h2 > a');
     $results = [];
     foreach ($items as $subCrawler) {
         $subCrawler->setAttribute('href', 'http://www.prothom-alo.com' . $subCrawler->getAttribute('href'));
         $results[] = $subCrawler->ownerDocument->saveXML($subCrawler);
     }
     return $results;
 }
Example #27
0
 /**
  * returns html value of rdfa property.
  *
  * @param string $html content to crawl
  * @param StructureInterface $content
  * @param string $property could be a property sequence like (block,1,title,0)
  *
  * @return bool
  */
 public function getPropertyValue($html, StructureInterface $content, $property)
 {
     // extract special property
     $crawler = new Crawler();
     $crawler->addHtmlContent($html, 'UTF-8');
     $nodes = $crawler;
     $before = '';
     if (false !== ($sequence = $this->getSequence($content, $property))) {
         foreach ($sequence['sequence'] as $item) {
             // is not integer
             if (!ctype_digit(strval($item))) {
                 $before = $item;
                 $nodes = $nodes->filter('*[property="' . $item . '"]');
             } else {
                 $nodes = $nodes->filter('*[rel="' . $before . '"]')->eq($item);
             }
         }
     } else {
         // FIXME it is a bit complex but there is no :not operator in crawler
         // should be *[property="block"]:not(*[property] *)
         $nodes = $nodes->filter('*[property="' . $property . '"]')->reduce(function (Crawler $node) {
             // get parents
             $parents = $node->parents();
             $count = 0;
             // check if one parent is property exclude it
             $parents->each(function ($node) use(&$count) {
                 if (null !== $node->attr('property') && $node->attr('typeof') === 'collection') {
                     ++$count;
                 }
             });
             return $count === 0;
         });
     }
     // if rdfa property not found return false
     if ($nodes->count() > 0) {
         // create an array of changes
         return $nodes->each(function (Crawler $crawlerNode) {
             $node = $crawlerNode->getNode(0);
             $attributes = [];
             foreach ($node->attributes as $name => $value) {
                 $attributes[$name] = $value->nodeValue;
             }
             $attributes['html'] = $crawlerNode->html();
             return $attributes;
         });
     }
     return false;
 }
 public function replaceImages(Flyer $flyer)
 {
     $flyer_html = $flyer->getHtml();
     $crawler = new Crawler();
     $crawler->addHtmlContent($flyer_html);
     foreach ($crawler->filter('img') as $domElement) {
         $attr_src = $domElement->getAttribute('src');
         if (!$this->validateURL($attr_src)) {
             $result = $this->utilsBusiness->upladImage($attr_src, 'flyers');
             $domElement->setAttribute('src', $result['url']);
         }
     }
     $flyer->setHtml($crawler->html());
     $this->saveData($flyer);
     return $flyer;
 }
Example #29
0
 private function getMessage(Response $response)
 {
     if (500 >= $response->getStatusCode() && $response->getStatusCode() < 600) {
         $crawler = new Crawler();
         $crawler->addHtmlContent($response->getContent());
         if ($crawler->filter('.text-exception h1')->count() > 0) {
             $exceptionMessage = trim($crawler->filter('.text-exception h1')->text());
             $trace = '';
             if ($crawler->filter('#traces-0 li')->count() > 0) {
                 list($trace) = explode("\n", trim($crawler->filter('#traces-0 li')->text()));
             }
             return $message = 'Internal Server Error: ' . $exceptionMessage . ' ' . $trace;
         }
     }
     return $response->getContent();
 }
Example #30
-1
 protected function process($content)
 {
     $parser = new Parser('yaml', 'markdown');
     $frontmatter = $parser->parse($content);
     $crawler = new Crawler();
     $crawler->addHtmlContent($frontmatter->getBody());
     $title = '';
     $crawler->filterXPath('//h1')->each(function (Crawler $crawler) use(&$title) {
         foreach ($crawler as $node) {
             if (!$title) {
                 $title = $node->nodeValue;
             }
             $node->parentNode->removeChild($node);
         }
     });
     $images = $crawler->filterXPath('//img');
     foreach ($images as $image) {
         $src = $image->getAttribute('src');
         $image->setAttribute('src', str_replace('../..', '', $src));
     }
     $body = $crawler->html();
     $intro = explode('<hr>', $body)[0];
     $body = str_replace('<hr>', '', $body);
     return ['title' => $title ?: '', 'keywords' => isset($frontmatter->head['keywords']) ? $frontmatter->head['keywords'] : '', 'description' => isset($frontmatter->head['description']) ? $frontmatter->head['description'] : mb_substr(strip_tags(trim($intro)), 0, 150), 'intro' => trim($intro), 'cover' => isset($frontmatter->head['cover']) ? $frontmatter->head['cover'] : '', 'content' => trim($body), 'tags' => isset($frontmatter->head['tags']) ? $frontmatter->head['tags'] : ''];
 }