filterXPath() public méthode

The XPath expression is evaluated in the context of the crawler, which is considered as a fake parent of the elements inside it. This means that a child selector "div" or "./div" will match only the div elements of the current crawler, not their children.
public filterXPath ( string $xpath ) : Crawler
$xpath string An XPath expression
Résultat Crawler A new instance of Crawler with the filtered list of nodes
 /**
  * Returns a method in the current specification from a DOMNode
  *
  * @param \DOMNode $node A DOMNode
  *
  * @return Method
  */
 public function getMethod(\DOMNode $node)
 {
     $crawler = new Crawler($node);
     $name = $crawler->attr('name');
     // Initialize
     $method = new Method($name);
     // Type
     $method->setType(preg_match('/(^(get|is)|ToString$)/', $name) ? Method::TYPE_ACCESSOR : Method::TYPE_ACTION);
     // Description
     $descriptions = $crawler->filterXPath('//comment');
     if (count($descriptions) !== 1) {
         throw new \Exception('Only one comment expected');
     }
     $descriptions->rewind();
     $description = $this->getInner($descriptions->current());
     $method->setDescription($description);
     // Parameters
     foreach ($crawler->filterXPath('//parameter') as $node) {
         $method->addParameter($this->getParameter($node));
     }
     // Return
     $returnNodes = $crawler->filterXPath('//return');
     if (count($returnNodes) > 1) {
         throw new \Exception("Should not be more than one return node");
     } elseif (count($returnNodes) == 1) {
         $returnNodes->rewind();
         list($type, $description) = $this->getReturn($returnNodes->current());
         $method->setReturnType($type);
         $method->setReturnDescription($description);
     }
     return $method;
 }
 /**
  * Search for torrents.
  *
  * @param string $query
  * @param int    $category
  * @return array Array of torrents. Either empty or filled.
  */
 public function search($query, $category)
 {
     # Set single-cell view for torrents.
     $requestOptions = ['headers' => ['User-Agent' => 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36']];
     try {
         $url = $this->makeUrl($query, $category);
         $response = $this->httpClient->get($url, $requestOptions);
         $crawler = new Crawler((string) $response->getBody());
     } catch (\Exception $e) {
         return [];
     }
     $items = $crawler->filterXpath('//channel/item');
     $torrents = [];
     foreach ($items as $item) {
         $torrent = new Torrent();
         $itemCrawler = new Crawler($item);
         // Set details for torrent.
         $torrent->setSite($this->tag);
         $torrent->setTitle($itemCrawler->filterXpath('//title')->text());
         $torrent->setSeeders((int) $itemCrawler->filterXpath('//torrent:seeds')->text());
         $torrent->setLeechers((int) $itemCrawler->filterXpath('//torrent:peers')->text());
         $torrent->setMagnet($itemCrawler->filterXpath('//torrent:magnetURI')->text());
         $torrent->setSize($this->formatBytes((int) $itemCrawler->filterXPath('//torrent:contentLength')->text()));
         $torrent->setAge($itemCrawler->filterXPath('//pubDate')->text());
         $torrent->setCategory($itemCrawler->filterXPath('//category')->text());
         $torrents[] = $torrent;
     }
     return $torrents;
 }
Exemple #3
0
 /**
  * Парсит карту кафе в формате ['название кафе' => 'ссылка на информацию о кафе']
  *
  * @param string $html
  *
  * @return array
  */
 public function parseCafeList($html)
 {
     $this->setHtml($html);
     $cafe_map = [];
     $this->crawler->filterXPath('//body//ul[@class="xoxo"]//li/a')->each(function (Crawler $node) use(&$cafe_map) {
         $cafe_map[$node->attr('title')] = $node->attr('href');
     });
     return $cafe_map;
 }
 /**
  * @depends testAngularSammui
  * @param Crawler $crawler
  */
 public function testSammuiClient(Crawler $crawler)
 {
     $clientId = $crawler->filterXPath('//html/head/meta[@name="sammui-oauth2-client-id"]')->attr('content');
     $clientSecret = $crawler->filterXPath('//html/head/meta[@name="sammui-oauth2-client-secret"]')->attr('content');
     $this->assertNotNull($clientId);
     $this->assertNotNull($clientSecret);
     $this->assertTrue(is_string($clientId), $clientId);
     $this->assertTrue(is_string($clientSecret), $clientSecret);
     $this->assertStringStartsNotWith('no-client-found-for', $clientId);
     $this->assertStringStartsNotWith('no-client-found-for', $clientSecret);
 }
 /**
  * @return array
  */
 public function getRobotsData()
 {
     $data = strtolower($this->crawler->filterXPath("//meta[@name='robots']")->attr('content'));
     $splitted = explode(',', $data);
     $robots = [];
     foreach ($splitted as $value) {
         $value = trim($value);
         if (strlen($value) > 2) {
             $robots[$value] = $value;
         }
     }
     return $robots;
 }
Exemple #6
0
 public function parse(Crawler $crawler)
 {
     $params = array();
     $node = $crawler->filter('.text .druh .nabizim');
     if ($node->count()) {
         $params['type'] = trim($node->first()->text());
     }
     $node = $crawler->filter('.text h2 a');
     if ($node->count()) {
         $params['name'] = trim($node->first()->text());
     }
     $node = $crawler->filter('.item .image img');
     if ($node->count()) {
         $params['images'] = 1;
         $dom = $node->getNode(0);
         $params['imageUrls'] = array('http://midi.cz' . $node->first()->attr('src'));
         // Natvrdo ziskavame url
     } else {
         $params['images'] = 0;
         $params['imageUrls'] = array();
     }
     $node = $crawler->filterXPath('//*[@class="table_info"]//tr[3]/td[2]');
     if ($node->count()) {
         $params['region'] = trim($node->first()->text());
     }
     $node = $crawler->filter('.priceBox');
     if ($node->count()) {
         $exploded = explode(' ', trim($node->first()->text()));
         $params['price'] = $exploded[0];
         if (isset($exploded[1])) {
             $params['currency'] = $exploded[1];
         }
     }
     // email
     $node = $crawler->filterXPath('//*[@class="table_info"]//tr[4]/td[2]');
     if ($node->count()) {
         $params['email'] = trim($node->first()->text());
     }
     // telefon
     $node = $crawler->filterXPath('//*[@class="table_info"]//tr[5]/td[2]');
     if ($node->count()) {
         $params['phone'] = trim($node->first()->text());
     }
     $node = $crawler->filterXPath('//*[@id="mainCol"]/div[1]/div[2]/p[3]');
     if ($node->count()) {
         $params['text'] = trim($node->first()->text());
     }
     return $params;
 }
 public function processContent()
 {
     $entryDOM = $this->crawler->filterXPath('//article[contains(@class, "entry")]');
     $this->post->content = '';
     $entryDOM->children()->each(function (Crawler $node, $i) {
         $newNode = $this->processNode($node);
         if ($newNode !== NULL) {
             if ($newNode->nodeName !== 'a' && isset($newNode->text)) {
                 $nodeName = trim($newNode->nodeName);
                 $this->post->content .= '<' . $nodeName . '>' . $newNode->text . '</' . $nodeName . '>';
             }
         }
     });
     $this->post->content = $this->translator->translate($this->post->content);
 }
Exemple #8
0
 /**
  * Returns a list of episodes for a specified season of a TV show.
  *
  * @param string $id ID as contained in the URL for the TV show of the
  *        form http://www.imdb.com/title/ID/
  * @param string $season Season for which to return episodes
  * @return array Associative array indexed by episode number of
  *         associative arrays each containing data for an individual
  *         episode within the season
  */
 public function getSeasonEpisodes($id, $season)
 {
     $crawler = $this->request('GET', $this->baseUrl . '/title/' . $id . '/episodes?season=' . $season);
     $divs = $crawler->filterXPath('//div[contains(@class, "eplist")]/div[contains(@class, "list_item")]/div[@class="info"]');
     $episodes = array();
     foreach ($divs as $div) {
         $div = new Crawler($div);
         $number = $div->filterXPath('//meta[@itemprop="episodeNumber"]')->attr('content');
         $title = $div->filterXPath('//strong/a[@itemprop="name"]')->text();
         $airdate = $div->filterXPath('//div[@class="airdate"]')->text();
         $description = $div->filterXPath('//div[@class="item_description"]')->text();
         $episodes[$number] = array_map('trim', array('title' => $title, 'airdate' => $airdate, 'description' => $description));
     }
     return $episodes;
 }
 /**
 * Summary.
 
 *
 *@since  0.9.0
 
 * @see
 * @return array('title', 'image_url')
 
 * @author nguyenvanduocit
 */
 public function getRandomFactOfProgramming()
 {
     $client = new Client();
     $response = $client->get('http://thecodinglove.com/random');
     if ($response->getStatusCode() === 200) {
         $result = array('title' => '', 'image_url' => '');
         $crawler = new Crawler($response->getBody()->getContents());
         /**
          * Get the title
          */
         $titleCrawler = $crawler->filterXPath('//div[@id="post1"]//h3');
         if ($titleCrawler) {
             $result['title'] = $titleCrawler->text();
         }
         /**
          * Get image
          */
         $imageCrawler = $crawler->filterXPath('//div[@class="bodytype"]//img');
         if ($imageCrawler) {
             $result['image_url'] = $imageCrawler->attr('src');
         }
         return $result;
     } else {
         return array('title' => 'No image found' . $response->getStatusCode(), 'image_url' => 'http://funny.topdev.vn/wp-content/uploads/images/when-they-tell-me-the-website-has-to-be-supported-by-ie6-1439201300.gif');
     }
 }
 /**
  * Replace all src of img.inline-image with an embedded image
  *
  * @param  Swift_Message $message
  */
 protected function inlineImages(Swift_Message $message)
 {
     $html = $message->getBody();
     $crawler = new Crawler();
     $crawler->addHtmlContent($html);
     $imgs = array();
     $replaces = array();
     foreach ($crawler->filterXPath("//img[contains(concat(' ',normalize-space(@class), ' '), ' inline-image ')]") as $img) {
         $normalized_src = $src = $img->getAttribute('src');
         if (isset($replaces['src="' . $src . '"'])) {
             continue;
         }
         // if starting with one slash, use local file
         if (preg_match('#^/[^/]#', $normalized_src)) {
             $normalized_src = $this->web_directory . parse_url($src, PHP_URL_PATH);
         }
         if (!isset($imgs[$normalized_src])) {
             $swift_image = Swift_Image::fromPath($normalized_src);
             $imgs[$normalized_src] = $message->embed($swift_image);
         }
         $replaces['src=\'' . $src . '\''] = 'src="' . $imgs[$normalized_src] . '"';
         $replaces['src="' . $src . '"'] = 'src="' . $imgs[$normalized_src] . '"';
     }
     if (count($replaces)) {
         $html = str_replace(array_keys($replaces), array_values($replaces), $html);
         $message->setBody($html);
     }
 }
 /**
  * @param File $file
  * @return Sale[]
  */
 public function crawl(File $file)
 {
     $sales = [];
     $crawler = new Crawler(file_get_contents($file->getPathname()));
     /** @var $saleItem \DOMElement */
     foreach ($crawler->filterXPath('//Data/Items/Item') as $saleItem) {
         $saleObj = new Sale();
         $tag = $saleItem->getAttribute('Tag');
         $tagEntity = $this->getEm()->getRepository('AffiliateDashboardBundle:Tag')->findbyName($tag);
         if (!$tagEntity) {
             $tagEntity = new Tag();
             $tagEntity->setName($tag);
             $this->getEm()->persist($tagEntity);
             $this->getEm()->flush();
         }
         $saleObj->setAsin($saleItem->getAttribute('ASIN'));
         $saleObj->setCategory($saleItem->getAttribute('Category'));
         $saleObj->setDate(new \DateTime(date('Y-m-d H:i:s', $saleItem->getAttribute('EDate'))));
         $saleObj->setEarnings($this->parseFloat($saleItem->getAttribute('Earnings')));
         $saleObj->setLinkType($saleItem->getAttribute('LinkType'));
         $saleObj->setPrice($this->parseFloat($saleItem->getAttribute('Price')));
         $saleObj->setQty((int) $saleItem->getAttribute('Qty'));
         $saleObj->setRate($this->parseFloat($saleItem->getAttribute('Rate')));
         $saleObj->setRevenue($this->parseFloat($saleItem->getAttribute('Revenue')));
         $saleObj->setAffiliateTag($tagEntity);
         $saleObj->setSeller($saleItem->getAttribute('Seller') ?: null);
         $saleObj->setTitle($saleItem->getAttribute('Title'));
         $sales[] = $saleObj;
     }
     return $sales;
 }
 protected function getGeoIpData($ip)
 {
     $data = array();
     return $data;
     $html = file_get_contents(sprintf('http://www.geoiptool.com/en/?IP=%s', $ip));
     $crawler = new Crawler($html);
     $temp = $crawler->filterXPath('//table[@class="tbl_style"][3]')->html();
     $temp = strip_tags($temp);
     $atemp = explode("\n", $temp);
     array_shift($atemp);
     $tdata = array();
     $key = null;
     foreach ($atemp as $t) {
         if (preg_match('/:/', $t)) {
             $key = preg_replace('/:/', '', trim($t));
             $key = preg_replace('/\\s+/', "_", $key);
             $key = strtolower($key);
             continue;
         }
         $tdata[$key][] = $t;
     }
     foreach ($tdata as $key => $val) {
         $val = trim(implode(' ', $val));
         $data[$key] = (!empty($val) and $val !== '+' and $val !== '()') ? $val : null;
     }
     return $data;
 }
 protected function doValidation(Response $response)
 {
     $crawler = new Crawler($response->getBody());
     $actionNodes = $crawler->filterXPath('//form[//input[@type="password"]]');
     $url = (string) $response->getUri();
     foreach ($actionNodes as $node) {
         $action = $node->getAttribute('action');
         if (strpos($action, 'https://') === 0) {
             continue;
         }
         $fullPath = $node->tagName;
         $parent = $node->parentNode;
         while ($parent = $parent->parentNode) {
             if (property_exists($parent, 'tagName')) {
                 $fullPath = $parent->tagName . '/' . $fullPath;
             } else {
                 break;
             }
         }
         if (in_array($fullPath, $this->knownIdentifier, true)) {
             continue;
         }
         $this->knownIdentifier[] = $fullPath;
         $this->assert(strpos($url, 'https://') !== false, 'Password is transferred insecure using HTTP.');
     }
 }
 /**
  * @inheritdoc
  */
 protected function parse(Requests_Response $requests)
 {
     $crawler = new Crawler();
     $crawler->addContent($requests->body);
     $r = $crawler->filterXPath('//*[@id="content"]/div/div[2]/div[1]/div[1]/ul/li');
     $results = array();
     /** @var DOMElement $el */
     foreach ($r as $el) {
         $c = new Crawler();
         $c->add($el);
         $tags = [];
         /** @var DOMElement $z */
         foreach ($c->filter(".horizontal-separated-list li") as $z) {
             $tags[] = $z->textContent;
         }
         $result = new Result();
         $result->setTitle(trim($c->filter(".details a")->text()));
         $result->setTags($tags);
         $relUrl = $c->filter(".details a")->attr("href");
         $id = explode("--", explode("/", parse_url($relUrl)["path"])[2])[1];
         $result->setId($this->getName() . "_" . intval($id));
         $result->setUrl("http://www.anibis.ch/" . $relUrl);
         $result->setPrice($c->filter(".price")->text());
         $result->setDescription($c->filter(".details .description")->text());
         $results[] = $result;
     }
     return $results;
 }
 /**
  * @param string $url The url to scrape.
  * @return \Slice\CliApp\ScrapeResults The results of the scrape task.
  */
 public function getProductsForUrl($url)
 {
     //Grab the remote document contents
     $rawHTML = $this->downloader->download($url);
     //Drop it into a DOM crawler
     $crawler = new Crawler();
     $crawler->addContent($rawHTML);
     try {
         //Use xPath to find all of the product li elements
         $productList = $crawler->filterXPath($this->productListXpath);
     } catch (\InvalidArgumentException $e) {
         //Convert into a Scrape Exception for easy handling by the command
         throw new ScrapeException($this->configValues['error_msg']['product_parse_error']);
     }
     //If there are none the page isn't supported
     if (sizeof($productList) == 0) {
         throw new ScrapeException($this->configValues['error_msg']['no_products']);
     }
     //Loop over each product li
     $productList->each(function ($liCrawler, $i) {
         try {
             //Find the product detail page url from the link
             $productURL = $liCrawler->filterXPath($this->pdpLinkXpath)->attr('href');
         } catch (\InvalidArgumentException $e) {
             //Convert into a Scrape Exception for easy handling by the command
             throw new ScrapeException($this->configValues['error_msg']['product_parse_error']);
         }
         $product = $this->pdpParser->parseUrl($productURL);
         //Populate the final results container
         $this->results->addProduct($product);
     });
     return $this->results;
 }
 /**
  * @param $locator
  * @return Crawler
  */
 protected function filterByXPath($locator)
 {
     if (!Locator::isXPath($locator)) {
         throw new MalformedLocator($locator, 'xpath');
     }
     return $this->crawler->filterXPath($locator);
 }
 /**
  * Filter the price present on each countries price page, and return the price
  *
  * @param string $content
  *
  * @return string $price
  */
 public function filterPrice($content)
 {
     $crawler = new Crawler();
     $crawler->addHtmlContent($content);
     $price = $crawler->filterXPath("html/body/div[1]/div[3]/div/div/div[3]/div[4]/div/table/tr[1]/td[2]")->extract('_text', 'td');
     return trim($price[0]);
 }
Exemple #18
0
 protected function match($selector)
 {
     try {
         $selector = \Symfony\Component\CssSelector\CssSelector::toXPath($selector);
     } catch (\Symfony\Component\CssSelector\Exception\ParseException $e) {
     }
     return @$this->crawler->filterXPath($selector);
 }
 public function find(Crawler $crawler)
 {
     try {
         return $crawler->filterXPath('//meta[@name="description"]')->attr('content');
     } catch (\InvalidArgumentException $e) {
         return null;
     }
 }
 public function extractAuthor(Crawler $crawler)
 {
     $ret = null;
     $crawler->filterXPath('//meta[@name="parsely-author"]')->each(function (Crawler $node) use(&$ret) {
         $ret = $node->attr('content');
     });
     return $ret;
 }
 public function find(Crawler $crawler)
 {
     try {
         return $crawler->filterXPath('//meta[@property="og:type"]')->attr('content');
     } catch (\InvalidArgumentException $e) {
         return null;
     }
 }
Exemple #22
0
 public function find(Crawler $crawler)
 {
     try {
         return $crawler->filterXPath('//img')->attr('src');
     } catch (\InvalidArgumentException $e) {
         return null;
     }
 }
Exemple #23
0
 public function find(Crawler $crawler)
 {
     try {
         return trim($crawler->filterXPath('//head/title')->text());
     } catch (\InvalidArgumentException $e) {
         return null;
     }
 }
Exemple #24
0
 public function testUnknownThemeFallbacksToDefault()
 {
     $var = 1;
     $this->dumper->setTheme('unknown');
     $this->dumper->setFormat(Format\HtmlFormat::FORMAT_NAME);
     $html = $this->dumper->dump($var);
     $crawler = new Crawler($html);
     $this->assertEquals('Simple', $crawler->filterXPath('//input[@type="hidden"]')->attr('value'));
 }
Exemple #25
0
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function handle()
 {
     foreach (RssFeed::limit(1)->get() as $feed) {
         $this->crawler->addContent($feed->html);
         foreach ($this->pointers as $key => $value) {
             try {
                 $data[$key] = $this->crawler->filterXPath($value)->text();
             } catch (\Exception $e) {
                 $data[$key] = null;
             }
         }
         $data['url'] = $feed->url;
         $organisation = $this->saveOrganisation($data);
         $location = $this->saveLocation();
         $organisation->location()->save($location);
         $vacancy = Vacancy::firstOrNew(['ref' => $data['ref']]);
     }
 }
 public function parse(Crawler $crawler)
 {
     $params = array();
     $node = $crawler->filter('input[name="nabpop"]:checked');
     if ($node->count()) {
         $params['type'] = trim($node->first()->attr('value'));
     }
     $node = $crawler->filter('select[name="kategorie"] option:selected');
     if ($node->count()) {
         $params['categoryId'] = trim($node->first()->attr('value'));
         $params['category'] = trim($node->first()->text());
     }
     $node = $crawler->filter('select[name="kraj"] option:selected');
     if ($node->count()) {
         $params['regionId'] = trim($node->first()->attr('value'));
         $params['region'] = trim($node->first()->text());
     }
     $node = $crawler->filter('input[name="nazev"]');
     if ($node->count()) {
         $params['name'] = trim($node->first()->attr('value'));
     }
     $node = $crawler->filter('input[name="cena"]');
     if ($node->count()) {
         $params['price'] = trim($node->first()->attr('value'));
     }
     $node = $crawler->filter('select[name="mena"] option:selected');
     if ($node->count()) {
         $params['currency'] = trim($node->first()->attr('value'));
     }
     // email
     $node = $crawler->filterXPath('//*[@id = "bflm"]/following-sibling::script');
     if ($node->count()) {
         $text = $node->text();
         if (preg_match('/\\("bflm"\\)\\.value\\=\'(.*)\'(.*)\'(.*)\'\\;/', $text, $matches)) {
             $params['email'] = $matches[1] . '@' . $matches[3];
         }
     }
     // telefon
     $node = $crawler->filter('input[name="telefon"]');
     if ($node->count()) {
         $params['phone'] = trim($node->first()->attr('value'));
     }
     // telefon
     $node = $crawler->filter('textarea[name="prispevek"]');
     if ($node->count()) {
         $params['text'] = trim($node->first()->text());
     }
     // Pocet obrazku
     $as = $crawler->filter('.InzeratObrd a');
     $imageUrls = array();
     $as->each(function (Crawler $a) use(&$imageUrls) {
         $imageUrls[] = $a->attr('href');
     });
     $params['images'] = trim($as->count());
     $params['imageUrls'] = $imageUrls;
     return $params;
 }
Exemple #27
0
 protected function getNode($html, $xpath)
 {
     $nodes = new Crawler($html);
     $filtered = $nodes->filterXPath($xpath);
     if ($filtered->count() === 0) {
         throw new Exception("Html does not contain `{$xpath}`.", Exception::NODE_NOT_FOUND);
     }
     return $filtered;
 }
Exemple #28
0
 /**
  * Scraps og:title off the page content
  * @param  string $url
  * @return string
  */
 public function scrap($url)
 {
     $title = 'Unable to parse';
     $this->request->setMethod(HTTP_METH_GET);
     $this->request->setUrl($url);
     try {
         $response = $this->request->send();
         $this->crawler->addHtmlContent($response->getBody());
         $subCrawler = $this->crawler->filterXPath('//head/meta[@property="og:title"]');
         $meta = $subCrawler->getNode(0);
         if ($meta) {
             $title = $meta->getAttribute('content');
         }
     } catch (Exception $e) {
         $title = $e->getMessage();
     }
     return $title;
 }
Exemple #29
-1
 protected function process($content)
 {
     $parser = new Parser('yaml', 'markdown');
     $frontmatter = $parser->parse($content);
     $crawler = new Crawler();
     $crawler->addHtmlContent($frontmatter->getBody());
     $title = '';
     $crawler->filterXPath('//h1')->each(function (Crawler $crawler) use(&$title) {
         foreach ($crawler as $node) {
             if (!$title) {
                 $title = $node->nodeValue;
             }
             $node->parentNode->removeChild($node);
         }
     });
     $images = $crawler->filterXPath('//img');
     foreach ($images as $image) {
         $src = $image->getAttribute('src');
         $image->setAttribute('src', str_replace('../..', '', $src));
     }
     $body = $crawler->html();
     $intro = explode('<hr>', $body)[0];
     $body = str_replace('<hr>', '', $body);
     return ['title' => $title ?: '', 'keywords' => isset($frontmatter->head['keywords']) ? $frontmatter->head['keywords'] : '', 'description' => isset($frontmatter->head['description']) ? $frontmatter->head['description'] : mb_substr(strip_tags(trim($intro)), 0, 150), 'intro' => trim($intro), 'cover' => isset($frontmatter->head['cover']) ? $frontmatter->head['cover'] : '', 'content' => trim($body), 'tags' => isset($frontmatter->head['tags']) ? $frontmatter->head['tags'] : ''];
 }
Exemple #30
-12
 private function parse($html)
 {
     $crawler = new Crawler($html);
     $razonSocial = ucwords(strtolower(trim($crawler->filterXPath(self::XPATH_RAZON_SOCIAL)->text())));
     $actividades = [];
     $crawler->filterXPath(self::XPATH_ACTIVITIES)->each(function (Crawler $node, $i) use(&$actividades) {
         if ($i > 0) {
             $actividades[] = ['giro' => $node->filterXPath('//td[1]/font')->text(), 'codigo' => (int) $node->filterXPath('//td[2]/font')->text(), 'categoria' => $node->filterXPath('//td[3]/font')->text(), 'afecta' => $node->filterXPath('//td[4]/font')->text() == 'Si'];
         }
     });
     return ['razonSocial' => $razonSocial, 'actividades' => $actividades];
 }