clear() public method

Removes all the nodes.
public clear ( )
 /**
  * @param string $url
  * @param array  $tags
  *
  * @return WatchLink
  */
 public function extract(string $url, array $tags) : WatchLink
 {
     $watchLink = new WatchLink();
     $watchLink->setUrl($url);
     $this->crawler->clear();
     $this->crawler->addHtmlContent($this->fetcher->fetch($url));
     $watchLink->setName($this->extractTitle());
     $watchLink->setDescription($this->extractDescription());
     $watchLink->setImage($this->extractImage());
     foreach ($tags as $tag) {
         $watchLink->addTag($this->tagRepository->findOrCreate($tag));
     }
     return $watchLink;
 }
示例#2
0
 /**
  * {@inheritDoc}
  */
 public function crawl($html)
 {
     $crawler = new Crawler();
     $crawler->clear();
     $crawler->addHtmlContent($html);
     $movieInfo = $crawler->filter('#overview-top')->each(function (Crawler $domCrawler) {
         $title = $domCrawler->filter('.header span')->first()->text();
         $year = $domCrawler->filter('.header span')->last()->text();
         return ['title' => $title, 'year' => $year, 'rating' => $domCrawler->filter('.star-box .giga-star ')->first()->text(), 'desc' => $domCrawler->filter('p.description')->text()];
     });
 }
示例#3
0
 /**
  * @param $html
  * @return array
  */
 public function load($html)
 {
     $metaTags = [];
     $this->crawler->clear();
     $this->crawler->addHtmlContent($html);
     $this->crawler->filter('meta')->each(function (Crawler $node) {
         $name = strtolower($node->attr('name'));
         $content = $node->attr('content');
         $metaTags[$name] = $content;
     });
     $links = [];
     $this->crawler->filter('a')->each(function (Crawler $link) use(&$links) {
         $rel = $link->attr('rel');
         if ('nofollow' === strtolower($rel)) {
             return false;
         }
         $links[] = $link->attr('href');
         return $link;
     });
     $this->links = array_unique($links);
     $this->metaTags = $metaTags;
     return ['links' => $this->links, 'meta' => $metaTags];
 }
示例#4
0
 /**
  * @param Position $position
  * @param string $content
  */
 public function getPositionData(Position $position, $content)
 {
     $crawler = new Crawler($content);
     $trs = $crawler->filter('#full-props-list tr');
     $data = [];
     if ($trs->count() > 0) {
         foreach ($trs as $tr) {
             $crawler->clear();
             $crawler->addNode($tr);
             $th = $crawler->filter('th');
             $td = $crawler->filter('td');
             if ($th->count() > 0 && $td->count() > 0) {
                 $data[trim($th->text())] = trim($td->text());
             }
         }
     }
     $position->setAttributes($data);
 }
示例#5
0
文件: Dmm.php 项目: sdhou/CrawlPush
 public function handle()
 {
     $this->jobLogInfo('dmm start');
     $crawler = new Crawler();
     $guzzle = new Guzzle(['timeout' => 10, 'cookies' => true]);
     $jar = new CookieJar();
     \App\Model\Dmm::all()->map(function ($dmm) use($guzzle, $jar, $crawler) {
         $html = $guzzle->get($dmm->href, ['cookies' => $jar])->getBody();
         $crawler->addHtmlContent($html);
         $crawler->filterXPath('//ul[@id="list"]/li/div[1]//p[@class="tmb"]/a')->each(function (Crawler $crawle) use($dmm) {
             $title = trim($crawle->text());
             $href = $crawle->attr('href');
             $img_url = $crawle->filterXPath('//span[1]/img')->attr('src');
             DmmList::add($dmm->id, $title, $href, $img_url);
             echo "{$href}\n";
         });
         $crawler->clear();
     });
     $this->jobLogInfo('crawl end');
 }
示例#6
0
 public function testClear()
 {
     $doc = new \DOMDocument();
     $node = $doc->createElement('test');
     $crawler = new Crawler($node);
     $crawler->clear();
     $this->assertCount(0, $crawler, '->clear() removes all the nodes from the crawler');
 }
示例#7
0
 public function testClear()
 {
     $crawler = new Crawler(new \DOMNode());
     $crawler->clear();
     $this->assertCount(0, $crawler, '->clear() removes all the nodes from the crawler');
 }
 /**
  * @param $content string
  *
  * @return string
  */
 private function removeLastItem($content)
 {
     $document = new \DOMDocument('1.0', \Yii::$app->charset);
     $crawler = new Crawler();
     $crawler->addHTMLContent($content, \Yii::$app->charset);
     $root = $document->appendChild($document->createElement('_root'));
     $crawler->rewind();
     $root->appendChild($document->importNode($crawler->current(), true));
     $domxpath = new \DOMXPath($document);
     $crawlerInverse = $domxpath->query(CssSelector::toXPath($this->widgetItem . ':last-child'));
     foreach ($crawlerInverse as $key => $elementToRemove) {
         $parent = $elementToRemove->parentNode;
         $parent->removeChild($elementToRemove);
     }
     $crawler->clear();
     $crawler->add($document);
     return $crawler->filter('body')->eq(0)->html();
 }
示例#9
0
 /**
  * @param string $html
  */
 private function setHtml($html)
 {
     $this->crawler->clear();
     $this->crawler->addHtmlContent($html);
 }