add() 공개 메소드

This method uses the appropriate specialized add*() method based on the type of the argument.
public add ( DOMNodeList | DOMNode | array | string | null $node )
$node DOMNodeList | DOMNode | array | string | null A node
예제 #1
0
 public function call($uri, $method = 'get', $parameters = array(), $changeStack = true)
 {
     $browser = parent::call($uri, $method, $parameters, $changeStack);
     $crawler = new Crawler();
     $crawler->add($browser->getResponse()->getContent());
     return $crawler;
 }
예제 #2
0
 /**
  * @inheritdoc
  */
 protected function parse(Requests_Response $requests)
 {
     $crawler = new Crawler();
     $crawler->addContent($requests->body);
     $r = $crawler->filterXPath('//*[@id="content"]/div/div[2]/div[1]/div[1]/ul/li');
     $results = array();
     /** @var DOMElement $el */
     foreach ($r as $el) {
         $c = new Crawler();
         $c->add($el);
         $tags = [];
         /** @var DOMElement $z */
         foreach ($c->filter(".horizontal-separated-list li") as $z) {
             $tags[] = $z->textContent;
         }
         $result = new Result();
         $result->setTitle(trim($c->filter(".details a")->text()));
         $result->setTags($tags);
         $relUrl = $c->filter(".details a")->attr("href");
         $id = explode("--", explode("/", parse_url($relUrl)["path"])[2])[1];
         $result->setId($this->getName() . "_" . intval($id));
         $result->setUrl("http://www.anibis.ch/" . $relUrl);
         $result->setPrice($c->filter(".price")->text());
         $result->setDescription($c->filter(".details .description")->text());
         $results[] = $result;
     }
     return $results;
 }
예제 #3
0
    /**
     * @covers Symfony\Component\DomCrawler\Crawler::add
     */
    public function testAdd()
    {
        $crawler = new Crawler();
        $crawler->add($this->createDomDocument());
        $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from a \DOMDocument');

        $crawler = new Crawler();
        $crawler->add($this->createNodeList());
        $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from a \DOMNodeList');

        foreach ($this->createNodeList() as $node) {
            $list[] = $node;
        }
        $crawler = new Crawler();
        $crawler->add($list);
        $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from an array of nodes');

        $crawler = new Crawler();
        $crawler->add($this->createNodeList()->item(0));
        $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from an \DOMNode');

        $crawler = new Crawler();
        $crawler->add('<html><body>Foo</body></html>');
        $this->assertEquals('Foo', $crawler->filter('body')->text(), '->add() adds nodes from a string');
    }
예제 #4
0
 /**
  * @inheritdoc
  */
 protected function parse(Requests_Response $requests)
 {
     $crawler = new Crawler();
     $crawler->addContent($requests->body);
     $r = $crawler->filter("#page > main > section > div > div.result-item-list article a > .box-row");
     $results = array();
     /** @var DOMElement $el */
     foreach ($r as $el) {
         $c = new Crawler();
         $c->add($el);
         $tags = [];
         /** @var DOMElement $z */
         foreach ($c->filter(".box-row ul.box-row-item-attribute-list li") as $z) {
             if ($z->childNodes !== null && $z->childNodes->length >= 4) {
                 $tags[] = $z->childNodes->item(1)->nodeValue . ": " . $z->childNodes->item(3)->nodeValue;
             }
         }
         $addressB = $c->filter(".item-title--street");
         $address = $addressB->text() . " " . $addressB->siblings()->text();
         $tags[] = "Adresse: " . $address;
         $result = new Result();
         $result->setTags($tags);
         $result->setTitle(trim($c->filter("h2")->text()));
         if ($c->filter("item-description p")->valid()) {
             $result->setDescription($c->filter("item-description p")->text());
         }
         $link = $el->parentNode->attributes->getNamedItem("href")->nodeValue;
         $result->setId($this->getName() . "_" . explode("/", $link)[2]);
         $result->setUrl("http://m.homegate.ch/" . $link);
         $results[] = $result;
     }
     return $results;
 }
예제 #5
0
파일: Email.php 프로젝트: tanateros/parser
 /**
  * @param string $path
  * @return string
  */
 function parse($path)
 {
     if (!file_exists($this->pathMails = $this->config['cacheDir'] . DIRECTORY_SEPARATOR . $this->siteHash . DIRECTORY_SEPARATOR . $path)) {
         mkdir($this->pathMails);
     }
     foreach ($this->getLinks() as $file => $url) {
         $readStream = fopen($url, 'r');
         $writeStream = fopen($this->pathSiteHash . DIRECTORY_SEPARATOR . $file, 'w');
         stream_set_blocking($readStream, 0);
         stream_set_blocking($writeStream, 0);
         $read = new \React\Stream\Stream($readStream, $this->loop);
         $write = new \React\Stream\Stream($writeStream, $this->loop);
         $read->on('end', function () use($file, &$files) {
             $path = $this->pathSiteHash . DIRECTORY_SEPARATOR . $file;
             $crawler = new Crawler();
             $crawler->add(file_get_contents($path));
             $arrLinks = $crawler->filter('a')->each(function (Crawler $nodeCrawler) {
                 return [$nodeCrawler->filter('a')->attr('href')];
             });
             $validMails = [];
             foreach ($arrLinks as $k => $url) {
                 if (filter_var($url[0], FILTER_VALIDATE_EMAIL)) {
                     $validMails[] = $url[0];
                 } else {
                     if (filter_var($m = str_replace('mailto:', '', $url[0]), FILTER_VALIDATE_EMAIL)) {
                         $validMails[] = $m;
                     }
                 }
             }
             $mails = [];
             foreach ($validMails as $m) {
                 array_push($mails, str_replace('mailto:', '', $m));
             }
             file_put_contents($this->pathMails . DIRECTORY_SEPARATOR . $file, implode(PHP_EOL, $mails));
             unset($files[$file]);
         });
         $read->pipe($write);
     }
     // каждые $this->config['periodTime'] секунд выполнять какое-то действие
     $this->loop->addPeriodicTimer($this->config['periodTime'], function ($timer) use(&$files) {
         if (0 === count($files)) {
             $timer->cancel();
         }
         echo PHP_EOL . "Passed {$this->config['periodTime']} sec. " . PHP_EOL;
     });
     echo "This script will show the download status every {$this->config['periodTime']} seconds." . PHP_EOL;
     $this->loop->run();
     return 'Dir of result in: ' . $this->config['cacheDir'] . DIRECTORY_SEPARATOR . $this->siteHash . DIRECTORY_SEPARATOR . $path;
 }
예제 #6
0
 /**
  * @Route("/sport/{name}")
  * @Template()
  */
 public function indexAction($name)
 {
     $html = "http://www.chemistwarehouse.com.au/search?searchtext=Banana%20Boat%20SPF%2050+%20Everyday%20100g%20Tube&searchmode=allwords";
     $ch = curl_init($html);
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
     curl_setopt($ch, CURLOPT_HEADER, 0);
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
     $result = curl_exec($ch);
     curl_close($ch);
     $crawloer = new Crawler($result);
     $products = $crawloer->filter(".Product");
     foreach ($products as $product) {
         $crawler1 = new Crawler();
         $crawler1->add($product);
         $productName = $crawler1->filter('a')->attr('title');
         $price = $crawler1->filter('.Price')->text();
         return ['productName' => $productName, 'price' => $price];
     }
 }
예제 #7
0
 /**
  * @return array
  */
 function getLinks()
 {
     $crawler = new Crawler();
     $crawler->add(file_get_contents($this->site));
     $arrLinks = $crawler->filter('a')->each(function (Crawler $nodeCrawler) {
         return [$nodeCrawler->filter('a')->attr('href')];
     });
     $validLinks = [];
     $i = 0;
     foreach ($arrLinks as $k => $url) {
         $url[0] = str_replace('/redirect.php?url=', '', $url[0]);
         if (!filter_var($url[0], FILTER_VALIDATE_URL)) {
             if (@get_headers($url[0])[0] == 'HTTP/1.1 200 OK') {
                 $validLinks[$i . '.' . $this->config['dataFormat']] = $url[0];
             } else {
                 if (@get_headers($this->site . $url[0])[0] == 'HTTP/1.1 200 OK') {
                     $validLinks[$i . '.' . $this->config['dataFormat']] = $this->site . $url[0];
                 }
             }
             $i++;
         }
     }
     return $validLinks;
 }
예제 #8
0
 /**
  * @covers Symfony\Component\DomCrawler\Crawler::add
  */
 public function testAdd()
 {
     if (!class_exists('Symfony\\Component\\CssSelector\\CssSelector')) {
         $this->markTestSkipped('The "CssSelector" component is not available');
     }
     $crawler = new Crawler();
     $crawler->add($this->createDomDocument());
     $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from a \\DOMDocument');
     $crawler = new Crawler();
     $crawler->add($this->createNodeList());
     $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from a \\DOMNodeList');
     foreach ($this->createNodeList() as $node) {
         $list[] = $node;
     }
     $crawler = new Crawler();
     $crawler->add($list);
     $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from an array of nodes');
     $crawler = new Crawler();
     $crawler->add($this->createNodeList()->item(0));
     $this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->add() adds nodes from an \\DOMNode');
     $crawler = new Crawler();
     $crawler->add('<html><body>Foo</body></html>');
     $this->assertEquals('Foo', $crawler->filter('body')->text(), '->add() adds nodes from a string');
 }
예제 #9
0
 /**
  * @expectedException \InvalidArgumentException
  */
 public function testAddInvalidType()
 {
     $crawler = new Crawler();
     $crawler->add(1);
 }
 /**
  * @param $content string
  *
  * @return string
  */
 private function removeLastItem($content)
 {
     $document = new \DOMDocument('1.0', \Yii::$app->charset);
     $crawler = new Crawler();
     $crawler->addHTMLContent($content, \Yii::$app->charset);
     $root = $document->appendChild($document->createElement('_root'));
     $crawler->rewind();
     $root->appendChild($document->importNode($crawler->current(), true));
     $domxpath = new \DOMXPath($document);
     $crawlerInverse = $domxpath->query(CssSelector::toXPath($this->widgetItem . ':last-child'));
     foreach ($crawlerInverse as $key => $elementToRemove) {
         $parent = $elementToRemove->parentNode;
         $parent->removeChild($elementToRemove);
     }
     $crawler->clear();
     $crawler->add($document);
     return $crawler->filter('body')->eq(0)->html();
 }
예제 #11
0
 /**
  * @expectedException \InvalidArgumentException
  * @expectedExceptionMessage Nodes set in a Crawler must be DOMElement or DOMDocument instances, "DOMNode" given.
  */
 public function testAddInvalidNode()
 {
     $crawler = new Crawler();
     $crawler->add(new \DOMNode());
 }
예제 #12
0
 public function extractAction($html)
 {
     $crawler = new Crawler();
     $crawler->add($html);
     /*$crawler = $crawler->filter('body')->nextAll();
       foreach ($crawler as $domElement) {
           $nodeValue = $domElement->nodeValue;
       }*/
     $readData = $crawler->filterXpath('//body/p')->extract(array('_text', 'class'));
     /*
      * print_r($readData); - Array ( [0] => Array ( [0] => Hello World! [1] => message ) [1] => Array ( [0] => Hello Crawler! [1] => ) [2] => Array
      *
      * Throw empty data from array readData and create arrays $showDataEven and $showDataOdd
      */
     /* Prvi nacin
             $i = 0;
             $j = 0;
     
             $showData = array();
             foreach($readData as $row){
                 foreach($row as $key=>$value) {
                     if ($key == 0) {
                         if(($j % 2) == 0){
                             $showDataEven[$i] = $value;
                             $showDataA[$a][$b] = $value;
                             $b++;
                         }else{
                             $showDataOdd[$i] = $value;
                             $i++;
                             $showDataA[$a][$b] = $value;
                             $a++;
                             $b = 0;
                         }
                     }
                 }
                 $j++;
             }
             */
     /*
      * $showDataEven: Array ( [0] => Hello World! [1] => Hello World2! [2] => Hello World3! [3] => Hello World4! )
      * and $showDataOdd: Array ( [0] => Hello Crawler! [1] => Hello Crawler2! [2] => Hello Crawler3! [3] => Hello Crawler4! )
      */
     /*$i = 0;
       $j = 0;
       foreach($showDataEven as $keyeven=>$valueeven){
           $showData[$i][$j] = $valueeven;
           $j++;
           foreach($showDataOdd as $keyodd=>$valueodd){
               if($keyeven == $keyodd){
                   $showData[$i][$j] = $valueodd;
                   $j = 0;
               }
           }
           $i++;
       }*/
     $a = 0;
     $b = 0;
     /* Drugi nacin */
     $showDataA = array();
     foreach ($readData as $row) {
         foreach ($row as $key => $value) {
             if ($key == 0) {
                 if ($b % 2 == 0) {
                     $showDataA[$a][$b] = $value;
                     $b++;
                 } else {
                     $showDataA[$a][$b] = $value;
                     $a++;
                     $b = 0;
                 }
             }
         }
     }
     //print_r($showDataA);
     return $showDataA;
     //$crawler = $crawler->filter('body')->children()->text();
     //return $crawler;
 }
예제 #13
0
 /**
  * @param integer $page
  *
  * @return Crawler
  */
 private function doRequest($page)
 {
     $response = $this->browser->get($this->buildUrl($page));
     $crawler = new Crawler();
     $crawler->add($response->toDomDocument());
     return $crawler;
 }
예제 #14
0
 /**
  * Adds a node to the current list of nodes.
  *
  * This method uses the appropriate specialized add*() method based
  * on the type of the argument.
  *
  * Overwritten from parent to allow Crawler to be added
  *
  * @param null|\DOMNodeList|array|\DOMNode|Crawler $node A node
  *
  * @api
  */
 public function add($node)
 {
     if ($node instanceof Crawler) {
         foreach ($node as $childnode) {
             $this->addNode($childnode);
         }
     } else {
         parent::add($node);
     }
 }
예제 #15
0
 /**
  * Mark the test failed and outputs the HTTP response's body.
  *
  * @param string $selector 
  */
 protected function debugResponse($selector = "body")
 {
     if ($this->getResponseBody()) {
         $crawler = new Crawler();
         $crawler->add($this->getResponseBody());
         $message = "Response debug:\n";
         $message .= $crawler->filter($selector)->text();
         $this->fail($message);
     }
     $this->fail("No response to debug");
 }
예제 #16
0
 public function extractAction($url)
 {
     //$html = htmlspecialchars_decode($url);
     //print_r($html);
     $crawler = new Crawler();
     $crawler->add($url);
     /*$crawler = $crawler->filter('body')->nextAll();
       foreach ($crawler as $domElement) {
           $nodeValue = $domElement->nodeValue;
       }*/
     /*
      *
      *
     filterXpath('//html/body/div/div/form/div/div/table/tbody/tr/td/a/img')
     *
     *
     */
     $readData = $crawler->filterXpath('//html/body/div/div/form/div/div');
     /*
     $readData0 = $crawler
         ->filterXpath('//html/body/div/div/form/div/div')
         ->extract(array('_text', 'class'))
     ;
     print_r($readData0);
     */
     $html = '';
     foreach ($readData as $domElement) {
         $html .= $domElement->ownerDocument->saveHTML($domElement);
     }
     $crawler = new Crawler();
     $crawler->add($html);
     // /html/body/div/table
     $readData1 = $crawler->filterXpath('//html/body/div/table/tr/th');
     $readData11 = $crawler->filterXpath('//html/body/div/table/tr/th')->extract(array('_text', 'class'));
     /**** getting ID: to first array $showData1 *****/
     $showData1 = array();
     $j1 = 0;
     foreach ($readData11 as $keyrow => $valuerow) {
         if ($keyrow % 2 == 0) {
             foreach ($valuerow as $keyid => $valueid) {
                 if ($keyid % 2 == 0) {
                     $showData1[$j1] = $valueid;
                 }
             }
             $j1++;
         }
     }
     /******** Reading data from table - tr-td - $crawler->add($html);********/
     // /html/body/div/table
     $readData3 = $crawler->filterXpath('//html/body/div/table/tr/td');
     $readData33 = $crawler->filterXpath('//html/body/div/table/tr/td')->extract(array('_text', 'class'));
     /**** getting Description: to third array $showData3 *****/
     $showData3 = array();
     $j3 = 0;
     foreach ($readData33 as $keyrow => $valuerow) {
         if ($keyrow % 2 == 0) {
             foreach ($valuerow as $keydesc => $valuedesc) {
                 if ($keydesc % 2 == 0) {
                     $showData3[$j3] = $valuedesc;
                 }
             }
             $j3++;
         }
     }
     /*
     $html3 = '';
     foreach ($readData3 as $domElement) {
         $html3 .= $domElement->ownerDocument->saveHTML($domElement);
     }
     */
     /******** Reading data URL from table - tr-td - $crawler->add($html);********/
     // /html/body/div/table
     $readData4 = $crawler->filterXpath('//html/body/div/table/tr/td/a/img');
     $readData44 = $crawler->filterXpath('//html/body/div/table/tr/td/a/img')->extract(array('src', 'img'));
     /**** getting URL: to fourth array $showData4 *****/
     $showData4 = array();
     $j4 = 0;
     foreach ($readData44 as $keyrow => $valuerow) {
         foreach ($valuerow as $keyurl => $valueurl) {
             if ($keyurl % 2 == 0) {
                 $showData4[$j4] = $valueurl;
             }
         }
         $j4++;
     }
     $html4 = '';
     foreach ($readData4 as $domElement) {
         $html4 .= $domElement->ownerDocument->saveHTML($domElement);
     }
     /******** Reading data from table - tr-th - input - $crawler->add($html1);********/
     // /html/body/div/table
     $html1 = '';
     foreach ($readData1 as $domElement) {
         $html1 .= $domElement->ownerDocument->saveHTML($domElement);
     }
     $crawler = new Crawler();
     $crawler->add($html1);
     $readData2 = $crawler->filterXpath('//html/body/th/input');
     $readData22 = $crawler->filterXpath('//html/body/th/input')->extract(array('value', 'input'));
     /*        $reducedSubsetCrawler = $crawler->reduce(function (Crawler $crawler, $i) {
                     // Just return `false` if you want to remove an element from a set:
                     return preg_match('/^value/', $crawler->attr('input'));
                 });
     
             $newCrawler = $crawler->filter('input[type=text]')
                 ->first();
     */
     /**** getting Title: to second array $showData2 *****/
     $showData2 = array();
     $j2 = 0;
     foreach ($readData22 as $keyrow => $valuerow) {
         foreach ($valuerow as $keyid => $valueid) {
             if ($keyid % 2 == 0) {
                 $showData2[$j2] = $valueid;
             }
         }
         $j2++;
     }
     $html2 = '';
     foreach ($readData2 as $domElement) {
         $html2 .= $domElement->ownerDocument->saveHTML($domElement);
     }
     $crawler = new Crawler();
     $crawler->add($html2);
     //$more = $reducedSubsetCrawler->filter('a > img')->first();
     /*********** Create array of array to return to controller **************/
     $showData = array($showData1, $showData2, $showData3, $showData4);
     $i = count($showData[0]);
     //print_r($i);
     $showDataD1 = $showData[0];
     $showDataD2 = $showData[1];
     $showDataD3 = $showData[2];
     $showDataD4 = $showData[3];
     $showDataA = array();
     for ($j = 0; $j < $i; $j++) {
         $showDataA[$j][0] = $showDataD1[$j];
     }
     for ($j = 0; $j < $i; $j++) {
         $showDataA[$j][1] = $showDataD2[$j];
     }
     for ($j = 0; $j < $i; $j++) {
         $showDataA[$j][2] = $showDataD3[$j];
     }
     for ($j = 0; $j < $i; $j++) {
         $showDataA[$j][3] = $showDataD4[$j];
     }
     //print_r($showDataD1);
     //print_r($showData);
     //print_r($showDataA);
     return $showDataA;
     //$crawler = $crawler->filter('body')->children()->text();
     //return $crawler;
 }