Example #1
0
 /**
  * @param \OCA\News\Db\Item $item
  * @return \OCA\News\Db\Item enhanced item
  */
 public function enhance(Item $item)
 {
     foreach ($this->regexXPathPair as $regex => $search) {
         if (preg_match($regex, $item->getUrl())) {
             $file = $this->getFile($item->getUrl());
             // convert encoding by detecting charset from header
             $contentType = $file->headers['content-type'];
             if (preg_match('/(?<=charset=)[^;]*/', $contentType, $matches)) {
                 $body = mb_convert_encoding($file->body, 'HTML-ENTITIES', $matches[0]);
             } else {
                 $body = $file->body;
             }
             $dom = new \DOMDocument();
             @$dom->loadHTML($body);
             $xpath = new \DOMXpath($dom);
             $xpathResult = $xpath->evaluate($search);
             // in case it wasnt a text query assume its a single
             if (!is_string($xpathResult)) {
                 $xpathResult = $this->domToString($xpathResult);
             }
             // convert all relative to absolute URLs
             $xpathResult = $this->substituteRelativeLinks($xpathResult, $item->getUrl());
             if ($xpathResult) {
                 $item->setBody($xpathResult);
             }
         }
     }
     return $item;
 }
    /**
     * @param \OCA\News\Db\Item $item
     * @return \OCA\News\Db\Item enhanced item
     */
    public function enhance(Item $item){

        foreach($this->regexXPathPair as $regex => $search) {

            if(preg_match($regex, $item->getUrl())) {
                $body = $this->getFile($item->getUrl());

                // First check if either <meta charset="..."> or
                // <meta http-equiv="Content-Type" ...> is specified and use it
                // If this fails use mb_detect_encoding()
                $regex = '/<meta\s+[^>]*(?:charset\s*=\s*[\'"]([^>\'"]*)[\'"]' .
                         '|http-equiv\s*=\s*[\'"]content-type[\'"]\s+[^>]*' .
                         'content\s*=\s*[\'"][^>]*charset=([^>]*)[\'"])[^>]*>' .
                         '/i';
                if(preg_match($regex, $body, $matches)) {
                    $enc = strtoupper($matches[sizeof($matches) - 1]);
                } else {
                    $enc = mb_detect_encoding($body);
                }
                $enc = $enc ? $enc : 'UTF-8';
                $body = mb_convert_encoding($body, 'HTML-ENTITIES', $enc);
                if (trim($body) === '') {
                    return $item;
                }

                $dom = new DOMDocument();
                @$dom->loadHTML($body);

                $xpath = new DOMXpath($dom);
                $xpathResult = $xpath->evaluate($search);

                // in case it wasnt a text query assume its a dom element and
                // convert it to text
                if(!is_string($xpathResult)) {
                    $xpathResult = $this->domToString($xpathResult);
                }

                $xpathResult = trim($xpathResult);

                // convert all relative to absolute URLs
                $xpathResult = $this->substituteRelativeLinks(
                    $xpathResult, $item->getUrl()
                );

                if($xpathResult) {
                    $item->setBody($xpathResult);
                }
            }
        }

        return $item;
    }
Example #3
0
 /**
  * @param \OCA\News\Db\Item $item
  * @return \OCA\News\Db\Item enhanced item
  */
 public function enhance(Item $item)
 {
     if (preg_match($this->matchArticleUrl, $item->getUrl())) {
         $body = $item->getBody();
         foreach ($this->regexPair as $search => $replaceWith) {
             $body = preg_replace($search, $replaceWith, $body);
         }
         $item->setBody($body);
     }
     return $item;
 }
    public function testDontTransformAbsoluteUrlsAndMails()
    {
        $file = new \stdClass();
        $file->headers = array("content-type" => "text/html; charset=utf-8");
        $file->body = '<html>
			<body>
				<img src="http://www.url.com/absolute/url.png">
				<a href="mailto:test@testsite.com">mail</a>
			</body>
		</html>';
        $item = new Item();
        $item->setUrl('https://www.explosm.net/all/312');
        $item->setBody('Hello thar');
        $this->fileFactory->expects($this->once())->method('getFile')->with($this->equalTo($item->getUrl()), $this->equalTo($this->timeout), $this->equalTo($this->redirects), $this->equalTo($this->headers), $this->equalTo($this->userAgent))->will($this->returnValue($file));
        $result = $this->testEnhancer->enhance($item);
        $this->assertEquals('<img src="http://www.url.com/absolute/url.png"><a target="_blank" href="mailto:test@testsite.com">mail</a>', $result->getBody());
    }
 public function testDontTransformAbsoluteUrlsAndMails()
 {
     $encoding = 'utf-8';
     $body = '<html>
         <body>
             <img src="http://www.url.com/absolute/url.png">
             <a href="mailto:test@testsite.com">mail</a>
         </body>
     </html>';
     $item = new Item();
     $item->setUrl('https://www.explosm.net/all/312');
     $item->setBody('Hello thar');
     $this->setUpFile($body, $encoding, $item->getUrl());
     $result = $this->testEnhancer->enhance($item);
     $this->assertEquals('<div>' . '<img src="http://www.url.com/absolute/url.png">' . '<a target="_blank" rel="noreferrer" href="mailto:test@testsite.com">mail</a>' . '</div>', $result->getBody());
 }