예제 #1
0
 /**
  * @param \OCA\News\Db\Item $item
  * @return \OCA\News\Db\Item enhanced item
  */
 public function enhance(Item $item)
 {
     foreach ($this->regexXPathPair as $regex => $search) {
         if (preg_match($regex, $item->getUrl())) {
             $file = $this->getFile($item->getUrl());
             // convert encoding by detecting charset from header
             $contentType = $file->headers['content-type'];
             if (preg_match('/(?<=charset=)[^;]*/', $contentType, $matches)) {
                 $body = mb_convert_encoding($file->body, 'HTML-ENTITIES', $matches[0]);
             } else {
                 $body = $file->body;
             }
             $dom = new \DOMDocument();
             @$dom->loadHTML($body);
             $xpath = new \DOMXpath($dom);
             $xpathResult = $xpath->evaluate($search);
             // in case it wasnt a text query assume its a single
             if (!is_string($xpathResult)) {
                 $xpathResult = $this->domToString($xpathResult);
             }
             // convert all relative to absolute URLs
             $xpathResult = $this->substituteRelativeLinks($xpathResult, $item->getUrl());
             if ($xpathResult) {
                 $item->setBody($xpathResult);
             }
         }
     }
     return $item;
 }
예제 #2
0
 public function testGlobalEnhancer()
 {
     $this->enhancer->registerGlobalEnhancer(new AddEnhancer());
     $this->enhancer->registerGlobalEnhancer(new AddEnhancer());
     $item = new Item();
     $item->setBody(1);
     $result = $this->enhancer->enhance($item, 'test');
     $this->assertEquals(3, $result->getBody());
 }
    public function testMultipleParagraphsInDiv() {
        $body = '<p>paragraph 1</p><p>paragraph 2</p>';
        $expected = '<div>' . $body . '</div>';
        $item = new Item();
        $item->setBody($body);

        $result = $this->enhancer->enhance($item);
        $this->assertEquals($expected, $result->getBody());
    }
예제 #4
0
 public function testRegexEnhancer()
 {
     $item = new Item();
     $item->setBody('atests is a nice thing');
     $item->setUrl('http://john.com');
     $regex = ["%tes(ts)%" => "heho\$1tests"];
     $regexEnhancer = new RegexArticleEnhancer('%john.com%', $regex);
     $item = $regexEnhancer->enhance($item);
     $this->assertEquals('ahehotstests is a nice thing', $item->getBody());
 }
예제 #5
0
 /**
  * @param \OCA\News\Db\Item $item
  * @return \OCA\News\Db\Item enhanced item
  */
 public function enhance(Item $item)
 {
     if (preg_match($this->matchArticleUrl, $item->getUrl())) {
         $body = $item->getBody();
         foreach ($this->regexPair as $search => $replaceWith) {
             $body = preg_replace($search, $replaceWith, $body);
         }
         $item->setBody($body);
     }
     return $item;
 }
예제 #6
0
 public function testFind()
 {
     $feedId = $this->feeds['first feed']->getId();
     $item = new Item();
     $item->setTitle('my title thats long');
     $item->setGuid('a doner');
     $item->setGuidHash('a doner');
     $item->setFeedId($feedId);
     $item->setUnread();
     $item->setBody('Döner');
     $created = $this->itemMapper->insert($item);
     $fetched = $this->itemMapper->find($created->getId(), $this->userId);
     $this->assertEquals($item->getTitle(), $fetched->getTitle());
     $this->assertEquals($item->getGuid(), $fetched->getGuid());
     $this->assertEquals($item->getGuidHash(), $fetched->getGuidHash());
     $this->assertEquals($item->getFeedId(), $fetched->getFeedId());
     $this->assertEquals($item->isRead(), $fetched->isRead());
     $this->assertEquals('Döner', $fetched->getBody());
 }
예제 #7
0
 /**
  * @param \OCA\News\Db\Item $item
  * @return \OCA\News\Db\Item enhanced item
  */
 public function enhance(Item $item)
 {
     foreach ($this->regexXPathPair as $regex => $search) {
         if (preg_match($regex, $item->getUrl())) {
             $body = $this->getFile($item->getUrl());
             // First check if either <meta charset="..."> or
             // <meta http-equiv="Content-Type" ...> is specified and use it
             // If this fails use mb_detect_encoding()
             $regex = '/<meta\\s+[^>]*(?:charset\\s*=\\s*[\'"]([^>\'"]*)[\'"]' . '|http-equiv\\s*=\\s*[\'"]content-type[\'"]\\s+[^>]*' . 'content\\s*=\\s*[\'"][^>]*charset=([^>]*)[\'"])[^>]*>' . '/i';
             if (preg_match($regex, $body, $matches)) {
                 $enc = strtoupper($matches[sizeof($matches) - 1]);
             } else {
                 $enc = mb_detect_encoding($body);
             }
             $enc = $enc ? $enc : 'UTF-8';
             $body = mb_convert_encoding($body, 'HTML-ENTITIES', $enc);
             if (trim($body) === '') {
                 return $item;
             }
             $dom = new DOMDocument();
             $isOk = @$dom->loadHTML($body);
             $xpath = new DOMXpath($dom);
             $xpathResult = $xpath->evaluate($search);
             // in case it wasnt a text query assume its a dom element and
             // convert it to text
             if (!is_string($xpathResult)) {
                 $xpathResult = $this->domToString($xpathResult);
             }
             $xpathResult = trim($xpathResult);
             // convert all relative to absolute URLs
             $xpathResult = $this->substituteRelativeLinks($xpathResult, $item->getUrl());
             if ($isOk && $xpathResult !== false && $xpathResult !== '') {
                 $item->setBody($xpathResult);
             }
         }
     }
     return $item;
 }
    /**
     * This method is run after all enhancers and for every item
     */
    public function enhance(Item $item) {

        $dom = new DOMDocument();

        // wrap it inside a div if there is none to prevent invalid wrapping
        // inside <p> tags
        $body = '<div>' . $item->getBody() . '</div>';

        @$dom->loadHTML($body, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);

        $xpath = new DOMXpath($dom);

        // remove youtube autoplay
        // NOTE: PHP supports only XPath 1.0 so no matches() function :(
        $youtubeIframes = "//iframe[contains(@src, 'youtube.com')]";

        $elements = $xpath->query($youtubeIframes);
        foreach ($elements as $element) {

            // src needs to be matched against regex to prevent false positives
            // and because theres no XPath matches function available
            $src = $element->getAttribute('src');
            $regex = '%^(http://|https://|//)(www\.)?youtube.com/' .
                     '.*\?.*autoplay=1.*%i';

            if (preg_match($regex, $src)) {
                $replaced = str_replace('autoplay=1', 'autoplay=0', $src);
                $element->setAttribute('src', $replaced);
            }
        }

        // save all changes back to the item
        $item->setBody(trim($dom->saveHTML()));

        return $item;
    }
예제 #9
0
 protected function buildItem($simplePieItem, $feedLink)
 {
     $item = new Item();
     $item->setStatus(0);
     $item->setUnread();
     $url = $this->decodeTwice($simplePieItem->get_permalink());
     if (!$url) {
         $url = $feedLink;
     }
     $item->setUrl($url);
     // unescape content because angularjs helps against XSS
     $item->setTitle($this->decodeTwice($simplePieItem->get_title()));
     $guid = $simplePieItem->get_id();
     $item->setGuid($guid);
     // purification is done in the businesslayer
     $item->setBody($simplePieItem->get_content());
     // pubdate is not required. if not given use the current date
     $date = $simplePieItem->get_date('U');
     if (!$date) {
         $date = $this->time->getTime();
     }
     $item->setPubDate($date);
     $item->setLastModified($this->time->getTime());
     $author = $simplePieItem->get_author();
     if ($author !== null) {
         $name = $this->decodeTwice($author->get_name());
         if ($name) {
             $item->setAuthor($name);
         } else {
             $item->setAuthor($this->decodeTwice($author->get_email()));
         }
     }
     // TODO: make it work for video files also
     $enclosure = $simplePieItem->get_enclosure();
     if ($enclosure !== null) {
         $enclosureType = $enclosure->get_type();
         if (stripos($enclosureType, "audio/") !== false) {
             $item->setEnclosureMime($enclosureType);
             $item->setEnclosureLink($enclosure->get_link());
         }
     }
     return $item;
 }
예제 #10
0
 public function testImportArticlesCreatesOwnFeedWhenNotFound()
 {
     $url = 'http://owncloud/args';
     $feed = new Feed();
     $feed->setId(3);
     $feed->setUserId($this->user);
     $feed->setUrl($url);
     $feed->setLink($url);
     $feed->setTitle('Articles without feed');
     $feed->setAdded($this->time);
     $feed->setFolderId(0);
     $feed->setPreventUpdate(true);
     $feeds = [$feed];
     $item = new Item();
     $item->setFeedId(3);
     $item->setAuthor('john');
     $item->setGuid('s');
     $item->setGuidHash('s');
     $item->setTitle('hey');
     $item->setPubDate(333);
     $item->setBody('come over');
     $item->setEnclosureMime('mime');
     $item->setEnclosureLink('lin');
     $item->setUnread();
     $item->setUnstarred();
     $item->setLastModified($this->time);
     $json = $item->toExport(['feed3' => $feed]);
     $json2 = $json;
     // believe it or not this copies stuff :D
     $json2['feedLink'] = 'http://test.com';
     $items = [$json, $json2];
     $insertFeed = new Feed();
     $insertFeed->setLink('http://owncloud/nofeed');
     $insertFeed->setUrl('http://owncloud/nofeed');
     $insertFeed->setUserId($this->user);
     $insertFeed->setTitle('Articles without feed');
     $insertFeed->setAdded($this->time);
     $insertFeed->setPreventUpdate(true);
     $insertFeed->setFolderId(0);
     $this->l10n->expects($this->once())->method('t')->will($this->returnValue('Articles without feed'));
     $this->feedMapper->expects($this->once())->method('findAllFromUser')->with($this->equalTo($this->user))->will($this->returnValue($feeds));
     $this->feedMapper->expects($this->once())->method('insert')->with($this->equalTo($insertFeed))->will($this->returnValue($insertFeed));
     $this->itemMapper->expects($this->at(0))->method('findByGuidHash')->will($this->throwException(new DoesNotExistException('yo')));
     $this->purifier->expects($this->once())->method('purify')->with($this->equalTo($item->getBody()))->will($this->returnValue($item->getBody()));
     $this->itemMapper->expects($this->at(1))->method('insert')->with($this->equalTo($item));
     $this->itemMapper->expects($this->at(2))->method('findByGuidHash')->will($this->returnValue($item));
     $this->itemMapper->expects($this->at(3))->method('update')->with($this->equalTo($item));
     $this->feedMapper->expects($this->once())->method('findByUrlHash')->will($this->returnValue($feed));
     $result = $this->feedService->importArticles($items, $this->user);
     $this->assertEquals($feed, $result);
 }
예제 #11
0
 protected function buildItem($parsedItem)
 {
     $item = new Item();
     $item->setUnread();
     $item->setUrl($parsedItem->getUrl());
     $item->setGuid($parsedItem->getId());
     $item->setGuidHash($item->getGuid());
     $item->setPubDate($parsedItem->getDate()->getTimestamp());
     $item->setLastModified($this->time->getTime());
     // unescape content because angularjs helps against XSS
     $item->setTitle($this->decodeTwice($parsedItem->getTitle()));
     $item->setAuthor($this->decodeTwice($parsedItem->getAuthor()));
     // purification is done in the service layer
     $body = $parsedItem->getContent();
     $body = mb_convert_encoding($body, 'HTML-ENTITIES', mb_detect_encoding($body));
     $item->setBody($body);
     $enclosureUrl = $parsedItem->getEnclosureUrl();
     if ($enclosureUrl) {
         $enclosureType = $parsedItem->getEnclosureType();
         if (stripos($enclosureType, 'audio/') !== false || stripos($enclosureType, 'video/') !== false) {
             $item->setEnclosureMime($enclosureType);
             $item->setEnclosureLink($enclosureUrl);
         }
     }
     $item->generateSearchIndex();
     return $item;
 }
예제 #12
0
 private function createItem($enclosureType = null)
 {
     $this->expectItem('getUrl', $this->permalink);
     $this->expectItem('getTitle', $this->title);
     $this->expectItem('getId', $this->guid);
     $this->expectItem('getContent', $this->body);
     $item = new Item();
     date_default_timezone_set('America/Los_Angeles');
     $date = new \DateTime();
     $date->setTimestamp($this->pub);
     $this->expectItem('getDate', $date);
     $item->setPubDate($this->pub);
     $item->setStatus(0);
     $item->setUnread();
     $item->setUrl($this->permalink);
     $item->setTitle('my<\' title');
     $item->setGuid($this->guid);
     $item->setGuidHash($this->guid);
     $item->setBody($this->body);
     $item->setLastModified($this->time);
     $item->generateSearchIndex();
     $this->expectItem('getAuthor', $this->author);
     $item->setAuthor(html_entity_decode($this->author));
     if ($enclosureType === 'audio/ogg' || $enclosureType === 'video/ogg') {
         $this->expectItem('getEnclosureUrl', $this->enclosureLink);
         $this->expectItem('getEnclosureType', $enclosureType);
         $item->setEnclosureMime($enclosureType);
         $item->setEnclosureLink($this->enclosureLink);
     }
     return $item;
 }
예제 #13
0
    public function testDontTransformAbsoluteUrlsAndMails()
    {
        $file = new \stdClass();
        $file->headers = array("content-type" => "text/html; charset=utf-8");
        $file->body = '<html>
			<body>
				<img src="http://www.url.com/absolute/url.png">
				<a href="mailto:test@testsite.com">mail</a>
			</body>
		</html>';
        $item = new Item();
        $item->setUrl('https://www.explosm.net/all/312');
        $item->setBody('Hello thar');
        $this->fileFactory->expects($this->once())->method('getFile')->with($this->equalTo($item->getUrl()), $this->equalTo($this->timeout), $this->equalTo($this->redirects), $this->equalTo($this->headers), $this->equalTo($this->userAgent))->will($this->returnValue($file));
        $result = $this->testEnhancer->enhance($item);
        $this->assertEquals('<img src="http://www.url.com/absolute/url.png"><a target="_blank" href="mailto:test@testsite.com">mail</a>', $result->getBody());
    }
예제 #14
0
파일: bootstrap.php 프로젝트: sbambach/news
 private function createItem($item)
 {
     $newItem = new Item();
     $newItem->setFeedId($item['feedId']);
     $newItem->setStatus($item['status']);
     $newItem->setBody($item['body']);
     $newItem->setTitle($item['title']);
     $newItem->setAuthor($item['author']);
     $newItem->setGuid($item['guid']);
     $newItem->setGuidHash($item['guid']);
     $newItem->setUrl($item['url']);
     $newItem->setPubDate($item['pubDate']);
     $newItem->setLastModified($item['lastModified']);
     $newItem->setEnclosureMime($item['enclosureMime']);
     $newItem->setEnclosureLink($item['enclosureLink']);
     return $this->itemMapper->insert($newItem);
 }
예제 #15
0
    private function createItem($enclosureType=null) {
        $this->expectItem('getUrl', $this->permalink);
        $this->expectItem('getTitle', $this->title);
        $this->expectItem('getId', $this->guid);
        $this->expectItem('getContent', $this->body);

        $item = new Item();

        $this->expectItem('getDate', $this->pub);
        $item->setPubDate($this->pub);

        $item->setStatus(0);
        $item->setUnread();
        $item->setUrl($this->permalink);
        $item->setTitle('my<\' title');
        $item->setGuid($this->guid);
        $item->setGuidHash($this->guid);
        $item->setBody($this->body);
        $item->setLastModified($this->time);

        $this->expectItem('getAuthor', $this->author);
        $item->setAuthor(html_entity_decode($this->author));

        if($enclosureType === 'audio/ogg' || $enclosureType === 'video/ogg') {
            $this->expectItem('getEnclosureUrl', $this->enclosureLink);
            $this->expectItem('getEnclosureType', $enclosureType);

            $item->setEnclosureMime($enclosureType);
            $item->setEnclosureLink($this->enclosureLink);
        }
        return $item;
    }
예제 #16
0
 public function testDontTransformAbsoluteUrlsAndMails()
 {
     $encoding = 'utf-8';
     $body = '<html>
         <body>
             <img src="http://www.url.com/absolute/url.png">
             <a href="mailto:test@testsite.com">mail</a>
         </body>
     </html>';
     $item = new Item();
     $item->setUrl('https://www.explosm.net/all/312');
     $item->setBody('Hello thar');
     $this->setUpFile($body, $encoding, $item->getUrl());
     $result = $this->testEnhancer->enhance($item);
     $this->assertEquals('<div>' . '<img src="http://www.url.com/absolute/url.png">' . '<a target="_blank" rel="noreferrer" href="mailto:test@testsite.com">mail</a>' . '</div>', $result->getBody());
 }
예제 #17
0
 private function createItem($author = false, $enclosureType = null, $noPubDate = false)
 {
     $this->expectItem('get_permalink', $this->permalink);
     $this->expectItem('get_title', $this->title);
     $this->expectItem('get_id', $this->guid);
     $this->expectItem('get_content', $this->body);
     $item = new Item();
     if ($noPubDate) {
         $this->expectItem('get_date', 0);
         $item->setPubDate($this->time);
     } else {
         $this->expectItem('get_date', $this->pub);
         $item->setPubDate($this->pub);
     }
     $item->setStatus(0);
     $item->setUnread();
     $item->setUrl($this->permalink);
     $item->setTitle('my<\' title');
     $item->setGuid($this->guid);
     $item->setGuidHash(md5($this->guid));
     $item->setBody($this->body);
     $item->setLastModified($this->time);
     if ($author) {
         $mock = $this->getMock('author', array('get_name'));
         $mock->expects($this->once())->method('get_name')->will($this->returnValue($this->author));
         $this->expectItem('get_author', $mock);
         $item->setAuthor(html_entity_decode($this->author));
     } else {
         $mock = $this->getMock('author', array('get_name', 'get_email'));
         $mock->expects($this->any())->method('get_name')->will($this->returnValue(''));
         $mock->expects($this->any())->method('get_email')->will($this->returnValue($this->authorMail));
         $this->expectItem('get_author', $mock);
         $item->setAuthor(html_entity_decode($this->authorMail));
     }
     if ($enclosureType === 'audio/ogg') {
         $mock = $this->getMock('enclosure', array('get_type', 'get_link'));
         $mock->expects($this->any())->method('get_type')->will($this->returnValue($enclosureType));
         $this->expectItem('get_enclosure', $this->mock);
         $item->setEnclosureMime($enclosureType);
         $item->setEnclosureLink($this->enclosureLink);
     }
     return $item;
 }