public function testNotMatchShouldJustReturnItem() { $item = new Item(); $item->setUrl('hi'); $url = 'https://tests.com'; $this->articleEnhancer->expects($this->never())->method('enhance'); $result = $this->enhancer->enhance($item, $url); $this->assertEquals($item, $result); }
public function testMultipleParagraphsInDiv() { $body = '<p>paragraph 1</p><p>paragraph 2</p>'; $expected = '<div>' . $body . '</div>'; $item = new Item(); $item->setBody($body); $result = $this->enhancer->enhance($item); $this->assertEquals($expected, $result->getBody()); }
public function testGlobalEnhancer() { $this->enhancer->registerGlobalEnhancer(new AddEnhancer()); $this->enhancer->registerGlobalEnhancer(new AddEnhancer()); $item = new Item(); $item->setBody(1); $result = $this->enhancer->enhance($item, 'test'); $this->assertEquals(3, $result->getBody()); }
public function testRegexEnhancer() { $item = new Item(); $item->setBody('atests is a nice thing'); $item->setUrl('http://john.com'); $regex = ["%tes(ts)%" => "heho\$1tests"]; $regexEnhancer = new RegexArticleEnhancer('%john.com%', $regex); $item = $regexEnhancer->enhance($item); $this->assertEquals('ahehotstests is a nice thing', $item->getBody()); }
/** * @param \OCA\News\Db\Item $item * @return \OCA\News\Db\Item enhanced item */ public function enhance(Item $item) { if (preg_match($this->matchArticleUrl, $item->getUrl())) { $body = $item->getBody(); foreach ($this->regexPair as $search => $replaceWith) { $body = preg_replace($search, $replaceWith, $body); } $item->setBody($body); } return $item; }
/** * @param \OCA\News\Db\Item $item * @return \OCA\News\Db\Item enhanced item */ public function enhance(Item $item){ foreach($this->regexXPathPair as $regex => $search) { if(preg_match($regex, $item->getUrl())) { $body = $this->getFile($item->getUrl()); // First check if either <meta charset="..."> or // <meta http-equiv="Content-Type" ...> is specified and use it // If this fails use mb_detect_encoding() $regex = '/<meta\s+[^>]*(?:charset\s*=\s*[\'"]([^>\'"]*)[\'"]' . '|http-equiv\s*=\s*[\'"]content-type[\'"]\s+[^>]*' . 'content\s*=\s*[\'"][^>]*charset=([^>]*)[\'"])[^>]*>' . '/i'; if(preg_match($regex, $body, $matches)) { $enc = strtoupper($matches[sizeof($matches) - 1]); } else { $enc = mb_detect_encoding($body); } $enc = $enc ? $enc : 'UTF-8'; $body = mb_convert_encoding($body, 'HTML-ENTITIES', $enc); if (trim($body) === '') { return $item; } $dom = new DOMDocument(); @$dom->loadHTML($body); $xpath = new DOMXpath($dom); $xpathResult = $xpath->evaluate($search); // in case it wasnt a text query assume its a dom element and // convert it to text if(!is_string($xpathResult)) { $xpathResult = $this->domToString($xpathResult); } $xpathResult = trim($xpathResult); // convert all relative to absolute URLs $xpathResult = $this->substituteRelativeLinks( $xpathResult, $item->getUrl() ); if($xpathResult) { $item->setBody($xpathResult); } } } return $item; }
public function testCompleteArraysTransformed() { $item = new Item(); $item->setUnread(); $item2 = new Item(); $item2->setRead(); $serializer = new EntityApiSerializer('items'); $in = ['items' => [$item, $item2], 'test' => 1]; $result = $serializer->serialize($in); $this->assertTrue($result['items'][0]['unread']); $this->assertFalse($result['items'][1]['unread']); $this->assertEquals(1, $result['test']); }
public function testGetAllArticles() { $item1 = new Item(); $item1->setFeedId(3); $item2 = new Item(); $item2->setFeedId(5); $feed1 = new Feed(); $feed1->setId(3); $feed1->setLink('http://goo'); $feed2 = new Feed(); $feed2->setId(5); $feed2->setLink('http://gee'); $feeds = array($feed1, $feed2); $articles = array($item1, $item2); $this->feedBusinessLayer->expects($this->once())->method('findAll')->with($this->equalTo($this->user))->will($this->returnValue($feeds)); $this->itemBusinessLayer->expects($this->once())->method('getUnreadOrStarred')->with($this->equalTo($this->user))->will($this->returnValue($articles)); $return = $this->controller->articles(); $headers = $return->getHeaders(); $this->assertEquals('attachment; filename="articles.json"', $headers['Content-Disposition']); $this->assertEquals('[{"guid":null,"url":null,"title":null,' . '"author":null,"pubDate":null,"body":null,"enclosureMime":null,' . '"enclosureLink":null,"unread":false,"starred":false,' . '"feedLink":"http:\\/\\/goo"},{"guid":null,"url":null,"title":null,' . '"author":null,"pubDate":null,"body":null,"enclosureMime":null,' . '"enclosureLink":null,"unread":false,"starred":false,' . '"feedLink":"http:\\/\\/gee"}]', $return->render()); }
/** * @param \OCA\News\Db\Item $item * @return \OCA\News\Db\Item enhanced item */ public function enhance(Item $item) { foreach ($this->regexXPathPair as $regex => $search) { if (preg_match($regex, $item->getUrl())) { $file = $this->getFile($item->getUrl()); // convert encoding by detecting charset from header $contentType = $file->headers['content-type']; if (preg_match('/(?<=charset=)[^;]*/', $contentType, $matches)) { $body = mb_convert_encoding($file->body, 'HTML-ENTITIES', $matches[0]); } else { $body = $file->body; } $dom = new \DOMDocument(); @$dom->loadHTML($body); $xpath = new \DOMXpath($dom); $xpathResult = $xpath->evaluate($search); // in case it wasnt a text query assume its a single if (!is_string($xpathResult)) { $xpathResult = $this->domToString($xpathResult); } // convert all relative to absolute URLs $xpathResult = $this->substituteRelativeLinks($xpathResult, $item->getUrl()); if ($xpathResult) { $item->setBody($xpathResult); } } } return $item; }
public function __construct(array $defaults = []) { parent::__construct(); $defaults = array_merge(['url' => 'http://google.de', 'title' => 'title', 'author' => 'my author', 'pubDate' => 2323, 'body' => 'this is a body', 'enclosureMime' => 'video/mpeg', 'enclosureLink' => 'http://google.de/web.webm', 'feedId' => 0, 'status' => 2, 'lastModified' => 113, 'rtl' => false], $defaults); if (!array_key_exists('guid', $defaults)) { $defaults['guid'] = $defaults['title']; } if (!array_key_exists('guidHash', $defaults)) { $defaults['guidHash'] = $defaults['guid']; } $this->generateSearchIndex(); $this->fillDefaults($defaults); }
/** * This method is run after all enhancers and for every item */ public function enhance(Item $item) { $dom = new DOMDocument(); // wrap it inside a div if there is none to prevent invalid wrapping // inside <p> tags $body = '<div>' . $item->getBody() . '</div>'; @$dom->loadHTML($body, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); $xpath = new DOMXpath($dom); // remove youtube autoplay // NOTE: PHP supports only XPath 1.0 so no matches() function :( $youtubeIframes = "//iframe[contains(@src, 'youtube.com')]"; $elements = $xpath->query($youtubeIframes); foreach ($elements as $element) { // src needs to be matched against regex to prevent false positives // and because theres no XPath matches function available $src = $element->getAttribute('src'); $regex = '%^(http://|https://|//)(www\.)?youtube.com/' . '.*\?.*autoplay=1.*%i'; if (preg_match($regex, $src)) { $replaced = str_replace('autoplay=1', 'autoplay=0', $src); $element->setAttribute('src', $replaced); } } // save all changes back to the item $item->setBody(trim($dom->saveHTML())); return $item; }
public function testFind() { $feedId = $this->feeds['first feed']->getId(); $item = new Item(); $item->setTitle('my title thats long'); $item->setGuid('a doner'); $item->setGuidHash('a doner'); $item->setFeedId($feedId); $item->setUnread(); $item->setBody('Döner'); $created = $this->itemMapper->insert($item); $fetched = $this->itemMapper->find($created->getId(), $this->userId); $this->assertEquals($item->getTitle(), $fetched->getTitle()); $this->assertEquals($item->getGuid(), $fetched->getGuid()); $this->assertEquals($item->getGuidHash(), $fetched->getGuidHash()); $this->assertEquals($item->getFeedId(), $fetched->getFeedId()); $this->assertEquals($item->isRead(), $fetched->isRead()); $this->assertEquals('Döner', $fetched->getBody()); }
private function createItem($item) { $newItem = new Item(); $newItem->setFeedId($item['feedId']); $newItem->setStatus($item['status']); $newItem->setBody($item['body']); $newItem->setTitle($item['title']); $newItem->setAuthor($item['author']); $newItem->setGuid($item['guid']); $newItem->setGuidHash($item['guid']); $newItem->setUrl($item['url']); $newItem->setPubDate($item['pubDate']); $newItem->setLastModified($item['lastModified']); $newItem->setEnclosureMime($item['enclosureMime']); $newItem->setEnclosureLink($item['enclosureLink']); return $this->itemMapper->insert($newItem); }
private function createItem($enclosureType=null) { $this->expectItem('getUrl', $this->permalink); $this->expectItem('getTitle', $this->title); $this->expectItem('getId', $this->guid); $this->expectItem('getContent', $this->body); $item = new Item(); $this->expectItem('getDate', $this->pub); $item->setPubDate($this->pub); $item->setStatus(0); $item->setUnread(); $item->setUrl($this->permalink); $item->setTitle('my<\' title'); $item->setGuid($this->guid); $item->setGuidHash($this->guid); $item->setBody($this->body); $item->setLastModified($this->time); $this->expectItem('getAuthor', $this->author); $item->setAuthor(html_entity_decode($this->author)); if($enclosureType === 'audio/ogg' || $enclosureType === 'video/ogg') { $this->expectItem('getEnclosureUrl', $this->enclosureLink); $this->expectItem('getEnclosureType', $enclosureType); $item->setEnclosureMime($enclosureType); $item->setEnclosureLink($this->enclosureLink); } return $item; }
public function testImportArticlesCreatesOwnFeedWhenNotFound() { $url = 'http://owncloud/args'; $feed = new Feed(); $feed->setId(3); $feed->setUserId($this->user); $feed->setUrl($url); $feed->setLink($url); $feed->setTitle('Articles without feed'); $feed->setAdded($this->time); $feed->setFolderId(0); $feed->setPreventUpdate(true); $feeds = [$feed]; $item = new Item(); $item->setFeedId(3); $item->setAuthor('john'); $item->setGuid('s'); $item->setGuidHash('s'); $item->setTitle('hey'); $item->setPubDate(333); $item->setBody('come over'); $item->setEnclosureMime('mime'); $item->setEnclosureLink('lin'); $item->setUnread(); $item->setUnstarred(); $item->setLastModified($this->time); $json = $item->toExport(['feed3' => $feed]); $json2 = $json; // believe it or not this copies stuff :D $json2['feedLink'] = 'http://test.com'; $items = [$json, $json2]; $insertFeed = new Feed(); $insertFeed->setLink('http://owncloud/nofeed'); $insertFeed->setUrl('http://owncloud/nofeed'); $insertFeed->setUserId($this->user); $insertFeed->setTitle('Articles without feed'); $insertFeed->setAdded($this->time); $insertFeed->setPreventUpdate(true); $insertFeed->setFolderId(0); $this->l10n->expects($this->once())->method('t')->will($this->returnValue('Articles without feed')); $this->feedMapper->expects($this->once())->method('findAllFromUser')->with($this->equalTo($this->user))->will($this->returnValue($feeds)); $this->feedMapper->expects($this->once())->method('insert')->with($this->equalTo($insertFeed))->will($this->returnValue($insertFeed)); $this->itemMapper->expects($this->at(0))->method('findByGuidHash')->will($this->throwException(new DoesNotExistException('yo'))); $this->purifier->expects($this->once())->method('purify')->with($this->equalTo($item->getBody()))->will($this->returnValue($item->getBody())); $this->itemMapper->expects($this->at(1))->method('insert')->with($this->equalTo($item)); $this->itemMapper->expects($this->at(2))->method('findByGuidHash')->will($this->returnValue($item)); $this->itemMapper->expects($this->at(3))->method('update')->with($this->equalTo($item)); $this->feedMapper->expects($this->once())->method('findByUrlHash')->will($this->returnValue($feed)); $result = $this->feedService->importArticles($items, $this->user); $this->assertEquals($feed, $result); }
private function createItem($enclosureType = null) { $this->expectItem('getUrl', $this->permalink); $this->expectItem('getTitle', $this->title); $this->expectItem('getId', $this->guid); $this->expectItem('getContent', $this->body); $item = new Item(); date_default_timezone_set('America/Los_Angeles'); $date = new \DateTime(); $date->setTimestamp($this->pub); $this->expectItem('getDate', $date); $item->setPubDate($this->pub); $item->setStatus(0); $item->setUnread(); $item->setUrl($this->permalink); $item->setTitle('my<\' title'); $item->setGuid($this->guid); $item->setGuidHash($this->guid); $item->setBody($this->body); $item->setLastModified($this->time); $item->generateSearchIndex(); $this->expectItem('getAuthor', $this->author); $item->setAuthor(html_entity_decode($this->author)); if ($enclosureType === 'audio/ogg' || $enclosureType === 'video/ogg') { $this->expectItem('getEnclosureUrl', $this->enclosureLink); $this->expectItem('getEnclosureType', $enclosureType); $item->setEnclosureMime($enclosureType); $item->setEnclosureLink($this->enclosureLink); } return $item; }
public function testDontTransformAbsoluteUrlsAndMails() { $file = new \stdClass(); $file->headers = array("content-type" => "text/html; charset=utf-8"); $file->body = '<html> <body> <img src="http://www.url.com/absolute/url.png"> <a href="mailto:test@testsite.com">mail</a> </body> </html>'; $item = new Item(); $item->setUrl('https://www.explosm.net/all/312'); $item->setBody('Hello thar'); $this->fileFactory->expects($this->once())->method('getFile')->with($this->equalTo($item->getUrl()), $this->equalTo($this->timeout), $this->equalTo($this->redirects), $this->equalTo($this->headers), $this->equalTo($this->userAgent))->will($this->returnValue($file)); $result = $this->testEnhancer->enhance($item); $this->assertEquals('<img src="http://www.url.com/absolute/url.png"><a target="_blank" href="mailto:test@testsite.com">mail</a>', $result->getBody()); }
public function testDontTransformAbsoluteUrlsAndMails() { $encoding = 'utf-8'; $body = '<html> <body> <img src="http://www.url.com/absolute/url.png"> <a href="mailto:test@testsite.com">mail</a> </body> </html>'; $item = new Item(); $item->setUrl('https://www.explosm.net/all/312'); $item->setBody('Hello thar'); $this->setUpFile($body, $encoding, $item->getUrl()); $result = $this->testEnhancer->enhance($item); $this->assertEquals('<div>' . '<img src="http://www.url.com/absolute/url.png">' . '<a target="_blank" rel="noreferrer" href="mailto:test@testsite.com">mail</a>' . '</div>', $result->getBody()); }
/** * Import articles * @param array $json the array with json * @param string $userId the username * @return Feed if one had to be created for nonexistent feeds */ public function importArticles($json, $userId) { $url = 'http://owncloud/nofeed'; $urlHash = md5($url); // build assoc array for fast access $feeds = $this->findAll($userId); $feedsDict = []; foreach($feeds as $feed) { $feedsDict[$feed->getLink()] = $feed; } $createdFeed = false; // loop over all items and get the corresponding feed // if the feed does not exist, create a separate feed for them foreach ($json as $entry) { $item = Item::fromImport($entry); $item->setLastModified($this->timeFactory->getTime()); $feedLink = $entry['feedLink']; // this is not set on the item yet if(array_key_exists($feedLink, $feedsDict)) { $feed = $feedsDict[$feedLink]; $item->setFeedId($feed->getId()); } elseif(array_key_exists($url, $feedsDict)) { $feed = $feedsDict[$url]; $item->setFeedId($feed->getId()); } else { $createdFeed = true; $feed = new Feed(); $feed->setUserId($userId); $feed->setLink($url); $feed->setUrl($url); $feed->setTitle($this->l10n->t('Articles without feed')); $feed->setAdded($this->timeFactory->getTime()); $feed->setFolderId(0); $feed->setPreventUpdate(true); $feed = $this->feedMapper->insert($feed); $item->setFeedId($feed->getId()); $feedsDict[$feed->getLink()] = $feed; } try { // if item exists, copy the status $existingItem = $this->itemMapper->findByGuidHash( $item->getGuidHash(), $feed->getId(), $userId); $existingItem->setStatus($item->getStatus()); $this->itemMapper->update($existingItem); } catch(DoesNotExistException $ex){ $item->setBody($this->purifier->purify($item->getBody())); $this->itemMapper->insert($item); } } if($createdFeed) { return $this->feedMapper->findByUrlHash($urlHash, $userId); } return null; }
protected function buildItem($parsedItem) { $item = new Item(); $item->setUnread(); $item->setUrl($parsedItem->getUrl()); $item->setGuid($parsedItem->getId()); $item->setGuidHash($item->getGuid()); $item->setPubDate($parsedItem->getDate()->getTimestamp()); $item->setLastModified($this->time->getTime()); // unescape content because angularjs helps against XSS $item->setTitle($this->decodeTwice($parsedItem->getTitle())); $item->setAuthor($this->decodeTwice($parsedItem->getAuthor())); // purification is done in the service layer $body = $parsedItem->getContent(); $body = mb_convert_encoding($body, 'HTML-ENTITIES', mb_detect_encoding($body)); $item->setBody($body); $enclosureUrl = $parsedItem->getEnclosureUrl(); if ($enclosureUrl) { $enclosureType = $parsedItem->getEnclosureType(); if (stripos($enclosureType, 'audio/') !== false || stripos($enclosureType, 'video/') !== false) { $item->setEnclosureMime($enclosureType); $item->setEnclosureLink($enclosureUrl); } } $item->generateSearchIndex(); return $item; }
private function createItem($author = false, $enclosureType = null, $noPubDate = false) { $this->expectItem('get_permalink', $this->permalink); $this->expectItem('get_title', $this->title); $this->expectItem('get_id', $this->guid); $this->expectItem('get_content', $this->body); $item = new Item(); if ($noPubDate) { $this->expectItem('get_date', 0); $item->setPubDate($this->time); } else { $this->expectItem('get_date', $this->pub); $item->setPubDate($this->pub); } $item->setStatus(0); $item->setUnread(); $item->setUrl($this->permalink); $item->setTitle('my<\' title'); $item->setGuid($this->guid); $item->setGuidHash(md5($this->guid)); $item->setBody($this->body); $item->setLastModified($this->time); if ($author) { $mock = $this->getMock('author', array('get_name')); $mock->expects($this->once())->method('get_name')->will($this->returnValue($this->author)); $this->expectItem('get_author', $mock); $item->setAuthor(html_entity_decode($this->author)); } else { $mock = $this->getMock('author', array('get_name', 'get_email')); $mock->expects($this->any())->method('get_name')->will($this->returnValue('')); $mock->expects($this->any())->method('get_email')->will($this->returnValue($this->authorMail)); $this->expectItem('get_author', $mock); $item->setAuthor(html_entity_decode($this->authorMail)); } if ($enclosureType === 'audio/ogg') { $mock = $this->getMock('enclosure', array('get_type', 'get_link')); $mock->expects($this->any())->method('get_type')->will($this->returnValue($enclosureType)); $this->expectItem('get_enclosure', $this->mock); $item->setEnclosureMime($enclosureType); $item->setEnclosureLink($this->enclosureLink); } return $item; }
public function testRead() { $itemId = 3; $item = new Item(); $item->setStatus(128); $item->setId($itemId); $item->setRead(); $expectedItem = new Item(); $expectedItem->setStatus(128); $expectedItem->setUnread(); $expectedItem->setId($itemId); $expectedItem->setLastModified($this->time); $this->mapper->expects($this->once())->method('find')->with($this->equalTo($itemId), $this->equalTo($this->user))->will($this->returnValue($item)); $this->mapper->expects($this->once())->method('update')->with($this->equalTo($expectedItem)); $this->itemBusinessLayer->read($itemId, false, $this->user); $this->assertTrue($item->isUnread()); }
protected function buildItem($simplePieItem, $feedLink) { $item = new Item(); $item->setStatus(0); $item->setUnread(); $url = $this->decodeTwice($simplePieItem->get_permalink()); if (!$url) { $url = $feedLink; } $item->setUrl($url); // unescape content because angularjs helps against XSS $item->setTitle($this->decodeTwice($simplePieItem->get_title())); $guid = $simplePieItem->get_id(); $item->setGuid($guid); // purification is done in the businesslayer $item->setBody($simplePieItem->get_content()); // pubdate is not required. if not given use the current date $date = $simplePieItem->get_date('U'); if (!$date) { $date = $this->time->getTime(); } $item->setPubDate($date); $item->setLastModified($this->time->getTime()); $author = $simplePieItem->get_author(); if ($author !== null) { $name = $this->decodeTwice($author->get_name()); if ($name) { $item->setAuthor($name); } else { $item->setAuthor($this->decodeTwice($author->get_email())); } } // TODO: make it work for video files also $enclosure = $simplePieItem->get_enclosure(); if ($enclosure !== null) { $enclosureType = $enclosure->get_type(); if (stripos($enclosureType, "audio/") !== false) { $item->setEnclosureMime($enclosureType); $item->setEnclosureLink($enclosure->get_link()); } } return $item; }