public function parse($string) { $parsed = array(); $parsedClean = array(); // first find all <a> tags $parsed = $this->subparser->parse($string); //X_Debug::i("Atoms found: ".count($parsed)); foreach ($parsed as $found) { try { $hoster = $this->helper->findHoster($found['href']); $parsedClean[] = array('hoster' => $hoster, 'url' => $found['href'], 'label' => strip_tags($found['label'])); } catch (Exception $e) { /* invalid href, ignored */ /*X_Debug::i("No hoster for link: {$found['href']}");*/ } } return $parsedClean; }
public function parse($string) { $parsed = array(); $parsedClean = array(); // first find all <a> tags //@file_put_contents(sys_get_temp_dir()."/last-http-request.html", $string); $parsed = $this->subparser->parse($string); //X_Debug::i("Atoms found: ".count($parsed)); foreach ($parsed as $found) { try { $hoster = $this->helper->findHoster($found['href']); $parsedClean[] = array('hoster' => $hoster, 'url' => $found['href'], 'label' => strip_tags($found['label'])); //X_Debug::i("Valid link: {$found['href']}"); } catch (Exception $e) { /* invalid href, ignored */ /* if ( X_Env::startWith($found['href'], 'http://') ) X_Debug::i("No hoster for link: {$found['href']}"); */ } } return $parsedClean; }
/** * (non-PHPdoc) * @see X_PageParser_Parser::parse() */ public function parse($string) { switch ($this->mode) { case self::MODE_TITLES: return $this->parseTitles($string); case self::MODE_EPISODES: return $this->parseEpisodes($string); case self::MODE_LINKS: return $this->parseLinks($string); case self::MODE_NEXTPAGE: return $this->parseNextPage($string); default: throw new Exception("Invalid mode: {" . $this->mode . "}"); } // dead code // first, find the thumb url: $match = array(); $thumb = false; preg_match(self::PATTERN_THUMBNAIL, $string, $match); // avoid relative thumbnails if (count($match) > 0 && X_Env::startWith($match['image'], 'http://')) { $thumb = $match['image']; X_Debug::i("Thumbnail found: {$thumb}"); } /* @var $hosterHelper X_VlcShares_Plugins_Helper_Hoster */ $hosterHelper = X_VlcShares_Plugins::helpers()->hoster(); $subparser = X_PageParser_Parser_HosterLinks::factory($hosterHelper, X_PageParser_Parser_Preg::factory(self::PATTERN_LINKS, X_PageParser_Parser_Preg::PREG_MATCH_ALL, PREG_SET_ORDER)); $links = $subparser->parse($string); X_Debug::i("Valid hosters link found: " . count($links)); // clean results and reformat them $cleanedLinks = array(); foreach ($links as $link) { $cLink = array(); $cLink['hosterId'] = $link['hoster']->getId(); $cLink['videoId'] = $link['hoster']->getResourceId($link['url']); $cLink['label'] = strip_tags($link['label']); $cLink['link'] = "{$cLink['hosterId']}:{$cLink['videoId']}"; $cLink['thumbnail'] = $thumb; $cleanedLinks[] = $cLink; } return $cleanedLinks; }
private function _fetchGroupUpdates(X_Page_ItemList_PItem $items, $resourceType, $pageN = 1) { $page = X_PageParser_Page::getPage(self::URL_UPDATES_INDEX, X_PageParser_Parser_Preg::factory('/<p align="center"><a href="http:\\/\\/www\\.streaming-online\\.biz\\/(?P<id>[^\\/]+)\\/">(?P<label>.+?)<\\/a><\\/p>/', X_PageParser_Parser_Preg::PREG_MATCH_ALL, PREG_SET_ORDER)); $this->preparePageLoader($page); $parsed = $page->getParsed(); // $parsed format = array(array('image', 'category', 'id', 'label'),..) foreach ($parsed as $pItem) { //$group = $pItem['category']; $group = 'telefilm'; $label = trim($pItem['label']); $id = $pItem['id']; X_Debug::i("Parsed item: " . var_export(array($id, $group, $label), true)); $item = new X_Page_Item_PItem($this->getId() . "-{$resourceType}-{$group}-{$id}", $label); $item->setIcon('/images/icons/folder_32.png')->setType(X_Page_Item_PItem::TYPE_CONTAINER)->setCustom(__CLASS__ . ':location', "{$resourceType}/{$pageN}/{$group}/{$id}")->setDescription(APPLICATION_ENV == 'development' ? "{$resourceType}/{$pageN}/{$group}/{$id}" : null)->setLink(array('l' => X_Env::encode("{$resourceType}/{$pageN}/{$group}/{$id}")), 'default', false); $items->append($item); } }
/** * (non-PHPdoc) * @see X_PageParser_Parser::parse() */ public function parse($string) { // first, find the thumb url: $match = array(); $thumb = false; /* preg_match(self::PATTERN_THUMBNAIL, $string, $match); // avoid relative thumbnails if ( count($match) > 0 && X_Env::startWith($match['image'], 'http://') ) { $thumb = $match['image']; X_Debug::i("Thumbnail found: {$thumb}"); } */ /* @var $hosterHelper X_VlcShares_Plugins_Helper_Hoster */ $hosterHelper = X_VlcShares_Plugins::helpers()->hoster(); /* $subparser = X_PageParser_Parser_HosterLinks::factory($hosterHelper, X_PageParser_Parser_Preg::factory(self::PATTERN_LINKS, X_PageParser_Parser_Preg::PREG_MATCH_ALL, PREG_SET_ORDER) ); $links = $subparser->parse($string); X_Debug::i("Valid hosters link found: ".count($links)); */ $subparser = X_PageParser_Parser_Preg::factory(self::PATTERN_LINKS, X_PageParser_Parser_Preg::PREG_MATCH_ALL, PREG_SET_ORDER); $links = $subparser->parse($string); X_Debug::i("Step1 links: " . count($links)); // clean results and reformat them $cleanedLinks = array(); $pre = false; $startLine = false; foreach ($links as $link) { $cLink = array(); // first check if valid try { $hoster = $hosterHelper->findHoster($link['href']); // link is valid if ($link['startline']) { if ($link['pre']) { $pre = $link['pre']; } else { $pre = false; } } $cLink['hosterId'] = $hoster->getId(); $cLink['videoId'] = $hoster->getResourceId($link['href']); $cLink['label'] = $pre . " " . trim(strip_tags($link['label'])); $cLink['link'] = "{$cLink['hosterId']}:{$cLink['videoId']}"; //$cLink['thumbnail'] = $thumb; $cleanedLinks[] = $cLink; } catch (Exception $e) { // the link is invalid // so if it's a startline, the label has to be added to the eventual "pre" (if any). if ($link['startline']) { if ($link['pre']) { $pre = $link['pre']; } else { $pre = $link['label']; } } } } X_Debug::i("Step2 links: " . count($cleanedLinks)); return $cleanedLinks; }