public function load() { if (!$this->get_href()) { $this->log("load() No href", 'error'); return false; } $parent = $this->get_top_parent(); $this->log("load() href: " . $this->get_href(), 'info'); $doc = $parent->load($this->get_href()); if (!$doc) { $this->log("load() Cannot load url " . $this->get_href(), 'error'); return false; } $elmsTd = getElementsByClassName($doc, 'td', 'gst_fichier', false); if ($elmsTd->length < 1) { $this->log("load() Cannot found td element with gst_fichier class name", 'error'); return false; } foreach ($elmsTd as $td) { $elmsImg = $td->getElementsByTagName('img'); if ($elmsImg->length < 1) { $this->log("load() Cannot found img element", 'error'); continue; } if (!$elmsImg->item(0)->hasAttributes() || $elmsImg->item(0)->getAttribute('alt') == 'puce') { $this->log("load() puce element, zap", 'info'); continue; } $elmsA = $td->getElementsByTagName('a'); if ($elmsA->length < 1) { $this->log("load() Cannot found link to season", 'error'); continue; } $href = $elmsA->item(0)->getAttribute('href'); $num = strtolower(trim($elmsA->item(0)->textContent)); if (preg_match("/^saison\\s*(\\d+)\$/", $num, $matches)) { $num = $matches[1] + 0; } if (!$num || !$href) { $this->log("load() No num or href", 'error'); } $this->log("load() Found season {$num} ({$href})", 'info'); $season = new wssubSeason_seriessub($this); $season->set_num($num); $season->set_href($href); $season->load(); $this->add_season($season); } return true; // // if (!$season_num) { // continue; // } // $season = new wssubSeason_tvsubtitles($this); // $season->set_num($season_num); // $season->load_content($doc, $subcontent); // $this->add_season($season); // } }
public function load() { if (!$this->get_href()) { $this->log("load() No href", 'error'); return false; } $parent = $this->get_top_parent(); $this->log("load() Parsing season " . $this->get_num() . " (" . $this->get_href() . ")", 'error'); $doc = $parent->load($this->get_href()); if (!$doc) { $this->log("load() Cannot load url " . $this->get_href(), 'error'); return false; } $elmsTd = getElementsByClassName($doc, 'td', 'gst_fichier'); if ($elmsTd->length < 1) { $this->log("load() No elements td with classname gst_fichier", 'error'); return false; } foreach ($elmsTd as $td) { $elmsImg = $td->getElementsByTagName('img'); if ($elmsImg->length < 1) { $this->log("load() No img element", 'error'); continue; } $src = $elmsImg->item(0)->getAttribute('src'); if (!$src) { $this->log("load() Img without src", 'error'); continue; } //$this->log("src: " .$src); if (preg_match("/^.*rep\\.png\$/i", $src)) { $this->log("load() Foler zap", 'info'); continue; } $elmsA = $td->getElementsByTagName('a'); if ($elmsA->length < 1) { $this->log("load() Img without src", 'error'); continue; } $name = $elmsA->item(0)->textContent; $href = $elmsA->item(0)->getAttribute('href'); if (!$name || !$href) { $this->log("load() No name or href", 'error'); continue; } $episode = new wssubEpisode_seriessub($this); $episode->load_content($name, $href); $this->add_episode($episode); } return true; }
public function search($request) { if (!$request) { $this->log('search() No request', 'error'); return false; } if (!$request->get('search')) { $this->log("search() No search!", 'error'); return false; } $doc = $this->load($this->get_prefix_url() . '/sous-titres'); if (!$doc) { $this->log("Cannot load subtitles index", 'error'); return false; } $elmsTd = getElementsByClassName($doc, 'td', 'gst_name', false); if ($elmsTd->length < 1) { $this->log("Cannot find sub (td:class gst_name)"); return false; } foreach ($elmsTd as $td) { $name = strtolower(trim($td->textContent)); $search = $request->get('search'); if (preg_match("/^{$search}.*\$/i", $name)) { $this->log("found show: " . $name, 'info'); $elmsDiv = $td->getElementsByTagName('div'); if ($elmsDiv->length < 1) { continue; } $elmsA = $elmsDiv->item(0)->getElementsByTagName('a'); if ($elmsA->length < 1) { continue; } $href = $elmsA->item(0)->getAttribute('href'); $show = new wssubTvShow_seriessub($this); $show->set_name($name); $show->set_href($href); $show->load(); $this->add_tvshow($show); } } return true; }
public function load() { $parent = $this->get_top_parent(); if (!$this->get_url()) { $this->log("load() no url", 'error'); return false; } $doc = $parent->load($parent->get_prefix_url() . $this->get_url()); if (!isset($doc)) { $this->log("load() Cannot load url '{$url}'", 'error'); return false; } $left = getElementsByClassName($doc, 'div', 'left_articles', true); if ($left->length < 1) { $this->log("load() No left_articles", 'error'); return false; } $rootElm = $left->item(0)->firstChild; foreach ($rootElm->childNodes as $c) { if ($c->nodeType == 3) { // text node continue; } $this->log("load()" . $c->tagName . ' - ' . $c->textContent, 'info'); if ($c->tagName == 'p' && $c->getAttribute('class') == 'description') { $this->log("load() description: " . $c->textContent, 'error'); foreach ($c->childNodes as $dc) { if (!$dc || !isset($dc->tagType)) { continue; } if ($dc->tagType == 3 || $dc->tagName != "img") { continue; } $lang = get_mini_lang($dc->getAttribute('alt')); $this->log("load() lang: " . $lang, 'parse'); } } } return true; }
$cache_path = 'cache/html5_elements.html'; /** * @see http://www.w3.org/TR/html5/syntax.html#void-elements */ $empty_tags = array('area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'); /** * @see http://www.marguerite.jp/Nihongo/WWW/RefHTML5/Appendix/Content-Phrasing.html */ $phrasing_contents = array('a', 'abbr', 'area', 'map', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas', 'cite', 'code', 'command', 'del', 'dfn', 'em', 'embed', 'i', 'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'map', 'mark', 'math', 'meter', 'noscript', 'object', 'output', 'p', 'pre', 'progress', 'q', 'ruby', 's', 'samp', 'script', 'section', 'select', 'small', 'span', 'strong', 'sub', 'sup', 'svg', 'textarea', 'time', 'u', 'var', 'video', 'wbr', 'option'); $tags = array(); $doc = new DOMDocument(); if (!file_exists($cache_path)) { file_put_contents($cache_path, file_get_contents($elements_list_url)); } @$doc->loadHTMLFile($cache_path); $table = getElementsByClassName($doc, 'tabular-alt'); $table = $table[0]; $all_tr = $table->getElementsByTagName('tr'); foreach ($all_tr as $tr) { $td = $tr->getElementsByTagName('td'); $td = $td->item(0); if (is_null($td)) { continue; } $tag_name = trim($td->nodeValue, ' <>'); if (strpos($tag_name, '!') !== false) { continue; } if (!$tag_name) { continue; }
public function load() { $parent = $this->get_top_parent(); $show_id = $this->get_id(); $this->log("load: Top parent name: " . $parent->get_name(), 'info'); $this->log("load: Loading show id: " . $this->get_id(), 'info'); $a_se = array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); if ($parent->get_request()->get('season')) { $a_se = array($parent->get_request()->get('season')); } foreach ($a_se as $season_id) { $url = "http://www.tvsubtitles.net/tvshow-{$show_id}-{$season_id}.html"; $this->log("load: Search for season: {$show_id} - {$season_id} ({$url})", 'info'); $doc = $parent->load($url); if (!$doc) { $this->log("load() Cannot load url {$url}", 'error'); continue; } $body = getElementsByClassName($doc, 'div', 'left_articles', true); //$body = $doc->getElementsByTagName('left_articles'); if ($body->length == 0) { $this->log("load() No left_articles in document", 'error'); continue; } //$this->log("load() left_articles: " . $body->item(0)->textContent, 'info'); $body = $body->item(0)->childNodes->item(0); $subcontent = null; $season_num = null; foreach ($body->childNodes as $elm) { if (!isset($elm->tagName)) { continue; } switch ($elm->tagName) { case 'h2': $this->log("Title: " . $elm->textContent, 'parse'); $this->set_name($elm->textContent); break; case 'p': $season_num = $this->parse_seasons($elm); $this->log("Seasons list, active one, {$season_num}", 'parse'); break; case 'table': $this->log("sous contenu: " . $elm->textContent, 'parse'); $subcontent = $elm; break; case 'img': if ($elm->hasAttribute('class') && $elm->getAttribute('class') == 'thumbnail1') { $this->log("cover: " . $elm->getAttribute('src'), 'parse'); $this->set_img($elm->getAttribute('src')); } break; default: $this->log("Unknow elm: " . $elm->textContent, 'parse'); break; } } if (!$season_num) { continue; } $season = new wssubSeason_tvsubtitles($this); $season->set_num($season_num); $season->load_content($doc, $subcontent); $this->add_season($season); } }