Exemple #1
1
 public function load()
 {
     if (!$this->get_href()) {
         $this->log("load() No href", 'error');
         return false;
     }
     $parent = $this->get_top_parent();
     $this->log("load() href: " . $this->get_href(), 'info');
     $doc = $parent->load($this->get_href());
     if (!$doc) {
         $this->log("load() Cannot load url " . $this->get_href(), 'error');
         return false;
     }
     $elmsTd = getElementsByClassName($doc, 'td', 'gst_fichier', false);
     if ($elmsTd->length < 1) {
         $this->log("load() Cannot found td element with gst_fichier class name", 'error');
         return false;
     }
     foreach ($elmsTd as $td) {
         $elmsImg = $td->getElementsByTagName('img');
         if ($elmsImg->length < 1) {
             $this->log("load() Cannot found img element", 'error');
             continue;
         }
         if (!$elmsImg->item(0)->hasAttributes() || $elmsImg->item(0)->getAttribute('alt') == 'puce') {
             $this->log("load() puce element, zap", 'info');
             continue;
         }
         $elmsA = $td->getElementsByTagName('a');
         if ($elmsA->length < 1) {
             $this->log("load() Cannot found link to season", 'error');
             continue;
         }
         $href = $elmsA->item(0)->getAttribute('href');
         $num = strtolower(trim($elmsA->item(0)->textContent));
         if (preg_match("/^saison\\s*(\\d+)\$/", $num, $matches)) {
             $num = $matches[1] + 0;
         }
         if (!$num || !$href) {
             $this->log("load() No num or href", 'error');
         }
         $this->log("load() Found season {$num} ({$href})", 'info');
         $season = new wssubSeason_seriessub($this);
         $season->set_num($num);
         $season->set_href($href);
         $season->load();
         $this->add_season($season);
     }
     return true;
     //
     //         if (!$season_num) {
     //                continue;
     //            }
     //            $season = new wssubSeason_tvsubtitles($this);
     //            $season->set_num($season_num);
     //            $season->load_content($doc, $subcontent);
     //            $this->add_season($season);
     //        }
 }
Exemple #2
0
 public function load()
 {
     if (!$this->get_href()) {
         $this->log("load() No href", 'error');
         return false;
     }
     $parent = $this->get_top_parent();
     $this->log("load() Parsing season " . $this->get_num() . " (" . $this->get_href() . ")", 'error');
     $doc = $parent->load($this->get_href());
     if (!$doc) {
         $this->log("load() Cannot load url " . $this->get_href(), 'error');
         return false;
     }
     $elmsTd = getElementsByClassName($doc, 'td', 'gst_fichier');
     if ($elmsTd->length < 1) {
         $this->log("load() No elements td with classname gst_fichier", 'error');
         return false;
     }
     foreach ($elmsTd as $td) {
         $elmsImg = $td->getElementsByTagName('img');
         if ($elmsImg->length < 1) {
             $this->log("load() No img element", 'error');
             continue;
         }
         $src = $elmsImg->item(0)->getAttribute('src');
         if (!$src) {
             $this->log("load() Img without src", 'error');
             continue;
         }
         //$this->log("src: " .$src);
         if (preg_match("/^.*rep\\.png\$/i", $src)) {
             $this->log("load() Foler zap", 'info');
             continue;
         }
         $elmsA = $td->getElementsByTagName('a');
         if ($elmsA->length < 1) {
             $this->log("load() Img without src", 'error');
             continue;
         }
         $name = $elmsA->item(0)->textContent;
         $href = $elmsA->item(0)->getAttribute('href');
         if (!$name || !$href) {
             $this->log("load() No name or href", 'error');
             continue;
         }
         $episode = new wssubEpisode_seriessub($this);
         $episode->load_content($name, $href);
         $this->add_episode($episode);
     }
     return true;
 }
Exemple #3
0
 public function search($request)
 {
     if (!$request) {
         $this->log('search() No request', 'error');
         return false;
     }
     if (!$request->get('search')) {
         $this->log("search() No search!", 'error');
         return false;
     }
     $doc = $this->load($this->get_prefix_url() . '/sous-titres');
     if (!$doc) {
         $this->log("Cannot load subtitles index", 'error');
         return false;
     }
     $elmsTd = getElementsByClassName($doc, 'td', 'gst_name', false);
     if ($elmsTd->length < 1) {
         $this->log("Cannot find sub (td:class gst_name)");
         return false;
     }
     foreach ($elmsTd as $td) {
         $name = strtolower(trim($td->textContent));
         $search = $request->get('search');
         if (preg_match("/^{$search}.*\$/i", $name)) {
             $this->log("found show: " . $name, 'info');
             $elmsDiv = $td->getElementsByTagName('div');
             if ($elmsDiv->length < 1) {
                 continue;
             }
             $elmsA = $elmsDiv->item(0)->getElementsByTagName('a');
             if ($elmsA->length < 1) {
                 continue;
             }
             $href = $elmsA->item(0)->getAttribute('href');
             $show = new wssubTvShow_seriessub($this);
             $show->set_name($name);
             $show->set_href($href);
             $show->load();
             $this->add_tvshow($show);
         }
     }
     return true;
 }
 public function load()
 {
     $parent = $this->get_top_parent();
     if (!$this->get_url()) {
         $this->log("load() no url", 'error');
         return false;
     }
     $doc = $parent->load($parent->get_prefix_url() . $this->get_url());
     if (!isset($doc)) {
         $this->log("load() Cannot load url '{$url}'", 'error');
         return false;
     }
     $left = getElementsByClassName($doc, 'div', 'left_articles', true);
     if ($left->length < 1) {
         $this->log("load() No left_articles", 'error');
         return false;
     }
     $rootElm = $left->item(0)->firstChild;
     foreach ($rootElm->childNodes as $c) {
         if ($c->nodeType == 3) {
             // text node
             continue;
         }
         $this->log("load()" . $c->tagName . ' - ' . $c->textContent, 'info');
         if ($c->tagName == 'p' && $c->getAttribute('class') == 'description') {
             $this->log("load() description: " . $c->textContent, 'error');
             foreach ($c->childNodes as $dc) {
                 if (!$dc || !isset($dc->tagType)) {
                     continue;
                 }
                 if ($dc->tagType == 3 || $dc->tagName != "img") {
                     continue;
                 }
                 $lang = get_mini_lang($dc->getAttribute('alt'));
                 $this->log("load() lang: " . $lang, 'parse');
             }
         }
     }
     return true;
 }
$cache_path = 'cache/html5_elements.html';
/**
 * @see http://www.w3.org/TR/html5/syntax.html#void-elements
 */
$empty_tags = array('area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr');
/**
 * @see http://www.marguerite.jp/Nihongo/WWW/RefHTML5/Appendix/Content-Phrasing.html
 */
$phrasing_contents = array('a', 'abbr', 'area', 'map', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas', 'cite', 'code', 'command', 'del', 'dfn', 'em', 'embed', 'i', 'iframe', 'img', 'input', 'ins', 'kbd', 'keygen', 'label', 'map', 'mark', 'math', 'meter', 'noscript', 'object', 'output', 'p', 'pre', 'progress', 'q', 'ruby', 's', 'samp', 'script', 'section', 'select', 'small', 'span', 'strong', 'sub', 'sup', 'svg', 'textarea', 'time', 'u', 'var', 'video', 'wbr', 'option');
$tags = array();
$doc = new DOMDocument();
if (!file_exists($cache_path)) {
    file_put_contents($cache_path, file_get_contents($elements_list_url));
}
@$doc->loadHTMLFile($cache_path);
$table = getElementsByClassName($doc, 'tabular-alt');
$table = $table[0];
$all_tr = $table->getElementsByTagName('tr');
foreach ($all_tr as $tr) {
    $td = $tr->getElementsByTagName('td');
    $td = $td->item(0);
    if (is_null($td)) {
        continue;
    }
    $tag_name = trim($td->nodeValue, ' <>');
    if (strpos($tag_name, '!') !== false) {
        continue;
    }
    if (!$tag_name) {
        continue;
    }
 public function load()
 {
     $parent = $this->get_top_parent();
     $show_id = $this->get_id();
     $this->log("load: Top parent name: " . $parent->get_name(), 'info');
     $this->log("load: Loading show id: " . $this->get_id(), 'info');
     $a_se = array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
     if ($parent->get_request()->get('season')) {
         $a_se = array($parent->get_request()->get('season'));
     }
     foreach ($a_se as $season_id) {
         $url = "http://www.tvsubtitles.net/tvshow-{$show_id}-{$season_id}.html";
         $this->log("load: Search for season: {$show_id} - {$season_id} ({$url})", 'info');
         $doc = $parent->load($url);
         if (!$doc) {
             $this->log("load() Cannot load url {$url}", 'error');
             continue;
         }
         $body = getElementsByClassName($doc, 'div', 'left_articles', true);
         //$body = $doc->getElementsByTagName('left_articles');
         if ($body->length == 0) {
             $this->log("load() No left_articles in document", 'error');
             continue;
         }
         //$this->log("load() left_articles: " . $body->item(0)->textContent, 'info');
         $body = $body->item(0)->childNodes->item(0);
         $subcontent = null;
         $season_num = null;
         foreach ($body->childNodes as $elm) {
             if (!isset($elm->tagName)) {
                 continue;
             }
             switch ($elm->tagName) {
                 case 'h2':
                     $this->log("Title: " . $elm->textContent, 'parse');
                     $this->set_name($elm->textContent);
                     break;
                 case 'p':
                     $season_num = $this->parse_seasons($elm);
                     $this->log("Seasons list,  active one, {$season_num}", 'parse');
                     break;
                 case 'table':
                     $this->log("sous contenu: " . $elm->textContent, 'parse');
                     $subcontent = $elm;
                     break;
                 case 'img':
                     if ($elm->hasAttribute('class') && $elm->getAttribute('class') == 'thumbnail1') {
                         $this->log("cover: " . $elm->getAttribute('src'), 'parse');
                         $this->set_img($elm->getAttribute('src'));
                     }
                     break;
                 default:
                     $this->log("Unknow elm: " . $elm->textContent, 'parse');
                     break;
             }
         }
         if (!$season_num) {
             continue;
         }
         $season = new wssubSeason_tvsubtitles($this);
         $season->set_num($season_num);
         $season->load_content($doc, $subcontent);
         $this->add_season($season);
     }
 }