Page::go_line, PHP примеры использования

Пример #1

0

Показать файл

Файл: mangafox.php Проект: JerryMaheswara/crawler

 public function crawl_chapter($v)
 {
     $ifx = Text::create($v['infix'])->pad(3)->to_s();
     $prefix = $this->prefix;
     $p = new Page($v['url']);
     // grab total page
     $p->go_line('id="top_bar"');
     $p->go_line_regex('/of \\d+\\w+/');
     $tot = $p->curr_line()->regex_match('/of (\\d+)/');
     $tot = $tot[1];
     // grab first image
     $p->go_line('id="viewer"');
     $p->next_line(2);
     $src = $p->curr_line()->cut_between('src="', '"');
     $name = basename($src);
     echo "<a href='{$src}'>{$prefix}-{$ifx}-{$name}</a><br>\n";
     // iterate
     for ($i = 2; $i <= $tot; $i++) {
         $p = new Page(dirname($v['url']) . '/' . $i . '.html');
         $p->go_line('id="viewer"');
         $p->next_line(2);
         $src = $p->curr_line()->cut_between('src="', '"');
         $name = basename($src);
         echo "<a href='{$src}'>{$prefix}-{$ifx}-{$name}</a><br>\n";
     }
 }

Пример #2

0

Показать файл

Файл: Mangahead_Crawler.php Проект: JerryMaheswara/crawler

 public function get_images($chapter_url, $prefix, $infix)
 {
     $ifx = Text::create($infix)->pad(3)->to_s();
     $sitename = "http://mangahead.com";
     $pref = Text::create($chapter_url);
     if (!$pref->contain('index.php')) {
         $pref = $pref->replace($sitename . '/Manga', $sitename . '/index.php/Manga');
     }
     $finish = false;
     if ($pref->contain('?page=')) {
         $page = (int) $pref->cut_after('?page=')->to_s();
         $pref = $pref->cut_until('?page=');
     } else {
         $page = 1;
     }
     $pages = array();
     while (!$finish) {
         // file_put_contents('/tmp/head', $chapter_url."\n", FILE_APPEND);
         $p = new Page($chapter_url);
         $p->go_line('<blockquote>');
         if ($p->curr_line()->contain('&nbsp;&nbsp;&rsaquo;')) {
             $finish = false;
             $chapter_url = $pref . '/?page=' . ++$page;
         } else {
             $finish = true;
         }
         $srcs = $p->curr_line()->extract_to_array('<img src="', '"');
         foreach ($srcs as $src) {
             $parturl = Text::create($src)->replace('index.php', 'mangas')->replace('?action=thumb', '')->to_s();
             $name = basename($parturl);
             $pages["{$prefix}-{$ifx}-{$name}"] = $sitename . $parturl;
         }
     }
     return $pages;
 }

Пример #3

0

Показать файл

Файл: mangainn.php Проект: JerryMaheswara/crawler

 public function grab_chapter_infix($url)
 {
     $p = new Page($url);
     $p->go_line('id="gotoMangaInfo"');
     $m = $p->curr_line()->regex_match('/Chapter (\\w*)<\\//');
     return $m[1];
 }

Пример #4

0

Показать файл

Файл: Rule34xxx_Downloader.php Проект: JerryMaheswara/crawler

 private function collect_images($url, $dir)
 {
     $continue = true;
     $domain = 'http://rule34.xxx/';
     $base = 'http://rule34.xxx/index.php';
     do {
         echo $url . "\n";
         $p = new Page($url);
         $p->go_line('class="thumb"');
         do {
             if ($p->curr_line()->contain('href="')) {
                 $href = $p->curr_line()->cut_between('href="', '"')->to_s();
                 $href = htmlspecialchars_decode($href);
                 echo "{$domain}{$href}\n";
                 $p2 = new Page($domain . $href);
                 $p2->go_line('Original image');
                 $src = $p2->curr_line()->cut_between('href="http:', '"')->to_s();
                 $src = 'http:' . $src;
                 $outpath = $dir . basename($src);
                 download_it($src, $outpath, "--header=\"Accept: image/*\"");
                 // echo '<pre>'.htmlspecialchars($p2->curr_line()).'</pre>';
             }
         } while (!$p->next_line()->contain('<center>'));
         $p->reset_line();
         $p->go_line('id="paginator"');
         if ($p->curr_line()->contain('alt="next"')) {
             $m = $p->curr_line()->regex_match('/href="([^"]+)" alt="next"/');
             $url = $base . html_entity_decode($m[1]);
         } else {
             $continue = false;
         }
     } while ($continue);
 }

Пример #5

0

Показать файл

Файл: Mangafox_Crawler.php Проект: JerryMaheswara/crawler

 public function get_images($chapter_url, $prefix, $infix)
 {
     $ifx = Text::create($infix)->pad(3)->to_s();
     $p = new Page($chapter_url);
     // grab total page
     $p->go_line('id="top_bar"');
     $p->go_line_regex('/of \\d+\\w+/');
     $tot = $p->curr_line()->regex_match('/of (\\d+)/');
     $tot = $tot[1];
     // grab first image
     $p->go_line('id="viewer"');
     $p->next_line(2);
     $src = $p->curr_line()->cut_between('src="', '"');
     $name = basename($src);
     $result = array("{$prefix}-{$ifx}-{$name}" => $src);
     for ($i = 2; $i <= $tot; $i++) {
         $p = new Page(dirname($chapter_url) . '/' . $i . '.html');
         $p->go_line('id="viewer"');
         $p->next_line(2);
         $src = $p->curr_line()->cut_between('src="', '"');
         $name = basename($src);
         $result["{$prefix}-{$ifx}-{$name}"] = $src;
     }
     return $result;
 }

Пример #6

0

Показать файл

Файл: NoNeedForBushido_Extractor.php Проект: JerryMaheswara/crawler

 public function extract($columns, $s, $n, $url)
 {
     $result = array();
     $pattern_url = 'http://nn4b.com/?webcomic1=%s';
     for ($i = $s; $i <= $n; $i++) {
         $purl = sprintf($pattern_url, $i);
         $p = new Page($purl);
         $p->go_line('"og:image"');
         $src = $p->curr_line()->cut_between('content="', '"')->to_s();
         $p->reset_line();
         $p->go_line("link rel='next'");
         $next = $p->curr_line()->cut_between("href='", "'")->to_s();
         $item = array('image' => "<img src='{$src}'>", 'link' => "<a href='{$purl}'>Link</a>", 'next' => "<a href='{$next}'>Next</a>");
         $result[] = $item;
     }
     return $result;
 }

Пример #7

0

Показать файл

Файл: crazytje.php Проект: JerryMaheswara/crawler

 public function crawl_page($url, $ifx)
 {
     $p = new Page($url);
     $p->go_line('data[pages]');
     $pages = array();
     do {
         $line = $p->curr_line();
         if ($line->contain('</option>')) {
             $pages[] = $line->cut_between('>', '</option')->to_s();
         }
     } while (!$p->next_line()->contain('</select>'));
     $p->go_line('scanlations');
     $imgurl = $p->curr_line()->cut_between('<img src="', '"')->to_s();
     $imgbase = dirname($imgurl);
     foreach ($pages as $page) {
         echo "<a href='{$imgbase}/{$page}'>{$this->prefix}-{$ifx}-{$page}</a><br/>\n";
     }
 }

Пример #8

0

Показать файл

Файл: mangadoom.php Проект: JerryMaheswara/crawler

 public function crawl_chapter($v)
 {
     $ifx = Text::create($v['infix'])->pad(3)->to_s();
     $p = new Page($v['url']);
     // grab total page
     $p->go_line('select class="cbo_wpm_pag"');
     $p->next_line();
     $p->go_line('select class="cbo_wpm_pag"');
     $pages = $p->curr_line()->extract_to_array('value="', '"');
     // grab first image
     $p->reset_line();
     $this->crawl_page($p, $ifx);
     // iterate
     array_shift($pages);
     foreach ($pages as $page) {
         $purl = $v['url'] . $page . '/';
         $q = new Page($purl);
         $this->crawl_page($q, $ifx);
     }
 }

Пример #9

0

Показать файл

Файл: Mangadoom_Crawler.php Проект: JerryMaheswara/crawler

 public function get_images($chapter_url, $prefix, $infix)
 {
     $ifx = Text::create($infix)->pad(3)->to_s();
     $p = new Page($chapter_url);
     // grab total page
     $p->go_line('select class="cbo_wpm_pag"');
     $p->next_line();
     $p->go_line('select class="cbo_wpm_pag"');
     $pages = $p->curr_line()->extract_to_array('value="', '"');
     // grab first image
     $p->reset_line();
     $result = $this->crawl_page($p, $prefix, $ifx);
     // iterate
     array_shift($pages);
     foreach ($pages as $page) {
         $purl = $chapter_url . $page . '/';
         $q = new Page($purl);
         $result = $result + $this->crawl_page($q, $prefix, $ifx);
     }
     return $result;
 }

Пример #10

0

Показать файл

Файл: YahooScreen_Downloader.php Проект: JerryMaheswara/crawler

 private function collect_streams($url)
 {
     $p = new Page($url);
     $p->go_line('"streams":[{');
     $json_part = $p->curr_line()->cut_between('"streams":[{', '}]');
     $streams = '[{' . $json_part . '}]';
     $streams = json_decode($streams);
     $result = array();
     foreach ($streams as $stream) {
         $result[] = (object) array('res' => $stream->width . 'x' . $stream->height, 'url' => $stream->host . $stream->path, 'ext' => $stream->format);
     }
     return $result;
 }

Пример #11

0

Показать файл

Файл: foolslide.php Проект: JerryMaheswara/crawler

 public function crawl_chapter($v)
 {
     $ifx = Text::create($v['infix'])->pad(3)->to_s();
     $p = new Page($v['url']);
     // grab list of pages
     $p->go_line('="changePage(');
     $pages = $p->curr_line()->extract_to_array('href="', '"');
     // grab current image
     $this->crawl_page($p, $ifx);
     array_shift($pages);
     foreach ($pages as $purl) {
         $this->crawl_page(new Page($purl), $ifx);
     }
 }

Пример #12

0

Показать файл

Файл: Foolslide_Crawler.php Проект: JerryMaheswara/crawler

 public function get_images($chapter_url, $prefix, $infix)
 {
     $ifx = Text::create($infix)->pad(3)->to_s();
     $p = new Page($chapter_url);
     // grab list of pages
     $p->go_line('="changePage(');
     $pages = $p->curr_line()->extract_to_array('href="', '"');
     // grab current image
     $result = $this->crawl_page($p, $prefix, $ifx);
     array_shift($pages);
     foreach ($pages as $purl) {
         $result = $result + $this->crawl_page(new Page($purl), $prefix, $ifx);
     }
     return $result;
 }

Пример #13

0

Показать файл

Файл: mangastream.php Проект: JerryMaheswara/crawler

 public function crawl_chapter($v)
 {
     $ifx = Text::create($v['infix'])->pad(3)->to_s();
     $p = new Page($v['url']);
     // grab list of pages
     $p->go_line('Last Page (');
     $n = $p->curr_line()->cut_between('Last Page (', ')')->to_s();
     $dir_url = dirname($v['url']);
     // grab current image
     $this->crawl_page($p, $ifx, 1);
     for ($i = 2; $i <= $n; $i++) {
         $p = new Page($dir_url . '/' . $i);
         $this->crawl_page($p, $ifx, $i);
     }
 }

Пример #14

0

Показать файл

Файл: Batoto_Crawler.php Проект: JerryMaheswara/crawler

 public function get_images($chapter_url, $prefix, $infix)
 {
     $ifx = Text::create($infix)->pad(3)->to_s();
     $p = new Page($chapter_url);
     // grab list of pages
     $p->go_line('id="page_select"');
     $pages = $p->next_line()->extract_to_array('value="', '"');
     // grab current image
     $result = $this->crawl_page($p, $prefix, $ifx, 1);
     array_shift($pages);
     foreach ($pages as $i => $purl) {
         $p = new Page($purl);
         $result = $result + $this->crawl_page($p, $prefix, $ifx, $i + 2);
     }
     return $result;
 }

Пример #15

0

Показать файл

Файл: batoto.php Проект: JerryMaheswara/crawler

 public function crawl_chapter($v)
 {
     $ifx = Text::create($v['infix'])->pad(3)->to_s();
     $p = new Page($v['url']);
     // grab list of pages
     $p->go_line('id="page_select"');
     $pages = $p->next_line()->extract_to_array('value="', '"');
     // grab current image
     $this->crawl_page($p, $ifx);
     array_shift($pages);
     foreach ($pages as $purl) {
         $p = new Page($purl);
         $this->crawl_page($p, $ifx);
     }
     /*
     Manga_Crawler::multiProcess(4, $pages, array($this, 'crawl_page'), array($ifx));
     */
 }

Пример #16

0

Показать файл

Файл: Mangastream_Crawler.php Проект: JerryMaheswara/crawler

 public function get_images($chapter_url, $prefix, $infix)
 {
     $ifx = Text::create($infix)->pad(3)->to_s();
     $p = new Page($chapter_url);
     // grab list of pages
     $p->go_line('Last Page (');
     $n = $p->curr_line()->cut_between('Last Page (', ')')->to_s();
     $dir_url = dirname($chapter_url);
     // grab current image
     $result = array();
     list($img_name, $img_url) = $this->crawl_page($p, $prefix, $ifx, 1);
     $result[$img_name] = $img_url;
     for ($i = 2; $i <= $n; $i++) {
         $p = new Page($dir_url . '/' . $i);
         list($img_name, $img_url) = $this->crawl_page($p, $prefix, $ifx, $i);
         $result[$img_name] = $img_url;
     }
     return $result;
 }

Пример #17

0

Показать файл

Файл: stoptazmo.php Проект: JerryMaheswara/crawler

 public function crawl_chapter($v)
 {
     $ifx = Text::create($v['infix'])->pad(3)->to_s();
     $p = new Page($v['url']);
     $p->go_line('var pages');
     $json = $p->curr_line()->cut_between(' = ', ';');
     $list = json_decode($json);
     foreach ($list as $page) {
         $purl = new Text($page->url);
         $name = new Text($page->filename);
         if ($name->strlen() < 15) {
             $name = $this->prefix . '-' . $ifx . '-' . $name;
         }
         if ($purl->contain('resize_img.php')) {
             $purl = $purl->cut_between('resize_img.php?url=', '&width');
         }
         echo "<a href='{$purl}'>{$name}</a><br/>\n";
     }
 }

Пример #18

0

Показать файл

Файл: Crazytje_Crawler.php Проект: JerryMaheswara/crawler

 public function crawl_page($url, $prefix, $ifx)
 {
     $p = new Page($url);
     $p->go_line('data[pages]');
     $pages = array();
     do {
         $line = $p->curr_line();
         if ($line->contain('</option>')) {
             $pages[] = $line->cut_between('>', '</option')->to_s();
         }
     } while (!$p->next_line()->contain('</select>'));
     $p->go_line('scanlations');
     $imgurl = $p->curr_line()->cut_between('<img src="', '"')->to_s();
     $imgbase = dirname($imgurl);
     $result = array();
     foreach ($pages as $page) {
         $result["{$prefix}-{$ifx}-{$page}"] = "{$imgbase}/{$page}";
     }
     return $result;
 }

Пример #19

0

Показать файл

Файл: Kissmanga_Crawler.php Проект: JerryMaheswara/crawler

 public function get_images($chapter_url, $prefix, $infix)
 {
     $ifx = Text::create($infix)->pad(3)->to_s();
     $p = new Page($chapter_url, array('become_firefox' => true));
     // grab list of pages
     $p->go_line('var lstImages');
     $i = 1;
     $result = array();
     do {
         if ($p->curr_line()->contain('lstImages.push')) {
             $line = $p->curr_line();
             $img = $line->cut_between('push("', '")');
             $iname = Text::create($i++)->pad(3)->to_s() . Text::create(basename($img))->cut_rfrom('.')->cut_before('?')->to_s();
             $name = "{$prefix}-{$ifx}-{$iname}";
             $result[$name] = $img;
         }
     } while (!$p->next_line()->contain('new Array()'));
     $pages = $p->curr_line()->extract_to_array('href="', '"');
     return $result;
 }

Пример #20

0

Показать файл

Файл: ZBirthOfLevi_Downloader.php Проект: JerryMaheswara/crawler

 private function download_chapters($chapters)
 {
     $prefix = 'Birth_of_Levi';
     $dir = '/home/khandar-gdp/tmp/birth of levi/';
     foreach ($chapters as $infix => $url) {
         echo "Opening {$url}...\n";
         $p = new Page($url);
         $p->go_line('pages[1]=');
         $i = 1;
         do {
             $line = $p->curr_line();
             $img = $line->cut_between('="', '"');
             $suffix = Text::create($i++)->pad(3)->to_s();
             $ext = $img->cut_rafter('.');
             $filename = "{$dir}{$prefix}-{$infix}-{$suffix}.{$ext}";
             if (!is_file($filename)) {
                 download_it($img->to_s(), $filename);
             }
         } while ($p->next_line()->contain('pages['));
     }
 }

Пример #21

0

Показать файл

Файл: Mangaeden_Crawler.php Проект: JerryMaheswara/crawler

 public function get_images($chapter_url, $prefix, $infix)
 {
     $ifx = Text::create($infix)->pad(3)->to_s();
     $p = new Page($chapter_url);
     // grab list of pages
     $p->go_line('id="pageInfo"');
     $n = $p->curr_line()->cut_between(' of ', '<')->to_s();
     $pages = array();
     for ($i = 1; $i <= $n; $i++) {
         $pages[] = preg_replace('/\\/1\\/$/', '/' . $i . '/', $chapter_url);
     }
     // grab current image
     $p->reset_line();
     $result = $this->crawl_page($p, $prefix, $ifx, 1);
     array_shift($pages);
     // grab the rest of pages
     foreach ($pages as $i => $page) {
         $p = new Page($page);
         $result = $result + $this->crawl_page($p, $prefix, $ifx, $i + 2);
     }
     return $result;
 }

Пример #22

0

Показать файл

Файл: Fakku.php Проект: JerryMaheswara/crawler

 public function get_detail()
 {
     $p = new Page(Fakku::$base . $this->url . '/read');
     $content = new Text($p->content());
     // hack: sometimes old urls gone
     if ($content->contain('<title>Error Message</title>')) {
         $this->is_deleted = true;
         throw new Exception($this->url . ' url is gone');
     }
     if ($content->contain('var data = {')) {
         $p->go_line('var data = {');
         $json = $p->curr_line()->dup()->cut_between(' = ', ';')->to_s();
         $obj = json_decode($json);
         $js_thumbs = $obj->thumbs;
     } else {
         if ($content->contain('var data={')) {
             $p->go_line('var data={');
             $json = $p->curr_line()->dup()->cut_between('data=', ';')->to_s();
             $obj = json_decode($json);
             $js_thumbs = $obj->thumbs;
         } else {
             if ($content->contain('window.params.thumbs')) {
                 $p->go_line('window.params.thumbs');
                 $json = $p->curr_line()->cut_between('=', ';')->to_s();
                 $js_thumbs = json_decode($json);
             } else {
                 if ($content->contain('This content has been disabled.')) {
                     return;
                     $js_thumbs = array();
                 } else {
                     if ($content->contain('This content is not available in your country')) {
                         return;
                         $js_thumbs = array();
                     } else {
                         if ($content->contain('Content does not exist')) {
                             return;
                             $js_thumbs = array();
                         } else {
                             echo $p->url();
                             throw new Exception('where is thumbs?');
                         }
                     }
                 }
             }
         }
     }
     $thumbs = array();
     foreach ($js_thumbs as $tpath) {
         $thumbs[] = basename($tpath);
     }
     $this->thumbs = implode('#', $thumbs);
     // grab full image pattern
     $p->go_line('function imgpath(');
     $p->go_line('return \'');
     if ($p->curr_line()->contain('return \'')) {
         $imgpath = $p->curr_line()->dup()->cut_between("return '", "';")->to_s();
         $imgpath = str_replace("' + x + '", '%s', $imgpath);
     } else {
         $p->reset_line();
         $p->go_line('function imgpath(');
         $p->go_line('return\'');
         $imgpath = $p->curr_line()->dup()->cut_between("return'", "';")->to_s();
         $imgpath = str_replace("'+x+'", '%s', $imgpath);
     }
     $imgpath = str_replace("https://", 'http://', $imgpath);
     $this->pattern = $imgpath;
     $this->save();
 }

Пример #23

0

Показать файл

Файл: asdf.php Проект: JerryMaheswara/crawler

 public function grab_volume_chapters()
 {
     $p = new Page('http://en.wikipedia.org/wiki/List_of_Hajime_no_Ippo_chapters');
     $list = array();
     while (!$p->end_of_line()) {
         $p->go_line('Main article:');
         if ($p->end_of_line()) {
             break;
         }
         $href = 'http://en.wikipedia.org' . $p->curr_line()->dup()->cut_between('href="', '"')->to_s();
         $p2 = new Page($href);
         while (!$p2->end_of_line()) {
             try {
                 $p2->go_line('<td id="vol');
                 $vol = $p2->curr_line()->dup()->cut_between('">', '<')->to_s();
                 do {
                     if ($p2->curr_line()->contain('<li>Round ')) {
                         $last_chapter = $p2->curr_line()->dup()->cut_between('Round ', ':')->to_s();
                     }
                 } while (!$p2->next_line()->contain('</table>'));
                 $list[$vol] = $last_chapter;
                 // echo "v $vol c $last_chapter <br/>\n";
             } catch (Exception $e) {
                 break;
             }
         }
         $p->next_line();
     }
     return $list;
 }

Пример #24

0

Показать файл

Файл: HentaiMangaOnline.php Проект: JerryMaheswara/crawler

 public function action_all_pages()
 {
     $start = self::$update;
     // what is the last page?
     $p = new Page($start);
     $p->go_line('Page 1 / ');
     $stop = (int) $p->curr_line()->cut_between('Page 1 / ', '<')->to_s();
     $pre_infos = array();
     for ($i = $stop; $i >= 1; $i--) {
         // file_put_contents('mangafap.links', "//Page {$i}\n", FILE_APPEND);
         $p = new Page($start . ($i > 1 ? 'page/' . $i . '/' : ''));
         $chunk = array_reverse($this->extract_from_list($p));
         // file_put_contents('mangafap.links', "\$links[] = ".var_export($chunk, true).";\n", FILE_APPEND);
         $pre_infos = array_merge($pre_infos, $chunk);
     }
     // Now we have complete books' links
     $complete_links = '<?php $links=' . var_export($pre_infos, true) . ';';
     file_put_contents('hmo.links', $complete_links);
 }

Пример #25

0

Показать файл

Файл: ieatsoul.php Проект: JerryMaheswara/crawler

        $b = break_url($v['url']);
        extract($b);
        $P = new Page($v['url']);
        // Grab all pages
        $pages = array();
        $P->go_line('id="Serie_pages"');
        do {
            if ($P->curr_line()->contain('<option')) {
                $pages[] = $P->curr_line()->dup()->cut_between('">', '</')->to_s();
            }
        } while (!$P->next_line()->contain('</select>'));
        array_shift($pages);
        // Grab this page's image
        $P->go_line('id="manga_img"');
        $src = $P->curr_line()->dup()->cut_between('src="', '"')->to_s();
        $name = basename($src);
        echo "<a href='{$domain}{$src}'>{$prefix}-{$ifx}-{$name}</a><br/>\n";
        // Now for the other pages
        foreach ($pages as $p) {
            $the_url = "{$basic_url}{$title}/{$chapter_id}/{$chapter_text}/{$p}/";
            $P = new Page($the_url);
            $P->go_line('id="manga_img"');
            $src = $P->curr_line()->dup()->cut_between('src="', '"')->to_s();
            $name = basename($src);
            echo "<a href='{$domain}{$src}'>{$prefix}-{$ifx}-{$name}</a><br/>\n";
        }
    }
}
?>

</body></html>

Пример #26

0

Показать файл

Файл: FreeHManga.php Проект: JerryMaheswara/crawler

 private function get_last_page($url)
 {
     $p = new Page($url);
     $p->go_line('Pages|');
     $m = $p->curr_line()->regex_match('/(\\d+) Pages/');
     return $m[1];
 }

Пример #27

0

Показать файл

Файл: nobushido.php Проект: JerryMaheswara/crawler

<?php

/*
ini untuk crawling webcomic
http://noneedforbushido.com/2002/comic/1/
supply starting chapter, crawl sampe abis
*/
require_once 'class/page.php';
require_once 'class/text.php';
$start = 'http://noneedforbushido.com/2002/comic/1/';
$next = true;
while ($next) {
    $p = new Page($start);
    $p->go_line('class="comic-item');
    $src = $p->curr_line()->dup()->cut_between('src="', '"')->to_s();
    $n = Text::create(basename($start))->pad(3, 0)->to_s();
    $year = Text::create($start)->cut_between('.com/', '/')->to_s();
    $text = "{$year}-comic{$n}";
    echo "<a href='{$src}'>{$text}</a><br />\n";
    // determine $next
    $p->go_line('class="next-comic-link');
    if ($p->curr_line()->contain('current-comic')) {
        $next = false;
    }
    $start = $p->curr_line()->dup()->cut_between('href="', '"')->to_s();
}

Пример #28

0

Показать файл

Файл: rho.php Проект: JerryMaheswara/crawler

 public function grab_chapter_urls($start_page_url, $check_database = false)
 {
     $p = new Page($start_page_url);
     // check if there are more pages
     $p->go_line("class='pages'");
     if ($p->curr_line()->exist("class='pages'")) {
         $m = $p->curr_line()->regex_match("/'>Page 1 \\/ (\\d+)<\\//");
         $tot_pages = $m[1];
     } else {
         $tot_pages = 1;
     }
     $chapters = array();
     if (isset($_GET['limitpage'])) {
         $tot_pages = $_GET['limitpage'];
     }
     for ($i = 1; $i <= $tot_pages; $i++) {
         $p = new Page($start_page_url . ($i == 1 ? '' : 'page/' . $i . '/'));
         echo "Grabbing " . $p->url() . "<br/>\n";
         // grab all chapter in this page
         $t_content = new Text($p->content());
         $raw = array_unique($t_content->extract_to_array('href="', '"'));
         foreach ($raw as $e) {
             if (preg_match('/^http:\\/\\/hentaimangaonline\\.com\\/read-[^\\/]*-hentai-manga-online\\/$/', $e)) {
                 if ($check_database) {
                     if ($this->url_already_exist($e)) {
                         return array_reverse(array_unique($chapters));
                     }
                 }
                 $chapters[] = $e;
             }
         }
         // return $chapters;//DEBUG
     }
     return array_reverse(array_unique($chapters));
 }

Пример #29

0

Показать файл

Файл: h.php Проект: JerryMaheswara/crawler

function fakku($url)
{
    if (!preg_match('/\\/read$/', $url)) {
        $url .= '/read';
    }
    $title = basename(dirname($url));
    $p = new Page($url);
    $content = new Text($p->content());
    $p->go_line('window.params.thumbs');
    $json = $p->curr_line()->cut_between('=', ';')->to_s();
    $js_thumbs = json_decode($json);
    foreach ($js_thumbs as $thumb) {
        $src = Text::create($thumb)->replace('.thumb.', '.')->replace('/thumbs/', '/images/')->to_s();
        echo "<a href='{$src}'>{$title}</a><br>\n";
    }
}

Пример #30

0

Показать файл

Файл: what.php Проект: JerryMaheswara/crawler

 public function process()
 {
     $this->go_line('Hayley Marie');
     do {
         if ($this->curr_line()->exist('href=')) {
             $m = $this->curr_line()->regex_match('/' . self::REG_HREF . '\\s+title=["\']([^"\']*)["\']/');
             $url = $m[1];
             $name = $m[2];
             if (!$name) {
                 $name = 'asdf' . $this->current_i;
             }
             $p = new Page($url);
             $p->go_line("id='form'");
             do {
                 if ($p->curr_line()->exist("href='")) {
                     $img = $p->curr_line()->dup()->cut_between("href='", "'");
                     if ($img->exist('imageboss.net')) {
                         $img->replace('/view/', '/img/')->replace('-', '/');
                     }
                     echo "<a href='{$img}'>{$name}</a><br />\n";
                 }
             } while (!$p->next_line()->exist('</form>'));
         }
     } while (!$this->next_line()->exist('id="vr_nav"'));
 }

PHP Page::go_line примеры использования