Example #1
0
 public function grab_volume_chapters()
 {
     $p = new Page('http://en.wikipedia.org/wiki/List_of_Hajime_no_Ippo_chapters');
     $list = array();
     while (!$p->end_of_line()) {
         $p->go_line('Main article:');
         if ($p->end_of_line()) {
             break;
         }
         $href = 'http://en.wikipedia.org' . $p->curr_line()->dup()->cut_between('href="', '"')->to_s();
         $p2 = new Page($href);
         while (!$p2->end_of_line()) {
             try {
                 $p2->go_line('<td id="vol');
                 $vol = $p2->curr_line()->dup()->cut_between('">', '<')->to_s();
                 do {
                     if ($p2->curr_line()->contain('<li>Round ')) {
                         $last_chapter = $p2->curr_line()->dup()->cut_between('Round ', ':')->to_s();
                     }
                 } while (!$p2->next_line()->contain('</table>'));
                 $list[$vol] = $last_chapter;
                 // echo "v $vol c $last_chapter <br/>\n";
             } catch (Exception $e) {
                 break;
             }
         }
         $p->next_line();
     }
     return $list;
 }
Example #2
0
function sankakucomplex($url)
{
    if (strpos($url, '/idol.')) {
        $base = 'https://idol.sankakucomplex.com';
    } else {
        $base = 'https://chan.sankakucomplex.com';
    }
    $page = 1;
    $tag = uniqid();
    $Turl = Text::create($url);
    if ($Turl->contain('tags=')) {
        $tag = $Turl->cut_after('tags=')->urldecode()->to_s();
    }
    do {
        if (isset($_GET['limit'])) {
            if ($page > $_GET['limit']) {
                break;
            }
        }
        $purl = $url . '&page=' . $page;
        echo "{$purl}<br>\n";
        do {
            $P = new Page($purl, array('become_firefox' => true));
            $T = new Text($P->content());
            sleep(3);
            // 429 too many requests
        } while ($T->contain('429 Too many requests'));
        $a = $T->extract_to_array('href="', '"');
        foreach ($a as $i => $e) {
            $E = new Text($e);
            if (!$E->contain('/post/show')) {
                unset($a[$i]);
            }
        }
        if (!count($a)) {
            break;
        }
        foreach ($a as $i => $e) {
            $E = new Text($e);
            $kurl = $base . $e;
            echo "{$kurl}<br>\n";
            flush();
            do {
                $P = new Page($kurl, array('become_firefox' => true));
                $T = new Text($P->content());
                sleep(3);
                // 429 too many requests
            } while ($T->contain('429 Too many requests'));
            // $P->go_line('id="highres"');
            if (isset($_GET['hires'])) {
                $P->go_line('id=highres');
            } else {
                $P->go_line('id=lowres');
            }
            if ($P->end_of_line()) {
                $P->reset_line();
                $P->go_line('id=highres');
            }
            $img = $P->curr_line()->cut_between('href="', '"')->to_s();
            // $P->reset_line();
            // $P->go_line('id="post_old_tags"');
            // $tag = $P->curr_line()->cut_between('value="', '"')->substring(0, 150)->to_s(); // max 100 karakter
            if ($img) {
                echo "<a href='{$img}'>{$tag}</a><br />\n";
                flush();
            } else {
                echo "This is flash<br />\n";
            }
        }
        $page++;
    } while (true);
}