public function get_images($chapter_url, $prefix, $infix) { $ifx = Text::create($infix)->pad(3)->to_s(); $p = new Page($chapter_url); // grab total page $p->go_line('id="top_bar"'); $p->go_line_regex('/of \\d+\\w+/'); $tot = $p->curr_line()->regex_match('/of (\\d+)/'); $tot = $tot[1]; // grab first image $p->go_line('id="viewer"'); $p->next_line(2); $src = $p->curr_line()->cut_between('src="', '"'); $name = basename($src); $result = array("{$prefix}-{$ifx}-{$name}" => $src); for ($i = 2; $i <= $tot; $i++) { $p = new Page(dirname($chapter_url) . '/' . $i . '.html'); $p->go_line('id="viewer"'); $p->next_line(2); $src = $p->curr_line()->cut_between('src="', '"'); $name = basename($src); $result["{$prefix}-{$ifx}-{$name}"] = $src; } return $result; }
public function crawl_chapter($v) { $ifx = Text::create($v['infix'])->pad(3)->to_s(); $prefix = $this->prefix; $p = new Page($v['url']); // grab total page $p->go_line('id="top_bar"'); $p->go_line_regex('/of \\d+\\w+/'); $tot = $p->curr_line()->regex_match('/of (\\d+)/'); $tot = $tot[1]; // grab first image $p->go_line('id="viewer"'); $p->next_line(2); $src = $p->curr_line()->cut_between('src="', '"'); $name = basename($src); echo "<a href='{$src}'>{$prefix}-{$ifx}-{$name}</a><br>\n"; // iterate for ($i = 2; $i <= $tot; $i++) { $p = new Page(dirname($v['url']) . '/' . $i . '.html'); $p->go_line('id="viewer"'); $p->next_line(2); $src = $p->curr_line()->cut_between('src="', '"'); $name = basename($src); echo "<a href='{$src}'>{$prefix}-{$ifx}-{$name}</a><br>\n"; } }