/** * Called when the crawler has crawled the given url. * * @param \SimZal\Crawler\Url $url * @param \Psr\Http\Message\ResponseInterface|null $response */ public function hasBeenCrawled(Url $url, $response) { $webpage = Webpage::where('url', (string) $url)->first(); if ($webpage) { $webpage->crawlcount++; $webpage->save(); } SaveSnapshot::create($url, $response); // $html = (string)$response->getBody( true ); //$response->getStatusCode() // https://guzzle3.readthedocs.org/http-client/response.html }
public static function create(Url $url, $response) { $webpage = Webpage::where('Url', (string) $url)->first(); if (!$webpage) { return; } // dd( $webpage ); $statusCode = $response ? $response->getStatusCode() : self::UNRESPONSIVE_HOST; $html = ''; if (self::isHtml($response)) { $html = $response->getBody()->getContents(); $html = FilterContent::make($html); } Snapshot::create(['html' => $html, 'status_code' => $statusCode, 'webpage_id' => $webpage->id, 'hash' => md5($html)]); }
/** * Display a listing of the resource. * * @return \Illuminate\Http\Response */ public function index(Request $request) { $webpages = Webpage::where('site_id', $request->input('site_id'))->get(); return view('webpage.index', compact('webpages')); }