Пример #1
0
 /**
  * Called when the crawler has crawled the given url.
  *
  * @param \SimZal\Crawler\Url                      $url
  * @param \Psr\Http\Message\ResponseInterface|null $response
  */
 public function hasBeenCrawled(Url $url, $response)
 {
     $webpage = Webpage::where('url', (string) $url)->first();
     if ($webpage) {
         $webpage->crawlcount++;
         $webpage->save();
     }
     SaveSnapshot::create($url, $response);
     // $html = (string)$response->getBody( true );
     //$response->getStatusCode()
     // https://guzzle3.readthedocs.org/http-client/response.html
 }
Пример #2
0
 public static function create(Url $url, $response)
 {
     $webpage = Webpage::where('Url', (string) $url)->first();
     if (!$webpage) {
         return;
     }
     // dd( $webpage );
     $statusCode = $response ? $response->getStatusCode() : self::UNRESPONSIVE_HOST;
     $html = '';
     if (self::isHtml($response)) {
         $html = $response->getBody()->getContents();
         $html = FilterContent::make($html);
     }
     Snapshot::create(['html' => $html, 'status_code' => $statusCode, 'webpage_id' => $webpage->id, 'hash' => md5($html)]);
 }
Пример #3
0
 /**
  * Display a listing of the resource.
  *
  * @return \Illuminate\Http\Response
  */
 public function index(Request $request)
 {
     $webpages = Webpage::where('site_id', $request->input('site_id'))->get();
     return view('webpage.index', compact('webpages'));
 }