private function getPlays(Crawler $container) { $plays = []; $container->filter('.movie_module')->each(function (Crawler $node) use(&$plays) { $play = []; $play['id'] = $node->filter('div.hd > a.favourite')->attr('rel'); $play['name'] = $node->filter('div.hd > h3 > a')->text(); if ($node->filter('div.estreno')->count()) { $play['premiere'] = SpiderHelper::cleanDate($node->filter('div.estreno')->text()); } $play['rating'] = SpiderHelper::cleanRating($node->filter('div.rating > p')->text()); $play['poster'] = SpiderHelper::getPosterFromURL($node->filter('div.mg > img')->attr('src')); array_push($plays, $play); }); return $plays; }
protected static function getMovies(Crawler $page, $utf8_decode) { $movies = []; $page->filter('.movie_module')->each(function (Crawler $node) use(&$movies, $utf8_decode) { $movie = []; $movie['id'] = $node->filter('div.hd > a.favourite')->attr('rel'); $movie['name'] = $node->filter('div.hd > h3 > a')->text(); // We need to decode the name ONLY if the data comes from movie.com.uy's AJAX endpoints // HTML coming from AJAX endpoints doesn't display accents correctly so it needs to be decoded if ($utf8_decode) { $movie['name'] = utf8_decode($movie['name']); } SpiderHelper::extractLanguageAnd3DFromTitle($movie['name'], $movie['language'], $movie['DDD']); if ($node->filter('div.estreno')->count()) { $movie['premiere'] = SpiderHelper::cleanDate($node->filter('div.estreno')->text()); } $movie['rating'] = SpiderHelper::cleanRating($node->filter('div.rating > p')->text()); $movie['poster'] = SpiderHelper::getPosterFromURL($node->filter('div.mg > img')->attr('src')); array_push($movies, $movie); }); return $movies; }