private static function getFunction($page) { $function = []; $function['name'] = $page->filter('div#info > div.hd > h3')->text(); SpiderHelper::extractLanguageAnd3DFromTitle($function['name'], $function['language'], $function['DDD']); $function['poster'] = SpiderHelper::getPosterFromURL($page->filter('div.mg > img')->attr('src')); $function['rating'] = SpiderHelper::cleanRating($page->filter('div.rating > p')->text()); $info = $page->filter('div.film_info > p'); $function['premiere'] = SpiderHelper::cleanInfo($info->eq(0)->text()); $function['age_restriction'] = SpiderHelper::cleanInfo($info->eq(1)->text()); $function['genre'] = SpiderHelper::cleanInfo($info->eq(2)->text()); $function['duration'] = SpiderHelper::cleanInfo($info->eq(3)->text()); $function['description'] = $page->filter('div.film_text > p')->text(); $theatres = []; $page->filter('ul#tabs_complejos_horarios > li > a')->each(function (Crawler $node) use(&$theatres, $page) { $theatre = []; // href structure: #tabs-ID_number, this code isolates the ID $theatre['id'] = explode('-', explode('_', $node->attr('href'))[0])[1]; $theatre['name'] = $node->text(); $theatre['screenings'] = self::getScreenings($page, str_replace('#', '', $node->attr('href'))); array_push($theatres, $theatre); }); $function['theatres'] = $theatres; return $function; }
private function getPlays(Crawler $container) { $plays = []; $container->filter('.movie_module')->each(function (Crawler $node) use(&$plays) { $play = []; $play['id'] = $node->filter('div.hd > a.favourite')->attr('rel'); $play['name'] = $node->filter('div.hd > h3 > a')->text(); if ($node->filter('div.estreno')->count()) { $play['premiere'] = SpiderHelper::cleanDate($node->filter('div.estreno')->text()); } $play['rating'] = SpiderHelper::cleanRating($node->filter('div.rating > p')->text()); $play['poster'] = SpiderHelper::getPosterFromURL($node->filter('div.mg > img')->attr('src')); array_push($plays, $play); }); return $plays; }
protected static function getMovies(Crawler $page, $utf8_decode) { $movies = []; $page->filter('.movie_module')->each(function (Crawler $node) use(&$movies, $utf8_decode) { $movie = []; $movie['id'] = $node->filter('div.hd > a.favourite')->attr('rel'); $movie['name'] = $node->filter('div.hd > h3 > a')->text(); // We need to decode the name ONLY if the data comes from movie.com.uy's AJAX endpoints // HTML coming from AJAX endpoints doesn't display accents correctly so it needs to be decoded if ($utf8_decode) { $movie['name'] = utf8_decode($movie['name']); } SpiderHelper::extractLanguageAnd3DFromTitle($movie['name'], $movie['language'], $movie['DDD']); if ($node->filter('div.estreno')->count()) { $movie['premiere'] = SpiderHelper::cleanDate($node->filter('div.estreno')->text()); } $movie['rating'] = SpiderHelper::cleanRating($node->filter('div.rating > p')->text()); $movie['poster'] = SpiderHelper::getPosterFromURL($node->filter('div.mg > img')->attr('src')); array_push($movies, $movie); }); return $movies; }