private static function getFunction($page) { $function = []; $function['name'] = $page->filter('div#info > div.hd > h3')->text(); SpiderHelper::extractLanguageAnd3DFromTitle($function['name'], $function['language'], $function['DDD']); $function['poster'] = SpiderHelper::getPosterFromURL($page->filter('div.mg > img')->attr('src')); $function['rating'] = SpiderHelper::cleanRating($page->filter('div.rating > p')->text()); $info = $page->filter('div.film_info > p'); $function['premiere'] = SpiderHelper::cleanInfo($info->eq(0)->text()); $function['age_restriction'] = SpiderHelper::cleanInfo($info->eq(1)->text()); $function['genre'] = SpiderHelper::cleanInfo($info->eq(2)->text()); $function['duration'] = SpiderHelper::cleanInfo($info->eq(3)->text()); $function['description'] = $page->filter('div.film_text > p')->text(); $theatres = []; $page->filter('ul#tabs_complejos_horarios > li > a')->each(function (Crawler $node) use(&$theatres, $page) { $theatre = []; // href structure: #tabs-ID_number, this code isolates the ID $theatre['id'] = explode('-', explode('_', $node->attr('href'))[0])[1]; $theatre['name'] = $node->text(); $theatre['screenings'] = self::getScreenings($page, str_replace('#', '', $node->attr('href'))); array_push($theatres, $theatre); }); $function['theatres'] = $theatres; return $function; }
private static function createCategory($name, Crawler $slider) { $items = []; $slider->filter('div.box_film')->each(function (Crawler $node) use(&$items) { $item = []; $item['name'] = $node->filter('div.mt > h3 > a')->text(); $item['poster'] = SpiderHelper::getPosterFromURL($node->filter('div.mg > a > img')->attr('src')); $item['id'] = SpiderHelper::getIDFromPoster($item['poster']); SpiderHelper::extractLanguageAnd3DFromTitle($item['name'], $item['language'], $item['DDD']); array_push($items, $item); }); return ['name' => $name, 'items' => $items]; }
private function getPlays(Crawler $container) { $plays = []; $container->filter('.movie_module')->each(function (Crawler $node) use(&$plays) { $play = []; $play['id'] = $node->filter('div.hd > a.favourite')->attr('rel'); $play['name'] = $node->filter('div.hd > h3 > a')->text(); if ($node->filter('div.estreno')->count()) { $play['premiere'] = SpiderHelper::cleanDate($node->filter('div.estreno')->text()); } $play['rating'] = SpiderHelper::cleanRating($node->filter('div.rating > p')->text()); $play['poster'] = SpiderHelper::getPosterFromURL($node->filter('div.mg > img')->attr('src')); array_push($plays, $play); }); return $plays; }
protected static function getMovies(Crawler $page, $utf8_decode) { $movies = []; $page->filter('.movie_module')->each(function (Crawler $node) use(&$movies, $utf8_decode) { $movie = []; $movie['id'] = $node->filter('div.hd > a.favourite')->attr('rel'); $movie['name'] = $node->filter('div.hd > h3 > a')->text(); // We need to decode the name ONLY if the data comes from movie.com.uy's AJAX endpoints // HTML coming from AJAX endpoints doesn't display accents correctly so it needs to be decoded if ($utf8_decode) { $movie['name'] = utf8_decode($movie['name']); } SpiderHelper::extractLanguageAnd3DFromTitle($movie['name'], $movie['language'], $movie['DDD']); if ($node->filter('div.estreno')->count()) { $movie['premiere'] = SpiderHelper::cleanDate($node->filter('div.estreno')->text()); } $movie['rating'] = SpiderHelper::cleanRating($node->filter('div.rating > p')->text()); $movie['poster'] = SpiderHelper::getPosterFromURL($node->filter('div.mg > img')->attr('src')); array_push($movies, $movie); }); return $movies; }
protected static function fetchData(Client $client) { $page = $client->request('GET', 'http://www.movie.com.uy/cine/'); $premieres = self::getMovies($page->filter('div#proximos_est'), false); return array_merge(SpiderHelper::getPosterData($page), ['functions' => $premieres]); }