/** * Compile imdb search results html into view/db * ready array. * * @param string $results * @return array */ public function compileSearchResults($results) { $crawler = new crawler($results); //once we have the curl result cleaned, we can loop trough it and filter out all the results //matching our query. $sections = $crawler->filter('table.results > tr.odd, table.results > tr.even'); //store title,type,year to check for duplicates $check = ''; $current = ''; //then we will loop trough every result and extract the information we require(title, plot, cast etc.) foreach ($sections as $k => $value) { $cr = new crawler($value); $title = $cr->filter('td.image > a')->extract('title'); $poster = $cr->filter('td.image > a > img')->extract('src'); $rating = $cr->filter('td.title > div.user_rating')->extract('_text'); $imdbid = $cr->filter('td.image > a')->extract('href'); $shortTitle = head($cr->filter('td.title > a')->extract('_text')); //set current title+year+type so we can spot duplicates $current = $shortTitle . $this->typeFromTitle($title) . $this->year($title); //if we already have such title+type+year increment year so we dont overwrite previous title if (strpos($check, $current) !== false) { continue; } $compiled[] = array('imdb_id' => $this->imdbid($imdbid), 'title' => $shortTitle, 'original_title' => $shortTitle, 'type' => $this->typeFromTitle($title), 'poster' => $this->posterSize($poster), 'year' => $this->year($title), 'plot' => head($cr->filter('td.title > span.outline')->extract('_text')), 'genre' => head($cr->filter('td.title > span.genre')->extract('_text')), 'imdb_rating' => $this->cleanRating($rating), 'runtime' => trim(head($cr->filter('span.runtime')->extract('_text')), ' mins.'), 'imdb_votes_num' => head($cr->filter('td.sort_col')->extract('_text'))); if (isset($compiled[$k])) { $check .= $compiled[$k]['title'] . $compiled[$k]['type'] . $compiled[$k]['year']; } } return isset($compiled) ? $compiled : array(); }
/** * Gets titles actor is know for. * * @return array. */ public function getKnownFor() { if (!$this->knownFor) { //grab all the titles actor is know for $known = $this->crawler->filter('div#knownfor > div'); //extract id, title, poster for each one and make multidim array from it foreach ($known as $k => $v) { $crawler = new crawler($v); $imdbid = $this->id($crawler->filter('a')->extract('href')); $title = head($crawler->filter('a > img')->extract('title')); $poster = $this->image($crawler->filter('a > img')->extract('src'), $imdbid); $year = Helpers::extractYear(head($crawler->filter('a')->eq(1)->extract('_text'))); $this->knownFor[] = array('imdb_id' => $imdbid, 'title' => $title, 'poster' => $poster, 'year' => $year); } } $this->images->saveMultiple($this->imgUrls, null, 'imdb/posters/'); return $this->knownFor; }
/** * Compiles titles cast. * * @return array */ private function compileCast() { //get all the actor/char rows from imdb $raw = $this->crawler->filter('table.cast_list > tr.odd, table.cast_list > tr.even'); //foreach row extract image, id, actor name and actors character(s) foreach ($raw as $k => $v) { //skip parsing first row since its not actor $crawler = new crawler($v); //get actor name and image $actor = head($crawler->filter('.primary_photo > a > img')->extract(array('loadlate', 'title'))); //get actor id $actorid = Helpers::extract(head($crawler->filter('.primary_photo > a')->extract('href')), 'nm'); //get char $char = head($crawler->filter('.character')->extract('_text')); $char = $this->prettify($char); //push all data into cast array $cast[last($actor)] = array('name' => last($actor), 'image' => head($actor), 'char' => $char, 'imdb_id' => $actorid); } return isset($cast) ? $cast : array(); }
/** * Compile reviews into save ready array. * * @return void/array */ private function compileReviews() { $allReviews = $this->crawler->filter('ol.critic_reviews > li'); foreach ($allReviews as $k => $v) { $cr = new crawler($v); $compiled[] = array('source' => head($cr->filter('div.source')->extract(array('_text'))), 'author' => head($cr->filter('div.author > a')->extract(array('_text'))), 'body' => trim(head($cr->filter('div.review_body')->extract(array('_text')))), 'link' => head($cr->filter('a.external')->extract(array('href'))), 'score' => trim(head($cr->filter('div.review_grade')->extract(array('_text'))))); } return isset($compiled) ? $compiled : null; }