/** * Search IMDb for titles matching $searchTerms * @param string $searchTerms * @param array $wantedTypes *optional* imdb types that should be returned. Defaults to returning all types. * The class constants MOVIE,GAME etc should be used e.g. [imdbsearch::MOVIE, imdbsearch::TV_SERIES] * @param int $maxResults *optional* The maximum number of results to retrieve from IMDB. 0 for unlimited. Defaults to mdb_config::$maxresults * @return array array of imdb objects */ public function search($searchTerms, $wantedTypes = null, $maxResults = null) { $results = array(); // @TODO remove maxresults? It has no effect on imdb and why would the user want less results than possible? if ($maxResults === null) { $maxResults = $this->maxresults; } $url = "http://" . $this->imdbsite . "/find?s=tt&q=" . urlencode($searchTerms); $pageRequest = new imdb_page($url, $this, $this->cache, $this->logger); $page = $pageRequest->get(); // Parse & filter results if (preg_match_all('!class="result_text"\\s*>\\s*<a href="/title/tt(?<imdbid>\\d{7})/[^>]*>(?<title>.*?)</a>\\s*(\\([^\\d]+\\)\\s*)?(\\((?<year>\\d{4})(.*?|)\\)|)(?<type>[^<]*)!ims', $page, $matches, PREG_SET_ORDER)) { foreach ($matches as $match) { if (count($results) == $maxResults) { break; } $type = $this->parseTitleType($match['type']); if (is_array($wantedTypes) && !in_array($type, $wantedTypes)) { continue; } $results[] = imdb::fromSearchResult($match['imdbid'], $match['title'], $match['year'], $type, $this); } } return $results; }
/** * Setup search results * @method results * @param optional string URL Replace search URL by your own * @return array results array of objects (instances of the imdb_person class) */ public function results($url = "") { if (empty($url)) { $url = $this->mkurl(); } $pageRequest = new imdb_page($url, $this, $this->cache, $this->logger); $page = $pageRequest->get(); if ($this->maxresults > 0) { $maxresults = $this->maxresults; } else { $maxresults = 999999; } // make sure to catch col #3, not #1 (pic only) // photo name 1=id 2=name 3=details preg_match_all('|<tr.*>\\s*<td.*>.*</td>\\s*<td.*<a href="/name/nm(\\d{7})[^>]*>([^<]+)</a>\\s*(.*)</td>|Uims', $page, $matches); $mc = count($matches[0]); $this->logger->debug("[Person Search] {$mc} matches"); $mids_checked = array(); for ($i = 0; $i < $mc; ++$i) { if ($i == $maxresults) { break; } // limit result count $pid = $matches[1][$i]; if (in_array($pid, $mids_checked)) { continue; } $mids_checked[] = $pid; $name = $matches[2][$i]; $info = $matches[3][$i]; $resultPerson = imdb_person::fromSearchResults($pid, $name, $this); if (!empty($info)) { if (preg_match('|<small>\\((.*),\\s*<a href="/title/tt(\\d{7}).*"\\s*>(.*)</a>\\s*\\((\\d{4})\\)\\)|Ui', $info, $match)) { $role = $match[1]; $mid = $match[2]; $movie = $match[3]; $year = $match[4]; $resultPerson->setSearchDetails($role, $mid, $movie, $year); } } $this->resu[$i] = $resultPerson; unset($resultPerson); } return $this->resu; }
/** * Get a page from IMDb, which will be cached in memory for repeated use * @param string $page Name of the page to retrieve e.g. Title, Credits * @return string * @see mdb_base->set_pagename() */ protected function getPage($page) { if (!empty($this->page[$page])) { return $this->page[$page]; } $pageRequest = new imdb_page($this->buildUrl($page), $this, $this->cache, $this->logger); $this->page[$page] = $pageRequest->get(); // @TODO is this needed? is anything on imdb not utf8 encoded? // Non ascii characters appear to be entity encoded anyway, so this would do nothing? if ($this->imdb_utf8recode && function_exists('mb_detect_encoding')) { $cur_encoding = mb_detect_encoding($this->page[$page]); if (!($cur_encoding == "UTF-8" && mb_check_encoding($this->page[$page], "UTF-8"))) { $this->page[$page] = utf8_encode($this->page[$page]); } } return $this->page[$page]; }