示例#1
0
/**
 * Fetches the data for a given OfDB id
 *
 * @author  Chinamann <*****@*****.**>
 * @param   int   OfDB id
 * @return  array Result data
 */
function ofdbscraperData($id)
{
    global $CLIENTERROR;
    global $ofdbscraperServer;
    global $ofdbscraperIdPrefix;
    global $cache;
    global $config;
    $id = preg_replace('/^' . $ofdbscraperIdPrefix . '/', '', $id);
    list($id, $vid) = explode("-", $id, 2);
    $data = array();
    //result
    $ary = array();
    //temp
    $ary2 = array();
    //temp2
    // Fetch Mainpage
    $resp = httpClient(ofdbscraperContentUrl($id), $cache);
    if (!$resp['success']) {
        $CLIENTERROR .= $resp['error'] . "\n";
    }
    // add encoding
    $data['encoding'] = get_response_encoding($resp);
    // add engine ID -> important for non edit.php refetch
    $data['imdbID'] = $ofdbscraperIdPrefix . $id;
    $resp['data'] = preg_replace('/[\\r\\n\\t]/', ' ', $resp['data']);
    // Titles / Year
    preg_match('/<title>(.*?)<\\/title>/i', $resp['data'], $ary);
    $ary[1] = preg_replace('/^OFDb[\\s-]*/', '', $ary[1]);
    $ary[1] = preg_replace('/\\[.*\\]/', ' ', $ary[1]);
    if (preg_match('/\\(([0-9]*)\\)/i', $ary[1], $ary2)) {
        $data['year'] = trim($ary2[1]);
    }
    $ary[1] = preg_replace('/\\([0-9]*\\)/', ' ', $ary[1]);
    $ary[1] = preg_replace('/\\s{2,}/s', ' ', $ary[1]);
    // check if there is a comma  sperated article at the end
    if (preg_match('#(.*),\\s*(A|The|Der|Die|Das|Ein|Eine|Einer)\\s*$#i', $ary[1], $subRes)) {
        $ary[1] = $subRes[2] . ' ' . $subRes[1];
    }
    list($t, $s) = explode(" - ", trim($ary[1]), 2);
    $data['title'] = trim($t);
    $data['subtitle'] = trim($s);
    // Original Title
    if (preg_match('/Originaltitel.*?<b>(.*?)</i', $resp['data'], $ary)) {
        $data['orgtitle'] .= trim($ary[1]);
    }
    // Country
    if (preg_match('/>Herstellungsland:.*?<b><a.*?>(.*?)<\\/a>/i', $resp['data'], $ary)) {
        $data['country'] .= trim($ary[1]);
    }
    // Rating
    if (preg_match('/Note: <span itemprop="ratingValue">([0-9\\.]+)/', $resp['data'], $ary)) {
        //	if (preg_match('/<br>Note:\s*([0-9\.]+)/', $resp['data'], $ary)) {
        $data['rating'] = $ary[1];
    }
    // Cover URL
    if (preg_match('#<img src="(http://img.ofdb.de/film/na.gif)"#i', $resp['data'], $ary)) {
        $data['coverurl'] = "";
    } else {
        if (preg_match('#<img src="(http://img.ofdb.de/film/.*?\\.jpg)"#i', $resp['data'], $ary)) {
            $data['coverurl'] = trim($ary[1]);
        }
    }
    // Fetch first VID if none already selected
    if (!$vid) {
        if (preg_match_all('/view\\.php\\?page=fassung&fid=' . $id . '&vid=([0-9]+)".*?class="Klein">(.*?)</i', $resp['data'], $ary, PREG_SET_ORDER)) {
            foreach ($ary as $row) {
                if (trim($row[2]) == "K" || trim($row[2]) == "KV") {
                    $vid = $row[1];
                    break;
                }
            }
            if (!$vid) {
                $vid = $ary[1][1];
            }
        }
    }
    // IMDB ID
    $data['imdbID'] = $ofdbscraperIdPrefix . "{$id}-{$vid}";
    // Fetch Plot
    if (preg_match('#href="(plot/[^"]+)"#i', $resp['data'], $ary)) {
        $subresp = httpClient($ofdbscraperServer . '/' . $ary[1], $cache);
        if (!$resp['success']) {
            $CLIENTERROR .= $subresp['error'] . "\n";
        }
        $subresp['data'] = preg_replace('/[\\r\\n\\t]/', ' ', $subresp['data']);
        //ofdbDbg($subresp['data'],false);
        if (preg_match('#</b><br><br>(.*?)</font></p>#i', $subresp['data'], $ary)) {
            $ary[1] = preg_replace('/\\s{2,}/s', ' ', $ary[1]);
            $ary[1] = preg_replace('#<(br|p)[ /]*>#i', "\n", $ary[1]);
            $data['plot'] = trim($ary[1]);
            //$data['plot'] = "aeääääaaaä";
        }
    }
    // Fetch Details
    $resp = httpClient(ofdbscraperDetailUrl($id), $cache);
    if (!$resp['success']) {
        $CLIENTERROR .= $resp['error'] . "\n";
    }
    $resp['data'] = preg_replace('/[\\r\\n\\t]/', ' ', $resp['data']);
    // Director
    if (preg_match('/<b><i>Regie<\\/i><\\/b>.*?<table.*?>(.*?)<\\/table>/i', $resp['data'], $ary)) {
        if (preg_match_all('/class="Daten"><a.*?>(.*?)<\\/a>/i', $ary[1], $ary2, PREG_SET_ORDER)) {
            foreach ($ary2 as $row) {
                $data['director'] .= trim($row[1]) . ', ';
            }
            $data['director'] = preg_replace('/, $/', '', $data['director']);
        }
    }
    // Cast
    if (preg_match('/<b><i>Darsteller<\\/i><\\/b>.*?<table.*?>(.*)<\\/table>/', $resp['data'], $ary)) {
        // dirty workaround for (.*?) failed on very long match groups issue (tested at PHP 5.2.5.5)
        // e.g.: ofdb:7749-111320 (Angel - Jäger der Finsternis)
        $ary[1] = preg_replace('#</table.*#', '', $ary[1]);
        if (preg_match_all('/class="Daten"><a(.*?)">(.*?)<\\/a>.*?<\\/td>  <td.*?<\\/td>  <td[^>]*>(.*?)<\\/td>/i', $ary[1], $ary2, PREG_SET_ORDER)) {
            foreach ($ary2 as $row) {
                $actor = trim(strip_tags($row[2]));
                $actorid = "";
                if (!empty($row[1])) {
                    if (preg_match('#href="view.php\\?page=person&id=([0-9]*)#i', $row[1], $idAry)) {
                        $actorid = $ofdbscraperIdPrefix . $idAry[1];
                    }
                }
                $character = "";
                if (!empty($row[3])) {
                    if (preg_match('#class="Normal">... ([^<]*)<#i', $row[3], $charAry)) {
                        $character = trim(strip_tags($charAry[1]));
                    }
                }
                $data['cast'] .= "{$actor}::{$character}::{$actorid}\n";
            }
        }
    }
    // Genres
    $genres = array('Amateur' => '', 'Eastern' => '', 'Experimentalfilm' => '', 'Mondo' => '', 'Kampfsport' => 'Sport', 'Biographie' => 'Biography', 'Katastrophen' => 'Thriller', 'Krimi' => 'Crime', 'Science-Fiction' => 'Sci-Fi', 'Kinder-/Familienfilm' => 'Family', 'Dokumentation' => 'Documentary', 'Action' => 'Action', 'Drama' => 'Drama', 'Abenteuer' => 'Adventure', 'Historienfilm' => 'History', 'Kurzfilm' => 'Short', 'Liebe/Romantik' => 'Romance', 'Heimatfilm' => 'Romance', 'Grusel' => 'Horror', 'Horror' => 'Horror', 'Erotik' => 'Adult', 'Hardcore' => 'Adult', 'Sex' => 'Adult', 'Musikfilm' => 'Musical', 'Animation' => 'Animation', 'Fantasy' => 'Fantasy', 'Trash' => 'Horror', 'Komödie' => 'Comedy', 'Krieg' => 'War', 'Mystery' => 'Mystery', 'Thriller' => 'Thriller', 'Tierfilm' => 'Documentary', 'Western' => 'Western', 'TV-Serie' => '', 'TV-Mini-Serie' => '', 'Sportfilm' => 'Sport', 'Splatter' => 'Horror', 'Manga/Anime' => 'Animation');
    if (preg_match('/>Genre\\(s\\)\\:.*?<b>(.*?)<\\/b>/i', $resp['data'], $ary)) {
        if (preg_match_all('/<a.*?>(.*?)<\\/a>/i', $ary[1], $ary2, PREG_SET_ORDER)) {
            foreach ($ary2 as $row) {
                $genre = trim(html_entity_decode($row[1]));
                $genre = strip_tags($genre);
                if (!$genre) {
                    continue;
                }
                if (isset($genres[$genre])) {
                    $data['genres'][] = $genres[$genre];
                }
            }
        }
    }
    // Fetch Version
    $resp = httpClient(ofdbscraperVersionUrl($id, $vid), $cache);
    if (!$resp['success']) {
        $CLIENTERROR .= $resp['error'] . "\n";
    }
    $resp['data'] = preg_replace('/[\\r\\n\\t]/', ' ', $resp['data']);
    // FSK
    $fsks = array('FSK o.A.' => '0', 'FSK 6' => '6', 'FSK 12' => '12', 'FSK 16' => '16', 'FSK 18' => '18', 'Keine Jugendfreigabe' => '18', 'SPIO/JK' => '18', 'juristisch geprüft' => '', 'ungeprüft' => '');
    if (preg_match('/>Freigabe:<.*?<b>(.*?)<\\/tr>/i', $resp['data'], $ary)) {
        $fsk = trim(html_entity_decode($ary[1]));
        $fsk = strip_tags($fsk);
        if (isset($fsks[$fsk])) {
            $data['fsk'] = $fsks[$fsk];
        }
    }
    // Languages
    // Languages (as Array)
    $laguages = array('arabisch' => 'arabic', 'bulgarisch' => 'bulgarian', 'chinesisch' => 'chinese', 'tschechisch' => 'czech', 'dänisch' => 'danish', 'holändisch' => 'dutch', 'englisch' => 'english', 'französisch' => 'french', 'deutsch' => 'german', 'griechisch' => 'greek', 'ungarisch' => 'hungarian', 'isländisch' => 'icelandic', 'indisch' => 'indian', 'israelisch' => 'israeli', 'italienisch' => 'italian', 'japanisch' => 'japanese', 'koreanisch' => 'korean', 'norwegisch' => 'norwegian', 'polnisch' => 'polish', 'portugisisch' => 'portuguese', 'rumänisch' => 'romanian', 'russisch' => 'russian', 'serbisch' => 'serbian', 'spanisch' => 'spanish', 'schwedisch' => 'swedish', 'thailändisch' => 'thai', 'türkisch' => 'turkish', 'vietnamesisch' => 'vietnamese', 'kantonesisch' => 'cantonese', 'katalanisch' => 'catalan', 'zypriotisch' => 'cypriot', 'zyprisch' => 'cypriot', 'esperanto' => 'esperanto', 'gälisch' => 'gaelic', 'hebräisch' => 'hebrew', 'hindi' => 'hindi', 'jüdisch' => 'jewish', 'lateinisch' => 'latin', 'mandarin' => 'mandarin', 'serbokroatisch' => 'serbo-croatian', 'somalisch' => 'somali');
    $lang_list = array();
    // Runtime
    if (preg_match('/>Laufzeit:<.*?<b>(.*?)\\s*Min/i', $resp['data'], $ary)) {
        $ary[1] = preg_replace('/:.*/', '', $ary[1]);
        $data['runtime'] = trim($ary[1]);
    }
    return $data;
}
示例#2
0
文件: imdb.php 项目: Boris-de/videodb
/**
 * At the moment - oct 2010 - most imdb-pages were changed to utf8,
 * but e.g. fullcredits are still iso-8859-1
 * so data is recoded here
 */
function imdbFixEncoding($data, $resp)
{
    $result = $resp;
    $pageEncoding = get_response_encoding($resp);
    if ($pageEncoding != $data['encoding']) {
        $result['data'] = iconv($pageEncoding, $data['encoding'], html_entity_decode_all($resp['data']));
    }
    return $result;
}
示例#3
0
/**
 * Fetches the data for a given Allocine-ID
 *
 * @author  Douglas Mayle <*****@*****.**>
 * @author  Tiago Fonseca <*****@*****.**>
 * @param   int   imdb-ID
 * @return  array Result data
 */
function allocineData($imdbID)
{
    global $allocineServer;
    global $allocineIdPrefix;
    global $CLIENTERROR;
    $allocineID = preg_replace('/^' . $allocineIdPrefix . '/', '', $imdbID);
    // fetch mainpage
    $resp = httpClient($allocineServer . '/film/fichefilm_gen_cfilm=' . $allocineID . '.html', 1);
    // added trailing / to avoid redirect
    if (!$resp['success']) {
        $CLIENTERROR .= $resp['error'] . "\n";
    }
    $data = array();
    // result
    $ary = array();
    // temp
    // add encoding
    $data['encoding'] = get_response_encoding($resp);
    // Allocine ID
    $data['id'] = "allocine:" . $allocineID;
    // We remove all the multiples spaces and line breakers
    $resp['data'] = preg_replace('/[\\s]{2,}/', '', $resp['data']);
    /*
      Title and subtitle
    */
    preg_match('#<h1.*?>(.*?)</h1>#si', $resp['data'], $ary);
    list($t, $s) = explode(" - ", trim($ary[1]), 2);
    // Some bugs when using html_clean function --> using html_clean_utf8
    $data['title'] = html_clean_utf8($t);
    $data['subtitle'] = html_clean_utf8($s);
    /*
      Year
    */
    preg_match('/<a.*? href="\\/film\\/tous\\/decennie.*?year=(\\d+)">(\\d+)<\\/a>/i', $resp['data'], $ary);
    if (!empty($ary[1])) {
        $data['year'] = trim($ary[1]);
    }
    /*
      Release Date
        added to the comments
    */
    preg_match('#<a.*? href="/film/agenda\\.html\\?week=\\d+\\-\\d+\\-\\d+">(.*)</a>#i', $resp['data'], $ary);
    $release_date = "";
    if (!empty($ary[1])) {
        $release_date = "\r\nDate de sortie cinéma : " . html_clean_utf8($ary[1]);
    }
    /*
      Cover URL
    */
    preg_match('#<div class="colleft">\\s*?<div class="vmargin20b">\\s*?<div class=\\"poster\\">\\s*?<em class=\\"imagecontainer\\">\\s*?<a .*?>\\s*?<img.*?src=\'(.*?)\'.*?>#si', $resp['data'], $ary);
    $data['coverurl'] = trim($ary[1]);
    /*
      Runtime
    */
    #Durée : 02h13min
    preg_match('/Durée :\\s*?(\\d+)h(\\d+)\\s*?min/i', $resp['data'], $ary);
    $hours = preg_replace('/,/', '', trim($ary[1]));
    $minutes = preg_replace('/,/', '', trim($ary[2]));
    $data['runtime'] = $hours * 60 + $minutes;
    /*
      Director
    */
    preg_match('#Réalisé par\\s*<span.*?><a.*?rel="v:directedBy".*?href=\'/personne/fichepersonne_gen_cpersonne=\\d+\\.html\' title=\'.*\'>(.*)</a></span>#i', $resp['data'], $ary);
    $data['director'] = trim($ary[1]);
    /*
      Rating
    */
    preg_match('#<p class="withstars"><a.*?href="/film/critiquepublic_gen_cfilm=\\d+\\.html"><img.*?class="stareval.*?".*?<span class=\\"moreinfo\\">\\((.*)\\)</span></p>#i', $resp['data'], $ary);
    $data['rating'] = trim($ary[1]);
    $data['rating'] = str_replace(",", ".", $data['rating']);
    // Allocine rating is based on 5, imdb is based on 10
    $data['rating'] = $data['rating'] * 2;
    /*
      Countries
    */
    // Countries in English
    $map_countries = array('allemand' => 'Germany', 'américain' => 'USA', 'arménien' => 'Armenia', 'argentin' => 'Argentina', 'sud-africain' => 'South Africa', 'australien' => 'Australia', 'belge' => 'Belgium', 'britannique' => 'UK', 'bulgare' => 'Bulgaria', 'canadien' => 'Canada', 'chinois' => 'China', 'coréen' => 'South Korea', 'danois' => 'Denmark', 'espagnol' => 'Spain', 'français' => 'France', 'grec' => 'Greece', 'hollandais' => 'Netherlands', 'hong-kongais' => 'Hong-Kong', 'hongrois' => 'Hungary', 'indien' => 'India', 'irlandais' => 'Republic of Ireland', 'islandais' => 'Iceland', 'israëlien' => 'Israel', 'italien' => 'Italy', 'japonais' => 'Japan', 'luxembourgeois' => 'Luxembourg', 'mexicain' => 'Mexico', 'norvégien' => 'Norge', 'néo-zélandais' => 'New Zealand', 'polonais' => 'Poland', 'portugais' => 'Portugal', 'roumain' => 'Romania', 'russe' => 'Russia', 'serbe' => 'Serbia', 'suédois' => 'Sweden', 'taïwanais' => 'Taiwan', 'tchèque' => 'Czech Republic', 'thaïlandais' => 'Thailand', 'turc' => 'Turkey', 'ukrainien' => 'Ukraine', 'vietnamien' => 'Vietnam');
    if (preg_match_all('#Long\\-métrage\\s*?<a.*?href=".*?">(.*?)</a>#si', $resp['data'], $ary, PREG_PATTERN_ORDER) > 0) {
        $originlist = explode(",", trim(join(', ', $ary[1])));
        foreach ($originlist as $origin) {
            $mapped_country_found = '';
            foreach ($map_countries as $pattern_c => $mapped_country) {
                if (preg_match_all('/' . $pattern_c . '/i', $origin, $junk, PREG_PATTERN_ORDER) > 0) {
                    $mapped_country_found = $mapped_country;
                    break;
                }
            }
            if ($data['country'] == '') {
                $data['country'] = $mapped_country_found;
            } elseif (stristr($data['country'], $mapped_country_found) == TRUE) {
                $data['country'] = $data['country'];
            } else {
                $data['country'] = $data['country'] . ', ' . $mapped_country_found;
            }
        }
    }
    /*
      Plot
    */
    preg_match('#<div id="synopsis_full">\\s*?<p>\\s*?<span class=\\"bold\\">Synopsis \\: </span>\\s*?<span property="v:summary">(.*?)</span>#is', $resp['data'], $ary);
    if (!empty($ary[1])) {
        $data['plot'] = $ary[1];
        $data['plot'] = html_clean_utf8($data['plot']);
        // And cleanup
        $data['plot'] = trim($data['plot']);
        $data['plot'] = preg_replace('/[\\n\\r]/', ' ', $data['plot']);
        $data['plot'] = preg_replace('/  /', ' ', $data['plot']);
    }
    /*
     Genres (as Array)
    */
    $map_genres = array('Action' => 'Action', 'Animation' => 'Animation', 'Arts Martiaux' => 'Action', 'Aventure' => 'Adventure', 'Biopic' => 'Biography', 'Bollywood' => 'Musical', 'Classique' => '-', 'Comédie Dramatique' => 'Drama', 'Comédie musicale' => 'Musical', 'Comédie' => 'Comedy', 'Dessin animé' => 'Animation', 'Divers' => '-', 'Documentaire' => 'Documentary', 'Drame' => 'Drama', 'Epouvante-horreur' => 'Horror', 'Erotique' => 'Adult', 'Espionnage' => '-', 'Famille' => 'Family', 'Fantastique' => 'Fantasy', 'Guerre' => 'War', 'Historique' => 'History', 'Horreur' => 'Horror', 'Musique' => 'Musical', 'Policier' => 'Crime', 'Péplum' => 'History', 'Romance' => 'Romance', 'Science fiction' => 'Sci-Fi', 'Thriller' => 'Thriller', 'Western' => 'Western');
    if (preg_match_all('#Genre :(.*?)</a>\\s*?<br#si', $resp['data'], $ary, PREG_PATTERN_ORDER) > 0) {
        $genrelist = explode(",", trim(join(', ', $ary[1])));
        foreach ($genrelist as $genre) {
            $mapped_genre_found = '';
            foreach ($map_genres as $pattern => $mapped_genre) {
                if (preg_match_all('/' . $pattern . '/i', $genre, $junk, PREG_PATTERN_ORDER) > 0) {
                    $mapped_genre_found = $mapped_genre;
                    break;
                }
            }
            $data['genres'][] = $mapped_genre_found != '-' ? $mapped_genre_found : trim($genre);
        }
    }
    /*
      Original Title
    */
    preg_match('#Titre original : <span class=\\"purehtml\\"><em>(.*)</em></span>#', $resp['data'], $ary);
    $data['origtitle'] = trim($ary[1]);
    /*
      Title and Subtitle
      If sub-title is blank, we'll try to fill in the original title for foreign films.
    */
    if (empty($data['subtitle'])) {
        if ($data['origtitle']) {
            $data['subtitle'] = $data['title'];
            $data['title'] = $data['origtitle'];
        }
    }
    /*
      CREDITS AND CAST
    */
    // fetch credits
    // Another HTML page
    $resp = httpClient($allocineServer . '/film/casting_gen_cfilm=' . $allocineID . '.html', 1);
    if (!$resp['success']) {
        $CLIENTERROR .= $resp['error'] . "\n";
    }
    // We remove all the multiples spaces and line breakers
    $resp['data'] = preg_replace('/[\\s]{2,}/', '', $resp['data']);
    if (preg_match('#<h2>Acteurs, rôles, personnages</h2>(.*?)<div class="titlebar">\\s*?<a class="anchor" id=\'actors\'></a>\\s*?<h2>#is', $resp['data'], $Section)) {
        # the big ones with image
        /*
        <div class="titlebar">
        <h3>
        <a href="/personne/fichepersonne_gen_cpersonne=5568.html">Liam Neeson</a>
        </h3>
        </div>
        <p>
        Rôle : Qui-Gon Jinn
        </p>
        <div class="spacer"></div>
        */
        preg_match_all('#<div class="titlebar">\\s*?<h3>\\s*?<a href="/personne/fichepersonne_gen_cpersonne=(\\d+?).html">(.*?)</a>\\s*?</h3>\\s*?</div>\\s*?<p>\\s*Rôle : (.*?)\\s*</p>#is', $Section[1], $ary, PREG_PATTERN_ORDER);
        $count = 0;
        while (isset($ary[1][$count])) {
            $cast .= $ary[2][$count] . "::" . $ary[3][$count] . "::allocine:" . $ary[1][$count] . "\n";
            $count++;
        }
        # extended cast - without image
        /*
        <tr class="odd">
        <td>
        Shmi Skywalker
        </td>
        <td>
        <a href="/personne/fichepersonne_gen_cpersonne=14279.html">Pernilla August</a>
        </td>
        </tr>
        */
        preg_match_all('#<tr.*?>\\s*?<td>\\s*(.*?)\\s*</td>\\s*?<td>\\s*?<a href="/personne/fichepersonne_gen_cpersonne=(\\d+).html">(.*?)</a>\\s*?</td>#si', $Section[1], $ary, PREG_PATTERN_ORDER);
        $count = 0;
        while (isset($ary[1][$count])) {
            $cast .= $ary[3][$count] . "::" . $ary[1][$count] . "::allocine:" . $ary[2][$count] . "\n";
            $count++;
        }
        $data['cast'] = trim($cast);
    }
    /*
      Comments
    */
    // By default
    $data['language'] = 'french';
    // Another HTML page
    $resp = httpClient($allocineServer . '/film/fichefilm-' . $allocineID . '/technique/', 1);
    if (!$resp['success']) {
        $CLIENTERROR .= $resp['error'] . "\n";
    }
    // We remove all the multiples spaces and line breakers
    $resp['data'] = preg_replace('/[\\s]{2,}/', '', $resp['data']);
    // Technical informations as comment
    preg_match('#<div class=\\"rubric\\">\\s*?<div class=\\"vpadding20b\\">\\s*(.*?)\\s*</div>\\s*?</div>#si', $resp['data'], $ary);
    if (!empty($ary[1])) {
        $data['comment'] = $ary[1];
        $data['comment'] = str_replace("Tourné en :", "Tourné en : ", $data['comment']);
        // Adding the release date in theater
        $data['comment'] = $data['comment'] . $release_date;
        // Search the language
        // Default language
        $data['language'] = "french";
        if (preg_match('#<p>\\s*?<span class=\\"bold\\">Tourné en :</span>\\s*(.*?)\\s*</p>#si', $resp['data'], $ary)) {
            $data['language'] = $ary[1];
            // Converting languages from french to english
            $map_languages = array('Anglais' => 'english', 'Français' => 'french', 'Allemand' => 'german', 'Italien' => 'italian', 'Espagnol' => 'spanish', 'Coréen' => 'Korean', 'Roumain' => 'romanian', 'Autre' => 'french', 'Hindi' => 'hindi', 'Arabe' => 'arabic', 'Thaï' => 'thai', 'Danois' => 'danish', 'Suédois' => 'swedish', 'Tchèque' => 'czech', 'Japonais' => 'japanese', 'Portugais' => 'portuguese', 'Norvégien' => 'norwegian', 'Bulgare' => 'bulgarian', 'Grec' => 'greek', 'Hongrois' => 'hungarian', 'Turc' => 'turkish', 'Islandais' => 'icelandic', 'Polonais' => 'polish', 'Russe' => 'russian', 'Ukrainien' => 'ukrainian', 'Serbe' => 'serbian', 'Vietnamien' => 'vietnamese', 'Afrikaans' => 'afrikaans');
            foreach ($map_languages as $pattern => $map_lang) {
                $data['language'] = str_replace($pattern, $map_lang, $data['language']);
            }
        }
    }
    // Return the data collected
    return $data;
}
示例#4
0
/**
 * Fetches the data for a given Dvdfr-ID
 *
 * @param   int   IMDB-ID
 * @return  array Result data
 */
function dvdfrData($imdbID)
{
    global $dvdfrServer;
    global $CLIENTERROR;
    $data = array();
    // result
    $ary = array();
    // temp
    $para['useragent'] = 'VideoDB (http://www.videodb.net/)';
    // fetch mainpage
    $resp = httpClient(dvdfrContentUrl($imdbID), 1, $para);
    // added trailing / to avoid redirect
    if (!$resp['success']) {
        $CLIENTERROR .= $resp['error'] . "\n";
    }
    // add encoding
    $data['encoding'] = get_response_encoding($resp);
    // See http://www.dvdfr.com/api/dvd.php?id=2869 for output
    // Titles
    preg_match('#<titres>\\s*<fr>(.+?)</fr>\\s*<vo>(.+?)</vo>#is', $resp['data'], $ary);
    $data['title'] = mb_convert_case(dvdfrCleanStr($ary[1]), MB_CASE_TITLE, $data['encoding']);
    $data['subtitle'] = mb_convert_case(dvdfrCleanStr($ary[2]), MB_CASE_TITLE, $data['encoding']);
    // I found: <div class="dvd_titleinfo">USA, Royaume-Uni , 2004<br />R&D TV, Sky TV, USA Cable Entertainment</div>
    preg_match('#<listePays>\\s*<pays.*?>(.+?)</pays>#is', $resp['data'], $ary);
    $data['country'] = dvdfrCleanStr($ary[1]);
    preg_match('#<annee>(\\d+)</annee>#is', $resp['data'], $ary);
    $data['year'] = dvdfrCleanStr($ary[1]);
    // Cover URL
    preg_match('#<cover>(.*?)</cover>#i', $resp['data'], $ary);
    $data['coverurl'] = trim($ary[1]);
    // Runtime
    preg_match('#<duree>(\\d+)</duree>#i', $resp['data'], $ary);
    $data['runtime'] = $ary[1];
    // Director (only the first one)
    preg_match('#<star type="R.*?alisateur" id="\\d+">(.*?)</star>#i', $resp['data'], $ary);
    $data['director'] = dvdfrCleanStr($ary[1]);
    // Plot
    preg_match('#<synopsis>(.*?)</synopsis>#is', $resp['data'], $ary);
    if (!empty($ary[1])) {
        $data['plot'] = $ary[1];
        // And cleanup
        $data['plot'] = preg_replace('/[\\n\\r]/', ' ', $data['plot']);
        $data['plot'] = preg_replace('/\\s+/', ' ', $data['plot']);
        $data['plot'] = dvdfrCleanStr($data['plot']);
    }
    // maps dvdfr category ids to videodb category names
    $category_map = array("1" => "Action", "2" => "Animation", "61" => "", "3" => "Adventure", "72" => "", "81" => "Musical", "4" => "Comedy", "5" => "Drama", "6" => "Musical", "74" => "Romance", "7" => "Music", "8" => "", "9" => "Short", "10" => "Documentary", "78" => "Documentary", "11" => "Music", "12" => "", "13" => "Documentary", "14" => "Drama", "73" => "Drama", "15" => "Adult", "16" => "Action", "17" => "Sci-Fi", "30" => "Musical", "83" => "Sport", "18" => "War", "19" => "Musical", "20" => "History", "21" => "Horror", "22" => "Comedy", "23" => "Animation", "24" => "Adult", "25" => "Music", "79" => "", "26" => "Music", "27" => "Action", "28" => "", "57" => "", "29" => "Documentary", "32" => "Music", "71" => "Music", "31" => "Music", "33" => "War", "34" => "Crime", "54" => "", "76" => "Music", "55" => "Music", "56" => "Sci-Fi", "60" => "", "75" => "", "58" => "", "59" => "", "62" => "", "63" => "Sport", "82" => "Sport", "64" => "Music", "65" => "", "66" => "Thriller", "67" => "Music", "68" => "Music", "69" => "Documentary", "70" => "Western", "Science Fiction" => "Sci-Fi");
    // Genres (as Array)
    if (preg_match_all('#<categorie>(.*?)</categorie>#i', $resp['data'], $ary, PREG_PATTERN_ORDER) > 0) {
        $count = 0;
        while (isset($ary[1][$count])) {
            $data['genres'][] = $category_map[dvdfrCleanStr($ary[1][$count])];
            $count++;
        }
    }
    // Cast
    if (preg_match('#<stars>(.*)</stars>#is', $resp['data'], $Section)) {
        preg_match_all('#<star type="Acteur" id="(\\d+)">(.*?)</star>#i', $Section[1], $ary, PREG_PATTERN_ORDER);
        for ($i = 0; $i < sizeof($ary[0]); $i++) {
            $cast .= dvdfrCleanStr($ary[2][$i]) . '::::dvdfr' . dvdfrCleanStr($ary[1][$i]) . "\n";
            #$cast  .= "$actor::$character::$imdbIdPrefix$actorid\n";
        }
        $data['cast'] = dvdfrCleanStr($cast);
    }
    #// Convert ISO to UTF8
    #$encoding = $data['encoding'];
    #foreach( $data as $k => $v ) {
    #  $data[$k] = mb_convert_encoding(trim($v),'UTF-8',$encoding);
    #}
    return $data;
}
示例#5
0
/**
 * Search an image on isohunt
 *
 * Searches for a given title on the isohunt and returns the found links in
 * an array
 *
 * @param   string    The search string
 * @return  array     Associative array with id and title
 */
function isohuntSearch($title)
{
    global $CLIENTERROR;
    global $isohuntServer;
    $data = array();
    $url = $isohuntServer . '/js/rss/' . urlencode($title);
    $resp = httpClient($url, 1);
    if (!$resp['success']) {
        $CLIENTERROR .= $resp['error'] . "\n";
    }
    // add encoding
    $data['encoding'] = get_response_encoding($resp);
    $xml = @simplexml_load_string($resp['data']);
    /*
    SimpleXMLElement Object
    (
        [@attributes] => Array
            (
                [version] => 2.0
            )
    
        [channel] => SimpleXMLElement Object
            (
                [title] => isoHunt > All > scrubs
                [link] => http://isohunt.com
                [description] => BitTorrent search feeds > All > scrubs
                [language] => en-us
                [category] => All
                [ttl] => 60
                [image] => SimpleXMLElement Object
                    (
                        [title] => isoHunt > All > scrubs
                        [url] => http://isohunt.com/img/buttons/isohunt-02.gif
                        [link] => http://isohunt.com/
                        [width] => 157
                        [height] => 45
                    )
    
                [lastBuildDate] => Sun, 22 Mar 2009 22:47:21 GMT
                [pubDate] => Sun, 22 Mar 2009 22:47:21 GMT
                [item] => Array
                    (
                        [0] => SimpleXMLElement Object
                            (
                                [title] => Scrubs. S08E12. HDTV. XviD  [3/9]
                                [link] => http://isohunt.com/torrent_details/72045453/scrubs?tab=summary
                                [guid] => http://isohunt.com/torrent_details/72045453/scrubs?tab=summary
                                [enclosure] => SimpleXMLElement Object
                                    (
                                        [@attributes] => Array
                                            (
                                                [url] => http://isohunt.com/download/72045453/scrubs.torrent
                                                [length] => 354292859
                                                [type] => application/x-bittorrent
                                            )
    
                                    )
    
                                [description] => <h3>Bit Torrent details:</h3>Category: TV<br>Original site: http://thepiratebay.org/<br>Size: 337.88 MB, in 2 files<br><br>Seeds: 3 &nbsp; | &nbsp; Leechers: 9 &nbsp; | &nbsp; Downloads: 16<p>Description:<br>Torrent downloaded from http://thepiratebay.org
                                [pubDate] => Fri, 20 Mar 2009 22:55:22 GMT
                            )
    */
    if (is_object($xml)) {
        foreach ($xml->channel->item as $row) {
            $res = array();
            $res['title'] = (string) $row->title;
            #        $res['imgsmall']   = $img;
            #        $res['coverurl']   = $img;
            $res['url'] = (string) $row->link;
            $res['torrent'] = (string) $row->enclosure['url'];
            $res['filesize'] = (string) $row->enclosure['length'];
            $res['subtitle'] = sizetostring($res['filesize'], 1);
            $res['plot'] = (string) $row->description;
            if (preg_match('#(Seeds: .+?)<#', $res['plot'], $m)) {
                $res['sl'] = $m[1];
            }
            #       dump($res);
            $data[] = $res;
        }
    }
    #   dump($data);
    return $data;
}