function queryItem($search_attributes_r, $s_item_type) { $page = $this->fetchURI('http://www.michaeldvd.com.au/Discs/Disc.asp?ID=' . $search_attributes_r['michaeldid']); if ($page) { // First translate things like into "real" characters. This // achieves the reverse of htmlentities(): $page = strtr($page, array_flip(get_html_translation_table(HTML_ENTITIES, ENT_QUOTES))); // But is translated to a hard space, which trim() doesn't trim. // Work around that: $page = strtr($page, chr(160), ' '); // The image may not even exist! if (preg_match("#/CoverArt/" . $search_attributes_r['michaeldid'] . ".jpg#i", $page)) { $this->addItemAttribute('imageurl', 'http://www.michaeldvd.com.au/CoverArt/' . $search_attributes_r['michaeldid'] . '.jpg'); } else { if (preg_match("#/CoverArtUnverified/" . $search_attributes_r['michaeldid'] . ".jpg#i", $page)) { $this->addItemAttribute('imageurl', 'http://www.michaeldvd.com.au/CoverArtUnverified/' . $search_attributes_r['michaeldid'] . '.jpg'); } } if (preg_match('#<title.*?>(.*?)</title#is', $page, $matches)) { $title = strip_tags($matches[1]); if (preg_match("/(.*)\\(([0-9]+)\\)/", $title, $matches)) { $title = $matches[1]; $this->addItemAttribute('year', $matches[2]); } if (($idx = strpos($title, "(Blu-ray)")) !== FALSE) { $title = substr($title, 0, $idx); } $this->addItemAttribute('title', $title); } if (preg_match('#best version.*?best version.*?<td.*?>(.+?)</td#is', $page, $matches)) { $p = trim(strip_tags(str_replace('<br>', "\n", $matches[1]))); // No point in putting "undetermined" in the database. Leave it // blank so that a refresh can correct it later. If there is no difference, // do not provide option, as its annoying. if (strcasecmp($p, 'undetermined') !== 0 && strcasecmp($p, 'Same') !== 0) { $this->addItemAttribute('miker4r1', $p); } } if (preg_match('#<!.*?blurb.*?>(.*?)<!#is', $page, $matches)) { $p = trim(strip_tags($matches[1])); // If a blurb defined. if (strcasecmp($p, 'no blurb yet') !== 0) { $this->addItemAttribute('blurb', $p); } } if (preg_match('#http://www\\.imdb\\.com/title/tt([0-9]+)/#i', $page, $matches)) { $this->addItemAttribute('imdb_id', $matches[1]); } if (preg_match('#/Graphics/Ratings/(.+?).gif"#i', $page, $matches)) { $this->addItemAttribute('age_rating', strip_tags($matches[1])); } if (preg_match('#Starring/dir/music.*?</tr.*?<td.*?>(.*?)</td#is', $page, $matches)) { $block = strip_tags(str_replace('<br>', ',', $matches[1])); $this->addItemAttribute('actors', explode(",", $block)); } if (preg_match('#Director\\(s\\).*?</tr.*?<td.*?>(.*?)</td#is', $page, $matches)) { $block = strip_tags(str_replace('<br>', ',', $matches[1])); $this->addItemAttribute('director', explode(",", $block)); } if (preg_match('#<!--\\s*genre.*?<td[^>]*>(.*?)</td#is', $page, $matches)) { $this->addItemAttribute('genre', strtolower(trim(strip_tags($matches[1])))); } if (preg_match('#Movie release year.*?<td.*?>([0-9]*)</td#is', $page, $matches)) { $this->addItemAttribute('year', strip_tags($matches[1])); } if (preg_match('#<!--\\s*Running time.*?<td.*?>\\s*(\\d*).*?</td#is', $page, $matches)) { // OpenDb can only handle four-digit running times; MichaelD has minutes and seconds as "xxx:xx" $this->addItemAttribute('run_time', strip_tags($matches[1])); } if (preg_match('#<!--\\s*Region coding.*?<td.*?>(.*?)</td#is', $page, $matches)) { $this->addItemAttribute('dvd_region', explode(' ', strip_tags($matches[1]))); } if (preg_match('#<!--[\\s]*widescreen/full.*?<td.*?>(.*?)</td#is', $page, $matches)) { if (preg_match('#full\\s*frame#i', $matches[1])) { $this->addItemAttribute('ratio', '1.33'); } else { if (preg_match('#(\\d+(?:\\.\\d+)):1#', $matches[1], $submatches)) { $this->addItemAttribute('ratio', $submatches[1]); } } if (stristr($matches[1], '16x9 enhanced')) { $this->addItemAttribute('anamorphic', 'Y'); } } if (preg_match('#<!--\\s*audio parameters.*?<tr.*?audio.*?<tr.*?>(.*?)</tr#is', $page, $matches)) { $subresource = fetch_attribute_type_lookup_rs('AUDIO_LANG', NULL); while ($sub = db_fetch_assoc($subresource)) { if (preg_match('#' . $sub['value'] . '#i', $matches[1])) { $this->addItemAttribute('audio_lang', $sub['value']); } } if (preg_match('#english[^,]dolby digital 5\\.1#i', $matches[1])) { $this->addItemAttribute('audio_lang', 'ENGLISH_5.1'); } if (preg_match('#english dts#i', $matches[1])) { $this->addItemAttribute('audio_lang', 'ENGLISH_DTS'); } if (preg_match('#commentary#i', $matches[1])) { $this->addItemAttribute('audio_xtra', 'DIR_COMMENT'); } } if (preg_match('#<!--\\s*audio parameters.*?</tr.*?subtitles.*?</tr>.*?<tr.*?>(.*?)</tr#is', $page, $matches)) { $subtitles = strip_tags($matches[1]); $subresource = fetch_attribute_type_lookup_rs('SUBTITLES', NULL); while ($sub = db_fetch_assoc($subresource)) { if (preg_match('#' . $sub['value'] . '#i', $subtitles)) { $this->addItemAttribute('subtitles', $sub['value']); } } } if (preg_match('#<!--\\s*extras.*?</tr.*?<td.*?>(.*?)</td#is', $page, $matches)) { $this->addItemAttribute('dvd_extras', strip_tags(preg_replace('#\\s*,\\s*#', "\n", $matches[1]))); } // MichaelD puts the disc's year in parenthesis after the // title. OpenDb shifts articles to the end of the title, // after a comma, then optionally appends the year in item // listings. So "The Animatrix" becomes // "Animatrix (2003), The (2003)". // "Brazil" becomes "Brazil (1985) (1985)". // "Blues Brothers 2000 (2000) (2000)". See the problem? // // This block removes the year from the disc's title, provided // it is at the end and parenthesised. "Blues Brothers 2000" // should remain intact. ARD. if (strlen($this->getItemAttribute('title')) > 0 && is_numeric($this->getItemAttribute('year')) && preg_match('/\\(' . $this->getItemAttribute('year') . '\\)$/', $this->getItemAttribute('title'))) { $this->replaceItemAttribute('title', preg_replace('/\\s*\\(' . $this->getItemAttribute('year') . '\\)$/', '', $this->getItemAttribute('title'))); } // Attempt to include data from IMDB if available if (is_numeric($this->getItemAttribute('imdb_id'))) { $sitePlugin =& get_site_plugin_instance('imdb'); if ($sitePlugin !== FALSE) { if ($sitePlugin->queryItem(array('imdb_id' => $this->getItemAttribute('imdb_id')), $s_item_type)) { // no mapping process is performed here, as no $s_item_type was provided. $itemData = $sitePlugin->getItemData(); if (is_array($itemData)) { // merge data in here. while (list($key, $value) = each($itemData)) { if ($key == 'actors') { $this->replaceItemAttribute('actors', $value); } else { if ($key == 'director') { $this->replaceItemAttribute('director', $value); } else { if ($key == 'year') { $this->replaceItemAttribute('year', $value); } else { if ($key == 'actors') { $this->replaceItemAttribute('actors', $value); } else { if ($key == 'plot') { //have to map from imdb to michaeld attribute type. $this->addItemAttribute('blurb', $value); } else { if ($key != 'age_rating' && $key != 'run_time') { $this->addItemAttribute($key, $value); } } } } } } } } } } } return TRUE; } else { //if ($page) return FALSE; } }
/** Will return an array of the following structure. array( "year"=>year, "age_rating"=>age_rating, "dvd_region"=>dvd_region, // not applicable for VHS,DIVX,etc "ratio"=>ration, "audio_lang"=>spoken languages, "subtitles"=>subtitles, "run_time"=>runtime, "director"=>director, "actors"=>actors, ); If nothing parsed correctly, then this function will returned unitialised array. */ function parse_amazon_video_data($search_attributes_r, $s_item_type, $pageBuffer) { // FIXME - this is used for more than just US site now, so this is invalid //$this->addItemAttribute('vid_format', 'NTSC'); // genre extraction block. $startidx = strpos($pageBuffer, "<li><b>Genres:</b>"); if ($startidx !== FALSE) { // Move past start text. $startidx += 18; //"Genres:</b>" $endidx = strpos($pageBuffer, "</li>", $startidx); if ($endidx !== FALSE) { // Get rid of all the html - a quick hack! $genre = trim(substr($pageBuffer, $startidx, $endidx - $startidx)); $genre = strip_tags($genre); // If composite genre, get rid of / as we do not need it. $genre = str_replace(" / ", " ", $genre); // Expand Sci-Fi to OpenDb matching value. $genre = str_replace("Sci-Fi", "ScienceFiction", $genre); // Match all whitespace and convert to a comma. $genre = preg_replace("/[\\s]+/", ",", $genre); $genre = str_replace("(more)", "", $genre); $this->addItemAttribute('genre', explode(",", $genre)); } } $this->addItemAttribute('actors', parse_amazon_video_people("Actors", $pageBuffer)); $this->addItemAttribute('director', parse_amazon_video_people("Directors", $pageBuffer)); // Region extraction block //<li><b>Region: </b>Region 1 if (preg_match("/<li><b>Region:[\\s]*<\\/b>Region ([0-6])/", $pageBuffer, $regs)) { $this->addItemAttribute('dvd_region', $regs[1]); } // Ratio //<li><b>Aspect Ratio:</b> 1.85:1</li> if (preg_match("!<li><b>Aspect Ratio:</b>(.*?)<\\/li>!", $pageBuffer, $regs)) { if (preg_match_all("/([0-9]{1}\\.[0-9]+):1/", $regs[1], $matches)) { $this->addItemAttribute('ratio', $matches[1]); } } if (preg_match("/<li><b>Number of discs:[\\s]*<\\/b>[\\s]*([0-9]+)/", $pageBuffer, $regs2)) { $this->addItemAttribute('no_discs', $regs2[1]); } //<b>Rating</b> <img src="http://ec1.images-amazon.com/images/G/01/detail/r._V46905301_.gif" alt="R" align="absmiddle" border="0" height="11" width="12"></li> if (preg_match("!Rated:</span> (.*?) !mis", $pageBuffer, $regs)) { $this->addItemAttribute('age_rating', $regs[1]); } else { if (preg_match("!Rated:.*?<span>\\s*(.*?)\\s!ms", $pageBuffer, $regs)) { $this->addItemAttribute('age_rating', $regs[1]); } } if (preg_match("!<b>Studio:[\\s]*</b>[\\s]*([^<]+)</li>!i", $pageBuffer, $regs)) { $this->addItemAttribute('studio', $regs[1]); } //<li><b>DVD Release Date:</b> April 27, 2004</li> if (preg_match("/<b>DVD Release Date:<\\/b>([^<]+)<\\/li>/i", $pageBuffer, $regs)) { $timestamp = strtotime($regs[1]); // if year not defined, use dvd_rel_dt if ($this->getItemAttribute('year') === FALSE) { $this->addItemAttribute('year', date('Y', $timestamp)); } $this->addItemAttribute('dvd_rel_dt', date('d/m/Y', $timestamp)); } // Duration extraction block //<li><b>Run Time:</b> 125 minutes </li> if (preg_match("/<li><b>Run Time:<\\/b>[\\s]*([0-9]+) minutes/i", $pageBuffer, $regs)) { $this->addItemAttribute('run_time', $regs[1]); } // Get the anamorphic format attribute - Thanks to André Monz <amonz@users.sourceforge.net if (preg_match("/anamorphic/", $pageBuffer)) { $this->addItemAttribute('anamorphic', 'Y'); } if (preg_match("/THX Certified/i", $pageBuffer)) { $this->addItemAttribute('audio_lang', 'ENGLISH_THX'); } if (preg_match("!<li><b>Language:</b>[\\s]*(.*?)</li>!i", $pageBuffer, $regs)) { $audio_lang_r = explode(',', $regs[1]); $amazon_dvd_audio_map = array(array("English", "2.0"), array("English", "5.0"), array("English", "5.1"), array("English", "6.1", "EX"), array("English", "6.1", "DTS", "ES"), array("English", "6.1"), array("English", "DTS")); $amazon_audio_lang_map = array(array("French"), array("Spanish"), array("German")); while (list(, $audio_lang) = @each($audio_lang_r)) { $key = parse_language_info($audio_lang, $amazon_dvd_audio_map); if ($key !== NULL) { $this->addItemAttribute('audio_lang', $key); } $key = parse_language_info($audio_lang, $amazon_audio_lang_map); if ($key !== NULL) { $this->addItemAttribute('audio_lang', $key); } } } if (preg_match("!<li><b>Subtitles:</b>[\\s]*(.*?)</li>!i", $pageBuffer, $regs)) { $amazon_video_subtitle_map = array(array("English"), array("French"), array("Spanish"), array("German")); $audio_lang_r = explode(',', $regs[1]); while (list(, $audio_lang) = @each($audio_lang_r)) { $key = parse_language_info($audio_lang, $amazon_video_subtitle_map); if ($key !== NULL) { $this->addItemAttribute('subtitles', $key); } } } // Edition details block - 'dvd_extras' attribute if (preg_match("!<b>DVD Features:<\\/b><ul>(.*?)<\\/ul>!", $pageBuffer, $regs)) { $dvdFeaturesBlock = $regs[1]; if (preg_match_all("/<li>(.*)<\\/li>/mUi", $dvdFeaturesBlock, $matches)) { $dvd_extras = NULL; while (list(, $item) = @each($matches[1])) { $item = html_entity_decode(strip_tags($item), ENT_COMPAT, get_opendb_config_var('themes', 'charset') == 'utf-8' ? 'UTF-8' : 'ISO-8859-1'); // We may have a hard space here, so get rid of it. $item = trim(strtr($item, chr(160), ' ')); if (strpos($item, "anamorphic") === FALSE && strpos($item, "Available Subtitles") === FALSE && strpos($item, "Available Audio Tracks") === FALSE) { //Commentary by: director George Cosmatos if (strpos($item, "Commentary by") !== FALSE && ends_with($item, "Unknown Format")) { $item = substr($item, 0, strlen($item) - strlen("Unknown Format")); } else { if (preg_match("/\"([^\"]+)\"/", $item, $reg2)) { $item = $reg2[1]; } } $dvd_extras[] = $item; } } if (is_array($dvd_extras)) { $this->addItemAttribute('dvd_extras', implode("\n", $dvd_extras)); } } } // IMDB ID block //<A HREF="http://amazon.imdb.com/title/tt0319061/"> //http://www.amazon.com/gp/redirect.html/103-0177494-1143005?location=http://amazon.imdb.com/title/tt0319061&token=F5BF95E1B869FD4EB1192434BA5B7FECBA8B3718 //http://amazon.imdb.com/title/tt0319061 if (preg_match("!http://amazon.imdb.com/title/tt([0-9]+)!is", $pageBuffer, $regs)) { $this->addItemAttribute('imdb_id', $regs[1]); } // Attempt to include data from IMDB if available - but only for DVD, VHS, etc // as IMDB does not work with BOOKS or CD's. if (is_numeric($this->getItemAttribute('imdb_id'))) { $sitePlugin =& get_site_plugin_instance('imdb'); if ($sitePlugin !== FALSE) { if ($sitePlugin->queryItem(array('imdb_id' => $this->getItemAttribute('imdb_id')), $s_item_type)) { // no mapping process is performed here, as no $s_item_type was provided. $itemData = $sitePlugin->getItemData(); if (is_array($itemData)) { // merge data in here. while (list($key, $value) = each($itemData)) { if ($key == 'actors') { $this->replaceItemAttribute('actors', $value); } else { if ($key == 'director') { $this->replaceItemAttribute('director', $value); } else { if ($key == 'year') { $this->replaceItemAttribute('year', $value); } else { if ($key == 'actors') { $this->replaceItemAttribute('actors', $value); } else { if ($key == 'genre') { $this->replaceItemAttribute('genre', $value); } else { if ($key == 'plot') { //have to map from imdb to amazon attribute type. $this->addItemAttribute('blurb', $value); } else { if ($key != 'age_rating' && $key != 'run_time') { $this->addItemAttribute($key, $value); } } } } } } } } } } } } }
function perform_site_process(&$item_r, &$status_type_r, &$HTTP_VARS, &$footer_links_r) { $sitePlugin =& get_site_plugin_instance($HTTP_VARS['site_type']); if ($sitePlugin !== FALSE) { if ($HTTP_VARS['op'] == 'site-search') { $return_val = handle_site_search($sitePlugin, $HTTP_VARS, $errors, $footer_links_r); if ($return_val === "__EXACT_TITLE_MATCH__") { // do nothing - we have an exact match, so shall fall down // to do site / refresh operation instead. } else { if ($return_val !== FALSE) { // display search page and break out. do_op_title($item_r, $status_type_r, 'site-search'); echo $return_val; if (get_opendb_config_var('item_input.site', 'debug') === TRUE) { echo $sitePlugin->getDebugItemDataAsHtml(); } return; } else { // $return_val === FALSE) do_op_title($item_r, $status_type_r, 'site-search'); echo format_error_block($errors); if (get_opendb_config_var('item_input.site', 'debug') === TRUE) { echo $sitePlugin->getDebugItemDataAsHtml(); } return; } } } else { //if($HTTP_VARS['op'] == 'site-search') if ($sitePlugin->_queryItem($HTTP_VARS) !== TRUE) { // display error do_op_title($item_r, $status_type_r, 'new'); $errors = $sitePlugin->getErrors(); // we need to provide at least some indication of why there was a problem. if ($errors === FALSE) { $errors = get_opendb_lang_var('undefined_error'); } echo format_error_block($errors); if (get_opendb_config_var('item_input.site', 'debug') === TRUE) { echo $sitePlugin->getDebugItemDataAsHtml(); } return; } } // at this point we have an exact match! $site_item_attributes_r = $sitePlugin->getItemData($item_r['s_item_type']); if (is_not_empty_array($site_item_attributes_r)) { $HTTP_VARS = array_merge($HTTP_VARS, $site_item_attributes_r); if (is_exists_item($item_r['item_id'])) { $HTTP_VARS['op'] = 'refresh'; $titleArticlesConfig = $sitePlugin->getConfigValue('item_input.title_articles'); if (is_not_empty_array($titleArticlesConfig)) { set_opendb_config_ovrd_var('item_input', 'title_articles', $titleArticlesConfig); } if (get_opendb_config_var('item_input', 'auto_site_update') === TRUE) { // expand $HTTP_VARS to bypass edit form. $HTTP_VARS = get_site_item_input_data($HTTP_VARS['op'], $item_r, $HTTP_VARS); perform_update_process($item_r, $status_type_r, $HTTP_VARS, $footer_links_r); } else { perform_edit_process($item_r, $status_type_r, $HTTP_VARS, $footer_links_r); } } else { //if(is_exists_item($item_r['item_id'])) $HTTP_VARS['op'] = 'site'; if (get_opendb_config_var('item_input', 'auto_site_insert') === TRUE) { $item_r['s_status_type'] = fetch_default_status_type(); // expand $HTTP_VARS to bypass edit form. $HTTP_VARS = get_site_item_input_data($HTTP_VARS['op'], $item_r, $HTTP_VARS); perform_insert_process($item_r, $status_type_r, $HTTP_VARS, $footer_links_r); } else { perform_new_process($item_r, $status_type_r, $HTTP_VARS, $footer_links_r); } } if (get_opendb_config_var('item_input.site', 'debug') === TRUE) { echo $sitePlugin->getDebugItemDataAsHtml(); } } else { // no info found - drop down to new operation. $HTTP_VARS['op'] = 'new'; perform_new_process($item_r, $status_type_r, $HTTP_VARS, $footer_links_r); } } else { //if($sitePlugin !== FALSE) do_op_title($item_r, $status_type_r, $HTTP_VARS['op']); echo format_error_block(get_opendb_lang_var('undefined_error')); opendb_logger(OPENDB_LOG_ERROR, __FILE__, __FUNCTION__, 'Site plugin class not found', $HTTP_VARS); } }