function tag_html($selector, $source = false) { /*$r = noko($source)->get($selector)->toHtml(); $r = str_replace(array("\n", "\t"), '', $r); if ($r=='<root/>') $r=''; $r=close_tags($r); if ($r) { $r = substr($r, strpos($r,">")+1); $r = substr($r, 0,strrpos($r,"<")); $r = trim($r); }*/ $r = tags_html($selector, $source); if ($r) { $r = reset($r); } else { $r = false; } re($r); return $r; }
<?php // Sample TASK: get past-2015 movies from IMDB Top 250 and write to MySQL DB. Take cover images as well // ---------------------------------------------------------------------------------------------------- require '../parsemx.php'; begin_debug(); // By default, localhost root/root MySQL user is used. Set $q_server, $q_user, $q_password to override $q_database = 'imdb_sample'; q("CREATE TABLE IF NOT EXISTS films (title VARCHAR(250), year SMALLINT, cover VARCHAR(250))"); $http_cache = true; http_get('http://www.imdb.com/chart/top/'); foreach (tags_html('.titleColumn') as $film) { set_source($film); $year = inside('(', ')', tag_text('.secondaryInfo')); if ($year < 2015) { continue; } http_get(tag_link('a')); // Open film link $title = q_escape(replace('(*)', '', tag_text('h1'))); // Take title and remove year like (2015) from it if (q("SELECT * FROM films WHERE title={$title}")) { continue; } // If film already in DB, skip http_get(tag_link('.poster')); // Open poster link $cover = q_escape(http_get_file(tag_image('#primary-img'), 'covers/')); // Download primary image from slideshow q("INSERT INTO films SET title={$title}, year={$year}, cover={$cover}"); }