function wiki_to_db($url, $label, $award, $bdd, $first) { $url = $url; $html = connect_to($url); $html = str_get_html($html); // 2. get category $h2 = $html->find('h2'); foreach ($h2 as $h2_node) { $title = $h2_node->find('span', 1); if (isset($title)) { $title = $title->plaintext; if (strcmp($title, $label) == 0) { // 3. find ul list of books $list = find_ul_tag($h2_node); if (isset($list)) { $award_books = parse_list_books($list, $first); } else { return; } // 4. insert books in db foreach ($award_books as $book) { $id = $book[0]; $title = $book[1]; $author = $book[2]; $date = $book[3]; $award = $award; $amazon_best_url = ""; $amazon_price = ""; $cheapest_price = ""; $ship_price = ""; $ISBN = ""; // step 5 : insert in db $request = 'INSERT INTO literary_award VALUES(\'' . $id . '\', \'' . $title . '\', \'' . $author . '\', \'' . $date . '\', \'' . $award . '\', \'' . $amazon_best_url . '\', \'' . $amazon_price . '\', \'' . $cheapest_price . '\', \'' . $ship_price . '\', \'' . $ISBN . '\')'; $bdd->exec($request); echo $id . "\n"; } break; } } } $html->clear(); unset($html); }
function get_amazon_price($ISBN) { $amazon_url = "http://www.amazon.fr/dp/" . $ISBN; $html = connect_to($amazon_url); $html = str_get_html($html); // 0. get the price $amazon_price = $html->find('.priceLarge', 0); if (isset($amazon_price)) { $amazon_price = $amazon_price->plaintext; $amazon_price = trim(str_replace('EUR ', '', $amazon_price)); } else { $amazon_price = 0; } $html->clear(); unset($html); return $amazon_price; }
function reading_list(){ // step 0 : delete feeds from db //1. connect to BDD $pdo_options[PDO::ATTR_ERRMODE] = PDO::ERRMODE_EXCEPTION; $bdd = new PDO(DB_PMBA, DB_USER, DB_PWD); $bdd->exec("SET CHARACTER SET utf8"); $count = 0; // 2. clean $bdd->exec('DELETE FROM reading_list'); $url = "http://personalmba.com/best-business-books/"; $pmba_url = "http://personalmba.com"; $html = connect_to($url,$book); $html = str_get_html($html); // 2. get category $h2 = $html->find('h2[id]'); foreach($h2 as $category){ $en_category = $category->innertext; $list_books = $category->next_sibling()->find('li'); foreach($list_books as $book){ //3. info $title = $book->find('a',0)->plaintext; $amazon_com_url = $pmba_url.$book->find('a',0)->href; $author = $book->plaintext; $id = get_id($author); $author = get_author($author); $en_review = $pmba_url.$book->find('a',1)->href; $fr_category = ""; $fr_review = ""; $amazon_best_url = ""; $amazon_price = ""; $cheapest_price = ""; $ship_price = ""; $ISBN = ""; // step 4 : insert in db $request = 'INSERT INTO reading_list VALUES(\''.$id.'\', \''.$title.'\', \''.$author.'\', \''.$en_category.'\', \''.$fr_category. '\', \''.$en_review.'\', \''.$fr_review.'\', \''.$amazon_com_url.'\', \''.$amazon_best_url.'\', \''.$amazon_price.'\', \''.$cheapest_price.'\', \''.$ship_price.'\', \''.$ISBN.'\')'; $bdd->exec($request); echo $id."\n"; } } $html->clear(); unset($html); // and now we're done; close it $bdd = null; }