function MainBookData($url, $initial_csv_row_data, &$output) { include_once "library/simple_html_dom.php"; $Main_Data = ""; $html = file_get_dom($url); $ul = $html->find('div[id=material_results] ul'); // Header for csv // CHeck whether Material Exists if ($ul != null) { $total_type_books = count($ul); // Counting type of books for ($j = 0; $j < $total_type_books; $j++) { $all_li = $ul[$j]->find('li'); $total_books = count($all_li); //This will give us Amount of books for ($i = 0; $i < $total_books; $i++) { $BookTitle = $all_li[$i]->find('span[class=wrap]', 0)->plaintext; $BookTitle = htmlspecialchars_decode($BookTitle); $ImageUrl = $all_li[$i]->find('img', 0)->getAttribute("src"); if ($all_li[$i]->find('div[class=field]', 1)->plaintext != "") { $BK_UsedPrice = $all_li[$i]->find('div[class=field]', 1)->find('span[class=emph]', 0)->plaintext; } if ($all_li[$i]->find('div[class=field]', 2)->plaintext != "") { $BK_NewPrice = $all_li[$i]->find('div[class=field]', 2)->find('span[class=emph]', 0)->plaintext; } if ($all_li[$i]->find('div[id=field]', 0)->plaintext != "") { $BK_DigitalPrice = $all_li[$i]->find('div[id=field]', 0)->find('span[class=emph]', 0)->plaintext; } $AuthorEdition = $all_li[$i]->find('div[class=detail]', 0)->plaintext; $AuthorEdition = split("Edition", $AuthorEdition); // Data Cleaning for Author and Edition $Author = $AuthorEdition[0]; $Edition = $AuthorEdition[1]; $Author = str_replace("Author:", "", $Author); $Edition = str_replace(":", "", $Edition); $Author = str_replace("\n", "", $Author); $Edition = str_replace("\n", "", $Edition); $Author = ltrim($Author); $Edition = ltrim($Edition); $Author = rtrim($Author); $Author = htmlspecialchars_decode($Author); $Edition = rtrim($Edition); // --- Data Cleaning ENDz $SisterUrl_Ancher = $all_li[$i]->find('div[id=field] a', 0); if ($SisterUrl_Ancher->plaintext != "") { // Check if Sister URL is available $SisterUrl = $SisterUrl_Ancher->getAttribute("href"); $sister_site_data = SisterSiteData($SisterUrl); } else { $sister_site_data = ",,,,,,"; } if ($ImageUrl != "http://images.efollett.com/books/noBookImage.gif") { // ONly Access Amazon Api if you image FOund if ($ImageUrl != "http://images.efollett.com/booksnull") { $amazon = getAmazonData("{$BookTitle}, {$Author}, {$Edition}"); if ($amazon) { $AmazonListPrice = $amazon['AmazonListPrice']; $AmazonDiscountPrice = $amazon['AmazonDiscountPrice']; $NonAmazonNewPrice = $amazon['NonAmazonNewPrice']; $NonAmazonUsedPrice = $amazon['NonAmazonUsedPrice']; $AmazonDetailPageURL = $amazon['AmazonDetailPageURL']; } $Bk_ISBN = split("/", $ImageUrl); $Bk_ISBN_count = count($Bk_ISBN) - 1; $Bk_ISBN = $Bk_ISBN[$Bk_ISBN_count]; $Bk_ISBN = explode('.', $Bk_ISBN); $Bk_ISBN = $Bk_ISBN[0]; } } echo $row_data = "{$initial_csv_row_data},\"{$BookTitle}\",\"{$Author}\",\"{$Edition}\",{$ImageUrl},{$BK_UsedPrice},{$BK_NewPrice},{$BK_DigitalPrice},{$Bk_ISBN},{$AmazonListPrice},{$AmazonDiscountPrice},{$NonAmazonNewPrice},{$NonAmazonUsedPrice},{$AmazonDetailPageURL},{$SisterUrl},{$sister_site_data}\n"; echo "\n"; fwrite($output, $row_data); // Clearing Space unset($BookTitle); unset($SisterUrl); unset($Author); unset($Edition); unset($ImageUrl); unset($BK_UsedPrice); unset($BK_NewPrice); unset($BK_DigitalPrice); unset($Bk_ISBN); unset($row_data); unset($amazon); unset($AmazonListPrice); unset($NonAmazonNewPrice); unset($NonAmazonUsedPrice); unset($AmazonDiscountPrice); unset($AmazonDetailPageURL); } // for } } else { // If no book is found still add the record echo $row_data = "{$initial_csv_row_data},,,,,,,,\n"; fwrite($output, $row_data); } $html->__destruct(); unset($html); unset($ul); }
<?php function SisterSiteData($sister_url) { include_once "library/simple_html_dom.php"; $url = "http://www.cafescribe.com/index.php?option=com_virtuemart&page=shop.product_details&flypage=shop.flypage&isbn13=9780073527093&storeid=670&vmcchk=1"; $html = file_get_dom($url); $ListPrice = $html->find('div[id=bodycenter] table td', 0)->children[2]->children[1]->plaintext; $ListPrice = trim($ListPrice); $YouPayPrice = $html->find('div[id=bodycenter] table td', 0)->children[2]->children[6]->plaintext; $YouPayPrice = trim($YouPayPrice); $Author = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 0)->children[1]->plaintext; $Author = trim($Author); $Edition = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 4)->children[1]->plaintext; $Edition = trim($Edition); $Publisher = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 3)->children[1]->plaintext; $Publisher = trim($Publisher); $ISBN_10_Print = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 6)->children[1]->plaintext; $ISBN_10_Print = trim($ISBN_10_Print); $ISBN_13_Print = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 7)->children[1]->plaintext; $ISBN_13_Print = trim($ISBN_13_Print); $ISBN_10_Digital = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 8)->children[1]->plaintext; $ISBN_10_Digital = trim($ISBN_10_Digital); $ISBN_13_Digital = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 9)->children[1]->plaintext; $ISBN_13_Digital = trim($ISBN_13_Digital); return "{$Author},{$Edition},{$Publisher},{$ISBN_10_Print},{$ISBN_13_Print},{$ISBN_10_Digital},{$ISBN_13_Digital},{$ListPrice},{$YouPayPrice}"; } echo SisterSiteData("as");