Exemplo n.º 1
0
function MainBookData($url, $initial_csv_row_data, &$output)
{
    include_once "library/simple_html_dom.php";
    $Main_Data = "";
    $html = file_get_dom($url);
    $ul = $html->find('div[id=material_results] ul');
    // Header for csv
    // CHeck whether Material Exists
    if ($ul != null) {
        $total_type_books = count($ul);
        // Counting type of books
        for ($j = 0; $j < $total_type_books; $j++) {
            $all_li = $ul[$j]->find('li');
            $total_books = count($all_li);
            //This will give us Amount of books
            for ($i = 0; $i < $total_books; $i++) {
                $BookTitle = $all_li[$i]->find('span[class=wrap]', 0)->plaintext;
                $BookTitle = htmlspecialchars_decode($BookTitle);
                $ImageUrl = $all_li[$i]->find('img', 0)->getAttribute("src");
                if ($all_li[$i]->find('div[class=field]', 1)->plaintext != "") {
                    $BK_UsedPrice = $all_li[$i]->find('div[class=field]', 1)->find('span[class=emph]', 0)->plaintext;
                }
                if ($all_li[$i]->find('div[class=field]', 2)->plaintext != "") {
                    $BK_NewPrice = $all_li[$i]->find('div[class=field]', 2)->find('span[class=emph]', 0)->plaintext;
                }
                if ($all_li[$i]->find('div[id=field]', 0)->plaintext != "") {
                    $BK_DigitalPrice = $all_li[$i]->find('div[id=field]', 0)->find('span[class=emph]', 0)->plaintext;
                }
                $AuthorEdition = $all_li[$i]->find('div[class=detail]', 0)->plaintext;
                $AuthorEdition = split("Edition", $AuthorEdition);
                // Data Cleaning for Author and Edition
                $Author = $AuthorEdition[0];
                $Edition = $AuthorEdition[1];
                $Author = str_replace("Author:", "", $Author);
                $Edition = str_replace(":", "", $Edition);
                $Author = str_replace("\n", "", $Author);
                $Edition = str_replace("\n", "", $Edition);
                $Author = ltrim($Author);
                $Edition = ltrim($Edition);
                $Author = rtrim($Author);
                $Author = htmlspecialchars_decode($Author);
                $Edition = rtrim($Edition);
                // --- Data Cleaning ENDz
                $SisterUrl_Ancher = $all_li[$i]->find('div[id=field] a', 0);
                if ($SisterUrl_Ancher->plaintext != "") {
                    // Check if Sister URL is available
                    $SisterUrl = $SisterUrl_Ancher->getAttribute("href");
                    $sister_site_data = SisterSiteData($SisterUrl);
                } else {
                    $sister_site_data = ",,,,,,";
                }
                if ($ImageUrl != "http://images.efollett.com/books/noBookImage.gif") {
                    // ONly Access Amazon Api if you image FOund
                    if ($ImageUrl != "http://images.efollett.com/booksnull") {
                        $amazon = getAmazonData("{$BookTitle}, {$Author}, {$Edition}");
                        if ($amazon) {
                            $AmazonListPrice = $amazon['AmazonListPrice'];
                            $AmazonDiscountPrice = $amazon['AmazonDiscountPrice'];
                            $NonAmazonNewPrice = $amazon['NonAmazonNewPrice'];
                            $NonAmazonUsedPrice = $amazon['NonAmazonUsedPrice'];
                            $AmazonDetailPageURL = $amazon['AmazonDetailPageURL'];
                        }
                        $Bk_ISBN = split("/", $ImageUrl);
                        $Bk_ISBN_count = count($Bk_ISBN) - 1;
                        $Bk_ISBN = $Bk_ISBN[$Bk_ISBN_count];
                        $Bk_ISBN = explode('.', $Bk_ISBN);
                        $Bk_ISBN = $Bk_ISBN[0];
                    }
                }
                echo $row_data = "{$initial_csv_row_data},\"{$BookTitle}\",\"{$Author}\",\"{$Edition}\",{$ImageUrl},{$BK_UsedPrice},{$BK_NewPrice},{$BK_DigitalPrice},{$Bk_ISBN},{$AmazonListPrice},{$AmazonDiscountPrice},{$NonAmazonNewPrice},{$NonAmazonUsedPrice},{$AmazonDetailPageURL},{$SisterUrl},{$sister_site_data}\n";
                echo "\n";
                fwrite($output, $row_data);
                // Clearing Space
                unset($BookTitle);
                unset($SisterUrl);
                unset($Author);
                unset($Edition);
                unset($ImageUrl);
                unset($BK_UsedPrice);
                unset($BK_NewPrice);
                unset($BK_DigitalPrice);
                unset($Bk_ISBN);
                unset($row_data);
                unset($amazon);
                unset($AmazonListPrice);
                unset($NonAmazonNewPrice);
                unset($NonAmazonUsedPrice);
                unset($AmazonDiscountPrice);
                unset($AmazonDetailPageURL);
            }
            // for
        }
    } else {
        // If no book is found still add the record
        echo $row_data = "{$initial_csv_row_data},,,,,,,,\n";
        fwrite($output, $row_data);
    }
    $html->__destruct();
    unset($html);
    unset($ul);
}
Exemplo n.º 2
0
<?php

function SisterSiteData($sister_url)
{
    include_once "library/simple_html_dom.php";
    $url = "http://www.cafescribe.com/index.php?option=com_virtuemart&page=shop.product_details&flypage=shop.flypage&isbn13=9780073527093&storeid=670&vmcchk=1";
    $html = file_get_dom($url);
    $ListPrice = $html->find('div[id=bodycenter] table td', 0)->children[2]->children[1]->plaintext;
    $ListPrice = trim($ListPrice);
    $YouPayPrice = $html->find('div[id=bodycenter] table td', 0)->children[2]->children[6]->plaintext;
    $YouPayPrice = trim($YouPayPrice);
    $Author = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 0)->children[1]->plaintext;
    $Author = trim($Author);
    $Edition = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 4)->children[1]->plaintext;
    $Edition = trim($Edition);
    $Publisher = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 3)->children[1]->plaintext;
    $Publisher = trim($Publisher);
    $ISBN_10_Print = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 6)->children[1]->plaintext;
    $ISBN_10_Print = trim($ISBN_10_Print);
    $ISBN_13_Print = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 7)->children[1]->plaintext;
    $ISBN_13_Print = trim($ISBN_13_Print);
    $ISBN_10_Digital = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 8)->children[1]->plaintext;
    $ISBN_10_Digital = trim($ISBN_10_Digital);
    $ISBN_13_Digital = $html->find('div[id=bodycenter] table td', 0)->children[9]->find('tr', 9)->children[1]->plaintext;
    $ISBN_13_Digital = trim($ISBN_13_Digital);
    return "{$Author},{$Edition},{$Publisher},{$ISBN_10_Print},{$ISBN_13_Print},{$ISBN_10_Digital},{$ISBN_13_Digital},{$ListPrice},{$YouPayPrice}";
}
echo SisterSiteData("as");