function getData() { // Create connection $originalUrl = "http://www.locanto.ph/geo/539918/Houses-for-Rent/307/Cebu-City/?sort=dist&dist=&post_type=1&page="; $page = 0; $allItem = 1; $reference_no = ''; $original_site = ''; $site_link_id = '1'; $original_post_link = ''; $title = ''; $description = ''; $price = ''; $product_image = ''; $furnishing = ''; $location = ''; $posted_date = ''; $square_area = ''; $bedrooms = ''; $bathrooms = ''; $floor = ''; $name_of_posted_person = ''; $contact_mobile = ''; $contact_email = ''; $contact_landline = ''; $created = ''; $modified = ''; $status = ''; ## Get the data from page 1 to bottom do { $urls = $originalUrl . $page; $variable = parent::exeCurl($urls); $xpath = parent::dom($variable); #get text header $properties = $xpath->query('//div[@class="resultRow"]'); #loop through each of the row items $numberOfItem = 0; foreach ($properties as $container) { $arr = $container->getElementsByTagName("a"); // get the anchor tag $img = $container->getElementsByTagName("img"); $url = ""; $x = 0; foreach ($arr as $index => $item) { $href = $item->getAttribute("href"); if ($x === 1) { $url = $href; } $x++; } ## Open single html $variable = parent::exeCurl($url); $xpathRow = parent::dom($variable); ## Get the ID Number $rowID = $xpathRow->query('//div[@id="ad-info-header"]'); $propID = ""; foreach ($rowID as $rowItem) { $text = trim(preg_replace("/[\r\nAdID: ]+/", "", $rowItem->nodeValue)); $propID = $text; } $con = parent::connection(); $result = mysqli_query($con, "SELECT * FROM fdci_web_crawler WHERE reference_no = '{$propID}' AND site_link_id = '{$site_link_id}'"); if (mysqli_num_rows($result) > 0) { die; } ## Get the title $rowTitle = $xpathRow->query('//span[@class="h2"]'); $propTitle = ""; foreach ($rowTitle as $rowItem) { $text = trim(preg_replace("/[\r\n]+/", " ", $rowItem->nodeValue)); $propTitle = $text; } ## Get the price $rowPrice = $xpathRow->query('//div[@class="h1gray"]'); $propPrice = ""; foreach ($rowPrice as $rowItem) { $text = trim(preg_replace("/[\r\n₱,m²BR]+/", "", $rowItem->nodeValue)); $propPrice = $text; $explodedPrice = explode(" ", $propPrice); } ## Get the Advertiser $rowAdvertiser = $xpathRow->query('//div[@class="mybox_wrapper"]//div[@class="mybox_title"]'); $array = array(); foreach ($rowAdvertiser as $rowItem) { $array[] = $rowItem->nodeValue; } $arr = $array[1]; $explodedAdvertiser = explode(" ", $arr); ## Get the Street Address $rowStreet = $xpathRow->query('//span[@itemprop="streetAddress"]'); $propStreet = ""; foreach ($rowStreet as $rowItem) { $text = trim(preg_replace("/[\r\n]+/", "", $rowItem->nodeValue)); $propStreet = $text; } ## Get the Local Address $rowLocation = $xpathRow->query('//span[@itemprop="addressLocality"]'); $propCity = ""; foreach ($rowLocation as $rowItem) { $text = trim(preg_replace("/[\r\n]+/", "", $rowItem->nodeValue)); $propCity = $text; } ## Get the Country $rowCountry = $xpathRow->query('//span[@itemprop="addressCountry"]'); $propCountry = ""; foreach ($rowCountry as $rowItem) { $text = trim(preg_replace("/[\r\n]+/", "", $rowItem->nodeValue)); $propCountry = $text; } ## Get the Description $rowDesc = $xpathRow->query('//div[@id="js-user_content"]'); $propDesc = ""; foreach ($rowDesc as $rowItem) { $text = trim(preg_replace("/[\r\n]+/", "", $rowItem->nodeValue)); $propDesc = $text; } ## Get the front image $rowImg = $xpathRow->query('//a[@title="Click to view larger image!"]/@href'); $img = ""; foreach ($rowImg as $data) { $prop_front_img = $data->nodeValue; } ## Get the date $rowDate = $xpathRow->query('//div[@class="indent24"]//span/@title'); $date = ""; foreach ($rowDate as $rowItem) { $date = $rowItem->nodeValue; } ## Get all images $rowImg = $xpathRow->query('//a[@class="tn_img js-tn_img"]//@href'); $prop_imgs = array(); foreach ($rowImg as $row_item_img) { $prop_imgs[] = $row_item_img->nodeValue; // $imgs = array(); $imgs = ''; foreach ($prop_imgs as $rowItem) { $imgs = $imgs . ' ' . $rowItem; } } $imgs = $imgs . ' ' . $prop_front_img; ## Location condition if ($propStreet != '') { $location = $propStreet . ', ' . $propCity . ', ' . $propCountry; } else { $location = $propCity . ', ' . $propCountry; } $image = preg_split("/[\\s,]+/", $imgs); $jsonImage = json_encode($image); $reference_no = $propID; $original_site = 'http://www.locanto.ph/'; $site_link_id = '1'; $original_post_link = $url; $title = $propTitle; $description = $propDesc; $price = $explodedPrice[0]; $product_image = $jsonImage; $furnishing = ''; $location = $location; $posted_date = $date; $square_area = ''; $bedrooms = ''; $bathrooms = ''; $floor = ''; $name_of_posted_person = $explodedAdvertiser[1]; $contact_mobile = ''; $contact_email = ''; $contact_landline = ''; $created = ''; $modified = ''; $status = 1; $posted_date = parent::stringToDate($posted_date); //edit by karen parent::insertData($reference_no, $original_site, $site_link_id, $original_post_link, $title, $description, $price, $product_image, $furnishing, $location, $posted_date, $square_area, $bedrooms, $bathrooms, $floor, $name_of_posted_person, $contact_mobile, $contact_email, $contact_landline, $status); $numberOfItem++; $allItem++; } $page++; } while ($numberOfItem == 25); // end of while loop }
function getData() { // Create connection $originalUrl = "http://rentpad.com.ph/ws/search.htm?a=31&cityName=Cebu&propertyTypeIDs=[]&furnishTypeIDs=[3]&placeIDs=[]&statusTypeIDs=[]&amenityIDs=[]&longMonthRateLow=0&longMonthRateHigh=999999&numBedroomsLow=0&numBedroomsHigh=999&itemsPerPage=15&lengthOfStay=&ham=ham&pageNumber="; $page = 0; $allItem = 1; $searchUrl = 'http://rentpad.com.ph/ws/search.htm?a=31&cityName=Cebu&propertyTypeIDs=[]&furnishTypeIDs=[3]&placeIDs=[]&statusTypeIDs=[]&amenityIDs=[]&longMonthRateLow=0&longMonthRateHigh=999999&numBedroomsLow=0&numBedroomsHigh=999&itemsPerPage=15&lengthOfStay=&ham=ham&pageNumber='; $c = 1; for ($b = 1; $b <= 16; $b++) { $base_url = 'http://rentpad.com.ph:80/'; $json = parent::exeCurl($searchUrl . $b); $data = json_decode($json, true); for ($a = 0; $a < 15; $a++) { $reference_no = ''; $original_site = ''; $site_link_id = '2'; $original_post_link = ''; $title = ''; $description = ''; $price = ''; $product_image = ''; $furnishing = ''; $location = ''; $posted_date = ''; $square_area = ''; $bedrooms = ''; $bathrooms = ''; $floor = ''; $name_of_posted_person = ''; $contact_mobile = ''; $contact_email = ''; $contact_landline = ''; $created = ''; $modified = ''; $status = ''; $id = $data['model']['searchResult']['listings'][$a]['id']; /* $con = parent::connection(); $result = mysqli_query($con,"SELECT * FROM fdci_web_crawler WHERE reference_no = '$id' AND site_link_id = '$site_link_id'"); if(mysqli_num_rows($result)>0){ die(); }*/ $urlTitle = $data['model']['searchResult']['listings'][$a]['urlTitle']; $address = $data['model']['searchResult']['listings'][$a]['address']; $city = $data['model']['searchResult']['listings'][$a]['city']; $longTerm = $data['model']['searchResult']['listings'][$a]['leaseLongTerm']; $shortTerm = $data['model']['searchResult']['listings'][$a]['leaseShortTerm']; $Communitydescription = $data['model']['searchResult']['listings'][$a]['community']['description']; $longMonthRate = $data['model']['searchResult']['listings'][$a]['longMonthRate']; $primaryPhoto = $data['model']['searchResult']['listings'][$a]['primaryPhoto']['filename']; $sqArea = $data['model']['searchResult']['listings'][$a]['sqArea']; $title = $data['model']['searchResult']['listings'][$a]['title']; if ($longTerm == 1) { $termUrl = 'long-term-rentals'; $siteUrl = 'http://rentpad.com.ph/' . $termUrl . '/cebu/' . $urlTitle . '/' . $id; $variable = parent::exeCurl($siteUrl); $xpath = parent::dom($variable); $descriptions = ''; $des = $xpath->query('//span[@style="font-size: 14px; line-height: 20px;"]'); foreach ($des as $desItem) { $descriptions = trim(preg_replace("/[\r\n]+/", "", $desItem->nodeValue)); } $table = $xpath->query('//table[@id="table-listing-details"]//tr'); // start edited by jacob $tr_array = array(); foreach ($table as $tr) { $tr_array[] = trim(preg_replace("/\\s+/", "", $tr->nodeValue)); } $data = count($tr_array); for ($i = 0; $i < $data; $i++) { $date = $tr_array[$i]; @(list($t, $v) = split('[:.-]', $date)); $mystring = $t; // get location $findme = 'Location'; $pos = strpos($mystring, $findme); if ($pos == true) { $location = $v; } // get bedroom $findBedroom = 'Bedrooms'; $pos1 = strpos($mystring, $findBedroom); if ($pos1 == true) { $bedroom = $v; } // get bathroom $findBathroom = 'Bathrooms'; $pos2 = strpos($mystring, $findBathroom); if ($pos2 == true) { $bathroom = $v; } // get furnishing $findFurnishing = 'Furnishing'; $pos3 = strpos($mystring, $findFurnishing); if ($pos3 == true) { $furnishing = $v; } // get date $findDate = 'Updated'; $pos4 = strpos($mystring, $findDate); if ($pos4 == true) { $date = $v; } // get floor $findFloor = 'Floor'; $pos5 = strpos($mystring, $findFloor); if ($pos5 == true) { $floor = $v; } } // get images $images = array(); foreach ($xpath->query('//div[@id="content-photo"]//img[starts-with(@data-src,"http")]') as $image) { $images[] = $image->getAttribute('data-src'); } $allImages = ''; $cImage = count($images); for ($i = 0; $i < $cImage; $i++) { $allImages = $allImages . ' ' . $images[$i]; } $image = preg_split("/[\\s,]+/", $allImages); $jsonImage = json_encode($image); // get contact person // $person = $xpath->query('//div[@id="contact-name"]')->nodeValue; // $contact_person = trim($person); } if ($shortTerm == 1) { $termUrl = 'short-term-rentals'; $siteUrl = 'http://rentpad.com.ph/' . $termUrl . '/cebu/' . $urlTitle . '/' . $id; $siteUrl = 'http://rentpad.com.ph/' . $termUrl . '/cebu/' . $urlTitle . '/' . $id; } $reference_no = $id; $original_site = 'http://rentpad.com.ph'; $site_link_id = '2'; $original_post_link = $siteUrl; $title = $title; $description = $descriptions; $price = $longMonthRate; $product_image = $jsonImage; $furnishing = $furnishing; $location = $address . ', ' . $city; $posted_date = $date; $square_area = $sqArea; $bedrooms = $bedroom; $bathrooms = $bathroom; $floors = $floor; $name_of_posted_person = ''; $contact_mobile = ''; $contact_email = ''; $contact_landline = ''; $created = ''; $modified = ''; $status = 1; $posted_date = parent::stringToDate($posted_date); //edit by karen parent::insertData($reference_no, $original_site, $site_link_id, $original_post_link, $title, $description, $price, $product_image, $furnishing, $location, $posted_date, $square_area, $bedrooms, $bathrooms, $floors, $name_of_posted_person, $contact_mobile, $contact_email, $contact_landline, $status); $c++; } } $numberOfItem++; $allItem++; }