function scrapindia($url) { $array = array(); $xpath = new XPATH($url); $titlequery = $xpath->query("//span[@title]/text()"); $urlquery = $xpath->query("//div[@type='tuple']/a/@href"); $locationquery = $xpath->query("//span[@class='loc']/span/text()"); $infoquery = $xpath->query("//span[@class='desc']/text()"); $skillquery = $xpath->query("//div[@class='desc']/span/text()"); for ($x = 0; $x < $titlequery->length; $x++) { $title = $array[$x]['title'] = $titlequery->item($x)->nodeValue; $url = $array[$x]['url'] = $urlquery->item($x)->nodeValue; $location = $array[$x]['location'] = $locationquery->item($x)->nodeValue; $info = $array[$x]['info'] = $infoquery->item($x)->nodeValue; $skill = $array[$x]['skill'] = $skillquery->item($x)->nodeValue; //check for nextpage link //$debugquery = mysql_query("INSERT INTO companyinfo (title,url,location,info,country,language,work) VALUES ('$title','$url','$location','$info','India','English','$skill') "); } $nextpagelink = $xpath->query("(//div[@class='pagination']/a/@href)[2]"); if ($nextpagelink->length) { $nextUrl = $nextpagelink->item(0)->nodeValue; $array = array_merge($array, scrapindia($nextUrl)); } //die(mysql_error()); //} }
function scrapindia($url) { $array = array(); $xpath = new XPATH($url); $titlequery = $xpath->query("//td[@class='job_cell']/a/span/text()"); $urlquery = $xpath->query("//td[@class='job_cell']/a/@href"); $locationquery = $xpath->query("//span[@class='loc_title']/text()"); $infoquery = $xpath->query("//div[@class='list_job_desc']/text()"); $skillquery = $xpath->query("//span[@class='section_name']/text()"); for ($x = 0; $x < $titlequery->length; $x++) { $title = $array[$x]['title'] = $titlequery->item($x)->nodeValue; $url = $array[$x]['url'] = $urlquery->item($x)->nodeValue; $location = $array[$x]['location'] = $locationquery->item($x)->nodeValue; $info = $array[$x]['info'] = $infoquery->item($x)->nodeValue; $skill = $array[$x]['skill'] = $skillquery->item($x)->nodeValue; } //check for nextpage link //$debugquery = mysql_query("INSERT INTO companyinfo (title,url,location,info,country,language,work) VALUES ('$title','$url','$location','$info','India','English','$skill') "); // } $i = 1; while ($i < 11) { $nextpagelink = $xpath->query("(//div[@class='pagination']/a/@href)[{$i}]"); $nextUrl = $nextpagelink->item(0)->nodeValue; $array = array_merge($array, scrapindia($nextUrl)); /* if (!$debugquery) { die(mysql_error()); } */ } return $array; }