function scrapindia($url) { $array = array(); $xpath = new XPATH($url); $titlequery = $xpath->query("//span[@title]/text()"); $urlquery = $xpath->query("//div[@type='tuple']/a/@href"); $locationquery = $xpath->query("//span[@class='loc']/span/text()"); $infoquery = $xpath->query("//span[@class='desc']/text()"); $skillquery = $xpath->query("//div[@class='desc']/span/text()"); for ($x = 0; $x < $titlequery->length; $x++) { $title = $array[$x]['title'] = $titlequery->item($x)->nodeValue; $url = $array[$x]['url'] = $urlquery->item($x)->nodeValue; $location = $array[$x]['location'] = $locationquery->item($x)->nodeValue; $info = $array[$x]['info'] = $infoquery->item($x)->nodeValue; $skill = $array[$x]['skill'] = $skillquery->item($x)->nodeValue; //check for nextpage link //$debugquery = mysql_query("INSERT INTO companyinfo (title,url,location,info,country,language,work) VALUES ('$title','$url','$location','$info','India','English','$skill') "); } $nextpagelink = $xpath->query("(//div[@class='pagination']/a/@href)[2]"); if ($nextpagelink->length) { $nextUrl = $nextpagelink->item(0)->nodeValue; $array = array_merge($array, scrapindia($nextUrl)); } //die(mysql_error()); //} }
function scrapindia($url) { $array = array(); $xpath = new XPATH($url); $titlequery = $xpath->query("//td[@class='job_cell']/a/span/text()"); $urlquery = $xpath->query("//td[@class='job_cell']/a/@href"); $locationquery = $xpath->query("//span[@class='loc_title']/text()"); $infoquery = $xpath->query("//div[@class='list_job_desc']/text()"); $skillquery = $xpath->query("//span[@class='section_name']/text()"); for ($x = 0; $x < $titlequery->length; $x++) { $title = $array[$x]['title'] = $titlequery->item($x)->nodeValue; $url = $array[$x]['url'] = $urlquery->item($x)->nodeValue; $location = $array[$x]['location'] = $locationquery->item($x)->nodeValue; $info = $array[$x]['info'] = $infoquery->item($x)->nodeValue; $skill = $array[$x]['skill'] = $skillquery->item($x)->nodeValue; } //check for nextpage link //$debugquery = mysql_query("INSERT INTO companyinfo (title,url,location,info,country,language,work) VALUES ('$title','$url','$location','$info','India','English','$skill') "); // } $i = 1; while ($i < 11) { $nextpagelink = $xpath->query("(//div[@class='pagination']/a/@href)[{$i}]"); $nextUrl = $nextpagelink->item(0)->nodeValue; $array = array_merge($array, scrapindia($nextUrl)); /* if (!$debugquery) { die(mysql_error()); } */ } return $array; }
<?php require_once 'Xpath.php'; $startUrl = "http://www.bbc.com/sport/football/premier-league/fixtures"; //href -- //td[@class='title']/a/@href //title -- //td[@class='title']/a/text() //img src -- //td[@class='image']//img/@src //img title -- //td[@class='image']//img/@title $xpath = new XPATH($startUrl); //$imageQuery = $xpath->query("//td[@class='image']//img/@src"); //$imageTitleQuery = $xpath->query("//td[@class='image']//img/@title"); ////td[@class='kickoff']/text() $linkTitleQuery = $xpath->query("//span[@class='team-home teams']/a/text()"); $linkTitleQuery1 = $xpath->query("//span[@class='team-away teams']/a/text()"); // $gameTime = $xpath->query("//td[@class='kickoff']/text()"); $gameTime = $xpath->query("//td[@class='kickoff']"); $gameDate = $xpath->query("//div[@class='fixtures-table full-table-medium']//h2[@class='table-header']/text()"); $linkHrefQuery = $xpath->query("//span[@class='team-home teams']/a/@href"); // echo $imageQuery->length; // echo $imageTitleQuery->length; // echo $linkTitleQuery->length; $data = array(); for ($x = 0; $x < $linkHrefQuery->length; $x++) { //$data[$x]['imageTitle'] = $imageTitleQuery->item($x)->nodeValue; //$data[$x]['imageSrc'] = $imageQuery->item($x)->nodeValue; $data[$x]['Home Team'] = $linkTitleQuery->item($x)->nodeValue; $data[$x]['Away Team'] = $linkTitleQuery1->item($x)->nodeValue; // $data[$x]['KickOff'] = trim($gameTime->item($x)->nodeValue); // $table = $gameTime->item($x)->parentNode->parentNode->parentNode; // $data[$x]['Date'] = trim($table->previousSibling->previousSibling->nodeValue); //$data[$x]['linkHrefQuery'] = $linkHrefQuery->item($x)->nodeValue;