Esempio n. 1
0
function scrapindia($url)
{
    $array = array();
    $xpath = new XPATH($url);
    $titlequery = $xpath->query("//span[@title]/text()");
    $urlquery = $xpath->query("//div[@type='tuple']/a/@href");
    $locationquery = $xpath->query("//span[@class='loc']/span/text()");
    $infoquery = $xpath->query("//span[@class='desc']/text()");
    $skillquery = $xpath->query("//div[@class='desc']/span/text()");
    for ($x = 0; $x < $titlequery->length; $x++) {
        $title = $array[$x]['title'] = $titlequery->item($x)->nodeValue;
        $url = $array[$x]['url'] = $urlquery->item($x)->nodeValue;
        $location = $array[$x]['location'] = $locationquery->item($x)->nodeValue;
        $info = $array[$x]['info'] = $infoquery->item($x)->nodeValue;
        $skill = $array[$x]['skill'] = $skillquery->item($x)->nodeValue;
        //check for nextpage link
        //$debugquery = mysql_query("INSERT INTO companyinfo (title,url,location,info,country,language,work) VALUES ('$title','$url','$location','$info','India','English','$skill') ");
    }
    $nextpagelink = $xpath->query("(//div[@class='pagination']/a/@href)[2]");
    if ($nextpagelink->length) {
        $nextUrl = $nextpagelink->item(0)->nodeValue;
        $array = array_merge($array, scrapindia($nextUrl));
    }
    //die(mysql_error());
    //}
}
Esempio n. 2
0
function scrapindia($url)
{
    $array = array();
    $xpath = new XPATH($url);
    $titlequery = $xpath->query("//td[@class='job_cell']/a/span/text()");
    $urlquery = $xpath->query("//td[@class='job_cell']/a/@href");
    $locationquery = $xpath->query("//span[@class='loc_title']/text()");
    $infoquery = $xpath->query("//div[@class='list_job_desc']/text()");
    $skillquery = $xpath->query("//span[@class='section_name']/text()");
    for ($x = 0; $x < $titlequery->length; $x++) {
        $title = $array[$x]['title'] = $titlequery->item($x)->nodeValue;
        $url = $array[$x]['url'] = $urlquery->item($x)->nodeValue;
        $location = $array[$x]['location'] = $locationquery->item($x)->nodeValue;
        $info = $array[$x]['info'] = $infoquery->item($x)->nodeValue;
        $skill = $array[$x]['skill'] = $skillquery->item($x)->nodeValue;
    }
    //check for nextpage link
    //$debugquery = mysql_query("INSERT INTO companyinfo (title,url,location,info,country,language,work) VALUES ('$title','$url','$location','$info','India','English','$skill') ");
    // }
    $i = 1;
    while ($i < 11) {
        $nextpagelink = $xpath->query("(//div[@class='pagination']/a/@href)[{$i}]");
        $nextUrl = $nextpagelink->item(0)->nodeValue;
        $array = array_merge($array, scrapindia($nextUrl));
        /* if (!$debugquery)
           {
           die(mysql_error());
         }
        
        */
    }
    return $array;
}
Esempio n. 3
0
<?php

require_once 'Xpath.php';
$startUrl = "http://www.bbc.com/sport/football/premier-league/fixtures";
//href -- //td[@class='title']/a/@href
//title -- //td[@class='title']/a/text()
//img src -- //td[@class='image']//img/@src
//img title -- //td[@class='image']//img/@title
$xpath = new XPATH($startUrl);
//$imageQuery = $xpath->query("//td[@class='image']//img/@src");
//$imageTitleQuery = $xpath->query("//td[@class='image']//img/@title");
////td[@class='kickoff']/text()
$linkTitleQuery = $xpath->query("//span[@class='team-home teams']/a/text()");
$linkTitleQuery1 = $xpath->query("//span[@class='team-away teams']/a/text()");
// $gameTime = $xpath->query("//td[@class='kickoff']/text()");
$gameTime = $xpath->query("//td[@class='kickoff']");
$gameDate = $xpath->query("//div[@class='fixtures-table full-table-medium']//h2[@class='table-header']/text()");
$linkHrefQuery = $xpath->query("//span[@class='team-home teams']/a/@href");
// echo $imageQuery->length;
// echo $imageTitleQuery->length;
// echo $linkTitleQuery->length;
$data = array();
for ($x = 0; $x < $linkHrefQuery->length; $x++) {
    //$data[$x]['imageTitle'] = $imageTitleQuery->item($x)->nodeValue;
    //$data[$x]['imageSrc'] = $imageQuery->item($x)->nodeValue;
    $data[$x]['Home Team'] = $linkTitleQuery->item($x)->nodeValue;
    $data[$x]['Away Team'] = $linkTitleQuery1->item($x)->nodeValue;
    // $data[$x]['KickOff'] = trim($gameTime->item($x)->nodeValue);
    // $table = $gameTime->item($x)->parentNode->parentNode->parentNode;
    // $data[$x]['Date'] = trim($table->previousSibling->previousSibling->nodeValue);
    //$data[$x]['linkHrefQuery'] = $linkHrefQuery->item($x)->nodeValue;