Exemple #1
0
 public function extractPage($pageID, $pageTitle, $pageSource)
 {
     global $pagetitle;
     // Needed for Imageextraction in catchObjectDatatype.php (catchLogo());
     $pagetitle = $pageTitle;
     $result = new ExtractionResult($pageID, $this->language, self::extractorID);
     global $parseResult;
     // Contains the Extraction result
     $parseResult = null;
     parsePage($pageID, $pageSource);
     if (count($parseResult) < 1) {
         return $result;
     }
     $knownProperties = array($parseResult[0][1]);
     foreach ($parseResult as $myTriple) {
         $subject = RDFtriple::URI($myTriple[0]);
         // Rename Properties like LeaderName1, LeaderName2, ... to LeaderName
         if (preg_match("/(.*[^0-9_]+)([0-9])\$/", $myTriple[1], $matches)) {
             $key = array_search($matches[1], $knownProperties);
             if ($key) {
                 $myTriple[1] = $knownProperties[$key];
             } else {
                 array_push($knownProperties, $matches[1]);
             }
             $myTriple[1] = $matches[1];
         } else {
             if (!array_search($myTriple[1], $knownProperties)) {
                 array_push($knownProperties, $myTriple[1]);
             }
         }
         $predicate = RDFtriple::URI($myTriple[1]);
         if ($myTriple[3] == "r") {
             $object = RDFtriple::URI($myTriple[2]);
         } else {
             if ($myTriple[5] == null) {
                 $myTriple[5] = $this->language;
             }
             $object = RDFtriple::literal($myTriple[2], $myTriple[4], $myTriple[5]);
         }
         $result->addTriple($subject, $predicate, $object);
         $this->allPredicates->addPredicate($myTriple[1]);
     }
     return $result;
 }
#parsePage($html1);
#print "Next page link: " . $nextPageLink . "\n";
print "title, author, year, addinfo, publisher, abstract" . "\n";
#scraperwiki::sqliteexecute("create table acmdata1 (a int, `title` string, 'author' string, 'year' string, 'addinfo' string, 'publisher' string, 'abstract' string)");
$maxPages = 0;
$numrecords = 1;
while (strlen($nextPageLink) > 0 and $maxPages < 6) {
    $maxPages++;
    #print "Moving on to next page" . "\n";
    $html_content = scraperWiki::scrape($nextPageLink);
    $html1 = str_get_html($html_content);
    #print $html1 . "\n";
    $nextLink = $html1->find("td[@colspan='2']", 0);
    # print "Next link: " . $nextLink->innertext . "\n";
    $nextPageLink = getNextLink($nextLink);
    $numrecords = parsePage($html1, $maxPages);
    sleep(120);
}
print "No further pages" . "\n";
$data = scraperwiki::select("* from acmdata1");
print "<html><table>";
print "<tr><th>Title</th><th>Author</th><th>Year</th><th>AddInfo</th><th>Publisher</th><th>Abstract</th>";
foreach ($data as $d) {
    print "<tr>";
    print "<td>" . "ACM" . "</td>";
    print "<td>" . $d["title"] . "</td>";
    print "<td>" . $d["author"] . "</td>";
    print "<td>" . $d["year"] . "</td>";
    print "<td>" . $d["addinfo"] . "</td>";
    print "<td>" . $d["publisher"] . "</td>";
    print "<td>" . $d["abstract"] . "</td>";
Exemple #3
0
function parsePage($key,$out=false)
{	global $global_current_file,$designPath,$commonDesignPath,$renderInclude;

 	$dpath = $designPath; $cdpath = $commonDesignPath;
	$global_current_file = $key;

	if($out === false)
	{	$out = getDesign($key);
		$designPath .= '/'.$key; $commonDesignPath .= '/'.$key;
	}

	if($out !== false)
	{	$offset=0;
		while(1)
		{	$start = strpos($out,'§',$offset);
			if($start === false) break;
			$off = $start+2;
			$end = strpos($out,'§',$off);
			if($end === false) break;
			$size = $end-$off;
			$word = substr($out,$off,$size);
			$out = substr_replace($out,parsePage($word),$start,$size+4);
			$offset = $start;
		}
	}

	// /* generate next line in brace content ! */ global $renderWords,$renderInclude; foreach($renderWords as $w) print '$d=getDesignCache(\':'.$w.'\');if($d!==false){if(!isset($renderInclude[\''.$w.'\']))$renderInclude[\''.$w.'\']=$d;else $renderInclude[\''.$w.'\'].=$d;}';die();
	if(getDesignCache(':addition') !== false){ $d=getDesignCache(':head');if($d!==false){if(!isset($renderInclude['head']))$renderInclude['head']=$d;else $renderInclude['head'].=$d;}$d=getDesignCache(':body');if($d!==false){if(!isset($renderInclude['body']))$renderInclude['body']=$d;else $renderInclude['body'].=$d;}$d=getDesignCache(':js');if($d!==false){if(!isset($renderInclude['js']))$renderInclude['js']=$d;else $renderInclude['js'].=$d;}$d=getDesignCache(':jquery');if($d!==false){if(!isset($renderInclude['jquery']))$renderInclude['jquery']=$d;else $renderInclude['jquery'].=$d;}$d=getDesignCache(':title');if($d!==false){if(!isset($renderInclude['title']))$renderInclude['title']=$d;else $renderInclude['title'].=$d;}$d=getDesignCache('meta');if($d!==false){if(!isset($renderInclude['meta']))$renderInclude['meta']=$d;else $renderInclude['meta'].=$d;}$d=getDesignCache(':style');if($d!==false){if(!isset($renderInclude['style']))$renderInclude['style']=$d;else $renderInclude['style'].=$d;}$d=getDesignCache(':keywords');if($d!==false){if(!isset($renderInclude['keywords']))$renderInclude['keywords']=$d;else $renderInclude['keywords'].=$d;}$d=getDesignCache(':description');if($d!==false){if(!isset($renderInclude['description']))$renderInclude['description']=$d;else $renderInclude['description'].=$d;} }

	$designPath = $dpath; $commonDesignPath = $cdpath;
	return $out;
}
Exemple #4
0
//*** Parse the HTML Header.
$strOutput .= parseHeader($intCatId, $strCommand, $intElmntId);
//*** Route to the correct HTML Body Parser.
switch ($intCatId) {
    case NAV_MYPUNCH_LOGIN:
        if ($_CONF['app']['secureLogin']) {
            header("Location: " . Request::getURI("https") . "/?cid=" . NAV_MYPUNCH_LOGIN);
            exit;
        } else {
            require_once 'inc.tplparse_login.php';
            $strOutput .= parseLogin($intElmntId, $strCommand);
        }
        break;
    case NAV_MYPUNCH_NOACCOUNT:
        require_once 'includes/inc.tplparse_noaccount.php';
        $strOutput .= parsePage($intElmntId, $strCommand);
        break;
    case NAV_MYPUNCH_USERS:
        require_once 'includes/inc.tplparse_user.php';
        if ($intElmntId == 0) {
            $intElmntId = NAV_MYPUNCH_USERS_USER;
        }
        $strOutput .= parseMenu($intCatId, $strCommand);
        $strOutput .= parseUsers($intElmntId, $strCommand);
        break;
    case NAV_MYPUNCH_PROFILE:
        require_once 'includes/inc.tplparse_profile.php';
        $strOutput .= parseMenu($intCatId, $strCommand);
        $strOutput .= parseProfile($intElmntId, $strCommand);
        break;
    case NAV_MYPUNCH_ANNOUNCEMENTS: