public function getFBSummary()
 {
     $college = str_replace(" ", "", $this->_college);
     $this->setTitle($college);
     $url = $this->_baseURL . "/search?q={$this->_titles}&type=page";
     $source = urlParser::cURL($url);
     $decoded = json_decode($source);
     $id = 0;
     for ($i = 0; $i < count($decoded->data); $i++) {
         $val = $decoded->data[$i]->category;
         if ($val == "education" || $val == "university" || $val == "Education" || $val == "University") {
             $id = $decoded->data[$i]->id;
             break;
         }
     }
     $url = $this->_baseURL . "/" . $id;
     $source = urlParser::cURL($url);
     $decoded = json_decode($source);
     $generalInfo = $decoded->general_info;
     $generalInfo = str_replace("\"", "'", $generalInfo);
     $generalInfo = str_replace("'", "", $generalInfo);
     //	$address = $decoded->location->street . " " . $decoded->location->city . ", " . $decoded->location->state;
     $array = array("Summary" => "{$generalInfo}");
     $this->_dbConnection->updateTable("CollegeSummary", "CollegeSummary", "CollegeName", $this->_college, "CollegeID", $array, "CollegeName = '{$this->_college}'");
 }
 /**
  * Public function setCollegeList
  * No parameters
  * 
  * @operates by searching wikipedia for all the state college categories and scraping all the college names.
  * Should get 3147 colleges as of 07/25/11
  * 
  * Postcondition: the database is loaded with all the scrapeable colleges off of wikipedia.
  */
 private function setCollegeList()
 {
     $stateList = array("Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Washington, D.C.", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming");
     $filteringKeywords = array("University", "College");
     $totalLinks = array();
     for ($i = 0; $i < count($stateList); $i++) {
         $temp = array();
         $links = $this->_wikipedia->getLinks("List_of_colleges_and_universities_in_" . $stateList[$i]);
         for ($j = 0; $j < count($links); $j++) {
             $temp[$j] = $links[$j]["title"];
         }
         $totalLinks = array_merge($totalLinks, $temp);
     }
     $filteredArray = urlParser::compareSearchArray($totalLinks, $filteringKeywords, true);
     for ($i = 0; $i < count($filteredArray); $i++) {
         //	print_r($filteredArray[$i]);
         //	print_r("<p>");
         $array_fieldValues = array("CollegeName" => $filteredArray[$i]);
         $this->_dbConnection->insertIntoTable("CollegeSummary", $array_fieldValues);
     }
 }
 /**
  * Retrieves the websiteURL via externalLinks
  */
 public function getUrl()
 {
     $array = $this->getExternalLinks($this->_college);
     $completeArray = array(".edu");
     //	print_r($array);
     $newArray = array();
     for ($i = 0; $i < count($array); $i++) {
         $newArray[] = $array[$i]["*"];
     }
     $filtered = urlParser::compareSearchArray($newArray, $completeArray, false);
     return $filtered[0];
 }
<?php

function __autoload($class)
{
    require_once $class . '.php';
}
print_r(urlParser::wikiParser());
Esempio n. 5
0
<?php

/**
 * Tester File for the urlParser.php file.
 * Provides a log of all the tests run on this class
 * Should be referenced when debugging a class when something goes wrong.
 * ###########################################
 * LOG:										##
 * This is where the log should be held		##
 * 07/25: Log Created						##
 * 											##
 * ###########################################
 */
// The function __autoload is the method for loading all the classes being used in the script. Use it at the beginning of every php main
// page.
function __autoload($class)
{
    require_once $class . '.php';
}
//Simple test being run
//Should excute time tests as well.
//It now can take the page and retrieve the URL content; Time to implement the parsing of all links.
print_r(urlParser::cURL("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=Harvard_University&rvprop=content&prop=revisions&redirects=1"));
echo "hello World";
$baseurl = "http://localhost:8888/CSAPI.php?";
$queryArr = array("Summary", "Research", "Professors", "Pictures", "Majors", "Links", "DivSports", "Clubs", "Arts");
// all possible valid queries
$id = "99";
// 1 - 3,000 is a valid range (few numbers above 3,000 will work too)
$attribute = "CollegeName";
$formatArr = array("php", "json");
$query = $queryArr[0];
//$id = "99"; // 1 - 3,000 is a valid range (few numbers above 3,000 will work too)
$attribute = "CollegeName";
$format = $formatArr[0];
$url = $baseurl . "query=" . $query . "&id=" . $id . "&format=" . $format;
// query and id MUST be specified
if (!empty($attribute)) {
    // attribute parameter is optional
    $url = $url . "&attribute=" . $attribute;
}
//cURLs the api url to get page contents (output of api call)
$source = urlParser::cURL($url);
$decoded;
if ($format = "php") {
    $decoded = unserialize($source);
}
if ($format = "json") {
    $decoded = json_decode($source);
}
//print_r($decoded[0]);
$stringToWrite = "\n";
$stream = fopen("/csapitestoutput.txt", "x+");
fwrite($stream, $stringToWrite);
fclose($stream);