예제 #1
0
 public static function scrape($lang = 'en')
 {
     $logger = self::getLogger('Starting offline scrape...');
     $langPathPart = self::langPathPartLookup($lang);
     $ridingPaths = parent::getRidingPaths(self::JURISDICTION_SHORTHAND, $langPathPart);
     $ridingCount = count($ridingPaths);
     self::addLog("{$ridingCount} ridings found");
     foreach ($ridingPaths as $i => $path) {
         $sourceUrl = self::generateSource($lang, $i);
         self::addLog("Getting results for riding {$i} {$path}...");
         $riding = new Riding();
         $riding->setSource($sourceUrl);
         $html = file_get_contents($path);
         $doc = new \DOMDocument();
         self::setErrorHandler();
         $doc->loadHTML($html);
         self::setErrorHandler(TRUE);
         $xpath = new \DOMXPath($doc);
         $xPathQuery = '//*[@id="grdResultsucElectoralDistrictResult' . $i . '"]/caption';
         self::addLog("xPath: {$xPathQuery}");
         $ridingNode = $xpath->query($xPathQuery);
         $ridingName = trim(substr($ridingNode->item(0)->textContent, 50));
         self::addLog($ridingName);
         $riding->setName(utf8_decode($ridingName));
         $tables = $doc->getElementsByTagName('table');
         // nodes = $xpath->query('/html/body/div[2]/div[2]/div[2]/div[3]/table/tbody');
         // cho "For " . $ridingNames[$i] . "\n";
         // ar_export($tables->item(0)->textContent);
         // cho "\n\n";
         $tablesLength = $tables->length;
         self::addLog("Found {$tablesLength} items in \$tables");
         $rows = $tables->item(0)->getElementsByTagName('tr');
         $numRows = $rows->length;
         self::addLog("There are {$numRows} rows\n");
         $j = 0;
         for ($j = 0; $j < $numRows - 1; $j++) {
             if ($j == 0) {
                 continue;
             }
             $row = $rows->item($j);
             $cells = $row->getElementsByTagName('td');
             $party = $cells->item(0)->textContent;
             if (strpos($party, ':') !== FALSE) {
                 continue;
             }
             $votes = preg_replace("/[^0-9]/", "", $cells->item(2)->textContent);
             $votes = str_replace(",", "", $votes);
             self::addLog("Scrapped: {$party}\t{$votes}\n");
             $riding->setVotes($party, $votes);
         }
         $xPathQuery = '//*[@id="divElectorNumberucElectoralDistrictResult' . $i . '"]/p';
         $numVoters = trim(substr($xpath->query($xPathQuery)->item(0)->textContent, 80));
         $numVoters = str_replace(',', '', $numVoters);
         self::addLog("Number of voters: {$numVoters}");
         $riding->setEligibleVoters($numVoters);
         $row = $rows->item($numRows - 1);
         $cells = $row->getElementsByTagName('td');
         $totalVotes = $cells->item(2)->textContent;
         $totalVotes = str_replace(',', '', $totalVotes);
         self::addLog("Number of total votes: {$totalVotes}");
         $riding->setAllRidingVotes($totalVotes);
         $riding->updateTallies();
     }
 }
예제 #2
0
 public static function scrape()
 {
     $logger = self::getLogger('Starting scrape...');
     $ridingIdentfiers = self::getRidingIdentifiers();
     self::addLog('Got ' . count($ridingIdentfiers) . ' ridings: ' . join(', ', $ridingIdentfiers));
     foreach ($ridingIdentfiers as $i) {
         $url = self::getFinalPath($i);
         self::addLog("Getting results for riding {$i} {$url}...");
         $riding = new Riding();
         $riding->setSource($url);
         $ridingCount = count(Riding::getAllRidings());
         self::addLog("Riding count is {$ridingCount}");
         $html = @file_get_contents($url);
         if ($html === FALSE) {
             self::addLog("Warning: no content for riding {$i} at {$url}");
             continue;
         }
         /*
         $doc = new \DOMDocument ();
         self::setErrorHandler();
         $doc->loadHTML ( $html );
         self::setErrorHandler(TRUE);
         $xpath = new \DOMXPath ( $doc );
         */
         $string = self::grep($html, 'Unofficial Poll Results', TRUE);
         $ridingName = self::grep($string, 'Unofficial Poll Results - [0-9][0-9]* ([\\. A-Z-]*)')[0];
         $ridingName = substr_replace($ridingName, '', -2);
         self::addLog("Got ridingName: {$ridingName}");
         $riding->setName($ridingName);
         // nodes = $xpath->query('/html/body/div[2]/div[2]/div[2]/div[3]/table/tbody');
         // cho "For " . $ridingNames[$i] . "\n";
         // ar_export($tables->item(0)->textContent);
         // cho "\n\n";
         $string = self::grep($html, 'CHeadCA', TRUE);
         preg_match_all("|<DIV CLASS=CHPA>([A-Z]*)</DIV>|", $string[0], $matches);
         self::addLog('Got for party: ' . count($matches[1]) . ' matches: ' . join(',', $matches[1]));
         $stringVotes = self::grep($html, 'ColFooter', TRUE);
         preg_match_all("|<TD Class=ColFooter ALIGN=RIGHT VALIGN=TOP>([ 0-9,]*)<BR>|", $stringVotes[2], $matchesVotes);
         self::addLog('Got for votes: ' . count($matchesVotes[1]) . ' matches: ' . join(' ', $matchesVotes[1]));
         if (count($matches == 0)) {
             self::addError("No matches found for party");
         }
         foreach ($matches[1] as $index => $party) {
             $votes = str_replace(',', '', $matchesVotes[1][$index]);
             if (empty($votes)) {
                 self::addError("No votes found for {$party} in {$ridingName}");
             }
             self::addLog("Setting {$votes} votes for {$party} in {$ridingName}");
             $riding->setVotes($party, $votes);
         }
         # Find and set eligeable voters
         $stringVotes = self::grep($html, 'ColFooter', TRUE);
         preg_match_all("|<TD Class=ColFooter ALIGN=RIGHT VALIGN=TOP>([0-9,]*)|", $stringVotes[1], $matchesVotes);
         self::addLog('Got for voters count ' . count($matchesVotes[1]) . ' matches: ' . join(' ', $matchesVotes[1]));
         $numVoters = str_replace(',', '', $matchesVotes[1][0]);
         self::addLog("Number of voters: {$numVoters}");
         $riding->setEligibleVoters($numVoters);
         # Find and save all votes (aka total votes)
         $stringVotes = self::grep($html, 'ColFooter', TRUE);
         preg_match_all("|<TD Class=ColFooter ALIGN=RIGHT VALIGN=TOP>([ 0-9,]*)</TABLE>|", $stringVotes[5], $matchesVotes);
         self::addLog('Got ' . count($matchesVotes[1]) . ' matches: ' . join(' ', $matchesVotes[1]));
         $totalVotes = str_replace(',', '', $matchesVotes[1][0]);
         self::addLog("Number of total votes: {$totalVotes}");
         $riding->setAllRidingVotes($totalVotes);
         # Update talies
         $riding->updateTallies();
     }
 }