function procMapLightInterest($interest) { global $mvMaxContribPerInterest, $mvMaxForAgainstBills; if ($this->bill_name_maplight_lookup) { include_once 'metavid2mvWiki.inc.php'; do_proc_interest($interest['key'], $interest['name']); } }
function do_people_insert($doInterestLookup = false, $forcePerson = '', $force = false) { global $valid_attributes, $states_ary; $dbr = wfGetDB(DB_SLAVE); include_once 'scrape_and_insert.inc.php'; $mvScrape = new MV_BaseScraper(); //get all people from govtrack db ( should not have to do this all the time) $govtrackDB = array(); //avoid duplicating the $govtrackDB array: getGovTrackPeopleDB($govtrackDB); //get all people from the congress people category $result = $dbr->select('categorylinks', 'cl_sortkey', array('cl_to' => 'Congress_Person')); if ($dbr->numRows($result) == 0) { die('could not find people: ' . "\n"); } $out = ''; $person_ary = array(); while ($person = $dbr->fetchObject($result)) { $person_ary[] = $person; } foreach ($person_ary as $person) { $person_name = $person->cl_sortkey; //get person data from wiki: $person_title = Title::newFromText($person_name); $smwStore =& smwfGetStore(); //check for govtrack key in page $propTitle = Title::newFromText('GovTrack Person ID', SMW_NS_PROPERTY); $smwProps = $smwStore->getPropertyValues($person_title, $propTitle); if (count($smwProps) != 0) { $v = current($smwProps); $person->gov_track_id = $v->getXSDValue(); } else { print "person: {$person_name} has no GovTrack Person ID make sure to include this on their page\n"; } if (isset($person->gov_track_id)) { setGovTrackSpecifcAttr($person, $govtrackDB[$person->gov_track_id]); } else { //check for govtrack key in $govtrackDB: foreach ($govtrackDB as $gov_track_person) { if (isset($gov_track_person['metavidid']) && $gov_track_person['metavidid'] == str_replace(' ', '_', $person_name)) { setGovTrackSpecifcAttr($person, $gov_track_person); } } reset($govtrackDB); //did not find metavid id try name test: if (!isset($person->govtrack_id)) { foreach ($govtrackDB as $gov_track_person) { if (isset($gov_track_person['middlename'])) { $gov_name = $gov_track_person['firstname'] . ' ' . substr($gov_track_person['middlename'], 0, 1) . '. ' . $gov_track_person['lastname']; //first check for exact match: if (strtolower($gov_name) == strtolower($person_name)) { setGovTrackSpecifcAttr($person, $gov_track_person); break; } } //else first last check: $nparts = split(' ', $person_name); if (strtolower($gov_track_person['firstname']) == strtolower($nparts[0]) && strtolower($gov_track_person['lastname']) == strtolower($nparts[count($nparts) - 1])) { setGovTrackSpecifcAttr($person, $gov_track_person); break; } } } if (!isset($person->gov_track_id)) { die("\n could not find gov track id for {$person_name} please add manually or remove from Congress_Person category\n "); } } //set the maplight key (not in sunlight api) $propTitle = Title::newFromText('MAPLight Person ID', SMW_NS_PROPERTY); $smwProps = $smwStore->getPropertyValues($person_title, $propTitle); if (count($smwProps) != 0) { $v = current($smwProps); $mapk = $v->getXSDValue(); $person->maplight_id = $v->getXSDValue(); } else { print "person: {$person_name} has no MAPLight Person ID could not lookup with sunlight api?\n"; } //set $person->name_ocr $propTitle = Title::newFromText('Name OCR', SMW_NS_PROPERTY); $smwProps = $smwStore->getPropertyValues($person_title, $propTitle); if (count($smwProps) != 0) { $v = current($smwProps); $person->name_ocr = $v->getXSDValue(); } $page_body = '{{Congress Person|' . "\n"; foreach ($valid_attributes as $dbKey => $attr) { list($name, $desc) = $attr; if ($dbKey == 'gov_track_id') { //we key all to govtrack id make sure its there: $page_body .= "GovTrack Person ID=" . $person->gov_track_id . "|\n"; } elseif ($dbKey == 'total_received') { if (!$mapk) { print 'no mapkey for total_received' . "\n"; } else { $raw_results = $mvScrape->doRequest('http://www.maplight.org/map/us/legislator/' . $mapk); preg_match('/Contributions\\sReceived\\:\\s\\$([^<]*)/', $raw_results, $matches); if (isset($matches[1])) { $page_body .= "{$name}=\$" . $matches[1] . "|\n"; } } } elseif ($dbKey == 'roles') { if ($person->{$dbKey}) { $i = 1; foreach ($person->{$dbKey} as $role) { $page_body .= "Role {$i} Type=" . ucfirst($role['type']) . "|\n"; $page_body .= "Role {$i} Party=" . $role['party'] . "|\n"; $page_body .= "Role {$i} State=" . $role['state'] . "|\n"; $page_body .= "Role {$i} Start Date=" . $role['startdate'] . "|\n"; $page_body .= "Role {$i} End Date=" . $role['enddate'] . "|\n"; $i++; } } } elseif ($dbKey == 'committee') { if (isset($person->{$dbKey})) { $i = 1; foreach ($person->{$dbKey} as $committee) { if (isset($committee['committee'])) { $page_body .= "Committee {$i}= " . $committee['committee'] . "|\n"; } if (isset($committee['subcommittee'])) { $page_body .= "Subcommittee {$i}= " . $committee['subcommittee'] . "|\n"; } if (isset($committee['role'])) { $page_body .= "Committee Role {$i}= " . $committee['role'] . "|\n"; } $i++; } } } elseif ($dbKey == 'contribution_date_range') { if (!$mapk) { print 'out of order attr process missing mapk' . "\n"; } else { $raw_results = $mvScrape->doRequest('http://www.maplight.org/map/us/legislator/' . $mapk); preg_match('/Showing\\scontributions<\\/dt><dd>([^<]*)</', $raw_results, $matches); if (isset($matches[1])) { $page_body .= "{$name}=" . $matches[1] . "|\n"; } } } elseif ($dbKey == 'maplight_id') { if (!$person->{$dbKey}) { // print 'do_maplight_id'."\n"; // try to grab the maplight id $person_lookup = $govtrackDB[$person->gov_track_id]; $raw_results = $mvScrape->doRequest('http://maplight.org/map/us/legislator/search/' . $person_lookup->lastname . '+' . $person->firstname); preg_match_all('/map\\/us\\/legislator\\/([^"]*)">(.*)<\\/a>.*<td>([^<]*)<.*<td>([^<]*)<.*<td>([^<]*)<.*<td>([^<]*)</U', $raw_results, $matches); // do point system for match $point = array(); $title_lookup = array('Rep.' => 'House', 'Sen.' => 'Senate'); if (isset($matches['2'][0])) { foreach ($matches['2'] as $k => $name_html) { if (!isset($point[$k])) { $point[$k] = 0; } list($lname, $fname) = explode(',', trim(strip_tags($name_html))); if (strtolower($person->first) == strtolower($fname)) { $point[$k] += 2; } if (strtolower($person->last) == strtolower($lname)) { $point[$k] += 2; } if ($person_lookup['state'] == $matches['3'][$k]) { $point[$k]++; } if ($person_lookup['district'] == $matches['4'][$k]) { $point[$k]++; } if ($person_lookup['party'] == $matches['5'][$k]) { $point[$k]++; } if (isset($person_lookup['title'])) { if (isset($title_lookup[$person['title']])) { if ($title_lookup[$person['title']] == $matches['6']) { $point[$k]++; } } } } $max = 0; $mapk = null; //print_r($matches); //die; foreach ($point as $k => $v) { if ($v > $max) { $mapk = $matches[1][$k]; $max = $v; } } } } else { $mapk = $person->{$dbKey}; } $page_body .= "{$name}=" . $mapk . "|\n"; } else { //try the $sulightData array if (isset($sulightData[$dbKey])) { $page_body .= $name . '=' . $sulightData[$dbKey] . "| \n"; } else { if (isset($person->{$dbKey})) { if (trim($person->{$dbKey}) != '') { if ($dbKey == 'state') { $person->state = $states_ary[$person->state]; } $page_body .= "{$name}={$person->{$dbKey}}| \n"; } } } } } // if we have the maplight key add in all contributions and process contributers if (!$mapk) { print 'missing mapkey' . "\n"; } else { $raw_results = $mvScrape->doRequest('http://www.maplight.org/map/us/legislator/' . $mapk); preg_match_all('/\\/map\\/us\\/interest\\/([^"]*)">([^<]*)<.*\\$([^\\<]*)</U', $raw_results, $matches); if (isset($matches[1])) { foreach ($matches[1] as $k => $val) { $hr_inx = $k + 1; $page_body .= "Funding Interest {$hr_inx}=" . html_entity_decode($matches[2][$k]) . "|\n"; $page_body .= "Funding Amount {$hr_inx}=\$" . $matches[3][$k] . "|\n"; if ($doInterestLookup) { // make sure the intrest has been processed: do_proc_interest($matches[1][$k], html_entity_decode($matches[2][$k])); } // do_proc_interest('G1100','Chambers of commerce'); } } } // add in the full name attribute: /*$page_body .= "Full Name=" . $person->title . ' ' . $person->first . ' ' . $person->middle . ' ' . $person->last . "| \n";*/ //close: $page_body .= '}}'; // add in basic info to be overwitten by transclude (from /*$full_name = $person->title . ' ' . $person->first . ' ' . $person->middle . ' ' . $person->last; if ( trim( $full_name ) == '' ) $full_name = $person->name_clean; $page_body .= "\n" . 'Person page For <b>' . $full_name . "</b><br />\n";*/ // "Text Spoken By [[Special:MediaSearch/person/{$person->name_clean}|$full_name]] "; do_update_wiki_page($person_title, $page_body, '', $force); //die('only run on first person'."\n"); } foreach ($person_ary as $person) { $person_lookup = $govtrackDB[$person->gov_track_id]; // download/upload all the photos: $imgTitle = Title::makeTitle(NS_IMAGE, $person->cl_sortkey . '.jpg'); // if(!$imgTitle->exists()){ global $wgTmpDirectory; $url = 'http://www.govtrack.us/data/photos/' . $person->gov_track_id . '-100px.jpeg'; //check if url exists: if (!url_exists($url)) { print " no image found for: {$person->cl_sortkey}\n"; continue; } // print $wgTmpDirectory . "\n"; $local_file = tempnam($wgTmpDirectory, 'WEBUPLOAD'); // copy file: # Check if already there existence $image = wfLocalFile($imgTitle); if ($image->exists()) { echo $imgTitle->getDBkey() . " already in the wiki\n"; continue; } for ($ct = 0; $ct < 10; $ct++) { if (!@copy($url, $local_file)) { print "failed to copy {$url} to local_file (tring again) \n"; } else { print "copy success\n"; $ct = 10; } if ($ct == 9) { print 'complete failure' . "\n"; } } # Stash the file echo "Saving " . $imgTitle->getDBkey() . "..."; $image = wfLocalFile($imgTitle); $archive = $image->publish($local_file); if (!$archive->isGood()) { echo "failed.\n"; continue; } echo "importing..."; $comment = 'Image file for [[' . $person->name_clean . ']]'; $license = ''; if ($image->recordUpload($archive, $comment, $license)) { # We're done! echo "done.\n"; } else { echo "failed.\n"; } } }
function do_people_insert($doInterestLookup = false, $forcePerson = '', $force = false) { global $valid_attributes, $states_ary; $dbr = wfGetDB(DB_SLAVE); include_once 'scrape_and_insert.inc.php'; $mvScrape = new MV_BaseScraper(); //do people query: if ($forcePerson != '') { $res = $dbr->query("SELECT * FROM `metavid`.`people` WHERE `name_clean` LIKE '{$forcePerson}'"); } else { $res = $dbr->query("SELECT * FROM `metavid`.`people`"); } if ($dbr->numRows($res) == 0) { die('could not find people: ' . "\n"); } $person_ary = array(); while ($person = $dbr->fetchObject($res)) { $person_ary[] = $person; } foreach ($person_ary as $person) { $person_title = Title::newFromUrl($person->name_clean); //semantic data via template: $mapk = null; $page_body = '{{Congress Person|' . "\n"; foreach ($valid_attributes as $dbKey => $attr) { list($name, $desc) = $attr; if ($dbKey == 'district') { //special case for district: if ($person->district) { if ($person->district != 0) { $page_body .= "{$name}=" . text_number($person->district) . ' District' . "|\n"; } } } else { if ($dbKey == 'total_received') { if (!$mapk) { print 'no mapkey for total_received' . "\n"; } else { $raw_results = $mvScrape->doRequest('http://www.maplight.org/map/us/legislator/' . $mapk); preg_match('/Contributions\\sReceived\\:\\s\\$([^<]*)/', $raw_results, $matches); if (isset($matches[1])) { $page_body .= "{$name}=\$" . $matches[1] . "|\n"; } } } else { if ($dbKey == 'contribution_date_range') { if (!$mapk) { print 'out of order attr proccess missing mapk' . "\n"; } else { $raw_results = $mvScrape->doRequest('http://www.maplight.org/map/us/legislator/' . $mapk); preg_match('/Showing\\scontributions<\\/dt><dd>([^<]*)</', $raw_results, $matches); if (isset($matches[1])) { $page_body .= "{$name}=" . $matches[1] . "|\n"; } } } else { if ($dbKey == 'maplight_id') { if (!$person->{$dbKey}) { //print 'do_maplight_id'."\n"; //try to grab the maplight id $raw_results = $mvScrape->doRequest('http://maplight.org/map/us/legislator/search/' . $person->last . '+' . $person->first); preg_match_all('/map\\/us\\/legislator\\/([^"]*)">(.*)<\\/a>.*<td>([^<]*)<.*<td>([^<]*)<.*<td>([^<]*)<.*<td>([^<]*)</U', $raw_results, $matches); //do point system for match $point = array(); $title_lookup = array('Rep.' => 'House', 'Sen.' => 'Senate'); if (isset($matches['2'][0])) { foreach ($matches['2'] as $k => $name_html) { if (!isset($point[$k])) { $point[$k] = 0; } list($lname, $fname) = explode(',', trim(strip_tags($name_html))); if (strtolower($person->first) == strtolower($fname)) { $point[$k] += 2; } if (strtolower($person->last) == strtolower($lname)) { $point[$k] += 2; } if ($person->state == $matches['3'][$k]) { $point[$k]++; } if ($person->district == $matches['4'][$k]) { $point[$k]++; } if ($person->party == $matches['5'][$k]) { $point[$k]++; } if (isset($title_lookup[$person->title])) { if ($title_lookup[$person->title] == $matches['6']) { $point[$k]++; } } } $max = 0; $mapk = null; //print_r($point); foreach ($point as $k => $v) { if ($v > $max) { $mapk = $matches[1][$k]; $max = $v; } } } } else { $mapk = $person->{$dbKey}; } $page_body .= "{$name}=" . $mapk . "|\n"; } else { if (trim($person->{$dbKey}) != '') { if ($dbKey == 'state') { $person->state = $states_ary[$person->state]; } $page_body .= "{$name}={$person->{$dbKey}}| \n"; } } } } } } //if we have the maplight key add in all contributions and procces contributers if (!$mapk) { print 'missing mapkey' . "\n"; } else { $raw_results = $mvScrape->doRequest('http://www.maplight.org/map/us/legislator/' . $mapk); preg_match_all('/\\/map\\/us\\/interest\\/([^"]*)">([^<]*)<.*\\$([^\\<]*)</U', $raw_results, $matches); if (isset($matches[1])) { foreach ($matches[1] as $k => $val) { $hr_inx = $k + 1; $page_body .= "Funding Interest {$hr_inx}=" . html_entity_decode($matches[2][$k]) . "|\n"; $page_body .= "Funding Amount {$hr_inx}=\$" . $matches[3][$k] . "|\n"; if ($doInterestLookup) { //make sure the intrest has been proccessed: do_proc_interest($matches[1][$k], html_entity_decode($matches[2][$k])); } //do_proc_interest('G1100','Chambers of commerce'); } } } //add in the full name attribute: $page_body .= "Full Name=" . $person->title . ' ' . $person->first . ' ' . $person->middle . ' ' . $person->last . "| \n"; $page_body .= '}}'; //add in basic info to be overwitten by tranclude (from $full_name = $person->title . ' ' . $person->first . ' ' . $person->middle . ' ' . $person->last; if (trim($full_name) == '') { $full_name = $person->name_clean; } $page_body .= "\n" . 'Person page For <b>' . $full_name . "</b><br />\n"; // "Text Spoken By [[Special:MediaSearch/person/{$person->name_clean}|$full_name]] "; do_update_wiki_page($person_title, $page_body, '', $force); //die('only run on first person'."\n"); } foreach ($person_ary as $person) { //download/upload all the photos: $imgTitle = Title::makeTitle(NS_IMAGE, $person->name_clean . '.jpg'); //if(!$imgTitle->exists()){ global $wgTmpDirectory; $url = 'http://www.opensecrets.org/politicians/img/pix/' . $person->osid . '.jpg'; //print $wgTmpDirectory . "\n"; $local_file = tempnam($wgTmpDirectory, 'WEBUPLOAD'); //copy file: # Check if already there existence $image = wfLocalFile($imgTitle); if ($image->exists()) { echo $imgTitle->getDBkey() . " already in the wiki\n"; continue; } for ($ct = 0; $ct < 10; $ct++) { if (!@copy($url, $local_file)) { print "failed to copy {$url} to local_file (tring again) \n"; } else { print "copy success\n"; $ct = 10; } if ($ct == 9) { print 'complete failure' . "\n"; } } # Stash the file echo "Saving " . $imgTitle->getDBkey() . "..."; $image = wfLocalFile($imgTitle); $archive = $image->publish($local_file); if (WikiError::isError($archive)) { echo "failed.\n"; continue; } echo "importing..."; $comment = 'Image file for [[' . $person->name_clean . ']]'; $license = ''; if ($image->recordUpload($archive, $comment, $license)) { # We're done! echo "done.\n"; } else { echo "failed.\n"; } //} } }