Esempio n. 1
0
 static function getByModelAndObjectQuery($model, Doctrine_Record $object)
 {
     if (!$object->exists()) {
         throw new Exception("Can't get " . LsString::pluralize($model) . " by new object");
     }
     $alias = substr(strtolower($model), 0, 1);
     return LsDoctrineQuery::create()->from($model . ' ' . $alias)->where($alias . '.object_model = ? AND ' . $alias . '.object_id = ?', array(get_class($object), $object->id));
 }
 protected function execute($arguments = array(), $options = array())
 {
     $configuration = ProjectConfiguration::getApplicationConfiguration($options['application'], $options['env'], true);
     $databaseManager = new sfDatabaseManager($configuration);
     $databaseManager->initialize($configuration);
     $q = EntityTable::getByExtensionQuery(array('Person', 'ElectedRepresentative'))->addWhere('summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ?', array('(daughter%', '(son%', '(father%', '(mother%', '(cousin%', '(husband%', '(wife%', '(brother%', '(sister%'))->orderBy('person.name_last');
     $members = $q->execute();
     foreach ($members as $member) {
         if (preg_match('/\\([^\\)]*\\)/isu', $member->summary, $match)) {
             echo $member->name . ":\n";
             if (preg_match_all('/(brother|sister|daughter|mother|father|wife|husband|cousin)\\sof\\s+([^\\;\\)\\,]*)(\\;|\\)|\\,)/isu', $match[0], $matches, PREG_SET_ORDER)) {
                 foreach ($matches as $m) {
                     echo "\t\t" . $m[1] . ' : of : ' . $m[2] . "\n";
                     $m[2] = str_replace('.', '', $m[2]);
                     $parts = LsString::split($m[2]);
                     $q = EntityTable::getByExtensionQuery(array('Person', 'ElectedRepresentative'));
                     foreach ($parts as $part) {
                         $q->addWhere('e.name like ?', '%' . $part . '%');
                     }
                     $people = $q->execute();
                     $family = array();
                     foreach ($people as $person) {
                         echo "\t\t\t\t" . $person->name . "\n";
                         if ($person->id != $member->id) {
                             $family[] = $person;
                         }
                     }
                     if (count($family) == 1) {
                         $q = LsDoctrineQuery::create()->from('Relationship r')->where('(r.entity1_id = ? or r.entity2_id =?) and (r.entity1_id = ? or r.entity2_id = ?)', array($member->id, $member->id, $person->id, $person->id));
                         if (!$q->count()) {
                             if ($description2 = FamilyTable::getDescription2($m[1], $family[0]->Gender->id)) {
                                 $relationship = new Relationship();
                                 $relationship->setCategory('Family');
                                 $relationship->Entity1 = $member;
                                 $relationship->Entity2 = $person;
                                 $relationship->description1 = $m[1];
                                 $relationship->description2 = $description2;
                                 $relationship->save();
                                 $ref = LsQuery::getByModelAndFieldsQuery('Reference', array('object_model' => 'Entity', 'object_id' => $member->id, 'name' => 'Congressional Biographical Directory'))->fetchOne();
                                 if ($ref) {
                                     $relationship->addReference($ref->source, null, null, $ref->name, $ref->source_detail, $ref->publication_date);
                                 }
                                 echo "-------------------------------added relationship\n";
                             }
                         }
                     }
                 }
             }
             echo "\n";
         }
     }
 }
Esempio n. 3
0
 public function hasSimilarName($str, $strict = false)
 {
     $str = OrgTable::removeSuffixes($str);
     $str = trim($str);
     if (!strlen($str)) {
         return false;
     }
     $terms = LsQuery::splitSearchPhrase($str);
     $matched = false;
     $names = $this->Entity->getAllNames();
     foreach ($terms as &$term) {
         if (is_array($term)) {
             foreach ($term as &$t) {
                 $t = LsString::escapeStringForRegex($t);
             }
             $term = implode('|', $term);
         } else {
             $term = LsString::escapeStringForRegex($term);
         }
     }
     unset($term);
     if ($terms[0] == 'The') {
         array_shift($terms);
     }
     foreach ($names as $name) {
         $matched = true;
         if (!preg_match('/^(The\\s+)?(' . $terms[0] . ')/isu', $name)) {
             $matched = false;
             continue;
         }
         foreach ($terms as $term) {
             $new = preg_replace('/((^|\\s)|\\b)(' . $term . ')(\\b|(\\s|$))/isu', ' ', $name, 1);
             if ($new == $name) {
                 $matched = false;
                 continue;
             }
             $name = $new;
         }
         $name = trim(OrgTable::removeSuffixes($name));
         if ($strict && $matched && strlen($name) > 0 && count(LsString::split($name)) >= $strict) {
             $matched = false;
         }
         if ($matched == true) {
             break;
         }
     }
     return $matched;
 }
Esempio n. 4
0
 public function getNameRegex($first_required = false)
 {
     $last_re = $this->getLastNameRegex();
     $name_first = $this->name_first;
     if (isset(PersonTable::$shortFirstNames[$name_first])) {
         $fn_arr = (array) PersonTable::$shortFirstNames[$name_first];
         $name_first = $this->name_first . ' ' . implode(' ', $fn_arr);
     }
     if ($first_required) {
         $fm = $this->name_middle . ' ' . $this->name_nick;
     } else {
         $fm = $name_first . ' ' . $this->name_middle . ' ' . $this->name_nick;
     }
     $fm_arr = preg_split('/[\\s-]+/', $fm, -1, PREG_SPLIT_NO_EMPTY);
     $initials = '';
     foreach ($fm_arr as &$fm) {
         $len = strlen(LsString::stripNonAlpha($fm));
         $fm = preg_replace("/(\\p{Ll})/e", "'['.'\\1'.strtoupper('\\1').']'", $fm);
         $initials .= strtoupper($fm[0]);
         //if string is longer than 3, then
         if ($len > 3) {
             $offset = strpos($fm, ']', strpos($fm, ']') + 1) + 1;
             $str = substr($fm, $offset);
             $str = str_replace(']', ']?', $str);
             $fm = substr($fm, 0, $offset) . $str;
         }
     }
     $fm = implode('|', $fm_arr);
     $separator = '\\b([\'"\\(\\)\\.]{0,3}\\s+|\\.\\s*|\\s?-\\s?)?';
     if ($first_required) {
         $nf_arr = LsString::split($name_first);
         foreach ($nf_arr as &$nf) {
             $nf = preg_replace("/(\\p{Ll})/e", "'['.'\\1'.strtoupper('\\1').']'", $nf);
         }
         $name_first = implode('|', $nf_arr);
         $re = '((\\b(' . $name_first . ')' . $separator . '(' . $fm . '|[' . $initials . '])?' . $separator . '((\\p{L}|[\'\\-])+' . $separator . ')?)+((' . $last_re . ')\\b))';
     } else {
         $re = '((\\b(' . $fm . '|[' . $initials . '])' . $separator . '((\\p{L}|[\'\\-])+' . $separator . ')?)+((' . $last_re . ')\\b))';
     }
     return $re;
 }
 public function updateBio($member)
 {
     $url = $this->_profileUrlBase . $member->bioguide_id;
     if ($this->browser->get($url)->responseIsError()) {
         //Error response (eg. 404, 500, etc)
         throw new Exception("Couldn't get " . $url);
     }
     $this->_bioPageText = $text = LsString::newlinesToSpaces($this->browser->getResponseText());
     //get bio
     if (preg_match('/, <\\/FONT>([^<]+)<\\/(TD|P)>/', $text, $bio)) {
         $bio = preg_replace('/\\n/', ' ', $bio[1]);
         $bio = ucfirst(trim(preg_replace('/\\s{2,}/', ' ', $bio)));
         $bio = LsHtml::replaceEntities($bio);
         $member->summary = $bio;
         $this->printDebug("Bio: " . $bio);
         if (preg_match('/\\b(a(\\s+\\p{L}+){2,8})\\;/isu', $bio, $match)) {
             $blurb = 'US ' . preg_replace('/a\\s+/isu', '', $match[1]);
             $member->blurb = $blurb;
             $this->printDebug("Blurb: " . $blurb);
         }
     } else {
         $this->printDebug("Couldn't find member bio on " . $url);
     }
 }
Esempio n. 6
0
 static function parseNyDonations($str)
 {
     $re2 = '/(<td.*?>(.*?\\s).*?<.td>\\s*)*?<.tr>/is';
     preg_match_all($re2, $str, $matches);
     $results = array();
     foreach ($matches[0] as $match) {
         $result = array("name" => "", "street" => "", "city" => "");
         $arr = preg_split('/<.td>\\s*<td.*?>/is', $match);
         $name_parts = preg_split('/<br>/is', $arr[0]);
         if (count($name_parts) > 1 && count($arr) > 5) {
             if (preg_match('/(inc|llp|llc|p\\.c\\.|pc)\\.?$/is', $name_parts[0], $match)) {
                 $result['name'] = $name_parts[0];
             } else {
                 $np = preg_split('/\\,\\s*/is', $name_parts[0]);
                 if (count($np) > 1) {
                     if (count($np) == 3 && stripos($np[2], "jr") !== 0) {
                         $result['name'] = $np[2] . " " . $np[0] . ", " . $np[1];
                     } else {
                         $result['name'] = $np[1] . " " . $np[0];
                     }
                 } else {
                     $result['name'] = $np[0];
                 }
             }
             if (count($name_parts) > 1) {
                 $result['street'] = $name_parts[1];
                 if (count($name_parts) > 2) {
                     $result['city'] = $name_parts[2];
                 }
             }
             $result['amount'] = $arr[1];
             $result['date'] = $arr[2];
             $result['committee'] = $arr[3];
             foreach ($result as &$r) {
                 $r = preg_replace('/(\\n|(<.*?>))/is', "", $r);
                 $r = trim($r);
                 $r = LsString::spacesToSpace($r);
             }
             unset($r);
             $results[] = $result;
         }
     }
     $str = implode("\t", array_keys($results[0])) . "\n";
     if (count($results)) {
         foreach ($results as $r) {
             $str .= implode("\t", $r);
             $str .= "\n";
         }
     }
     $str = trim($str);
     return $str;
 }
Esempio n. 7
0
 static function checkUrl($url, $org_name)
 {
     $ret = false;
     if (preg_match('/\\/\\/[^\\/]+\\//isu', $url, $match)) {
         $url = $match[0];
     }
     $parts = LsString::split($org_name);
     $all = '';
     $no_common = '';
     $no_corp = '';
     $stripped = '';
     $common = array('and', 'the', 'of', 'in', 'at', '&');
     $abbrevs = array('Corporation', 'Inc', 'Group', 'LLC', 'LLP', 'Corp', 'Co', 'Cos', 'LP', 'PA', 'Dept', 'Department', 'International', 'Administration');
     $both = array_merge($common, $abbrevs);
     foreach ($parts as $part) {
         if (!LsArray::inArrayNoCase($part, $common)) {
             $no_common .= $part[0];
         }
         if (!LsArray::inArrayNoCase($part, $abbrevs)) {
             $no_corp .= $part[0];
         }
         if (!LsArray::inArrayNoCase($part, $both)) {
             $stripped .= $part[0];
         }
         $all .= $part[0];
         if (stristr($url, $part) && strlen($part) > 1 && !LsArray::inArrayNoCase($part, $both)) {
             $ret = true;
         }
     }
     if ($ret == false) {
         if (strlen($all) > 2 && stristr($url, $all)) {
             $ret = true;
         }
         if (strlen($no_common) > 2 && stristr($url, $no_common)) {
             $ret = true;
         }
         if (strlen($no_corp) > 2 && stristr($url, $no_corp)) {
             $ret = true;
         }
     }
     return $ret;
 }
Esempio n. 8
0
 protected function processRow($row)
 {
     foreach ($row as &$r) {
         $r = trim($r);
     }
     $edit = array('Search Name' => $row['name'], 'Affiliation Name' => $row['affiliation1'], 'Similar Names' => array(), 'New Person' => null, 'Existing Person' => null, 'New Org' => null, 'Existing Org' => null, 'New Relationship' => null);
     try {
         $this->db->beginTransaction();
         $person = null;
         $search_person = PersonTable::parseFlatName($row['name']);
         $similar = $search_person->getSimilarEntitiesQuery(true)->execute();
         $matched_bio = false;
         $similar_ids = array();
         foreach ($similar as $s) {
             $similar_ids[] = $s->id;
             $sim_re = LsString::escapeStringForRegex($s->name_first);
             $search_re = LsString::escapeStringForRegex($search_person->name_first);
             if (preg_match('/^' . $sim_re . '/su', $search_person->name_first) == 0 && preg_match('/^' . $search_re . '/su', $s->name_first) == 0) {
                 continue;
             }
             $matched = false;
             $affils = array();
             $ct = 1;
             $matched_affils = array();
             $unmatched_affils = array();
             while (isset($row['affiliation' . $ct]) && trim($row['affiliation' . $ct]) != '') {
                 $affil = trim($row['affiliation' . $ct]);
                 $org = $s->checkAffiliations(array($affil));
                 if ($org) {
                     $matched_affils[] = array($org, $affil);
                     $edit['Existing Org'] = $org->id;
                     break;
                 } else {
                     $unmatched_affils[] = $affil;
                 }
                 $ct++;
             }
             if (count($matched_affils)) {
                 $person = $s;
                 break;
                 //$ret[] = array('person' => $s, $matched_affils, $unmatched_affils);
             } else {
                 /*$str = implode(' ', $unmatched_affils);
                   if (isset($row['bio']))
                   {
                     $str .= ' ' . $row['bio'];
                   }*/
                 $bio = $s->getExtendedBio();
                 foreach ($unmatched_affils as $affil) {
                     $affil = OrgTable::removeSuffixes($affil);
                     $this->printDebug($affil);
                     $this->printDebug($bio);
                     if (preg_match('/' . OrgTable::getNameRegex($affil) . '/su', $bio)) {
                         $matched_bio = true;
                         break;
                     }
                 }
                 if ($matched_bio) {
                     $person = $s;
                     break;
                 } else {
                     $this->printDebug('  ' . $s->name . ' failed');
                 }
             }
         }
         $edit['Similar Names'] = array_slice($similar_ids, 0, 5);
         $no_match = false;
         if (!$person) {
             if (isset($row['bio']) && trim($row['bio']) != '') {
                 $search_person->summary = $row['bio'];
             }
             $search_person->save();
             $this->printDebug('  not found, new person saved: ' . $search_person->name);
             $search_person->addReference($this->source_url, null, null, $this->source_name);
             $no_match = true;
             $edit['New Person'] = $search_person->id;
             $person = $search_person;
         } else {
             if (isset($row['bio']) && trim($row['bio']) != '' && !$person->summary) {
                 $person->summary = $row['bio'];
                 $person->save();
             }
             $this->printDebug('  **person found: ' . $person->name);
             $edit['Existing Person'] = $person->id;
         }
         if ($matched_bio || $no_match) {
             $orgs = OrgTable::getOrgsWithSimilarNames($row['affiliation1'], true);
             $max = -1;
             $affiliated_org = null;
             foreach ($orgs as $org) {
                 $this->printDebug('    found match: ' . $org->name);
                 $ct = $org->getRelatedEntitiesQuery('Person', RelationshipTable::POSITION_CATEGORY, null, null, null, false, 2)->count();
                 if ($ct > $max) {
                     $affiliated_org = $org;
                     $edit['Existing Org'] = $affiliated_org->id;
                     $max = $ct;
                 }
             }
             if (!$affiliated_org) {
                 $affiliated_org = new Entity();
                 $affiliated_org->addExtension('Org');
                 if (isset($row['affiliation1_extensions']) && $row['affiliation1_extensions'] != '') {
                     $extensions = explode(',', $row['affiliation1_extensions']);
                     foreach ($extensions as $ext) {
                         $ext = trim($ext);
                         if (in_array($ext, ExtensionDefinitionTable::$extensionNames)) {
                             $affiliated_org->addExtension($ext);
                         }
                     }
                 } else {
                     //$affiliated_org->addExtension('Business');
                 }
                 $affiliated_org->name = $row['affiliation1'];
                 $affiliated_org->save();
                 $affiliated_org->addReference($this->source_url, null, null, $this->source_name);
                 $edit['New Org'] = $affiliated_org->id;
             }
             $rel = new Relationship();
             $rel->Entity1 = $person;
             $rel->Entity2 = $affiliated_org;
             $rel->setCategory('Position');
             if (isset($row['affiliation1_title']) && $row['affiliation1_title'] != '') {
                 $description = trim($row['affiliation1_title']);
                 $rel->description1 = $description;
                 if ($description == 'Director' || $description == 'Trustee' || preg_match('/^Chair/su', $description)) {
                     $rel->is_board = 1;
                     $rel->is_employee = 0;
                 }
             }
             $rel->save();
             $rel->addReference($this->source_url, null, null, $this->source_name);
             $edit['New Relationship'] = $rel->id;
         }
         if (isset($row['start_date']) && trim($row['start_date']) != '') {
             $edit['Relationship']['start_date'] = trim($row['start_date']);
         }
         if (isset($row['end_date']) && trim($row['end_date']) != '') {
             $edit['Relationship']['end_date'] = trim($row['end_date']);
         }
         if (isset($row['title']) && trim($row['title']) != '') {
             $edit['Relationship']['title'] = trim($row['title']);
         }
         if (isset($row['notes']) && trim($row['notes']) != '') {
             $edit['Relationship']['notes'] = trim($row['notes']);
         }
         if (isset($row['rank']) && $row['rank'] != '') {
             $edit['rank'] = $row['rank'];
         }
         $this->db->commit();
     } catch (Exception $e) {
         $this->db->rollback();
         throw $e;
     }
     $this->edits[] = $edit;
 }
Esempio n. 9
0
 private function importLdaData($lobby_import)
 {
     $path = $this->_dir . $lobby_import->filename;
     $raw = file_get_contents($path);
     $xml = new SimpleXMLElement($raw);
     $filings = $xml->Filing;
     $limit = count($filings);
     $this->printDebug('importing data from ' . $lobby_import->filename . ' (record ' . $lobby_import->offset . ' of ' . $limit . ')');
     for ($n = (int) $lobby_import->offset; $n < $limit; $n++) {
         $this->_count = $this->_count + 1;
         if ($this->_count > $this->_limit) {
             die;
         }
         try {
             $this->db->beginTransaction();
             $lobby_import->offset = $n;
             if ($n == $limit - 1) {
                 $lobby_import->done = 1;
             }
             $lobby_import->save();
             if (!isset($filings[$n])) {
                 echo 'ok';
                 var_dump($filings[$n - 1]);
                 var_dump($filings[$n + 1]);
                 $this->printDebug('not set' . $n);
                 $this->db->commit();
                 continue;
             }
             $filing = $filings[$n];
             if (!isset($filing->Registrant)) {
                 $this->db->commit();
                 continue;
             }
             //var_dump($filing);
             $f = new LdaFiling();
             $f->federal_filing_id = $filing['ID'];
             $f->year = $filing['Year'];
             $f->amount = $filing['Amount'];
             $f->received = $filing['Received'];
             $f->import_id = $lobby_import->id;
             $f->offset = $n;
             //check for duplicate
             if (Doctrine::getTable('LdaFiling')->findOneByFederalFilingId($f->federal_filing_id)) {
                 $this->db->commit();
                 continue;
             }
             //set registrant
             if (!($r = Doctrine::getTable('LdaRegistrant')->findOneByFederalRegistrantId($filing->Registrant['RegistrantID']))) {
                 $r = new LdaRegistrant();
                 $r->name = LsString::spacesToSpace($filing->Registrant['RegistrantName']);
                 $r->federal_registrant_id = $filing->Registrant['RegistrantID'];
                 $r->address = $filing->Registrant['Address'];
                 $r->description = LsString::spacesToSpace($filing->Registrant['GeneralDescription']);
                 $r->country = $filing->Registrant['RegistrantCountry'];
                 $r->save();
             }
             $f->registrant_id = $r->id;
             //set client
             if ($filing->Client) {
                 if (!($c = LsQuery::getByModelAndFieldsQuery('LdaClient', array('registrant_id' => $r->id, 'federal_client_id' => $filing->Client['ClientID']))->execute()->getFirst())) {
                     $c = new LdaClient();
                     $c->name = LsString::spacesToSpace($filing->Client['ClientName']);
                     $c->federal_client_id = $filing->Client['ClientID'];
                     $c->registrant_id = $r->id;
                     $c->contact_name = LsString::spacesToSpace($filing->Client['ContactFullname']);
                     $c->description = LsString::spacesToSpace($filing->Client['GeneralDescription']);
                     $c->country = $filing->Client['ClientCountry'];
                     $c->state = $filing->Client['ClientState'];
                     $c->save();
                 }
                 $f->client_id = $c->id;
             }
             //set filing type
             if ($type = (string) $filing['Type']) {
                 //look for existing type
                 if (!($t = Doctrine::getTable('LdaType')->findOneByDescription($type))) {
                     $t = new LdaType();
                     $t->description = $type;
                     $t->save();
                 }
                 $f->type_id = $t->id;
                 unset($t);
             }
             if ($period = (string) $filing['Period']) {
                 //look for existing period
                 if (!($p = Doctrine::getTable('LdaPeriod')->findOneByDescription($period))) {
                     $p = new LdaPeriod();
                     $p->description = $period;
                     $p->save();
                 }
                 $f->period_id = $p->id;
             }
             $f->save();
             //add lobbyists
             if ($filing->Lobbyists) {
                 foreach ($filing->Lobbyists->Lobbyist as $lobbyist) {
                     $name = (string) $lobbyist['LobbyistName'];
                     if (!($l = LsQuery::getByModelAndFieldsQuery('LdaLobbyist', array('registrant_id' => $r->id, 'name' => $name))->execute()->getFirst())) {
                         $l = new LdaLobbyist();
                         $l->name = $name;
                         $l->registrant_id = $r->id;
                         $l->status = $lobbyist['LobbyistStatus'];
                         $l->indicator = $lobbyist['LobbyisteIndicator'];
                         $l->official_position = $lobbyist['OfficialPosition'];
                         $l->save();
                     }
                     $fl = new LdaFilingLobbyist();
                     $fl->filing_id = $f->id;
                     $fl->lobbyist_id = $l->id;
                     $fl->save();
                     unset($fl);
                     unset($l);
                 }
             }
             //add govt entities
             if ($filing->GovernmentEntities) {
                 foreach ($filing->GovernmentEntities->GovernmentEntity as $govt) {
                     $govt = trim($govt['GovEntityName']);
                     if (!($g = Doctrine::getTable('LdaGovt')->findOneByName($govt))) {
                         $g = new LdaGovt();
                         $g->name = $govt;
                         $g->save();
                     }
                     $fg = new LdaFilingGovt();
                     $fg->filing_id = $f->id;
                     $fg->govt_id = $g->id;
                     $fg->save();
                     unset($fg);
                     unset($g);
                 }
             }
             //add issues
             if ($filing->Issues) {
                 foreach ($filing->Issues->Issue as $issue) {
                     $code = (string) $issue['Code'];
                     if (!($i = Doctrine::getTable('LdaIssue')->findOneByName($code))) {
                         $i = new LdaIssue();
                         $i->name = $code;
                         $i->save();
                     }
                     $fi = new LdaFilingIssue();
                     $fi->filing_id = $f->id;
                     $fi->issue_id = $i->id;
                     $fi->specific_issue = $issue['SpecificIssue'];
                     $fi->save();
                     unset($fi);
                     unset($i);
                 }
             }
             $this->printDebug($f->federal_filing_id);
             //check for duplicate again
             if (Doctrine::getTable('LdaFiling')->findOneByFederalFilingId($f->federal_filing_id)) {
                 $this->db->rollback();
                 continue;
             }
             $this->db->commit();
         } catch (Exception $e) {
             $this->db->rollback();
             throw $e;
         }
         unset($f);
         unset($r);
         unset($c);
         unset($filing);
     }
     unset($xml);
     unset($raw);
     unset($filings);
 }
Esempio n. 10
0
function excerpt($string, $len = 50, $truncateStr = '...', $wholeWords = true)
{
    return LsString::excerpt($string, $len, $truncateStr, $wholeWords);
}
 public function parseDescriptionStr($str, $corp)
 {
     $descriptions = array();
     $remains = array();
     //cleanup text to be parsed
     $str = trim($str);
     $str = str_replace('.', ' ', $str);
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     $name_re = LsString::escapeStringForRegex($corp->name);
     $str = preg_replace('/\\b' . $name_re . '\\b/isu', '', $str);
     if ($corp->name_nick) {
         $nick_re = LsString::escapeStringForRegex($corp->name_nick);
         $str = preg_replace('/\\b' . $nick_re . '\\b/isu', '', $str);
     }
     if ($corp->ticker) {
         $tick_re = LsString::escapeStringForRegex($corp->ticker);
         $str = preg_replace('/\\b' . $tick_re . '\\b/isu', '', $str);
     }
     //split by commas
     $parts = preg_split('/,|;|\\band\\b|(?<!C[Oo])\\-|\\bAND\\b|\\s&\\s|\\//', $str, -1, PREG_SPLIT_NO_EMPTY);
     foreach ($parts as $part) {
         $part = trim($part);
         $part = preg_replace('/\\s{2,}/', ' ', $part);
         //abbreviation replacements
         $part = preg_replace('/( |^)(\\w) (\\w) (\\w)( |$)/', '\\2\\3\\4', $part);
         $part = preg_replace('/(Interim|Acting|Incoming) /i', '', $part);
         $part = preg_replace('/Sr /i', 'Senior ', $part);
         $part = preg_replace('/Chf /i', 'Chief ', $part);
         $part = preg_replace('/( |^)V( |$)/i', ' Vice ', $part);
         $part = preg_replace('/( |^)VP( |$)/i', ' Vice President ', $part);
         $part = preg_replace('/( |^)VC( |$)/i', ' Vice Chairman ', $part);
         $part = preg_replace('/( |^)Chr( |$)/i', ' Chairman ', $part);
         $part = preg_replace('/( |^)Ofcr( |$)/i', ' Officer ', $part);
         $part = preg_replace('/( |^)Vice P( |$)/i', ' Vice President ', $part);
         $part = preg_replace('/( |^)(Ex|Exec)( |$)/i', ' Executive ', $part);
         $part = preg_replace('/( |^)EVP( |$)/i', ' Executive Vice President ', $part);
         $part = preg_replace('/( |^)(Off|Offic|Offcr)( |$)/i', ' Officer ', $part);
         $part = str_replace('Gen ', 'General ', $part);
         $part = preg_replace('/( |^)(Op|Oper) /', ' Operating ', $part);
         $part = preg_replace('/( |^)(Bd|Brd)( |$)/i', ' Board ', $part);
         $part = preg_replace('/of Board/i', ' of the Board', $part);
         $part = preg_replace('/( |^)COB( |$)/i', ' Chairman of the Board ', $part);
         $part = preg_replace('/( |^)(Pres|Prs|Presid|Prsdt|Prsdnt)( |$)/i', ' President ', $part);
         $part = preg_replace('/( |^)Admin( |$)/i', ' Administrative ', $part);
         $part = preg_replace('/( |^)Info( |$)/i', ' Information ', $part);
         $part = preg_replace('/\\bComm\\b/i', 'Committee', $part);
         $part = preg_replace('/\\bInc\\b/i', '', $part);
         $part = preg_replace('/( |-|^)(Ch|Chm|Chmn|Chrm|Chrmn|Chair|Chairmain|Chariman)( |$)/i', '\\1Chairman ', $part);
         $part = preg_replace('/(Sec|Secr|Secy|Secretar|Secreta)( |$)/i', 'Secretary ', $part);
         $part = str_replace('Vice-', 'Vice ', $part);
         $part = preg_replace('/( |^)Non /i', ' Non-', $part);
         $part = preg_replace('/\\bCompl\\b/i', 'Compliance', $part);
         $part = str_ireplace('of Advisory', 'of the Advisory', $part);
         $part = preg_replace('/Advisory (Panel|Council)/i', 'Advisory Board', $part);
         $part = str_ireplace('Independent ', '', $part);
         $part = str_ireplace('Lead ', '', $part);
         $part = str_ireplace('Corporate ', '', $part);
         $part = str_ireplace('Outside ', '', $part);
         $part = str_ireplace('Non-interested', '', $part);
         $part = str_ireplace('Interested', '', $part);
         $part = str_replace('Main ', '', $part);
         $part = str_ireplace('Presiding ', '', $part);
         $part = str_ireplace('Founding ', '', $part);
         $part = str_ireplace('Acctg', 'Accounting', $part);
         $part = str_ireplace('Chairperson', 'Chairman', $part);
         $part = str_ireplace('Chairwoman', 'Chairman', $part);
         $part = str_ireplace("Gen'l", 'General', $part);
         $part = trim($part);
         $part = preg_replace('/\\s{2,}/', ' ', $part);
         $position = array('description' => null, 'note' => array());
         if ($part != '') {
             //look for matching title
             $p = LsArray::inArrayNoCase($part, PositionTable::$businessPositions);
             if ($p) {
                 $position['description'] = $p;
             } else {
                 if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) {
                     $position['description'] = $q->description1;
                 } else {
                     if (count($descriptions) == 0) {
                         $part_splat = LsString::split($part);
                         $note = array();
                         //$this->printDebug($part);
                         //var_dump($part_splat);
                         $lim = count($part_splat) - 1;
                         for ($i = 0; $i < $lim; $i++) {
                             $note[] = array_pop($part_splat);
                             $part_new = implode(' ', $part_splat);
                             if (strtoupper($part_new) == 'DIRECTOR') {
                                 break;
                             }
                             $p = LsArray::inArrayNoCase($part_new, PositionTable::$businessPositions);
                             if ($p) {
                                 $position['description'] = $p;
                             } else {
                                 if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) {
                                     $position['description'] = $q->description1;
                                 }
                             }
                         }
                         if (!$position['description']) {
                             $position['description'] = $part;
                         }
                     } else {
                         $descriptions[count($descriptions) - 1]['note'][] = $part;
                     }
                 }
             }
             if (isset($position['description'])) {
                 $descriptions[] = $position;
             }
         }
     }
     return $descriptions;
 }
Esempio n. 12
0
 public function removeFields($fields)
 {
     $diff = array_diff($this->getFieldsArray(), (array) $fields);
     sort($diff);
     return $this->fields = LsString::emptyToNull(implode(',', $diff));
 }
Esempio n. 13
0
 public function getSummary($str, Entity $e)
 {
     $str = LsHtml::replaceEntities($str);
     $name_re = array();
     $name_re[] = $e->getNameRegex();
     if ($e->name_nick && $e->name_nick != '') {
         $name_re[] = LsString::escapeStringForRegex($e->name_nick);
     }
     $name_re = implode('|', $name_re);
     $style_tags = implode('|', LsHtml::$fontStyleTags);
     $layout_tags = implode('|', LsHtml::$layoutTags);
     $re = '/((' . $name_re . ')(.*?))<\\/?(' . $layout_tags . ')/isu';
     $this->printDebug($re);
     $results = null;
     if (preg_match_all($re, $str, $matches)) {
         $results = $matches[1];
         foreach ($results as $result) {
             $result = LsString::spacesToSpace(LsHtml::stripTags($result));
             $this->printDebug($result);
         }
     }
     return $results;
 }
Esempio n. 14
0
 function __construct($text)
 {
     $text = LsHtml::replaceEntities($text);
     $text = LsString::utf8TransUnaccent($text);
     $this->text = $text;
 }
Esempio n. 15
0
 static function getHtmlPersonNames($text)
 {
     $name_matches = array();
     $re = '/>\\s*\\p{Lu}\'?(\\p{L}+|\\.)?\\s+\\p{Lu}\\.?\\s+\\p{Lu}\\p{L}+(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?/su';
     $re2 = '/>\\s*(\\p{Lu}\'?(\\p{L}+|\\.)?\\s+(\\p{Lu}\'?(\\s+|\\p{L}+\\s+|\\.\\s*)?){0,2}\\p{Lu}\'?\\p{L}+(\\-\\p{Lu}\'?\\p{L}+)?(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?)\\**\\s*</su';
     $re3 = '/>\\s*(\\p{Lu}\'?\\p{L}+(\\-\\p{Lu}\'?\\p{L}+)?\\,\\s+(\\p{Lu}\'?(\\p{L}+|\\.)?(\\s+(\\p{Lu}\'?(\\s+|\\p{L}+\\s+|\\.\\s*)?){0,2})?)(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?)\\**\\s*</su';
     $text = LsHtml::replaceEntities($text);
     $name_matches = array();
     if (preg_match_all($re2, $text, $matches, PREG_OFFSET_CAPTURE)) {
         //LOOP THROUGH MATCHES TO CONFIRM NAMES
         for ($i = 0; $i < count($matches[1]); $i++) {
             $m = $matches[1][$i];
             //echo $m[0] . "\n";
             $is_name = false;
             if (preg_match('/\\s+\\p{Lu}\\.?\\s/', $m[0])) {
                 //echo '  * initial' . "\n";
                 $is_name = true;
             }
             $parts = LsString::split(trim($m[0]));
             //ADD NAME TO MATCH LIST IF IT FITS CONDITIONS
             if (in_array($parts[0], LsLanguage::$commonFirstNames)) {
                 //echo '  * first name' . "\n";
                 $is_name = true;
             }
             $q = LsDoctrineQuery::create()->from('Person p')->where('p.name_first = ?', $parts[0]);
             if ($q->count() > 0) {
                 //echo '  LS name' . "\n";
                 $is_name = true;
             }
             if ($is_name) {
                 $name_matches[] = $m[0];
             }
             /*
                     if ($i != 0)
                     {
                       $beg = $matches[1][$i-1][1];
                       $tweenstr = substr($text,$beg, $m[1] - $beg);
                       //echo '  tag count: ' . LsHtml::tagCount($tweenstr) . "\n";
                     }
                     preg_match('/^[^\s]+\s/su',trim($m[0]),$match);
                     
                     $tags = LsHtml::getSurroundingTags($text,$m[1],3);*/
         }
     }
     if (preg_match_all($re3, $text, $matches, PREG_OFFSET_CAPTURE)) {
         for ($i = 0; $i < count($matches[1]); $i++) {
             $m = $matches[1][$i];
             //echo $m[0] . "\n";
             $person = PersonTable::parseCommaName($m[0]);
             $name_matches[] = $person->getFullName(false);
         }
     }
     return $name_matches;
 }
Esempio n. 16
0
 static function parseFlatName($str, $surname = null, $returnArray = false)
 {
     $namePrefix = $nameFirst = $nameMiddle = $nameLast = $nameSuffix = $nameNick = null;
     //to handle multi-word last names like Van der Twerp
     $sub = null;
     if ($surname) {
         $sub = preg_replace('/(^(\\P{L})+|(\\P{L})+$)/u', '', $surname);
         $sub = preg_replace('/\\s+/is', '_', $sub);
         $str = str_ireplace($surname, $sub, $str);
     }
     //trim and remove periods
     $str = trim(str_replace('.', ' ', $str));
     //remove extra spaces
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     //remove anything in parentheses at the end
     $str = preg_replace('/ \\([^\\)]+\\)/', '', $str);
     //get prefixes
     $prefixes = self::$nameParsePrefixes;
     while ($prefix = current($prefixes)) {
         if ($str != ($new = preg_replace('/^' . $prefix . ' /i', '', $str))) {
             if (!LsArray::inArrayNoCase($prefix, LsLanguage::$commonPrefixes)) {
                 $namePrefix .= $prefix . ' ';
             }
             $str = trim($new);
             reset($prefixes);
             continue;
         }
         next($prefixes);
     }
     $namePrefix = $namePrefix ? trim($namePrefix) : null;
     //get suffixes
     $suffixes = self::$nameParseSuffixes;
     while ($suffix = current($suffixes)) {
         if ($str != ($new = preg_replace('/ ' . $suffix . '$/i', '', $str))) {
             $nameSuffix = $suffix . ' ' . $nameSuffix;
             $str = trim($new);
             reset($suffixes);
             continue;
         }
         next($suffixes);
     }
     $nameSuffix = $nameSuffix ? trim($nameSuffix) : null;
     //remove commas left over from suffixes
     $str = trim(str_replace(',', '', $str));
     //find nickname in quotes
     if (preg_match('/["\']([\\S]+)[\'"]/', $str, $nickFound)) {
         $nameNick = $nickFound[1] ? $nickFound[1] : $nickFound[2];
         $str = trim(preg_replace('/["\']([\\S]+)[\'"]/', '', $str));
     }
     //condense multiple spaces
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     //split into parts
     $parts = explode(' ', $str);
     switch (count($parts)) {
         case 1:
             if ($namePrefix) {
                 $nameFirst = $namePrefix;
                 $nameLast = $parts[0];
                 $namePrefix = null;
             } else {
                 if ($nameSuffix) {
                     $nameFirst = $parts[0];
                     $nameLast = $nameSuffix;
                     $nameSuffix = null;
                 } else {
                     if (strtolower($sub) == strtolower($parts[0])) {
                         $nameLast = $parts[0];
                     } else {
                         $nameFirst = $parts[0];
                     }
                 }
             }
             break;
         case 2:
             $nameFirst = $parts[0];
             $nameLast = $parts[1];
             break;
         case 3:
             $nameFirst = $parts[0];
             $nameMiddle = $parts[1];
             $nameLast = $parts[2];
             break;
         default:
             $nameFirst = $parts[0];
             $nameLast = $parts[count($parts) - 1];
             for ($n = 1; $n < count($parts) - 1; $n++) {
                 $nameMiddle .= $parts[$n] . ' ';
             }
             $nameMiddle = trim($nameMiddle);
             break;
     }
     $nameLast = str_replace('_', ' ', $nameLast);
     $name = array('name_first' => $nameFirst, 'name_last' => $nameLast, 'name_middle' => $nameMiddle, 'name_prefix' => $namePrefix, 'name_suffix' => $nameSuffix, 'name_nick' => $nameNick);
     foreach ($name as $nk => &$nv) {
         if ($nv && $nk != 'name_suffix' && $nk != 'name_prefix') {
             $nv = preg_replace('/^(\\P{L})+|(\\P{L})+$/u', '', $nv);
             $case = LsString::checkCase($nv);
             $nv = $case == 'upper' || $case == 'lower' ? LsLanguage::nameize($nv) : $nv;
             if ($nk != 'name_last') {
                 $nv = LsLanguage::hgCaser($nv, false);
             }
         }
     }
     unset($nv);
     if ($returnArray) {
         return $name;
     }
     $person = new Entity();
     $person->addExtension('Person');
     $person->name_first = $name['name_first'];
     $person->name_middle = $name['name_middle'];
     $person->name_last = $name['name_last'];
     $person->name_nick = $name['name_nick'];
     $person->name_prefix = $name['name_prefix'];
     $person->name_suffix = $name['name_suffix'];
     return $person;
 }
Esempio n. 17
0
 public function getCleanFirstParagraph()
 {
     if ($this->_paragraphs) {
         return null;
     }
     $first = $this->_paragraphs[0];
     $first = LsString::spacesToSpace(LsHtml::replaceEntities(LsHtml::stripTags($first)));
     return $first;
 }
Esempio n. 18
0
 public function processRow($row)
 {
     if (isset($row['url']) && $row['url'] != '' && isset($row['url_name']) && $row['url_name'] != '') {
         $url = $row['url'];
         $url_name = $row['url_name'];
     } else {
         $url = $this->url;
         $url_name = $this->url_name;
     }
     foreach ($row as &$r) {
         trim($r);
     }
     unset($r);
     if ($this->entity) {
         $required = array('entity_name', 'primary_type', 'relationship_category');
     } else {
         $required = array('entity_name', 'primary_type');
     }
     foreach ($required as $req) {
         if (!isset($row[$req]) || $row[$req] == '') {
             $this->printDebug('!!! > skipping row, ' . $req . ' not set');
             return;
         }
     }
     if ($row['primary_type'] != 'Person' && $row['primary_type'] != 'Org') {
         $this->printDebug('!!! > primary type not properly set, skipping row...');
         return;
     }
     if ($this->entity) {
         $relationship_category = trim($row['relationship_category']);
         $relationship_category_id = array_search($relationship_category, RelationshipCategoryTable::$categoryNames);
         if (!$relationship_category_id) {
             $this->printDebug('!!! > relationship type not properly set, skipping row...');
             return;
         }
     }
     $this->printDebug("processing: " . $row['entity_name'] . '......');
     if ($row['primary_type'] == 'Person') {
         $entity2 = PersonTable::parseFlatName($row['entity_name']);
         $similar_entities = PersonTable::getSimilarQuery2($entity2)->execute();
     } else {
         $entity2 = new Entity();
         $entity2->addExtension('Org');
         $entity2->setEntityField('name', $row['entity_name']);
         $similar_entities = OrgTable::getOrgsWithSimilarNames($entity2->name);
     }
     $matched = false;
     foreach ($similar_entities as $similar_entity) {
         if ($similar_entity['primary_ext'] == 'Person') {
             $this->printDebug('  POSSIBLE MATCH: ' . $similar_entity->name . ' (Orgs :: ' . $similar_entity->getRelatedOrgsSummary() . "  Bio :: {$similar_entity->summary})");
         } else {
             $this->printDebug('  POSSIBLE MATCH: ' . $similar_entity->name . ' (Summary :: ' . $similar_entity->summary . ')');
         }
         $accept = $this->readline('  Is this the same entity? (y or n or b to break)');
         if ($accept == 'y') {
             $entity2 = $similar_entity;
             $matched = true;
             $this->printDebug('             [accepted]');
             break;
         } else {
             if ($accept == 'b') {
                 break;
             }
         }
     }
     $created = false;
     if (!$matched) {
         if ($entity2->getPrimaryExtension() == 'Person') {
             $this->printDebug('  New person: ' . $entity2->name_first . ' ' . $entity2->name_last);
         } else {
             $this->printDebug('  New org: ' . $entity2->name);
         }
         $accept = $this->readline('    create this new entity? (y or n) ');
         if ($accept == 'y') {
             try {
                 $extensions = LsString::split($row['entity_extensions'], '\\s*\\,\\s*');
                 foreach ($extensions as $extension) {
                     $entity2->addExtension($extension);
                 }
                 $entity2->save();
                 $entity2->addReference($url, null, null, $url_name);
             } catch (Exception $e) {
                 $this->printDebug('   !!! problems with extensions for this row');
             }
             $fields = array('summary', 'blurb', 'website');
             foreach ($fields as $field) {
                 if (isset($row[$field])) {
                     $entity2[$field] = $row[$field];
                 }
             }
             $entity2->save();
             $entity2->addReference($url, null, null, $url_name);
             $created = true;
             $this->printDebug(' ' . $entity2->name . ' saved');
             //sleep(1);
         } else {
             $entity2 = null;
         }
     }
     // create relationship
     if ($entity2) {
         if ($this->entity) {
             $relationship = new Relationship();
             if (isset($row['relationship_order']) && $row['relationship_order'] != '') {
                 if ($row['relationship_order'] == '1') {
                     $relationship->Entity1 = $this->entity;
                     $relationship->Entity2 = $entity2;
                 } else {
                     $relationship->Entity2 = $this->entity;
                     $relationship->Entity1 = $entity2;
                 }
             } else {
                 if ($relationship_category == 'Position' || $relationship_category == 'Education') {
                     if ($row['primary_type'] == 'Org') {
                         $relationship->Entity1 = $this->entity;
                         $relationship->Entity2 = $entity2;
                     } else {
                         $relationship->Entity1 = $entity2;
                         $relationship->Entity2 = $this->entity;
                     }
                 } else {
                     $relationship->Entity1 = $this->entity;
                     $relationship->Entity2 = $entity2;
                 }
             }
             $relationship->setCategory($relationship_category);
             $cols = array('description1', 'description2', 'start_date', 'end_date', 'goods', 'amount', 'is_board', 'is_executive', 'is_employee');
             foreach ($cols as $col) {
                 if (isset($row[$col]) && $row[$col] != '') {
                     try {
                         $relationship[$col] = $row[$col];
                     } catch (Exception $e) {
                         $this->printDebug("   could not set {$col} for relationship, skipping");
                     }
                 }
             }
             $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ? and r.id <> ?', array($relationship->entity1_id, $relationship->entity2_id, $relationship->category_id, $relationship->id))->fetchOne();
             if ($q) {
                 $this->printDebug('   (relationship already found, skipping...)');
                 return;
             }
             $relationship->save();
             $relationship->addReference($url, null, null, $url_name);
             $this->printDebug(" Relationship saved: {$relationship}\n");
         } else {
             if ($this->list) {
                 $q = LsDoctrineQuery::create()->from('LsListEntity le')->where('le.entity_id = ? and le.list_id = ?', array($entity2->id, $this->list->id))->fetchOne();
                 if ($q) {
                     $this->printDebug('   (already on list, skipping...)');
                     return;
                 }
                 $le = new LsListEntity();
                 $le->LsList = $this->list;
                 $le->Entity = $entity2;
                 var_dump($row);
                 if (isset($row['rank'])) {
                     echo $row['rank'];
                     $le->rank = $row['rank'];
                 }
                 $le->save();
             }
         }
     }
 }
Esempio n. 19
0
 public function executeAddBulk($request)
 {
     $this->checkEntity($request, false, false);
     $this->reference_form = new ReferenceForm();
     $this->reference_form->setSelectObject($this->entity);
     $this->add_bulk_form = new AddBulkForm();
     //get possible default categories
     $this->categories = LsDoctrineQuery::create()->select('c.name, c.name')->from('RelationshipCategory c')->orderBy('c.id')->fetchAll(PDO::FETCH_KEY_PAIR);
     array_unshift($this->categories, '');
     if ($request->isMethod('post') && in_array($request->getParameter('commit'), array('Begin', 'Continue'))) {
         if ($request->hasParameter('ref_id')) {
             $this->ref_id = $request->getParameter('ref_id');
         } else {
             $refParams = $request->getParameter('reference');
             $this->reference_form->bind($refParams);
             $restOfParams = (array) $request->getParameterHolder();
             $restOfParams = array_shift($restOfParams);
             $this->add_bulk_form->bind($restOfParams, $request->getFiles());
             if (!$this->reference_form->isValid() || !$this->add_bulk_form->isValid()) {
                 return;
             }
             if ($this->ref_id = $refParams['existing_source']) {
                 $ref = Doctrine::getTable('Reference')->find($this->ref_id);
                 $url = $ref->source;
             } else {
                 $ref = new Reference();
                 $ref->object_model = 'Entity';
                 $ref->object_id = $this->entity->id;
                 $ref->source = $refParams['source'];
                 $ref->name = $refParams['name'];
                 $ref->source_detail = $refParams['source_detail'];
                 $ref->publication_date = $refParams['publication_date'];
                 $ref->save();
             }
             $this->ref_id = $ref->id;
             $this->reference = $ref;
         }
         $verify_method = $request->getParameter('verify_method');
         if ($this->add_method = $request->getParameter('add_method')) {
             if ($this->add_method == 'scrape') {
                 //scrape ref url
                 //set names to confirm
                 $browser = new sfWebBrowser();
                 $entity_types = $request->getParameter('entity_types');
                 //FIND NAMES AT URL USING COMBO OF OPENCALAIS & LS CUSTOM HTML PARSING
                 if (!$browser->get($ref->source)->responseIsError()) {
                     $text = $browser->getResponseText();
                     $this->names = LsTextAnalysis::getHtmlEntityNames($text, $entity_types);
                     $text = LsHtml::findParagraphs($text);
                     $this->text = preg_replace('/<[^b][^>]*>/is', " ", $text);
                     $this->confirm_names = true;
                     return;
                 } else {
                     $request->setError('csv', 'problems finding names at that url');
                 }
             } else {
                 if ($this->add_method == 'upload') {
                     $file = $this->add_bulk_form->getValue('file');
                     $filename = 'uploaded_' . sha1($file->getOriginalName());
                     $extension = $file->getExtension($file->getOriginalExtension());
                     $filePath = sfConfig::get('sf_temp_dir') . '/' . $filename . $extension;
                     $file->save($filePath);
                     if ($filePath) {
                         if ($spreadsheetArr = LsSpreadsheet::parse($filePath)) {
                             $names = $spreadsheetArr['rows'];
                             if (!in_array('name', $spreadsheetArr['headers'])) {
                                 $request->setError('file', 'The file you uploaded could not be parsed properly because there is no "name" column.');
                                 return;
                             }
                             if (in_array('summary', $spreadsheetArr['headers'])) {
                                 foreach ($names as &$name) {
                                     $name['summary'] = str_replace(array('?', "'"), "'", $name['summary']);
                                     $name['summary'] = str_replace(array('?', '?', '"'), '"', $name['summary']);
                                     if (isset($name['title'])) {
                                         $name['description1'] = $name['title'];
                                     }
                                 }
                                 unset($name);
                             }
                         } else {
                             $request->setError('file', 'The file you uploaded could not be parsed properly.');
                             return;
                         }
                     } else {
                         $request->setError('file', 'You need to upload a file.');
                         return;
                     }
                 } else {
                     if ($this->add_method == 'summary') {
                         //parse summary for names
                         $this->text = $this->entity->summary;
                         $entity_types = $request->getParameter('entity_types');
                         $this->names = LsTextAnalysis::getTextEntityNames($this->text, $entity_types);
                         $this->confirm_names = true;
                         return;
                     } else {
                         if ($this->add_method == 'text') {
                             $manual_names = $request->getParameter('manual_names');
                             if ($manual_names && $manual_names != "") {
                                 $manual_names = preg_split('#[\\r\\n]+#', $manual_names);
                                 $manual_names = array_map('trim', $manual_names);
                                 $names = array();
                                 foreach ($manual_names as $name) {
                                     $names[] = array('name' => $name);
                                 }
                             } else {
                                 $request->setError('csv', 'You did not add names properly.');
                                 return;
                             }
                         } else {
                             if ($this->add_method == 'db_search') {
                                 $this->db_search = true;
                             }
                         }
                     }
                 }
             }
         }
         //intermediate scrape page -- takes confirmed names, builds names arr
         if ($confirmed_names = $request->getParameter('confirmed_names')) {
             $restOfParams = (array) $request->getParameterHolder();
             $restOfParams = array_shift($restOfParams);
             $this->add_bulk_form->bind($restOfParams, $request->getFiles());
             if (!$this->add_bulk_form->isValid()) {
                 $this->reference = Doctrine::getTable('reference')->find($this->ref_id);
                 $this->names = unserialize(stripslashes($request->getParameter('names')));
                 $this->confirm_names = true;
                 return;
             }
             $names = array();
             foreach ($confirmed_names as $cn) {
                 $names[] = array('name' => $cn);
             }
             $manual_names = $request->getParameter('manual_names');
             if ($manual_names && $manual_names != "") {
                 $manual_names = preg_split('#[\\r\\n]+#', $manual_names);
                 $manual_names = array_map('trim', $manual_names);
                 foreach ($manual_names as $name) {
                     $names[] = array('name' => $name);
                 }
             }
         }
         // LOAD IN RELATIONSHIP DEFAULTS
         if (isset($verify_method)) {
             $defaults = $request->getParameter('relationship');
             if ($verify_method == 'enmasse') {
                 $this->default_type = $request->getParameter('default_type');
                 $this->order = $request->getParameter('order');
                 $category_name = $request->getParameter('relationship_category_all');
                 $this->extensions = ExtensionDefinitionTable::getByTier(2, $this->default_type);
                 $extensions_arr = array();
                 foreach ($this->extensions as $ext) {
                     $extensions_arr[] = $ext->name;
                 }
             } else {
                 $category_name = $request->getParameter('relationship_category_one');
             }
             if ($category_name) {
                 $this->category_name = $category_name;
                 if (!($category = Doctrine::getTable('RelationshipCategory')->findOneByName($category_name))) {
                     $request->setError('csv', 'You did not select a relationship category.');
                     return;
                 }
                 $formClass = $category_name . 'Form';
                 $categoryForm = new $formClass(new Relationship());
                 $categoryForm->setDefaults($defaults);
                 $this->form_schema = $categoryForm->getFormFieldSchema();
                 if (in_array($category_name, array('Position', 'Education', 'Membership', 'Donation', 'Lobbying', 'Ownership'))) {
                     $this->field_names = array('description1', 'start_date', 'end_date', 'is_current');
                 } else {
                     $this->field_names = array('description1', 'description2', 'start_date', 'end_date', 'is_current');
                 }
                 $extraFields = array('Position' => array('is_board', 'is_executive'), 'Education' => array('degree_id'), 'Donation' => array('amount'), 'Transaction' => array('amount'), 'Lobbying' => array('amount'), 'Ownership' => array('percent_stake', 'shares'));
                 if (isset($extraFields[$category_name])) {
                     $this->field_names = array_merge($this->field_names, $extraFields[$category_name]);
                 }
             }
             $this->matches = array();
             // BOOT TO TOOLBAR OR LOOK FOR MATCHES FOR ENMASSE ADD
             if (isset($names) && count($names) > 0 || isset($this->db_search)) {
                 if ($verify_method == 'onebyone') {
                     if (isset($category_name)) {
                         $defaults['category'] = $category_name;
                     }
                     $toolbar_names = array();
                     foreach ($names as $name) {
                         $toolbar_names[] = $name['name'];
                     }
                     $this->getUser()->setAttribute('toolbar_names', $toolbar_names);
                     $this->getUser()->setAttribute('toolbar_entity', $this->entity->id);
                     $this->getUser()->setAttribute('toolbar_defaults', $defaults);
                     $this->getUser()->setAttribute('toolbar_ref', $this->ref_id);
                     $this->redirect('relationship/toolbar');
                 } else {
                     $this->category_name = $category_name;
                     if (isset($this->db_search)) {
                         $num = $request->getParameter('num', 10);
                         $page = $request->getParameter('page', 1);
                         $q = LsDoctrineQuery::create()->from('Entity e')->where('(e.summary rlike ? or e.blurb rlike ?)', array('[[:<:]]' . $this->entity->name . '[[:>:]]', '[[:<:]]' . $this->entity->name . '[[:>:]]'));
                         foreach ($this->entity->Alias as $alias) {
                             $q->orWhere('(e.summary rlike ? or e.blurb rlike ?)', array('[[:<:]]' . $alias->name . '[[:>:]]', '[[:<:]]' . $alias->name . '[[:>:]]'));
                         }
                         $q->setHydrationMode(Doctrine::HYDRATE_ARRAY);
                         $cat_id = constant('RelationshipTable::' . strtoupper($category_name) . '_CATEGORY');
                         $q->whereParenWrap();
                         $q->andWhere('NOT EXISTS (SELECT DISTINCT l.relationship_id FROM Link l ' . 'WHERE l.entity1_id = e.id AND l.entity2_id = ? AND l.category_id = ?)', array($this->entity['id'], $cat_id));
                         $summary_matches = $q->execute();
                         foreach ($summary_matches as $summary_match) {
                             $aliases = array();
                             foreach ($this->entity->Alias as $alias) {
                                 $aliases[] = LsString::escapeStringForRegex($alias->name);
                             }
                             $aliases = implode("|", $aliases);
                             $summary_match['summary'] = preg_replace('/(' . $aliases . ')/is', '<strong>$1</strong>', $summary_match['summary']);
                             $this->matches[] = array('search_results' => array($summary_match));
                         }
                     } else {
                         for ($i = 0; $i < count($names); $i++) {
                             if (isset($names[$i]['name']) && trim($names[$i]['name']) != '') {
                                 $name = $names[$i]['name'];
                                 $name_terms = $name;
                                 if ($this->default_type == 'Person') {
                                     $name_parts = preg_split('/\\s+/', $name);
                                     if (count($name_parts) > 1) {
                                         $name_terms = PersonTable::nameSearch($name);
                                     }
                                     $terms = $name_terms;
                                     $primary_ext = "Person";
                                 } else {
                                     if ($this->default_type == 'Org') {
                                         $name_terms = OrgTable::nameSearch($name);
                                         $terms = $name_terms;
                                         $primary_ext = "Org";
                                     } else {
                                         $terms = $name_terms;
                                         $primary_ext = null;
                                     }
                                 }
                                 $pager = EntityTable::getSphinxPager($terms, $page = 1, $num = 20, $listIds = null, $aliases = true, $primary_ext);
                                 $match = $names[$i];
                                 $match['search_results'] = $pager->execute();
                                 if (isset($names[$i]['types'])) {
                                     $types = explode(',', $names[$i]['types']);
                                     $types = array_map('trim', $types);
                                     $match['types'] = array();
                                     foreach ($types as $type) {
                                         if (in_array($type, $extensions_arr)) {
                                             $match['types'][] = $type;
                                         }
                                     }
                                 }
                                 $this->matches[] = $match;
                             }
                         }
                     }
                 }
             }
         }
     } else {
         if ($page = $this->getRequestParameter('page')) {
             $this->page = $page;
             $this->num = $this->getRequestParameter('num', 50);
         } else {
             if ($request->isMethod('post') && $request->getParameter('commit') == 'Submit') {
                 $this->ref_id = $this->getRequestParameter('ref_id');
                 $entity_ids = array();
                 $relationship_category = $this->getRequestParameter('category_name');
                 $order = $this->getRequestParameter('order');
                 $default_type = $request->getParameter('default_type');
                 $default_ref = Doctrine::getTable('Reference')->find($request->getParameter('ref_id'));
                 for ($i = 0; $i < $this->getRequestParameter('count'); $i++) {
                     if ($entity_id = $request->getParameter('entity_' . $i)) {
                         $selected_entity_id = null;
                         $relParams = $request->getParameter("relationship_" . $i);
                         if ($relParams['ref_name']) {
                             $ref['source'] = $relParams['ref_source'];
                             $ref['name'] = $relParams['ref_name'];
                         }
                         if ($entity_id == 'new') {
                             $name = $request->getParameter('new_name_' . $i);
                             if ($default_type == 'Person') {
                                 $new_entity = PersonTable::parseFlatName($name);
                             } else {
                                 $new_entity = new Entity();
                                 $new_entity->addExtension('Org');
                                 $new_entity->name = trim($name);
                             }
                             $new_entity->save();
                             $new_entity->blurb = $request->getParameter('new_blurb_' . $i);
                             $new_entity->summary = $request->getParameter('new_summary_' . $i);
                             if (!$ref) {
                                 $ref = $default_ref;
                             }
                             $new_entity->addReference($ref['source'], null, null, $ref['name']);
                             if ($types = $request->getParameter('new_extensions_' . $i)) {
                                 foreach ($types as $type) {
                                     $new_entity->addExtension($type);
                                 }
                             }
                             $new_entity->save();
                             $selected_entity_id = $new_entity->id;
                         } else {
                             if ($entity_id > 0) {
                                 $selected_entity_id = $entity_id;
                                 LsCache::clearEntityCacheById($selected_entity_id);
                             }
                         }
                         if ($selected_entity_id) {
                             $startDate = $relParams['start_date'];
                             $endDate = $relParams['end_date'];
                             unset($relParams['start_date'], $relParams['end_date'], $relParams['ref_name'], $relParams['ref_url']);
                             $rel = new Relationship();
                             $rel->setCategory($relationship_category);
                             if ($order == '1') {
                                 $rel->entity1_id = $this->entity['id'];
                                 $rel->entity2_id = $selected_entity_id;
                             } else {
                                 $rel->entity2_id = $this->entity['id'];
                                 $rel->entity1_id = $selected_entity_id;
                             }
                             //only set dates if valid
                             if ($startDate && preg_match('#^\\d{4}-\\d{2}-\\d{2}$#', Dateable::convertForDb($startDate))) {
                                 $rel->start_date = Dateable::convertForDb($startDate);
                             }
                             if ($endDate && preg_match('#^\\d{4}-\\d{2}-\\d{2}$#', Dateable::convertForDb($endDate))) {
                                 $rel->end_date = Dateable::convertForDb($endDate);
                             }
                             $rel->fromArray($relParams, null, $hydrateCategory = true);
                             if ($request->hasParameter('add_method') && $request->getParameter('add_method') == 'db_search') {
                                 $refs = EntityTable::getSummaryReferences($selected_entity_id);
                                 if (count($refs)) {
                                     $ref = $refs[0];
                                 } else {
                                     $refs = EntityTable::getAllReferencesById($selected_entity_id);
                                     if (count($refs)) {
                                         $ref = $refs[0];
                                     }
                                 }
                             }
                             if (!$ref) {
                                 $ref = $default_ref;
                             }
                             $rel->saveWithRequiredReference(array('source' => $ref['source'], 'name' => $ref['name']));
                             $ref = null;
                         }
                     }
                 }
                 $this->clearCache($this->entity);
                 $this->redirect($this->entity->getInternalUrl());
             } else {
                 if ($request->isMethod('post') && $request->getParameter('commit') == 'Cancel') {
                     $this->redirect($this->entity->getInternalUrl());
                 }
             }
         }
     }
 }
Esempio n. 20
0
 /**
  * Uses Google Maps API to parse flat address
  */
 static function parse($str, $returnAccuracy = false)
 {
     //CLEANUP
     $str = preg_replace('/[\\n\\r]/', ' ', $str);
     $key = sfConfig::get('sf_google_maps_key');
     $url = 'http://maps.google.com/maps/geo?q=' . urlencode($str) . '&output=xml&key=' . $key;
     //echo $url . "\n";
     $c = new sfWebBrowser();
     try {
         if (!$c->get($url)->responseIsError()) {
             $c->setResponseText(iconv('ISO-8859-1', 'UTF-8', $c->getResponseText()));
             $xml = $c->getResponseXml();
             //var_dump($xml);
             $structured = $xml->Response->Placemark->AddressDetails;
             $accuracy = (int) $structured['Accuracy'];
         } else {
             return null;
         }
     } catch (Exception $e) {
         // Adapter error (eg. Host not found)
         throw $e;
     }
     //accuracy of 4+ means we have at least a town (UNLESS THE TOWN DOESN'T "OFFICIALLY" EXIST)
     if ($accuracy > 3) {
         $address = new Address();
         //COUNTRY (USA only for now)
         $address->country_id = 1;
         //STATE (for some reason trickery needs to be done to get state to work right)
         $stateName = null;
         if (isset($structured->Country->AdministrativeArea->AdministrativeAreaName)) {
             $stateName = (array) $structured->Country->AdministrativeArea->AdministrativeAreaName;
             $stateName = isset($stateName[0]) ? $stateName[0] : $stateName;
         } else {
             $possible_state = $structured->Country->CountryNameCode;
             if ($possible_state != 'US') {
                 $stateName = $possible_state;
             }
         }
         if (!$stateName) {
             return null;
         }
         if (!($state = AddressStateTable::retrieveByText($stateName))) {
             return null;
         }
         $address->state_id = $state->id;
         //COUNTY (this may not exist)
         $countyName = $structured->Country->AdministrativeArea->SubAdministrativeArea->SubAdministrativeAreaName;
         $address->county = LsString::emptyToNull((string) $countyName);
         if ($countyName) {
             $cityName = $structured->Country->AdministrativeArea->SubAdministrativeArea->Locality->LocalityName;
         } else {
             $cityName = $structured->Country->AdministrativeArea->Locality->LocalityName;
         }
         //CITY (this may not exist!)
         $address->city = (string) $cityName;
         //accuracy of 5+ means we have postal code
         if ($accuracy > 4) {
             if ($cityName && $countyName) {
                 $base = $structured->Country->AdministrativeArea->SubAdministrativeArea->Locality;
                 if (isset($base->DependentLocality)) {
                     $base = $base->DependentLocality;
                 }
             } else {
                 if ($cityName && !$countyName) {
                     $base = $structured->Country->AdministrativeArea->Locality;
                 } else {
                     if (!$cityName && $countyName) {
                         $base = $structured->Country->AdministrativeArea->SubAdministrativeArea;
                     } else {
                         $base = $structured->Country->AdministrativeArea;
                     }
                 }
             }
             //POSTAL CODE (for some reason trickery needs to be done to get postal code to work right)
             if ($postalCode = (array) $base->PostalCode->PostalCodeNumber) {
                 $postalCode = $postalCode[0];
                 $address->postal = (string) $postalCode;
             }
             //accuracy of 8 means we have exact match
             //echo $accuracy . "\n";
             if ($accuracy > 5) {
                 //STREET (unit info is lost)
                 //echo "street1 info found \n";
                 $street1 = $base->Thoroughfare->ThoroughfareName;
                 $address->street1 = (string) $street1;
             }
         }
         //COORDINATES
         // Parse the coordinate string
         $coords = $c->getResponseXml()->Response->Placemark->Point->coordinates;
         list($lon, $lat, $alt) = explode(",", $coords);
         $address->longitude = $lon;
         $address->latitude = $lat;
         if ($returnAccuracy) {
             return array('address' => $address, 'accuracy' => $accuracy);
         } else {
             return $address;
         }
     } else {
         return null;
     }
 }
Esempio n. 21
0
 static function withinN($subject, $search1, $search2, $n)
 {
     $arr = LsString::split($subject);
     $w = '[^\\s]+\\s+';
     $re = '/(' . $w . '){0,' . $n . '}' . $search1 . '\\b\\,?\\s*(' . $w . '){0,' . $n . '}/';
     if (preg_match_all($re, $subject, $matches)) {
         foreach ($matches[0] as $match) {
             if (preg_match('/\\b' . $search2 . '/isu', $match)) {
                 return true;
             }
         }
     }
     return false;
 }
 static function convertValueForDisplay($value, $field, $excerpt = 40)
 {
     if (is_null($value)) {
         return 'NULL';
     }
     if (!($mod = self::loadModification($field))) {
         return $value;
     }
     $table = Doctrine::getTable($mod['object_model']);
     $columns = $table->getColumns();
     if ($mod['object_model'] == 'Entity') {
         if (!array_key_exists($field['field_name'], $columns)) {
             if ($extensionName = EntityTable::getExtensionNameByFieldName($field['field_name'])) {
                 $table = Doctrine::getTable($extensionName);
             }
         }
     } elseif ($mod['object_model'] == 'Relationship') {
         if (!array_key_exists($field['field_name'], $columns)) {
             $table = Doctrine::getTable(RelationshipTable::getCategoryNameByFieldName($field['field_name']));
         }
     }
     if ($alias = self::getFieldNameAlias($field)) {
         $class = $table->getRelation($alias)->getClass();
         if ($record = Doctrine::getTable($class)->find($value, Doctrine::HYDRATE_ARRAY)) {
             if ($class == 'Entity') {
                 sfLoader::loadHelpers('Ls');
                 return entity_link($record, null);
             } elseif ($class == 'sfGuardUser') {
                 sfLoader::loadHelpers('Ls');
                 return user_link($record);
             }
             return $record;
         }
     }
     if (in_array($field['field_name'], array('start_date', 'end_date'))) {
         return Dateable::convertForDisplay($value);
     }
     $def = $table->getColumnDefinition($field['field_name']);
     switch ($def['type']) {
         case 'integer':
             return (double) $value;
             break;
         case 'boolean':
             return $value ? 'yes' : 'no';
             break;
     }
     if ($excerpt) {
         $short = LsString::excerpt($value, $excerpt);
         return $short == $value ? $value : '<span title="' . strip_tags($value) . '">' . $short . '</span>';
     }
     return $value;
 }
Esempio n. 23
0
 private function parseBlurb($info, $age_match)
 {
     if (count($info['blurb_arr']) == 0) {
         return $info;
     }
     $id = $age_match['name_match']['id'];
     $person = Doctrine::getTable('Entity')->find($id);
     $name_words = explode(' ', $person->name);
     $skip = array('director', 'directors', 'since', 'board', $info['since'], $age_match['age'], 'age');
     $skip = array_merge($skip, $name_words);
     $new = array();
     foreach ($info['blurb_arr'] as $b) {
         $n = $b;
         foreach ($skip as $s) {
             $s = LsString::escapeStringForRegex($s);
             $n = preg_replace('/\\b' . $s . '\\b/isu', '', $n);
         }
         $n = preg_replace('/\\b\\d\\d\\d\\d\\b/', '', $n);
         $n = LsString::stripNonAlpha($n, ' ');
         $words = preg_split('/\\s+/s', $n);
         if (count($words) > 3) {
             $new[] = $b;
         }
     }
     if (count($new) > 0) {
         $blurb = implode(' ', $new);
         $blurb_parts = preg_split('/\\s+/s', $blurb);
         $skip = array_merge($skip, array('executive', 'vice', 'president', 'chief', 'chairman', 'of', 'the'));
         $n = $blurb;
         foreach ($skip as $s) {
             $s = LsString::escapeStringForRegex($s);
             $n = preg_replace('/\\b' . $s . '\\b/isu', '', $n);
         }
         $n = preg_replace('/\\b\\d\\d\\d\\d\\b/', '', $n);
         $n = LsString::stripNonAlpha($n, ' ');
         $words = preg_split('/\\s+/s', $n);
         if (count($words) > 4) {
             $info['blurb'] = $blurb;
         }
     }
     return $info;
 }
Esempio n. 24
0
 protected function importGovernor($row)
 {
     $url = $this->_baseUrl . $row['url'];
     if (!$this->browser->get($url)->responseIsError()) {
         $text = $this->browser->getResponseText();
         $text = LsHtml::replaceEntities($text);
         //preg_match('/>Family\:<\/b>([^<]*)<br/is',$text,$family_arr);
         $name = trim(str_ireplace('Gov.', '', $row['name']));
         $this->printDebug('');
         $this->printDebug($name . ':');
         $governor = PersonTable::parseFlatName($name);
         $governor->addExtension('PoliticalCandidate');
         $governor->addExtension('ElectedRepresentative');
         $governor->is_state = 1;
         $similar = $governor->getSimilarEntitiesQuery(true)->execute();
         foreach ($similar as $s) {
             $sim_re = LsString::escapeStringForRegex($s->name_first);
             $search_re = LsString::escapeStringForRegex($governor->name_first);
             if (preg_match('/^' . $sim_re . '/su', $governor->name_first) == 0 && preg_match('/^' . $search_re . '/su', $s->name_first) == 0) {
                 continue;
             }
             $bio = $s->getExtendedBio();
             if (preg_match('/\\bgovernor(ship)?\\b/isu', $bio)) {
                 $governor = $s;
                 $this->printDebug(' Found existing governor: ' . $s->name . ' ' . $s->id);
                 break;
             }
         }
         $governor->save();
         $this->printDebug($governor->id);
         if (!$governor->start_date && preg_match('/>Born\\:<\\/b>([^<]*)<br/is', $text, $birth_arr)) {
             $this->printDebug(' Birthdate: ' . $birth_arr[1]);
             $governor->start_date = trim($birth_arr[1]);
         }
         if (!$governor->birthplace && preg_match('/>Birth State\\:<\\/b>([^<]*)<br/is', $text, $birth_state_arr)) {
             $this->printDebug(' Birthplace: ' . trim($birth_state_arr[1]));
             $governor->birthplace = trim($birth_state_arr[1]);
         }
         //PARTY MEMBERSHIP
         if (preg_match('/>Party\\:<\\/b>([^<]*)<br/is', $text, $party_arr)) {
             $party_str = $party_arr[1];
             $this->printDebug(' Party: ' . $party_str);
             if (stristr($party_str, 'Democrat')) {
                 $party = EntityTable::getByExtensionQuery('PoliticalParty')->addWhere('name = ?', 'Democratic Party')->fetchOne();
             }
             if (stristr($party_str, 'Republican')) {
                 $party = EntityTable::getByExtensionQuery('PoliticalParty')->addWhere('name = ?', 'Republican Party')->fetchOne();
             }
             if (isset($party) && $party && !$governor->party_id) {
                 $governor->Party = $party;
                 $governor->is_independent = false;
                 $this->printDebug(' Added membership in ' . $party);
             } else {
                 if (stristr($party_str, 'Independent')) {
                     $governor->is_independent = true;
                 }
             }
         }
         if (!$governor->summary && preg_match_all('/>([^<]{240,})/isu', $text, $bio_match)) {
             $str = '';
             foreach ($bio_match[1] as $b) {
                 if (!stristr($b, 'Javascript')) {
                     $str .= "\n\n" . $b;
                 }
             }
             $str = trim($str);
             if (strlen($str)) {
                 $governor->summary = $str;
             }
         }
         $governor->save();
         $governor->addReference($url, null, $governor->getAllModifiedFields(), 'Governors Association');
         //SCHOOLS
         if (preg_match('/>School\\(s\\)\\:<\\/b>([^<]*)<br/is', $text, $school_arr)) {
             $school_names = explode(';', trim($school_arr[1]));
             if (count($school_names) == 1) {
                 $school_names = explode(',', $school_names[0]);
             }
             foreach ($school_names as $school_name) {
                 $school_name = trim($school_name);
                 if (!($school = EntityTable::getByExtensionQuery('School')->leftJoin('e.Alias a')->addWhere('e.name = ? or a.name = ?', array($school_name, $school_name))->fetchOne())) {
                     $school = new Entity();
                     $school->addExtension('Org');
                     $school->addExtension('School');
                     $school->name = $school_name;
                     $school->save();
                     $this->printDebug(' Added School: ' . $school_name);
                 }
                 $q = RelationshipTable::getByCategoryQuery('Education')->addWhere('entity1_id = ? and entity2_id = ?', array($governor->id, $school->id))->fetchOne();
                 if (!$q) {
                     $relationship = new Relationship();
                     $relationship->setCategory('Education');
                     $relationship->Entity1 = $governor;
                     $relationship->Entity2 = $school;
                     $relationship->is_current = 0;
                     $relationship->save();
                     $relationship->addReference($url, null, $relationship->getAllModifiedFields(), 'Governors Association');
                     $this->printDebug(' Added education: ' . $relationship->name);
                 }
             }
         }
         //GOVERNOR OFFICE AND POSITION
         $office_name = 'Office of the Governor of ' . $row['state'];
         if (!($office = EntityTable::getByExtensionQuery('GovernmentBody')->addWhere('name = ?', $office_name)->fetchOne())) {
             $office = new Entity();
             $office->name = $office_name;
             $office->addExtension('Org');
             $office->addExtension('GovernmentBody');
             $state = Doctrine::getTable('AddressState')->findOneByName($row['state']);
             if ($state) {
                 $office->state_id = $state->id;
             }
             $office->save();
             $office->addReference($url, null, $office->getAllModifiedFields(), 'Governors Association');
             $this->printDebug(' Added office: ' . $office->name);
         }
         $q = RelationshipTable::getByCategoryQuery('Position')->addWhere('entity1_id = ? and entity2_id = ? and description1 = ?', array($governor->id, $office->id, 'Governor'))->fetchOne();
         if (!$q) {
             sort($row['years']);
             $i = 0;
             while ($i < count($row['years'])) {
                 $governorship = new Relationship();
                 $governorship->setCategory('Position');
                 $governorship->Entity1 = $governor;
                 $governorship->Entity2 = $office;
                 $governorship->description1 = 'Governor';
                 $governorship->start_date = $row['years'][$i];
                 $i++;
                 if (isset($row['years'][$i])) {
                     $governorship->end_date = $row['years'][$i];
                     $governorship->is_current = 0;
                     if (!$governor->blurb && !isset($row['years'][$i + 1])) {
                         $governor->blurb = 'Former Governor of ' . $row['state'];
                     }
                 } else {
                     $governorship->is_current = 1;
                     if (!$governor->blurb) {
                         $governor->blurb = 'Governor of ' . $row['state'];
                     }
                 }
                 $governor->save();
                 $i++;
                 $governorship->save();
                 $governorship->addReference($url, null, $governorship->getAllModifiedFields(), 'Governors Association');
                 $this->printDebug(' Added governorship: ' . $governorship->name);
             }
         }
         //SPOUSE
         if (preg_match('/>Spouse\\:<\\/b>(.*?)<br/is', $text, $spouse_arr)) {
             $spouse = trim(LsHtml::stripTags($spouse_arr[1]));
             $q = RelationshipTable::getByCategoryQuery('Family')->addWhere('entity1_id = ? or entity2_id = ?', array($governor->id, $governor->id))->fetchOne();
             if (!$q && strlen($spouse)) {
                 $spouse = PersonTable::parseFlatName($spouse);
                 $spouse->save();
                 $this->printDebug(' Added spouse: ' . $spouse->name);
                 $relationship = new Relationship();
                 $relationship->setCategory('Family');
                 $relationship->Entity1 = $spouse;
                 $relationship->Entity2 = $governor;
                 $relationship->description1 = 'Spouse';
                 $relationship->description2 = 'Spouse';
                 $relationship->save();
                 $relationship->addReference($url, null, $relationship->getAllModifiedFields(), 'Governors Association');
                 $this->printDebug(' Added spouse relationship: ' . $relationship->name);
             }
         }
         //ADDRESS --not working, malformed addresses
         /*
               if (preg_match('/>Address\:\s*<\/b>(.*?)<b>/is',$text,$address_arr))      
               {
                 $address = trim(str_replace('<br/>',', ',$address_arr[1]));
                 $this->printDebug($address);
                 if ($governor->Address->count() == 0 && $a = $governor->addAddress($address))
                 {
                   $this->printDebug(' Address: ' . $a);
                   $governor->save();
                 }
               }*/
         //PHONE NUMBER
         if (preg_match('/>Phone\\(s\\)\\:<\\/b>([^<]*)<br/is', $text, $phone_arr)) {
             $phone_number = trim($phone_arr[1]);
             if (!$governor->Phone->count()) {
                 $phone = $governor->addPhone($phone_number);
                 $this->printDebug(' Phone: ' . $phone);
             }
         }
         if (!$governor->Image->count() && preg_match('/<img .*?class\\="display" src\\="([^"]*)"/is', $text, $img_arr)) {
             $url = $img_arr[1];
             try {
                 $fileName = ImageTable::createFiles($url, $governor->name_first);
             } catch (Exception $e) {
                 $fileName = null;
             }
             if ($fileName) {
                 //insert image record
                 $image = new Image();
                 $image->filename = $fileName;
                 $image->entity_id = $governor->id;
                 $image->title = $governor->name;
                 $image->caption = 'From Governors Association website';
                 $image->is_featured = true;
                 $image->is_free = false;
                 $image->url = $url;
                 $image->save();
                 $this->printDebug("Imported image: " . $image->filename);
             }
         }
     }
 }
Esempio n. 25
0
 public function parseResults($match)
 {
     if (isset($match['bio'])) {
         $bio_dirty = LsHtml::replaceEntities(LsString::spacesToSpace(LsHtml::stripTags($match['bio'], "; ")));
         $bio_dirty = preg_replace('/(\\;\\s)+/is', '; ', $bio_dirty);
     }
     foreach ($match as $k => &$m) {
         $m = LsHtml::replaceEntities(LsString::spacesToSpace(LsHtml::stripTags($m, " ")));
     }
     if (isset($match['name'])) {
         $name = $match['name'];
         $bio = '';
         if (isset($match['bio'])) {
             $bio = $match['bio'];
         }
     } else {
         return;
     }
     $this->printDebug("_________________________\n\nname: " . $name . "\n");
     $this->printDebug("bio: " . $bio . "\n");
     $accept = strtolower($this->readline('Process this entity? (n to skip) '));
     if ($accept == 'n' || $accept == 'no') {
         return false;
     }
     if (!$this->org_org) {
         if ($this->last_first) {
             $entity = PersonTable::parseCommaName($name);
         } else {
             $entity = PersonTable::parseFlatName($name);
         }
         $similar_entities = PersonTable::getSimilarQuery2($entity)->execute();
     } else {
         $entity = new Entity();
         $entity->addExtension('Org');
         foreach ($this->org_extensions as $ext) {
             $entity->addExtension($ext);
         }
         $entity->setEntityField('name', $name);
         $name = trim($name);
         $name = str_replace('.', '', $name);
         $similar_entities = OrgTable::getSimilarQuery($entity)->execute();
     }
     $matched = false;
     foreach ($similar_entities as $similar_entity) {
         if ($similar_entity['primary_ext'] == 'Person') {
             $this->printDebug('  POSSIBLE MATCH: ' . $similar_entity->name . ' (Orgs :: ' . $similar_entity->getRelatedOrgsSummary() . "  Bio :: {$similar_entity->summary})");
         } else {
             $this->printDebug('  POSSIBLE MATCH: ' . $similar_entity->name . ' (Summary :: ' . $similar_entity->summary . ')');
         }
         $accept = $this->readline('  Is this the same entity? (y or n)');
         $attempts = 1;
         while ($accept != 'y' && $accept != 'n' && $attempts < 5) {
             $accept = $this->readline('  Is this the same entity? (y or n) ');
             $attempts++;
         }
         if ($accept == 'y') {
             $entity = $similar_entity;
             $matched = true;
             $this->printDebug('             [accepted]');
             //sleep(1);
             break;
         } else {
             if ($accept == 'break') {
                 break;
             }
         }
     }
     $created = false;
     if (!$matched) {
         if ($entity->getPrimaryExtension() == 'Person') {
             $this->printDebug('  New person: ' . $entity->name_first . ' ' . $entity->name_last);
         } else {
             $this->printDebug('  New org: ' . $entity->name);
         }
         $accept = $this->readline('    create this new entity? (y or n) ');
         $attempts = 1;
         while ($accept != 'y' && $accept != 'n' && $attempts < 5) {
             $accept = $this->readline('    create this new entity? (y or n) ');
             $attempts++;
         }
         if ($accept == 'y') {
             if ($entity->getPrimaryExtension() == 'Person') {
                 $this->printDebug("\n  Bio: {$bio} \n");
                 $accept = $this->readline('    Add this bio? (y or n) ');
                 $attempts = 1;
                 while ($accept != 'y' && $accept != 'n' && $attempts < 5) {
                     $accept = $this->readline('    add this bio? (y or n) ');
                     $attempts++;
                 }
                 if ($accept == 'y') {
                     $entity->summary = $bio;
                 }
             }
             $entity->save();
             $entity->addReference($this->url, null, null, $this->url_name);
             $created = true;
             $this->printDebug(' ' . $entity->name . ' saved');
             //sleep(1);
         }
     }
     if (($matched || $created) && $entity->getPrimaryExtension() == 'Person') {
         $accept = $this->readline("Parse above bio for possible relationships? (y or n) ");
         $attempts = 1;
         while ($accept != 'y' && $accept != 'n' && $attempts < 5) {
             $accept = $this->readline("Parse above bio for possible relationships? (y or n) ");
             $attempts++;
         }
         if ($accept == 'y') {
             $names = $entity->parseBio($bio_dirty);
             $this->printDebug(" Orgs that {$entity} has a position at?");
             foreach ($names as $name) {
                 $exists = false;
                 $name = trim($name);
                 $accept = $this->readline(" > {$name} ::  an org? (y or n or b to break) ");
                 $attempts = 1;
                 $accept = strtolower($accept);
                 while ($accept != 'y' && $accept != 'n' && $accept != 'b' && $attempts < 5) {
                     $accept = $this->readline("  {$name} ::  an org? (y or n or b to break) ");
                     $accept = strtolower($accept);
                     $attempts++;
                 }
                 if ($accept == 'b') {
                     break;
                 } else {
                     if ($accept == 'y') {
                         $this->printDebug(' .....looking for names.....');
                         $orgs = EntityTable::getByExtensionAndNameQuery('Org', $name)->limit(10)->execute();
                         $related_org = null;
                         foreach ($orgs as $org) {
                             $q = LsDoctrineQuery::create()->from('Relationship r')->where('entity1_id = ? and entity2_id = ?', array($entity->id, $org->id))->fetchOne();
                             if ($q) {
                                 $this->printDebug('  Position already exists, skipping...');
                                 $exists = true;
                                 break;
                             }
                             $accept = $this->readline("    Create a position relationship between {$entity->name} and {$org->name}? (y or n) ");
                             $attempts = 1;
                             while ($accept != 'y' && $accept != 'n' && $attempts < 5) {
                                 $accept = $this->readline("    Create a position relationship between {$entity->name} and {$org->name}? (y or n) ");
                                 $attempts++;
                             }
                             if ($accept == 'y') {
                                 $related_org = $org;
                                 break;
                             }
                         }
                         if (!$related_org && !$exists) {
                             $accept = $this->readline(" couldn't find org, should this one be created: {$name} (y or n) ");
                             while ($accept != 'y' && $accept != 'n' && $attempts < 5) {
                                 $accept = $this->readline(" couldn't find org, should this one be created: {$name} (y or n) ");
                                 $attempts++;
                             }
                             if ($accept == 'y') {
                                 $related_org = new Entity();
                                 $related_org->addExtension('Org');
                                 $related_org->name = preg_replace('/\\.(?!com)/i', '', $name);
                                 $extensions = $this->readline("  what extensions should this org get? (eg 'Business, LobbyingFirm, LawFirm') ");
                                 $extensions = preg_split('/\\,\\s*/isu', $extensions, -1, PREG_SPLIT_NO_EMPTY);
                                 try {
                                     foreach ($extensions as $extension) {
                                         $related_org->addExtension($extension);
                                     }
                                     $related_org->save();
                                     $related_org->addReference($this->url, null, null, $this->url_name);
                                 } catch (Exception $e) {
                                     $this->printDebug('   !!! problems with org creation, skipping');
                                     $related_org = null;
                                 }
                             }
                         }
                         if ($related_org) {
                             $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ?', array($entity->id, $related_org->id, 1))->fetchOne();
                             if ($q) {
                                 $this->printDebug('   (relationship already found, skipping...)');
                                 continue;
                             }
                             $relationship = new Relationship();
                             $relationship->Entity1 = $entity;
                             $relationship->Entity2 = $related_org;
                             $relationship->setCategory('Position');
                             $title = $this->readline("     Title for this position relationship? (<enter> to skip) ");
                             if (strlen($title) > 2) {
                                 $relationship->description1 = $title;
                             }
                             $current = strtolower($this->readline("      Is the relationship current? (y or n or <enter> to skip) "));
                             if (in_array($current, array('y', 'yes'))) {
                                 $relationship->is_current = 1;
                             } else {
                                 if (in_array($current, array('n', 'no'))) {
                                     $relationship->is_current = 0;
                                 }
                             }
                             $board = strtolower($this->readline("      Is the relationship a board position? (y or n or <enter> to skip) "));
                             if (in_array($board, array('y', 'yes'))) {
                                 $relationship->is_board = 1;
                             } else {
                                 if (in_array($board, array('n', 'no'))) {
                                     $relationship->is_board = 0;
                                 }
                             }
                             $relationship->save();
                             $relationship->addReference($this->url, null, null, $this->url_name);
                             $this->printDebug("     Relationship saved: {$relationship}");
                         }
                     }
                 }
             }
         }
     }
     if ($matched || $created) {
         if ($this->list) {
             $q = LsDoctrineQuery::create()->from('LsListEntity l')->where('l.entity_id = ? and l.list_id = ?', array($entity->id, $this->list->id))->fetchOne();
             if (!$q) {
                 $le = new LsListEntity();
                 $le->Entity = $entity;
                 $le->LsList = $this->list;
                 if (isset($match['rank'])) {
                     if (preg_match('/(\\d+)/isu', $match['rank'], $m)) {
                         $le->rank = $m[1];
                     }
                 }
                 $le->save();
                 $this->printDebug('List membership saved');
             }
         }
         if ($this->org) {
             $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ?', array($entity->id, $this->org->id, 1))->fetchOne();
             if ($q) {
                 $this->printDebug('   (relationship already found, skipping...)');
                 return;
             }
             $relationship = new Relationship();
             $relationship->Entity1 = $entity;
             $relationship->Entity2 = $this->org;
             $relationship->setCategory($this->relationship_category);
             if ($this->description1) {
                 $relationship->description1 = $this->description1;
             } else {
                 $description = $this->readline("       what description to give this relationship ({$relationship}) ? (less than 3 chars will skip)");
                 if (strlen($description) > 2) {
                     $relationship->description1 = $description;
                 }
             }
             if ($this->relationship_category == 'Position') {
                 $relationship->is_board = $this->is_board;
             } else {
                 if ($this->relationship_category == 'Donation') {
                     if ($this->amount) {
                         $relationship->amount = $this->amount;
                     } else {
                         $amount = $this->readline("  what amount ({$relationship}) ? (less than 3 chars will skip)");
                         if (strlen($amount) > 1) {
                             $relationship->amount = $amount;
                         }
                     }
                 }
             }
             $relationship->save();
             $relationship->addReference($this->url, null, null, $this->url_name);
             $this->printDebug(" Relationship saved: {$relationship}");
         }
     }
     //dump history
     if (isset($match['affiliation1'])) {
         $affiliation = $match['affiliation'];
         //$this->printDebug($affiliation);
     }
 }
 private function findPersonBio($page, $person, $org)
 {
     //$this->printDebug('');
     $name_re = LsString::escapeStringForRegex($person->name_last);
     if (preg_match('/<title>([^<]*)<\\/title>/is', $page, $match)) {
         if (stristr($match[1], $person->name_last) && stristr($match[1], $person->name_first) && strlen($person->name_first) > 2) {
             $name_re .= '|' . LsString::escapeStringForRegex($person->name_first);
         }
     }
     $layout_tags = implode('|', LsHtml::$layoutTags);
     $re2 = '/>([^<]*?(' . $name_re . ')(\\s|,|<)(.*?))<\\/?(' . $layout_tags . ')/is';
     $re = $re2 . 'u';
     //$this->printDebug($re);
     $bio_match = null;
     if (preg_match_all($re, $page, $matches) || preg_match_all($re2, $page, $matches)) {
         //$this->printDebug('matches found');
         $arr = array();
         $most_reqs = 0;
         $qual = false;
         $news = false;
         foreach ($matches[1] as $match) {
             if (stristr($match, '}') || stristr($match, '{') || preg_match('/\\svar\\s/is', $match)) {
                 //$this->printDebug('FAILED - curly brackets');
                 continue;
             }
             $str = LsHtml::replaceEntities($match);
             $str = LsHtml::stripTags($str, '');
             $str = trim(LsString::spacesToSpace($str));
             $this->printDebug(strlen($str));
             if (strlen($str) > 3000) {
                 $this->printDebug('FAILED - str too long');
                 continue;
             }
             if (preg_match('/(^|\\b)(' . $name_re . ')\\b/is', $str) == 0) {
                 $this->printDebug($match . 'FAILED - no name match');
                 continue;
             }
             $word_count = count(explode(' ', $str));
             if ($word_count < 12) {
                 $this->printDebug('FAILED - str not long enough');
                 continue;
             } else {
                 if (stristr($str, 'announce') || stristr($str, 'today') || stristr($str, '—') || stristr($str, '–') || preg_match('/^[^\\-]{0,100}\\-(\\-|\\s)/is', $str)) {
                     $news = true;
                     $this->printDebug('FAILED: dash / announced / today');
                 } else {
                     if (preg_match('/(^|\\s)([\'"”])([^\\1]+)\\1/is', $str, $qm) && count(explode(' ', $qm[0])) > 6) {
                         $news = true;
                         $this->printDebug('FAILED: quote');
                     } else {
                         if (preg_match_all('/\\s(\\p{Ll})+\\b/su', $str, $lcm) < 5) {
                             $this->printDebug('FAILED: not enough lowercase');
                         } else {
                             $bio_words = PersonTable::$commonBioWords;
                             if (in_array('Lobbyist', $person->getExtensions())) {
                                 $bio_words = array_merge($bio_words, LobbyistTable::$commonBioWords);
                             }
                             $bio_words = implode('|', $bio_words);
                             $bio_word_ct = preg_match_all('/\\s(' . $bio_words . ')\\s/is', $str, $matches);
                             $str = trim($str);
                             if (preg_match('/\\.$/is', $str) == 0) {
                                 $this->printDebug('no period at end of string');
                             } else {
                                 if ($bio_word_ct > 1) {
                                     $news = false;
                                     $qual = true;
                                     $arr[] = $str;
                                 } else {
                                     $this->printDebug('less than 2 bio words');
                                     if ($news == false) {
                                         $str = preg_replace('/^[\\,\\.\\:\\;]\\s*/su', '', $str);
                                         $arr[] = $str;
                                         //array('str' => $str, 'bio_words' => $bio_word_ct);
                                     }
                                 }
                             }
                         }
                     }
                 }
                 //$this->printDebug('');
             }
         }
         if ($qual) {
             $arr = array_unique($arr);
             $ret = false;
             $bio = implode("\n\n", $arr);
             //$this->printDebug($name_re);
             if (strlen($bio) < 3000 && LsString::withinN($bio, '(' . $name_re . ')', '(is|was|holds|led|has|had|provides|practices|served|leads)', 2)) {
                 if (preg_match('/^.*?\\b(' . $name_re . ')\\b/is', $bio, $m) && count(explode(' ', $m[0])) < 20) {
                     $ret = true;
                     $this->printDebug('SUCCESS');
                 }
             } else {
                 $this->printDebug('within N failed !!!!');
             }
             $org_test = true;
             if ($ret && stristr($org->name, $person->name_last)) {
                 $org_test = false;
                 if (strlen($person->name_first) > 1) {
                     if (preg_match('/([^\\s]+\\s+){0,14}/is', $arr[0], $beg_match)) {
                         $nf_re = LsString::escapeStringForRegex($person->name_first);
                         if (preg_match('/\\b' . $nf_re . '\\b/is', $beg_match[0]) || preg_match('/\\b(Mr|Mrs|Ms)\\b/su', $arr[0])) {
                             $org_test = true;
                             //$this->printDebug('PASSED FIRST NAME TEST');
                         }
                     }
                 } else {
                     if (preg_match('/\\b(he|she|him|her|his|mr|ms|mrs)\\b/is', $arr[0])) {
                         $org_test = true;
                         //$this->printDebug('PASSED POSSESSIVE TEST');
                     }
                 }
             }
             if ($ret && $org_test) {
                 return $bio;
             }
         }
     } else {
         $this->printDebug('no matches found');
     }
     return false;
 }
Esempio n. 27
0
 public function convertValueForDisplay($value)
 {
     if (is_null($value)) {
         return '<span class="text_small">NULL</span>';
     }
     if (!($record = $this->Modification->getObject(true))) {
         return $value;
     }
     $table = $record->getTable();
     if ($record instanceof Entity) {
         $data = $record->getData();
         if (!array_key_exists($this->field_name, $data)) {
             if ($extensionName = EntityTable::getExtensionNameByFieldName($this->field_name)) {
                 $table = Doctrine::getTable($extensionName);
             }
         }
     } elseif ($record instanceof Relationship) {
         $data = $record->getData();
         if (!array_key_exists($this->field_name, $data)) {
             $table = Doctrine::getTable(RelationshipTable::getCategoryNameByFieldName($this->field_name));
         }
     }
     if ($alias = $this->getFieldNameAlias()) {
         $class = $table->getRelation($alias)->getClass();
         if ($record = Doctrine::getTable($class)->find($value)) {
             if ($record instanceof Entity) {
                 sfLoader::loadHelpers('Ls');
                 return entity_link($record, null);
             }
             return $record;
         }
     }
     if (in_array($this->field_name, array('start_date', 'end_date'))) {
         return Dateable::convertForDisplay($value);
     }
     $def = $table->getColumnDefinition($this->field_name);
     switch ($def['type']) {
         case 'integer':
             return (string) $value;
             break;
         case 'boolean':
             return $value ? 'yes' : 'no';
             break;
     }
     return LsString::excerpt($value);
 }