Exemple #1
0
 public function hasSimilarName($str, $strict = false)
 {
     $str = OrgTable::removeSuffixes($str);
     $str = trim($str);
     if (!strlen($str)) {
         return false;
     }
     $terms = LsQuery::splitSearchPhrase($str);
     $matched = false;
     $names = $this->Entity->getAllNames();
     foreach ($terms as &$term) {
         if (is_array($term)) {
             foreach ($term as &$t) {
                 $t = LsString::escapeStringForRegex($t);
             }
             $term = implode('|', $term);
         } else {
             $term = LsString::escapeStringForRegex($term);
         }
     }
     unset($term);
     if ($terms[0] == 'The') {
         array_shift($terms);
     }
     foreach ($names as $name) {
         $matched = true;
         if (!preg_match('/^(The\\s+)?(' . $terms[0] . ')/isu', $name)) {
             $matched = false;
             continue;
         }
         foreach ($terms as $term) {
             $new = preg_replace('/((^|\\s)|\\b)(' . $term . ')(\\b|(\\s|$))/isu', ' ', $name, 1);
             if ($new == $name) {
                 $matched = false;
                 continue;
             }
             $name = $new;
         }
         $name = trim(OrgTable::removeSuffixes($name));
         if ($strict && $matched && strlen($name) > 0 && count(LsString::split($name)) >= $strict) {
             $matched = false;
         }
         if ($matched == true) {
             break;
         }
     }
     return $matched;
 }
Exemple #2
0
 static function cleanNameForCategoryMatching($name)
 {
     $name = strtolower(OrgTable::removeSuffixes($name));
     $name = str_replace("'", "", $name);
     return $name;
 }
 public static function getByExtensionAndNameQuery($extensions, $str, $strength = 1)
 {
     $extensions = (array) $extensions;
     $str = trim($str);
     if (in_array('Org', $extensions)) {
         $str = OrgTable::removeSuffixes($str, $exclude = array('Bancorp'));
     }
     if (strlen($str) < 3) {
         return array();
     }
     $q = EntityTable::getByExtensionQuery($extensions)->leftJoin('e.Alias a');
     $search_queries = array($str);
     $arr = array('\\.' => ' ', '\\.' => '', '\\s&\\s' => ' and ', '\\sand\\s' => ' & ', ' & ' => ' ', ',' => '', '\\bUS\\b' => 'United States', 'United States\\b' => 'US');
     $i = 0;
     while ($i < count($search_queries)) {
         $name = $search_queries[$i];
         $i++;
         if (strlen($name) < 3) {
             break;
         }
         foreach ($arr as $k => $v) {
             $new = preg_replace('/' . $k . '/isu', $v, $name);
             if ($new != $name) {
                 if (!in_array($new, $search_queries)) {
                     $search_queries[] = $new;
                 }
             }
         }
     }
     foreach ($search_queries as &$s) {
         if ($strength == 0) {
             $s = '%' . $s . '%';
         } else {
             if ($strength == 1) {
                 $s .= '%';
             }
         }
     }
     unset($s);
     $e = implode(' or ', array_fill(0, count($search_queries), 'e.name like ?'));
     $a = implode(' or ', array_fill(0, count($search_queries), 'a.name like ?'));
     $search_queries = array_merge($search_queries, $search_queries);
     $q->addWhere($e . ' or ' . $a, $search_queries);
     return $q;
 }
Exemple #4
0
 static function getNameRegex($name)
 {
     $name = OrgTable::removeSuffixes($name);
     $name = LsString::escapeStringForRegex($name);
     $name = str_replace(',', ',?', $name);
     $name = str_replace('\\s+', ',?\\s+', $name);
     $name = str_replace('&', '(&|\\s*and\\s*)', $name);
     return $name;
 }
 protected function processRow($row)
 {
     foreach ($row as &$r) {
         $r = trim($r);
     }
     $edit = array('Search Name' => $row['name'], 'Affiliation Name' => $row['affiliation1'], 'Similar Names' => array(), 'New Person' => null, 'Existing Person' => null, 'New Org' => null, 'Existing Org' => null, 'New Relationship' => null);
     try {
         $this->db->beginTransaction();
         $person = null;
         $search_person = PersonTable::parseFlatName($row['name']);
         $similar = $search_person->getSimilarEntitiesQuery(true)->execute();
         $matched_bio = false;
         $similar_ids = array();
         foreach ($similar as $s) {
             $similar_ids[] = $s->id;
             $sim_re = LsString::escapeStringForRegex($s->name_first);
             $search_re = LsString::escapeStringForRegex($search_person->name_first);
             if (preg_match('/^' . $sim_re . '/su', $search_person->name_first) == 0 && preg_match('/^' . $search_re . '/su', $s->name_first) == 0) {
                 continue;
             }
             $matched = false;
             $affils = array();
             $ct = 1;
             $matched_affils = array();
             $unmatched_affils = array();
             while (isset($row['affiliation' . $ct]) && trim($row['affiliation' . $ct]) != '') {
                 $affil = trim($row['affiliation' . $ct]);
                 $org = $s->checkAffiliations(array($affil));
                 if ($org) {
                     $matched_affils[] = array($org, $affil);
                     $edit['Existing Org'] = $org->id;
                     break;
                 } else {
                     $unmatched_affils[] = $affil;
                 }
                 $ct++;
             }
             if (count($matched_affils)) {
                 $person = $s;
                 break;
                 //$ret[] = array('person' => $s, $matched_affils, $unmatched_affils);
             } else {
                 /*$str = implode(' ', $unmatched_affils);
                   if (isset($row['bio']))
                   {
                     $str .= ' ' . $row['bio'];
                   }*/
                 $bio = $s->getExtendedBio();
                 foreach ($unmatched_affils as $affil) {
                     $affil = OrgTable::removeSuffixes($affil);
                     $this->printDebug($affil);
                     $this->printDebug($bio);
                     if (preg_match('/' . OrgTable::getNameRegex($affil) . '/su', $bio)) {
                         $matched_bio = true;
                         break;
                     }
                 }
                 if ($matched_bio) {
                     $person = $s;
                     break;
                 } else {
                     $this->printDebug('  ' . $s->name . ' failed');
                 }
             }
         }
         $edit['Similar Names'] = array_slice($similar_ids, 0, 5);
         $no_match = false;
         if (!$person) {
             if (isset($row['bio']) && trim($row['bio']) != '') {
                 $search_person->summary = $row['bio'];
             }
             $search_person->save();
             $this->printDebug('  not found, new person saved: ' . $search_person->name);
             $search_person->addReference($this->source_url, null, null, $this->source_name);
             $no_match = true;
             $edit['New Person'] = $search_person->id;
             $person = $search_person;
         } else {
             if (isset($row['bio']) && trim($row['bio']) != '' && !$person->summary) {
                 $person->summary = $row['bio'];
                 $person->save();
             }
             $this->printDebug('  **person found: ' . $person->name);
             $edit['Existing Person'] = $person->id;
         }
         if ($matched_bio || $no_match) {
             $orgs = OrgTable::getOrgsWithSimilarNames($row['affiliation1'], true);
             $max = -1;
             $affiliated_org = null;
             foreach ($orgs as $org) {
                 $this->printDebug('    found match: ' . $org->name);
                 $ct = $org->getRelatedEntitiesQuery('Person', RelationshipTable::POSITION_CATEGORY, null, null, null, false, 2)->count();
                 if ($ct > $max) {
                     $affiliated_org = $org;
                     $edit['Existing Org'] = $affiliated_org->id;
                     $max = $ct;
                 }
             }
             if (!$affiliated_org) {
                 $affiliated_org = new Entity();
                 $affiliated_org->addExtension('Org');
                 if (isset($row['affiliation1_extensions']) && $row['affiliation1_extensions'] != '') {
                     $extensions = explode(',', $row['affiliation1_extensions']);
                     foreach ($extensions as $ext) {
                         $ext = trim($ext);
                         if (in_array($ext, ExtensionDefinitionTable::$extensionNames)) {
                             $affiliated_org->addExtension($ext);
                         }
                     }
                 } else {
                     //$affiliated_org->addExtension('Business');
                 }
                 $affiliated_org->name = $row['affiliation1'];
                 $affiliated_org->save();
                 $affiliated_org->addReference($this->source_url, null, null, $this->source_name);
                 $edit['New Org'] = $affiliated_org->id;
             }
             $rel = new Relationship();
             $rel->Entity1 = $person;
             $rel->Entity2 = $affiliated_org;
             $rel->setCategory('Position');
             if (isset($row['affiliation1_title']) && $row['affiliation1_title'] != '') {
                 $description = trim($row['affiliation1_title']);
                 $rel->description1 = $description;
                 if ($description == 'Director' || $description == 'Trustee' || preg_match('/^Chair/su', $description)) {
                     $rel->is_board = 1;
                     $rel->is_employee = 0;
                 }
             }
             $rel->save();
             $rel->addReference($this->source_url, null, null, $this->source_name);
             $edit['New Relationship'] = $rel->id;
         }
         if (isset($row['start_date']) && trim($row['start_date']) != '') {
             $edit['Relationship']['start_date'] = trim($row['start_date']);
         }
         if (isset($row['end_date']) && trim($row['end_date']) != '') {
             $edit['Relationship']['end_date'] = trim($row['end_date']);
         }
         if (isset($row['title']) && trim($row['title']) != '') {
             $edit['Relationship']['title'] = trim($row['title']);
         }
         if (isset($row['notes']) && trim($row['notes']) != '') {
             $edit['Relationship']['notes'] = trim($row['notes']);
         }
         if (isset($row['rank']) && $row['rank'] != '') {
             $edit['rank'] = $row['rank'];
         }
         $this->db->commit();
     } catch (Exception $e) {
         $this->db->rollback();
         throw $e;
     }
     $this->edits[] = $edit;
 }
 private function findOrgInfo($org)
 {
     $this->printDebug('***');
     $this->printDebug($org->name);
     if ($this->hasMeta($org->id, 'is_complete') && $this->getMeta($org->id, 'is_complete') && !$this->_override) {
         $this->printDebug("Already fetched lobbying data for Entity " . $org->id . "; skipping...");
         return;
     }
     $name = OrgTable::removeSuffixes($org->name, $exclude = array('Bancorp'));
     //$name = preg_replace('/(\p{Ll})(\p{Lu})/e','"$1 $2"', $name);
     $this->printDebug($name);
     $terms = preg_split('/[\\s\\.\\-]+/isu', $name, -1, PREG_SPLIT_NO_EMPTY);
     $q = LsDoctrineQuery::create()->from('LdaClient c');
     foreach ($terms as $term) {
         $q->addWhere('name like ?', '%' . $term . '%');
     }
     $clients = $q->execute();
     $client_names = array();
     $client_ids = array();
     foreach ($clients as $client) {
         $matched = true;
         $start = LsString::escapeStringForRegex($terms[0]);
         if (preg_match('/^' . $start . '\\b/isu', $client->name) == 0 && preg_match('/(\\(for\\s+|on\\s+behalf\\s+of\\s+)' . $start . '\\b/isu', $client->name) == 0) {
             $matched = false;
         }
         $name = $client->name;
         if (stristr($name, 'pilots') && stristr($name, 'ass')) {
             $matched = false;
         }
         foreach ($terms as $term) {
             $term = LsString::escapeStringForRegex($term);
             $new = preg_replace('/((^|\\s)|\\b)' . $term . '(\\b|(\\s|$))/isu', ' ', $name, 1);
             if ($new == $name) {
                 $matched = false;
             }
             $name = $new;
         }
         $name = trim(OrgTable::removeSuffixes($name));
         if ($matched && strlen($name) > 0 && count(LsString::split($name)) > 2) {
             //$this->printDebug($name . ' HAS TOO MANY WRONG WORDS*******************************************');
             //sleep(1);
         }
         if ($matched == true) {
             //$this->printDebug('Found matching client: ' . $client->name);
             $client_ids[] = $client->id;
             $client_names[] = $client->name;
         } else {
             //$this->printDebug('Not a match: ' . $client->name . "\n");
         }
     }
     $client_names = array_unique($client_names);
     if (count($terms) > 1 || count($client_names) < 30) {
         /*foreach($client_names as $client_name)
           {
             $e = EntityTable::findByAlias($client_name,$context = 'lda_client');
             if (!$e || $e->id != $org->id)
             {
               $alias = new Alias;
               $alias->name = $client_name;
               $alias->Entity = $org;
               $alias->context = 'lda_client';
               $alias->save();
             }
           }*/
         foreach ($client_ids as $client_id) {
             $lda_filings = Doctrine::getTable('LdaFiling')->findByClientId($client_id);
             foreach ($lda_filings as $lda_filing) {
                 $lf = Doctrine::getTable('LobbyFiling')->findOneByFederalFilingId($lda_filing->federal_filing_id);
                 if (!$lf) {
                     $this->printDebug($lda_filing->id);
                     $this->printDebug(number_format(memory_get_usage()));
                     $this->importFiling($org, $lda_filing);
                 } else {
                     $this->printDebug('Previously imported: ' . $lda_filing->federal_filing_id . "\n");
                 }
             }
         }
     } else {
         //$this->printDebug('TOO MANY NAMES**************************');
     }
     //    $fh = fopen('lobbying_client_names.csv','a');
     //    $w = $org->name . "\t" . $org->id . "\t" . implode("\n\t\t", $client_names) . "\n\n";
     //    fwrite($fh, $w);
     //    fclose($fh);
     $this->printDebug(count($client_names));
     $this->printDebug(implode(', ', $client_names));
 }