protected function execute($arguments = array(), $options = array()) { $configuration = ProjectConfiguration::getApplicationConfiguration($options['application'], $options['env'], true); $databaseManager = new sfDatabaseManager($configuration); $databaseManager->initialize($configuration); $q = EntityTable::getByExtensionQuery(array('Person', 'ElectedRepresentative'))->addWhere('summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ?', array('(daughter%', '(son%', '(father%', '(mother%', '(cousin%', '(husband%', '(wife%', '(brother%', '(sister%'))->orderBy('person.name_last'); $members = $q->execute(); foreach ($members as $member) { if (preg_match('/\\([^\\)]*\\)/isu', $member->summary, $match)) { echo $member->name . ":\n"; if (preg_match_all('/(brother|sister|daughter|mother|father|wife|husband|cousin)\\sof\\s+([^\\;\\)\\,]*)(\\;|\\)|\\,)/isu', $match[0], $matches, PREG_SET_ORDER)) { foreach ($matches as $m) { echo "\t\t" . $m[1] . ' : of : ' . $m[2] . "\n"; $m[2] = str_replace('.', '', $m[2]); $parts = LsString::split($m[2]); $q = EntityTable::getByExtensionQuery(array('Person', 'ElectedRepresentative')); foreach ($parts as $part) { $q->addWhere('e.name like ?', '%' . $part . '%'); } $people = $q->execute(); $family = array(); foreach ($people as $person) { echo "\t\t\t\t" . $person->name . "\n"; if ($person->id != $member->id) { $family[] = $person; } } if (count($family) == 1) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('(r.entity1_id = ? or r.entity2_id =?) and (r.entity1_id = ? or r.entity2_id = ?)', array($member->id, $member->id, $person->id, $person->id)); if (!$q->count()) { if ($description2 = FamilyTable::getDescription2($m[1], $family[0]->Gender->id)) { $relationship = new Relationship(); $relationship->setCategory('Family'); $relationship->Entity1 = $member; $relationship->Entity2 = $person; $relationship->description1 = $m[1]; $relationship->description2 = $description2; $relationship->save(); $ref = LsQuery::getByModelAndFieldsQuery('Reference', array('object_model' => 'Entity', 'object_id' => $member->id, 'name' => 'Congressional Biographical Directory'))->fetchOne(); if ($ref) { $relationship->addReference($ref->source, null, null, $ref->name, $ref->source_detail, $ref->publication_date); } echo "-------------------------------added relationship\n"; } } } } } echo "\n"; } } }
public function hasSimilarName($str, $strict = false) { $str = OrgTable::removeSuffixes($str); $str = trim($str); if (!strlen($str)) { return false; } $terms = LsQuery::splitSearchPhrase($str); $matched = false; $names = $this->Entity->getAllNames(); foreach ($terms as &$term) { if (is_array($term)) { foreach ($term as &$t) { $t = LsString::escapeStringForRegex($t); } $term = implode('|', $term); } else { $term = LsString::escapeStringForRegex($term); } } unset($term); if ($terms[0] == 'The') { array_shift($terms); } foreach ($names as $name) { $matched = true; if (!preg_match('/^(The\\s+)?(' . $terms[0] . ')/isu', $name)) { $matched = false; continue; } foreach ($terms as $term) { $new = preg_replace('/((^|\\s)|\\b)(' . $term . ')(\\b|(\\s|$))/isu', ' ', $name, 1); if ($new == $name) { $matched = false; continue; } $name = $new; } $name = trim(OrgTable::removeSuffixes($name)); if ($strict && $matched && strlen($name) > 0 && count(LsString::split($name)) >= $strict) { $matched = false; } if ($matched == true) { break; } } return $matched; }
public function getNameRegex($first_required = false) { $last_re = $this->getLastNameRegex(); $name_first = $this->name_first; if (isset(PersonTable::$shortFirstNames[$name_first])) { $fn_arr = (array) PersonTable::$shortFirstNames[$name_first]; $name_first = $this->name_first . ' ' . implode(' ', $fn_arr); } if ($first_required) { $fm = $this->name_middle . ' ' . $this->name_nick; } else { $fm = $name_first . ' ' . $this->name_middle . ' ' . $this->name_nick; } $fm_arr = preg_split('/[\\s-]+/', $fm, -1, PREG_SPLIT_NO_EMPTY); $initials = ''; foreach ($fm_arr as &$fm) { $len = strlen(LsString::stripNonAlpha($fm)); $fm = preg_replace("/(\\p{Ll})/e", "'['.'\\1'.strtoupper('\\1').']'", $fm); $initials .= strtoupper($fm[0]); //if string is longer than 3, then if ($len > 3) { $offset = strpos($fm, ']', strpos($fm, ']') + 1) + 1; $str = substr($fm, $offset); $str = str_replace(']', ']?', $str); $fm = substr($fm, 0, $offset) . $str; } } $fm = implode('|', $fm_arr); $separator = '\\b([\'"\\(\\)\\.]{0,3}\\s+|\\.\\s*|\\s?-\\s?)?'; if ($first_required) { $nf_arr = LsString::split($name_first); foreach ($nf_arr as &$nf) { $nf = preg_replace("/(\\p{Ll})/e", "'['.'\\1'.strtoupper('\\1').']'", $nf); } $name_first = implode('|', $nf_arr); $re = '((\\b(' . $name_first . ')' . $separator . '(' . $fm . '|[' . $initials . '])?' . $separator . '((\\p{L}|[\'\\-])+' . $separator . ')?)+((' . $last_re . ')\\b))'; } else { $re = '((\\b(' . $fm . '|[' . $initials . '])' . $separator . '((\\p{L}|[\'\\-])+' . $separator . ')?)+((' . $last_re . ')\\b))'; } return $re; }
public function checkUrl($url, $org_name) { $ret = false; if (preg_match('/\\/\\/[^\\/]+\\//isu', $url, $match)) { $url = $match[0]; } $parts = LsString::split($org_name); $all = ''; $no_common = ''; $no_corp = ''; $stripped = ''; $common = array('and', 'the', 'of', 'in', 'at', '&'); $abbrevs = array('Corporation', 'Inc', 'Group', 'LLC', 'LLP', 'Corp', 'Co', 'Cos', 'LP', 'PA', 'Dept', 'Department', 'International', 'Administration'); $both = array_merge($common, $abbrevs); foreach ($parts as $part) { if (!LsArray::inArrayNoCase($part, $common)) { $no_common .= $part[0]; } if (!LsArray::inArrayNoCase($part, $abbrevs)) { $no_corp .= $part[0]; } if (!LsArray::inArrayNoCase($part, $both)) { $stripped .= $part[0]; } $all .= $part[0]; if (stristr($url, $part) && strlen($part) > 1 && !LsArray::inArrayNoCase($part, $both)) { $ret = true; } } if ($ret == false) { if (strlen($all) > 2 && stristr($url, $all)) { $ret = true; } if (strlen($no_common) > 2 && stristr($url, $no_common)) { $ret = true; } if (strlen($no_corp) > 2 && stristr($url, $no_corp)) { $ret = true; } } return $ret; }
private function importFiling($org, $lda_filing) { try { $this->printTimeSince(); $this->printDebug('Starting import...'); $excerpt = array(); //$time = microtime(1); $this->db->beginTransaction(); $date = null; $excerpt['Federal Filing Id'] = $lda_filing->federal_filing_id; $excerpt['Year'] = $lda_filing->year; $excerpt['Type'] = $lda_filing->LdaType->description; if (preg_match('/^[^T]*/su', $lda_filing->received, $match)) { $date = $match[0]; $date = str_replace('/', '-', $date); } $lda_registrant = Doctrine::getTable('LdaRegistrant')->find($lda_filing->registrant_id); $excerpt['Registrant'] = $lda_registrant->name; if ($lda_filing->client_id) { $lda_client = Doctrine::getTable('LdaClient')->find($lda_filing->client_id); $excerpt['Client'] = $lda_client->name; } else { $this->db->rollback(); return null; } $lobbying_entity = null; //DETERMINE (& CREATE) LOBBYING ENTITY //$this->printTimeSince(); //$this->printDebug('determine/create...'); if (strtolower(OrgTable::stripNamePunctuation($lda_client->name)) == strtolower(OrgTable::stripNamePunctuation($lda_registrant->name))) { $lobbying_entity = $org; $client_entity = null; if (!$lobbying_entity->lda_registrant_id) { $lobbying_entity->lda_registrant_id = $lda_registrant->federal_registrant_id; $lobbying_entity->save(); $lobbying_entity->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $lobbying_entity->getAllModifiedFields(), 'LDA Filing', null, $date, false); } else { if ($lobbying_entity->lda_registrant_id != $lda_registrant->federal_registrant_id) { $this->printDebug("LDA registrant ids did not match up for {$lobbying_entity->name} and {$lda_registrant->name} even though names matched {$lda_client->name}\n"); $this->db->rollback(); return null; } } $this->printDebug($lobbying_entity->name . ' noted (same as client ' . $lda_client->name . ')'); } else { $client_entity = $org; if ($lda_client->description) { $description = trim($lda_client->description); if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) { if (strlen($description) < 200) { if (!$org->blurb || $org->blurb == '') { $org->blurb = $description; } } else { if (!$org->summary || $org->summary == '') { $org->summary = $description; } } } } $org->save(); $this->printDebug($lda_client->name . ' is distinct from ' . $lda_registrant->name); } $lda_lobbyists = $lda_filing->LdaLobbyists; $excerpt['Lobbyists'] = array(); foreach ($lda_lobbyists as $lda_lobbyist) { $excerpt['Lobbyists'][] = $lda_lobbyist->name; } $excerpt['Lobbyists'] = implode('; ', $excerpt['Lobbyists']); if (!$lobbying_entity) { $lobbyist_name = null; if (count($lda_lobbyists)) { $lobbyist_parts = explode(',', $lda_lobbyists[0]->name); if (count($lobbyist_parts) > 1) { $lobbyist_last = trim($lobbyist_parts[0]); $arr = LsString::split($lobbyist_parts[1]); $lens = array_map('strlen', $arr); arsort($lens); $keys = array_keys($lens); $lobbyist_longest = $arr[$keys[0]]; $lobbyist_name = trim($lobbyist_parts[1]) . ' ' . trim($lobbyist_parts[0]); $existing_lobbyist_registrant = null; } else { $lobbyist_name = preg_replace('/^(Mr|MR|MS|Dr|DR|MRS|Mrs|Ms)\\b\\.?/su', '', $lda_lobbyists[0]->name); $arr = LsString::split(trim($lobbyist_name)); $arr = LsArray::strlenSort($arr); $lobbyist_last = array_pop($arr); if (count($arr)) { $lobbyist_longest = array_shift(LsArray::strlenSort($arr)); } else { $lobbyist_longest = ''; } } } //check to see if registrant and lobbyist are same if (count($lda_lobbyists) == 1 && (strtoupper($lda_lobbyists[0]->name) == strtoupper($lda_registrant->name) || $lobbyist_last && stripos($lda_registrant->name, $lobbyist_last) == strlen($lda_registrant->name) - strlen($lobbyist_last) && stristr($lda_registrant->name, $lobbyist_longest))) { $existing_lobbyist_registrant = EntityTable::getByExtensionQuery('Lobbyist')->addWhere('lobbyist.lda_registrant_id = ?', $lda_registrant->federal_registrant_id)->execute()->getFirst(); if ($existing_lobbyist_registrant) { $lobbying_entity = $existing_lobbyist_registrant; $this->printDebug('Existing lobbyist is lobbying entity: ' . $lobbying_entity->name); } else { $lobbyist = $this->prepLobbyistName($lda_lobbyists[0]->name); if ($lobbyist) { $lobbyist->lda_registrant_id = $lda_registrant->federal_registrant_id; $lobbyist->save(); $lobbyist->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false); $this->printDebug('New lobbyist/lobbying entity saved: ' . $lobbyist->name); $lobbying_entity = $lobbyist; } } } else { if ($existing_firm = EntityTable::getByExtensionQuery('Org')->addWhere('org.lda_registrant_id = ? ', $lda_registrant->federal_registrant_id)->execute()->getFirst()) { $modified = array(); $lobbying_entity = $existing_firm; if ($lda_registrant->description) { $description = trim($lda_registrant->description); if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) { if (strlen($description) < 200) { if (!$existing_firm->blurb || $existing_firm->blurb == '') { $existing_firm->blurb = $description; $modified[] = 'blurb'; } } else { if (!$existing_firm->summary || $existing_firm->summary == '') { $existing_firm->summary = $description; $modified[] = 'summary'; } } } } if ($lda_registrant->address && $lda_registrant->address != '' && count($existing_firm->Address) == 0) { if ($address = $existing_firm->addAddress($lda_registrant->address)) { $existing_firm->save(); $address->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $address->getAllModifiedFields(), 'LDA Filing', null, $date, false); } } $existing_firm->save(); if (count($modified)) { $existing_firm->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $modified, 'LDA Filing', null, $date, false); } $this->printDebug('Existing firm is lobbying entity: ' . $lobbying_entity->name); } else { $firm = new Entity(); $firm->addExtension('Org'); $firm->addExtension('Business'); $firm->addExtension('LobbyingFirm'); $firm->name = LsLanguage::titleize(OrgTable::stripNamePunctuation($lda_registrant->name), true); $firm->lda_registrant_id = $lda_registrant->federal_registrant_id; if ($lda_registrant->description) { $description = trim($lda_registrant->description); if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) { if (strlen($description) < 200) { $firm->blurb = $description; } else { $firm->summary = $description; } } } if ($lda_registrant->address && $lda_registrant->address != '') { if ($address = $firm->addAddress($lda_registrant->address)) { $firm->save(); $address->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $address->getAllModifiedFields(), 'LDA Filing', null, $date, false); } } $firm->save(); $this->printDebug('New lobbying firm/lobbying entity saved: ' . $firm->name); $firm->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $firm->getAllModifiedFields(), 'LDA Filing', null, $date, false); $lobbying_entity = $firm; } } } //PREP GOVT ENTITIES //$this->printTimeSince(); //$this->printDebug('gov entities...'); $lda_govts = $lda_filing->LdaGovts; //$this->printDebug('count of lda govs is ***** ' . count($lda_govts)); $govt_entities = array(); $excerpt['Government Bodies'] = array(); foreach ($lda_govts as $lda_govt) { $excerpt['Government Bodies'][] = $lda_govt->name; $name_arr = $this->prepGovtName($lda_govt->name); if (!$name_arr) { continue; } if ($govt_entity = EntityTable::findByAlias($lda_govt->name, $context = 'lda_government_body')) { $govt_entities[] = $govt_entity; //$this->printDebug('Existing govt entity: ' . $govt_entity->name); } else { if ($govt_entity = EntityTable::getByExtensionQuery(array('Org', 'GovernmentBody'))->addWhere('name = ?', array($name_arr[0]))->fetchOne()) { $govt_entities[] = $govt_entity; $alias = new Alias(); $alias->context = 'lda_government_body'; $alias->name = $lda_govt->name; $alias->entity_id = $govt_entity->id; $alias->save(); } else { $govt_entity = new Entity(); $govt_entity->addExtension('Org'); $govt_entity->addExtension('GovernmentBody'); $govt_entity->name = $name_arr[0]; $govt_entity->name_nick = $name_arr[1]; $govt_entity->is_federal = 1; $govt_entity->save(); $alias = new Alias(); $alias->context = 'lda_government_body'; $alias->name = $lda_govt->name; $alias->entity_id = $govt_entity->id; $alias->save(); $govt_entity->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $govt_entity->getAllModifiedFields(), 'LDA Filing', null, $date, false); $govt_entities[] = $govt_entity; } } } $excerpt['Government Bodies'] = implode('; ', $excerpt['Government Bodies']); $excerpt_str = ''; foreach ($excerpt as $k => $v) { $excerpt_str .= $k . ": "; $excerpt_str .= $v . "\n"; } $excerpt = trim($excerpt_str); $this->printDebug($excerpt); $relationships = array(); $lobbying_entity_extensions = $lobbying_entity->getExtensions(); //CREATE LOBBYIST POSITION RELATIONSHIPS //$this->printTimeSince(); //$this->printDebug('lobbyist positions...'); $category = Doctrine::getTable('RelationshipCategory')->findOneByName('Position'); if (!in_array('Lobbyist', $lobbying_entity_extensions)) { $firm_lobbyists = array(); if ($lobbying_entity->exists()) { $q = LsDoctrineQuery::create()->from('Entity e')->leftJoin('e.Relationship r ON (r.entity1_id = e.id)')->where('r.entity2_id = ? AND r.category_id = ?', array($lobbying_entity->id, RelationshipTable::POSITION_CATEGORY)); $firm_lobbyists = $q->execute(); } $lobbyists = array(); foreach ($lda_lobbyists as $lda_lobbyist) { $lobbyist = $this->prepLobbyistName($lda_lobbyist->name); if (!$lobbyist) { continue; } $existing_lobbyist = null; foreach ($firm_lobbyists as $fl) { if (PersonTable::areNameCompatible($fl, $lobbyist)) { $existing_lobbyist = $fl; break; } } //echo "before lobb save or rel save: "; //$this->printTimeSince(); if (!$existing_lobbyist) { $lobbyist->save(); $lobbyist->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false); //$this->printDebug('New lobbyist saved: ' . $lobbyist->name); $r = new Relationship(); $r->Entity1 = $lobbyist; $r->Entity2 = $lobbying_entity; $r->setCategory('Position'); $r->description1 = 'Lobbyist'; $r->is_employee = 1; $r->save(); $r->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false); //$this->printDebug('New position relationship saved: ' . $lobbying_entity->name . ' and ' . $lobbyist->name); $lobbyists[] = $lobbyist; } else { //$this->printDebug('Lobbyist exists: ' . $lobbyist->name . ' is same as ' . $existing_lobbyist->name); $lobbyists[] = $existing_lobbyist; } } } //PREP ISSUES //$this->printTimeSince(); //$this->printDebug('issues...'); $issues = array(); $lda_issues = Doctrine_Query::create()->from('LdaFilingIssue f')->leftJoin('f.LdaIssue i')->where('f.filing_id = ?', $lda_filing->id)->execute(); foreach ($lda_issues as $lda_issue) { $name = LsLanguage::nameize($lda_issue->LdaIssue->name); if (!($issue = Doctrine::getTable('LobbyIssue')->findOneByName($name))) { $issue = new LobbyIssue(); $issue->name = $name; $issue->save(); //$this->printDebug('Lobbying issue saved: ' . $issue->name); } $issues[] = array($issue, $lda_issue->specific_issue); } //CREATE LOBBY FILING //$this->printTimeSince(); //$this->printDebug('creating lobby filing:'); $lobby_filing = new LobbyFiling(); $lobby_filing->year = $lda_filing->year; $lobby_filing->amount = $lda_filing->amount; $lobby_filing->federal_filing_id = $lda_filing->federal_filing_id; $period = $lda_filing->LdaPeriod->description; $lobby_filing->start_date = $date; if ($paren = strpos($period, '(')) { $lobby_filing->period = trim(substr($period, 0, $paren)); } else { $lobby_filing->period = 'Undetermined'; } $lobby_filing->report_type = LsLanguage::nameize($lda_filing->LdaType->description); foreach ($issues as $issue) { $filing_issue = new LobbyFilingLobbyIssue(); $filing_issue->Issue = $issue[0]; $filing_issue->Filing = $lobby_filing; $filing_issue->specific_issue = $issue[1]; $filing_issue->save(); } if (in_array('Lobbyist', $lobbying_entity_extensions)) { $lobby_filing->Lobbyist[] = $lobbying_entity; //$this->printDebug('Lobbying entity lobbyist added to lobbying relationship: ' . $lobbying_entity->name); } else { foreach ($lobbyists as $lobbyist) { $lobby_filing->Lobbyist[] = $lobbyist; } } //var_dump($lobby_filing->toArray()); $lobby_filing->save(); //CREATE TRANSACTION RELATIONSHIP, IF ANY //$this->printTimeSince(); //$this->printDebug('starting transaction relationships:'); $transaction = null; if ($client_entity != null) { $transaction = RelationshipTable::getByCategoryQuery('Transaction')->addWhere('r.entity1_id = ?', $client_entity->id)->addWhere('r.entity2_id = ?', $lobbying_entity->id)->addWhere('transaction.is_lobbying = ?', 1)->fetchOne(); if ($transaction) { $transaction->updateDateRange($date, true); if ($lda_filing->amount && $lda_filing->amount != '') { if (!$transaction->amount || $transaction->amount == '') { $transaction->amount = $lda_filing->amount; } else { $transaction->amount += $lda_filing->amount; } } $transaction->filings++; $transaction->save(); $transaction->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $transaction->getAllModifiedFields(), 'LDA Filing', null, $date, false); } else { $transaction = new Relationship(); $transaction->Entity1 = $client_entity; $transaction->Entity2 = $lobbying_entity; $transaction->setCategory('Transaction'); $transaction->description1 = 'Lobbying Client'; $transaction->is_lobbying = 1; $transaction->filings = 1; $transaction->updateDateRange($date, true); if (in_array('Person', $lobbying_entity_extensions)) { $transaction->description2 = 'Hired Lobbyist'; } else { $transaction->description2 = 'Lobbying Firm'; } if ($lda_filing->amount && $lda_filing->amount != '') { $transaction->amount = $lda_filing->amount; } $transaction->save(); $transaction->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $transaction->getAllModifiedFields(), 'LDA Filing', null, $date, false); //$this->printDebug('New lobbying transaction saved between client ' . $client_entity->name . ' and lobbying firm ' . $lobbying_entity->name); } $relationships[] = $transaction; } //CREATE LOBBYING RELATIONSHIP //$this->printTimeSince(); //$this->printDebug('starting lobbying relationships:'); foreach ($govt_entities as $govt_entity) { $lobbying_relationship = RelationshipTable::getByCategoryQuery('Lobbying')->addWhere('r.entity1_id = ?', $lobbying_entity->id)->addWhere('r.entity2_id = ?', $govt_entity->id)->fetchOne(); if ($lobbying_relationship) { $lobbying_relationship->updateDateRange($date); $lobbying_relationship->filings++; $lobbying_relationship->save(); } else { $lobbying_relationship = new Relationship(); $lobbying_relationship->Entity1 = $lobbying_entity; $lobbying_relationship->Entity2 = $govt_entity; $lobbying_relationship->setCategory('Lobbying'); if ($transaction) { $lobbying_relationship->description1 = 'Lobbying (for client)'; } else { $lobbying_relationship->description1 = 'Direct Lobbying'; } $lobbying_relationship->description2 = $lobbying_relationship->description1; $lobbying_relationship->updateDateRange($date, true); $lobbying_relationship->filings = 1; $lobbying_relationship->save(); $lobbying_relationship->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbying_relationship->getAllModifiedFields(), 'LDA Filing', null, $date, false); } $relationships[] = $lobbying_relationship; } foreach ($relationships as $relationship) { $lobby_filing->Relationship[] = $relationship; } $lobby_filing->save(); //$this->printTimeSince(); $this->printDebug("Import Completed\n"); $this->db->commit(); } catch (Exception $e) { $this->db->rollback(); throw $e; } }
static function getHtmlPersonNames($text) { $name_matches = array(); $re = '/>\\s*\\p{Lu}\'?(\\p{L}+|\\.)?\\s+\\p{Lu}\\.?\\s+\\p{Lu}\\p{L}+(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?/su'; $re2 = '/>\\s*(\\p{Lu}\'?(\\p{L}+|\\.)?\\s+(\\p{Lu}\'?(\\s+|\\p{L}+\\s+|\\.\\s*)?){0,2}\\p{Lu}\'?\\p{L}+(\\-\\p{Lu}\'?\\p{L}+)?(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?)\\**\\s*</su'; $re3 = '/>\\s*(\\p{Lu}\'?\\p{L}+(\\-\\p{Lu}\'?\\p{L}+)?\\,\\s+(\\p{Lu}\'?(\\p{L}+|\\.)?(\\s+(\\p{Lu}\'?(\\s+|\\p{L}+\\s+|\\.\\s*)?){0,2})?)(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?)\\**\\s*</su'; $text = LsHtml::replaceEntities($text); $name_matches = array(); if (preg_match_all($re2, $text, $matches, PREG_OFFSET_CAPTURE)) { //LOOP THROUGH MATCHES TO CONFIRM NAMES for ($i = 0; $i < count($matches[1]); $i++) { $m = $matches[1][$i]; //echo $m[0] . "\n"; $is_name = false; if (preg_match('/\\s+\\p{Lu}\\.?\\s/', $m[0])) { //echo ' * initial' . "\n"; $is_name = true; } $parts = LsString::split(trim($m[0])); //ADD NAME TO MATCH LIST IF IT FITS CONDITIONS if (in_array($parts[0], LsLanguage::$commonFirstNames)) { //echo ' * first name' . "\n"; $is_name = true; } $q = LsDoctrineQuery::create()->from('Person p')->where('p.name_first = ?', $parts[0]); if ($q->count() > 0) { //echo ' LS name' . "\n"; $is_name = true; } if ($is_name) { $name_matches[] = $m[0]; } /* if ($i != 0) { $beg = $matches[1][$i-1][1]; $tweenstr = substr($text,$beg, $m[1] - $beg); //echo ' tag count: ' . LsHtml::tagCount($tweenstr) . "\n"; } preg_match('/^[^\s]+\s/su',trim($m[0]),$match); $tags = LsHtml::getSurroundingTags($text,$m[1],3);*/ } } if (preg_match_all($re3, $text, $matches, PREG_OFFSET_CAPTURE)) { for ($i = 0; $i < count($matches[1]); $i++) { $m = $matches[1][$i]; //echo $m[0] . "\n"; $person = PersonTable::parseCommaName($m[0]); $name_matches[] = $person->getFullName(false); } } return $name_matches; }
public function processRow($row) { if (isset($row['url']) && $row['url'] != '' && isset($row['url_name']) && $row['url_name'] != '') { $url = $row['url']; $url_name = $row['url_name']; } else { $url = $this->url; $url_name = $this->url_name; } foreach ($row as &$r) { trim($r); } unset($r); if ($this->entity) { $required = array('entity_name', 'primary_type', 'relationship_category'); } else { $required = array('entity_name', 'primary_type'); } foreach ($required as $req) { if (!isset($row[$req]) || $row[$req] == '') { $this->printDebug('!!! > skipping row, ' . $req . ' not set'); return; } } if ($row['primary_type'] != 'Person' && $row['primary_type'] != 'Org') { $this->printDebug('!!! > primary type not properly set, skipping row...'); return; } if ($this->entity) { $relationship_category = trim($row['relationship_category']); $relationship_category_id = array_search($relationship_category, RelationshipCategoryTable::$categoryNames); if (!$relationship_category_id) { $this->printDebug('!!! > relationship type not properly set, skipping row...'); return; } } $this->printDebug("processing: " . $row['entity_name'] . '......'); if ($row['primary_type'] == 'Person') { $entity2 = PersonTable::parseFlatName($row['entity_name']); $similar_entities = PersonTable::getSimilarQuery2($entity2)->execute(); } else { $entity2 = new Entity(); $entity2->addExtension('Org'); $entity2->setEntityField('name', $row['entity_name']); $similar_entities = OrgTable::getOrgsWithSimilarNames($entity2->name); } $matched = false; foreach ($similar_entities as $similar_entity) { if ($similar_entity['primary_ext'] == 'Person') { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Orgs :: ' . $similar_entity->getRelatedOrgsSummary() . " Bio :: {$similar_entity->summary})"); } else { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Summary :: ' . $similar_entity->summary . ')'); } $accept = $this->readline(' Is this the same entity? (y or n or b to break)'); if ($accept == 'y') { $entity2 = $similar_entity; $matched = true; $this->printDebug(' [accepted]'); break; } else { if ($accept == 'b') { break; } } } $created = false; if (!$matched) { if ($entity2->getPrimaryExtension() == 'Person') { $this->printDebug(' New person: ' . $entity2->name_first . ' ' . $entity2->name_last); } else { $this->printDebug(' New org: ' . $entity2->name); } $accept = $this->readline(' create this new entity? (y or n) '); if ($accept == 'y') { try { $extensions = LsString::split($row['entity_extensions'], '\\s*\\,\\s*'); foreach ($extensions as $extension) { $entity2->addExtension($extension); } $entity2->save(); $entity2->addReference($url, null, null, $url_name); } catch (Exception $e) { $this->printDebug(' !!! problems with extensions for this row'); } $fields = array('summary', 'blurb', 'website'); foreach ($fields as $field) { if (isset($row[$field])) { $entity2[$field] = $row[$field]; } } $entity2->save(); $entity2->addReference($url, null, null, $url_name); $created = true; $this->printDebug(' ' . $entity2->name . ' saved'); //sleep(1); } else { $entity2 = null; } } // create relationship if ($entity2) { if ($this->entity) { $relationship = new Relationship(); if (isset($row['relationship_order']) && $row['relationship_order'] != '') { if ($row['relationship_order'] == '1') { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } else { $relationship->Entity2 = $this->entity; $relationship->Entity1 = $entity2; } } else { if ($relationship_category == 'Position' || $relationship_category == 'Education') { if ($row['primary_type'] == 'Org') { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } else { $relationship->Entity1 = $entity2; $relationship->Entity2 = $this->entity; } } else { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } } $relationship->setCategory($relationship_category); $cols = array('description1', 'description2', 'start_date', 'end_date', 'goods', 'amount', 'is_board', 'is_executive', 'is_employee'); foreach ($cols as $col) { if (isset($row[$col]) && $row[$col] != '') { try { $relationship[$col] = $row[$col]; } catch (Exception $e) { $this->printDebug(" could not set {$col} for relationship, skipping"); } } } $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ? and r.id <> ?', array($relationship->entity1_id, $relationship->entity2_id, $relationship->category_id, $relationship->id))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); return; } $relationship->save(); $relationship->addReference($url, null, null, $url_name); $this->printDebug(" Relationship saved: {$relationship}\n"); } else { if ($this->list) { $q = LsDoctrineQuery::create()->from('LsListEntity le')->where('le.entity_id = ? and le.list_id = ?', array($entity2->id, $this->list->id))->fetchOne(); if ($q) { $this->printDebug(' (already on list, skipping...)'); return; } $le = new LsListEntity(); $le->LsList = $this->list; $le->Entity = $entity2; var_dump($row); if (isset($row['rank'])) { echo $row['rank']; $le->rank = $row['rank']; } $le->save(); } } } }
public function parseDescriptionStr($str, $corp) { $descriptions = array(); $remains = array(); //cleanup text to be parsed $str = trim($str); $str = str_replace('.', ' ', $str); $str = preg_replace('/\\s{2,}/', ' ', $str); $name_re = LsString::escapeStringForRegex($corp->name); $str = preg_replace('/\\b' . $name_re . '\\b/isu', '', $str); if ($corp->name_nick) { $nick_re = LsString::escapeStringForRegex($corp->name_nick); $str = preg_replace('/\\b' . $nick_re . '\\b/isu', '', $str); } if ($corp->ticker) { $tick_re = LsString::escapeStringForRegex($corp->ticker); $str = preg_replace('/\\b' . $tick_re . '\\b/isu', '', $str); } //split by commas $parts = preg_split('/,|;|\\band\\b|(?<!C[Oo])\\-|\\bAND\\b|\\s&\\s|\\//', $str, -1, PREG_SPLIT_NO_EMPTY); foreach ($parts as $part) { $part = trim($part); $part = preg_replace('/\\s{2,}/', ' ', $part); //abbreviation replacements $part = preg_replace('/( |^)(\\w) (\\w) (\\w)( |$)/', '\\2\\3\\4', $part); $part = preg_replace('/(Interim|Acting|Incoming) /i', '', $part); $part = preg_replace('/Sr /i', 'Senior ', $part); $part = preg_replace('/Chf /i', 'Chief ', $part); $part = preg_replace('/( |^)V( |$)/i', ' Vice ', $part); $part = preg_replace('/( |^)VP( |$)/i', ' Vice President ', $part); $part = preg_replace('/( |^)VC( |$)/i', ' Vice Chairman ', $part); $part = preg_replace('/( |^)Chr( |$)/i', ' Chairman ', $part); $part = preg_replace('/( |^)Ofcr( |$)/i', ' Officer ', $part); $part = preg_replace('/( |^)Vice P( |$)/i', ' Vice President ', $part); $part = preg_replace('/( |^)(Ex|Exec)( |$)/i', ' Executive ', $part); $part = preg_replace('/( |^)EVP( |$)/i', ' Executive Vice President ', $part); $part = preg_replace('/( |^)(Off|Offic|Offcr)( |$)/i', ' Officer ', $part); $part = str_replace('Gen ', 'General ', $part); $part = preg_replace('/( |^)(Op|Oper) /', ' Operating ', $part); $part = preg_replace('/( |^)(Bd|Brd)( |$)/i', ' Board ', $part); $part = preg_replace('/of Board/i', ' of the Board', $part); $part = preg_replace('/( |^)COB( |$)/i', ' Chairman of the Board ', $part); $part = preg_replace('/( |^)(Pres|Prs|Presid|Prsdt|Prsdnt)( |$)/i', ' President ', $part); $part = preg_replace('/( |^)Admin( |$)/i', ' Administrative ', $part); $part = preg_replace('/( |^)Info( |$)/i', ' Information ', $part); $part = preg_replace('/\\bComm\\b/i', 'Committee', $part); $part = preg_replace('/\\bInc\\b/i', '', $part); $part = preg_replace('/( |-|^)(Ch|Chm|Chmn|Chrm|Chrmn|Chair|Chairmain|Chariman)( |$)/i', '\\1Chairman ', $part); $part = preg_replace('/(Sec|Secr|Secy|Secretar|Secreta)( |$)/i', 'Secretary ', $part); $part = str_replace('Vice-', 'Vice ', $part); $part = preg_replace('/( |^)Non /i', ' Non-', $part); $part = preg_replace('/\\bCompl\\b/i', 'Compliance', $part); $part = str_ireplace('of Advisory', 'of the Advisory', $part); $part = preg_replace('/Advisory (Panel|Council)/i', 'Advisory Board', $part); $part = str_ireplace('Independent ', '', $part); $part = str_ireplace('Lead ', '', $part); $part = str_ireplace('Corporate ', '', $part); $part = str_ireplace('Outside ', '', $part); $part = str_ireplace('Non-interested', '', $part); $part = str_ireplace('Interested', '', $part); $part = str_replace('Main ', '', $part); $part = str_ireplace('Presiding ', '', $part); $part = str_ireplace('Founding ', '', $part); $part = str_ireplace('Acctg', 'Accounting', $part); $part = str_ireplace('Chairperson', 'Chairman', $part); $part = str_ireplace('Chairwoman', 'Chairman', $part); $part = str_ireplace("Gen'l", 'General', $part); $part = trim($part); $part = preg_replace('/\\s{2,}/', ' ', $part); $position = array('description' => null, 'note' => array()); if ($part != '') { //look for matching title $p = LsArray::inArrayNoCase($part, PositionTable::$businessPositions); if ($p) { $position['description'] = $p; } else { if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) { $position['description'] = $q->description1; } else { if (count($descriptions) == 0) { $part_splat = LsString::split($part); $note = array(); //$this->printDebug($part); //var_dump($part_splat); $lim = count($part_splat) - 1; for ($i = 0; $i < $lim; $i++) { $note[] = array_pop($part_splat); $part_new = implode(' ', $part_splat); if (strtoupper($part_new) == 'DIRECTOR') { break; } $p = LsArray::inArrayNoCase($part_new, PositionTable::$businessPositions); if ($p) { $position['description'] = $p; } else { if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) { $position['description'] = $q->description1; } } } if (!$position['description']) { $position['description'] = $part; } } else { $descriptions[count($descriptions) - 1]['note'][] = $part; } } } if (isset($position['description'])) { $descriptions[] = $position; } } } return $descriptions; }
static function withinN($subject, $search1, $search2, $n) { $arr = LsString::split($subject); $w = '[^\\s]+\\s+'; $re = '/(' . $w . '){0,' . $n . '}' . $search1 . '\\b\\,?\\s*(' . $w . '){0,' . $n . '}/'; if (preg_match_all($re, $subject, $matches)) { foreach ($matches[0] as $match) { if (preg_match('/\\b' . $search2 . '/isu', $match)) { return true; } } } return false; }