protected function parseName(array $data) { $person = PersonTable::parseCommaName($data[2], null, true); $nameFields = array('name_first', 'name_middle', 'name_last', 'name_suffix', 'name_nick'); foreach ($nameFields as $field) { $data[] = $person[$field]; } return $data; }
private function generatePerson($name_str, $summary = null, $orgs = null) { $person = PersonTable::parseCommaName($name_str); return $person; }
public function parseResults($match) { if (isset($match['bio'])) { $bio_dirty = LsHtml::replaceEntities(LsString::spacesToSpace(LsHtml::stripTags($match['bio'], "; "))); $bio_dirty = preg_replace('/(\\;\\s)+/is', '; ', $bio_dirty); } foreach ($match as $k => &$m) { $m = LsHtml::replaceEntities(LsString::spacesToSpace(LsHtml::stripTags($m, " "))); } if (isset($match['name'])) { $name = $match['name']; $bio = ''; if (isset($match['bio'])) { $bio = $match['bio']; } } else { return; } $this->printDebug("_________________________\n\nname: " . $name . "\n"); $this->printDebug("bio: " . $bio . "\n"); $accept = strtolower($this->readline('Process this entity? (n to skip) ')); if ($accept == 'n' || $accept == 'no') { return false; } if (!$this->org_org) { if ($this->last_first) { $entity = PersonTable::parseCommaName($name); } else { $entity = PersonTable::parseFlatName($name); } $similar_entities = PersonTable::getSimilarQuery2($entity)->execute(); } else { $entity = new Entity(); $entity->addExtension('Org'); foreach ($this->org_extensions as $ext) { $entity->addExtension($ext); } $entity->setEntityField('name', $name); $name = trim($name); $name = str_replace('.', '', $name); $similar_entities = OrgTable::getSimilarQuery($entity)->execute(); } $matched = false; foreach ($similar_entities as $similar_entity) { if ($similar_entity['primary_ext'] == 'Person') { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Orgs :: ' . $similar_entity->getRelatedOrgsSummary() . " Bio :: {$similar_entity->summary})"); } else { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Summary :: ' . $similar_entity->summary . ')'); } $accept = $this->readline(' Is this the same entity? (y or n)'); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' Is this the same entity? (y or n) '); $attempts++; } if ($accept == 'y') { $entity = $similar_entity; $matched = true; $this->printDebug(' [accepted]'); //sleep(1); break; } else { if ($accept == 'break') { break; } } } $created = false; if (!$matched) { if ($entity->getPrimaryExtension() == 'Person') { $this->printDebug(' New person: ' . $entity->name_first . ' ' . $entity->name_last); } else { $this->printDebug(' New org: ' . $entity->name); } $accept = $this->readline(' create this new entity? (y or n) '); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' create this new entity? (y or n) '); $attempts++; } if ($accept == 'y') { if ($entity->getPrimaryExtension() == 'Person') { $this->printDebug("\n Bio: {$bio} \n"); $accept = $this->readline(' Add this bio? (y or n) '); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' add this bio? (y or n) '); $attempts++; } if ($accept == 'y') { $entity->summary = $bio; } } $entity->save(); $entity->addReference($this->url, null, null, $this->url_name); $created = true; $this->printDebug(' ' . $entity->name . ' saved'); //sleep(1); } } if (($matched || $created) && $entity->getPrimaryExtension() == 'Person') { $accept = $this->readline("Parse above bio for possible relationships? (y or n) "); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline("Parse above bio for possible relationships? (y or n) "); $attempts++; } if ($accept == 'y') { $names = $entity->parseBio($bio_dirty); $this->printDebug(" Orgs that {$entity} has a position at?"); foreach ($names as $name) { $exists = false; $name = trim($name); $accept = $this->readline(" > {$name} :: an org? (y or n or b to break) "); $attempts = 1; $accept = strtolower($accept); while ($accept != 'y' && $accept != 'n' && $accept != 'b' && $attempts < 5) { $accept = $this->readline(" {$name} :: an org? (y or n or b to break) "); $accept = strtolower($accept); $attempts++; } if ($accept == 'b') { break; } else { if ($accept == 'y') { $this->printDebug(' .....looking for names.....'); $orgs = EntityTable::getByExtensionAndNameQuery('Org', $name)->limit(10)->execute(); $related_org = null; foreach ($orgs as $org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('entity1_id = ? and entity2_id = ?', array($entity->id, $org->id))->fetchOne(); if ($q) { $this->printDebug(' Position already exists, skipping...'); $exists = true; break; } $accept = $this->readline(" Create a position relationship between {$entity->name} and {$org->name}? (y or n) "); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(" Create a position relationship between {$entity->name} and {$org->name}? (y or n) "); $attempts++; } if ($accept == 'y') { $related_org = $org; break; } } if (!$related_org && !$exists) { $accept = $this->readline(" couldn't find org, should this one be created: {$name} (y or n) "); while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(" couldn't find org, should this one be created: {$name} (y or n) "); $attempts++; } if ($accept == 'y') { $related_org = new Entity(); $related_org->addExtension('Org'); $related_org->name = preg_replace('/\\.(?!com)/i', '', $name); $extensions = $this->readline(" what extensions should this org get? (eg 'Business, LobbyingFirm, LawFirm') "); $extensions = preg_split('/\\,\\s*/isu', $extensions, -1, PREG_SPLIT_NO_EMPTY); try { foreach ($extensions as $extension) { $related_org->addExtension($extension); } $related_org->save(); $related_org->addReference($this->url, null, null, $this->url_name); } catch (Exception $e) { $this->printDebug(' !!! problems with org creation, skipping'); $related_org = null; } } } if ($related_org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ?', array($entity->id, $related_org->id, 1))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); continue; } $relationship = new Relationship(); $relationship->Entity1 = $entity; $relationship->Entity2 = $related_org; $relationship->setCategory('Position'); $title = $this->readline(" Title for this position relationship? (<enter> to skip) "); if (strlen($title) > 2) { $relationship->description1 = $title; } $current = strtolower($this->readline(" Is the relationship current? (y or n or <enter> to skip) ")); if (in_array($current, array('y', 'yes'))) { $relationship->is_current = 1; } else { if (in_array($current, array('n', 'no'))) { $relationship->is_current = 0; } } $board = strtolower($this->readline(" Is the relationship a board position? (y or n or <enter> to skip) ")); if (in_array($board, array('y', 'yes'))) { $relationship->is_board = 1; } else { if (in_array($board, array('n', 'no'))) { $relationship->is_board = 0; } } $relationship->save(); $relationship->addReference($this->url, null, null, $this->url_name); $this->printDebug(" Relationship saved: {$relationship}"); } } } } } } if ($matched || $created) { if ($this->list) { $q = LsDoctrineQuery::create()->from('LsListEntity l')->where('l.entity_id = ? and l.list_id = ?', array($entity->id, $this->list->id))->fetchOne(); if (!$q) { $le = new LsListEntity(); $le->Entity = $entity; $le->LsList = $this->list; if (isset($match['rank'])) { if (preg_match('/(\\d+)/isu', $match['rank'], $m)) { $le->rank = $m[1]; } } $le->save(); $this->printDebug('List membership saved'); } } if ($this->org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ?', array($entity->id, $this->org->id, 1))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); return; } $relationship = new Relationship(); $relationship->Entity1 = $entity; $relationship->Entity2 = $this->org; $relationship->setCategory($this->relationship_category); if ($this->description1) { $relationship->description1 = $this->description1; } else { $description = $this->readline(" what description to give this relationship ({$relationship}) ? (less than 3 chars will skip)"); if (strlen($description) > 2) { $relationship->description1 = $description; } } if ($this->relationship_category == 'Position') { $relationship->is_board = $this->is_board; } else { if ($this->relationship_category == 'Donation') { if ($this->amount) { $relationship->amount = $this->amount; } else { $amount = $this->readline(" what amount ({$relationship}) ? (less than 3 chars will skip)"); if (strlen($amount) > 1) { $relationship->amount = $amount; } } } } $relationship->save(); $relationship->addReference($this->url, null, null, $this->url_name); $this->printDebug(" Relationship saved: {$relationship}"); } } //dump history if (isset($match['affiliation1'])) { $affiliation = $match['affiliation']; //$this->printDebug($affiliation); } }
static function getHtmlPersonNames($text) { $name_matches = array(); $re = '/>\\s*\\p{Lu}\'?(\\p{L}+|\\.)?\\s+\\p{Lu}\\.?\\s+\\p{Lu}\\p{L}+(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?/su'; $re2 = '/>\\s*(\\p{Lu}\'?(\\p{L}+|\\.)?\\s+(\\p{Lu}\'?(\\s+|\\p{L}+\\s+|\\.\\s*)?){0,2}\\p{Lu}\'?\\p{L}+(\\-\\p{Lu}\'?\\p{L}+)?(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?)\\**\\s*</su'; $re3 = '/>\\s*(\\p{Lu}\'?\\p{L}+(\\-\\p{Lu}\'?\\p{L}+)?\\,\\s+(\\p{Lu}\'?(\\p{L}+|\\.)?(\\s+(\\p{Lu}\'?(\\s+|\\p{L}+\\s+|\\.\\s*)?){0,2})?)(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?)\\**\\s*</su'; $text = LsHtml::replaceEntities($text); $name_matches = array(); if (preg_match_all($re2, $text, $matches, PREG_OFFSET_CAPTURE)) { //LOOP THROUGH MATCHES TO CONFIRM NAMES for ($i = 0; $i < count($matches[1]); $i++) { $m = $matches[1][$i]; //echo $m[0] . "\n"; $is_name = false; if (preg_match('/\\s+\\p{Lu}\\.?\\s/', $m[0])) { //echo ' * initial' . "\n"; $is_name = true; } $parts = LsString::split(trim($m[0])); //ADD NAME TO MATCH LIST IF IT FITS CONDITIONS if (in_array($parts[0], LsLanguage::$commonFirstNames)) { //echo ' * first name' . "\n"; $is_name = true; } $q = LsDoctrineQuery::create()->from('Person p')->where('p.name_first = ?', $parts[0]); if ($q->count() > 0) { //echo ' LS name' . "\n"; $is_name = true; } if ($is_name) { $name_matches[] = $m[0]; } /* if ($i != 0) { $beg = $matches[1][$i-1][1]; $tweenstr = substr($text,$beg, $m[1] - $beg); //echo ' tag count: ' . LsHtml::tagCount($tweenstr) . "\n"; } preg_match('/^[^\s]+\s/su',trim($m[0]),$match); $tags = LsHtml::getSurroundingTags($text,$m[1],3);*/ } } if (preg_match_all($re3, $text, $matches, PREG_OFFSET_CAPTURE)) { for ($i = 0; $i < count($matches[1]); $i++) { $m = $matches[1][$i]; //echo $m[0] . "\n"; $person = PersonTable::parseCommaName($m[0]); $name_matches[] = $person->getFullName(false); } } return $name_matches; }
public function parseForm4Name($name, $signatureName = null) { $offset = 0; $matched = preg_match('/^O \\p{L}/', $name, $matches); if ($matched) { $name = "O'" . substr($name, 2); } //use signatureName to determine what order the name is in $flatName = false; if ($signatureName) { $nameParts = explode(' ', strtolower($name)); $signatureNameParts = explode(' ', strtolower($signatureName)); if ($nameParts[0] == $signatureNameParts[0]) { $flatName = true; } } if ($flatName) { $person = PersonTable::parseFlatName($name); } else { $re = '/^(de|du|von|van|di|du|st|del|da)\\s+((la|de|der)\\s+)?/isu'; $matched = preg_match($re, $name, $matches); if ($matched) { $offset = strlen($matches[0]); } $split = strpos($name, " ", $offset); $last = substr($name, 0, $split); $rest = substr($name, $split); $name = $last . "," . $rest; $person = PersonTable::parseCommaName($name); } $name = $person->getFullName(); $regex = $person->getNameRegex(); return array($person, $name, $regex); }