static function parseFlatName($str, $surname = null, $returnArray = false) { $namePrefix = $nameFirst = $nameMiddle = $nameLast = $nameSuffix = $nameNick = null; //to handle multi-word last names like Van der Twerp $sub = null; if ($surname) { $sub = preg_replace('/(^(\\P{L})+|(\\P{L})+$)/u', '', $surname); $sub = preg_replace('/\\s+/is', '_', $sub); $str = str_ireplace($surname, $sub, $str); } //trim and remove periods $str = trim(str_replace('.', ' ', $str)); //remove extra spaces $str = preg_replace('/\\s{2,}/', ' ', $str); //remove anything in parentheses at the end $str = preg_replace('/ \\([^\\)]+\\)/', '', $str); //get prefixes $prefixes = self::$nameParsePrefixes; while ($prefix = current($prefixes)) { if ($str != ($new = preg_replace('/^' . $prefix . ' /i', '', $str))) { if (!LsArray::inArrayNoCase($prefix, LsLanguage::$commonPrefixes)) { $namePrefix .= $prefix . ' '; } $str = trim($new); reset($prefixes); continue; } next($prefixes); } $namePrefix = $namePrefix ? trim($namePrefix) : null; //get suffixes $suffixes = self::$nameParseSuffixes; while ($suffix = current($suffixes)) { if ($str != ($new = preg_replace('/ ' . $suffix . '$/i', '', $str))) { $nameSuffix = $suffix . ' ' . $nameSuffix; $str = trim($new); reset($suffixes); continue; } next($suffixes); } $nameSuffix = $nameSuffix ? trim($nameSuffix) : null; //remove commas left over from suffixes $str = trim(str_replace(',', '', $str)); //find nickname in quotes if (preg_match('/["\']([\\S]+)[\'"]/', $str, $nickFound)) { $nameNick = $nickFound[1] ? $nickFound[1] : $nickFound[2]; $str = trim(preg_replace('/["\']([\\S]+)[\'"]/', '', $str)); } //condense multiple spaces $str = preg_replace('/\\s{2,}/', ' ', $str); //split into parts $parts = explode(' ', $str); switch (count($parts)) { case 1: if ($namePrefix) { $nameFirst = $namePrefix; $nameLast = $parts[0]; $namePrefix = null; } else { if ($nameSuffix) { $nameFirst = $parts[0]; $nameLast = $nameSuffix; $nameSuffix = null; } else { if (strtolower($sub) == strtolower($parts[0])) { $nameLast = $parts[0]; } else { $nameFirst = $parts[0]; } } } break; case 2: $nameFirst = $parts[0]; $nameLast = $parts[1]; break; case 3: $nameFirst = $parts[0]; $nameMiddle = $parts[1]; $nameLast = $parts[2]; break; default: $nameFirst = $parts[0]; $nameLast = $parts[count($parts) - 1]; for ($n = 1; $n < count($parts) - 1; $n++) { $nameMiddle .= $parts[$n] . ' '; } $nameMiddle = trim($nameMiddle); break; } $nameLast = str_replace('_', ' ', $nameLast); $name = array('name_first' => $nameFirst, 'name_last' => $nameLast, 'name_middle' => $nameMiddle, 'name_prefix' => $namePrefix, 'name_suffix' => $nameSuffix, 'name_nick' => $nameNick); foreach ($name as $nk => &$nv) { if ($nv && $nk != 'name_suffix' && $nk != 'name_prefix') { $nv = preg_replace('/^(\\P{L})+|(\\P{L})+$/u', '', $nv); $case = LsString::checkCase($nv); $nv = $case == 'upper' || $case == 'lower' ? LsLanguage::nameize($nv) : $nv; if ($nk != 'name_last') { $nv = LsLanguage::hgCaser($nv, false); } } } unset($nv); if ($returnArray) { return $name; } $person = new Entity(); $person->addExtension('Person'); $person->name_first = $name['name_first']; $person->name_middle = $name['name_middle']; $person->name_last = $name['name_last']; $person->name_nick = $name['name_nick']; $person->name_prefix = $name['name_prefix']; $person->name_suffix = $name['name_suffix']; return $person; }
static function nameize($str, $capitalize_acronyms = false, $delimiters = array("'", "-", " ", "/", "(", "&", ".")) { //echo "Start: $str "; $prefixes = 'MC|Mc'; //empty string. return it. if (strlen($str) == 0) { return $str; } //it seems to be properly capitalizaed. don't lose data. just confirm acronyms. $str_arr = explode(' ', $str); $pattern = null; foreach ($str_arr as $word) { if (LsString::checkCase($word) == 'capitalized') { $str = preg_replace("/\\b({$prefixes})(\\w)/e", '"$1".strtoupper("$2")', $str); //confirm Mc $str = preg_replace("/\\b(\\w)/e", 'strtoupper("$1")', $str); //capitalize first letter $str = preg_replace("/(\\'S)\\b|\$/e", 'stripslashes(strtolower("$1"))', $str); //make sure the 's are lowercase //echo " End1: $str \n"; return $str; } } //bring the string to our level $string = strtolower($str); //break the words by delims foreach ($delimiters as $delimeter) { $pos = strpos($string, $delimeter); if (preg_match('/\'s(\\b|$)/i', $string) && $delimeter == "'") { continue; } if ($pos) { $mend = ''; $words = explode($delimeter, $string); foreach ($words as $word) { //capitalize each portion of the string which was separated at a special character $mend .= ucfirst($word) . $delimeter; } $string = substr($mend, 0, -1); } } //add prefixes //$string = preg_replace('/\b(' . $prefixes . ')(\w)/e', '"$1".strtoupper("$2")', $string); $string = preg_replace('/\\b(mc)(\\w)/e', '"$1".strtoupper("$2")', $string); return ucfirst($string); }
private function importPerson($person_arr, $corp_name) { $last = $person_arr['nameLast']; $p1 = PersonTable::parseFlatName($person_arr['proxyName'], $last); //$p1->save(); //prep form 4 name for parseFlatName $rest = substr($person_arr['personName'], strlen($last)); $parts = preg_split('/\\s+/s', $rest, -1, PREG_SPLIT_NO_EMPTY); $suffixes = array(); $prefixes = array(); $fm = array(); //transfer suffixes to end of name passed to parseFlatName, prefixes to beginning of name foreach ($parts as $p) { if (strlen($p) > 1 && ($s = LsArray::inArrayNoCase($p, PersonTable::$nameParseSuffixes))) { $suffixes[] = $s; } else { if (strlen($p) > 1 && ($s = LsArray::inArrayNoCase($p, PersonTable::$nameParsePrefixes))) { $prefixes[] = $s; } else { $fm[] = $p; } } } $suffixes = implode(' ', $suffixes); $prefixes = implode(' ', $prefixes); $fm = implode(' ', $fm); $flatname = $prefixes . ' ' . $fm . ' ' . $last . ' ' . $suffixes; $p2 = PersonTable::parseFlatName($flatname, $last); //$p2->save(); $p = $this->mergePeople($p1, $p2); $case = LsString::checkCase($last); if ($case == 'upper') { $last = LsLanguage::nameize($last); } $p->name_last = $last; $p->name_first; $p->addExtension('BusinessPerson'); $p->sec_cik = $person_arr['personCik']; $p->save(); echo $p->name . " saved \n"; //save source info $p->addReference($person_arr['form4Url'], null, $fields = array('name_first', 'name_last', 'name_middle', 'name_suffix', 'name_prefix', 'name_nick'), $corp_name . ' Form 4', null, $person_arr['date']); //$p->addReference($person_arr['proxyUrl'], null, $fields = array('name_first', 'name_last', 'name_middle', 'name_suffix', 'name_prefix', 'name_nick'), $corp_name . ' proxy, ' . $person_arr['proxyYear'], ); return $p; }