Ejemplo n.º 1
0
 public function checkUrl($url, $org_name)
 {
     $ret = false;
     if (preg_match('/\\/\\/[^\\/]+\\//isu', $url, $match)) {
         $url = $match[0];
     }
     $parts = LsString::split($org_name);
     $all = '';
     $no_common = '';
     $no_corp = '';
     $stripped = '';
     $common = array('and', 'the', 'of', 'in', 'at', '&');
     $abbrevs = array('Corporation', 'Inc', 'Group', 'LLC', 'LLP', 'Corp', 'Co', 'Cos', 'LP', 'PA', 'Dept', 'Department', 'International', 'Administration');
     $both = array_merge($common, $abbrevs);
     foreach ($parts as $part) {
         if (!LsArray::inArrayNoCase($part, $common)) {
             $no_common .= $part[0];
         }
         if (!LsArray::inArrayNoCase($part, $abbrevs)) {
             $no_corp .= $part[0];
         }
         if (!LsArray::inArrayNoCase($part, $both)) {
             $stripped .= $part[0];
         }
         $all .= $part[0];
         if (stristr($url, $part) && strlen($part) > 1 && !LsArray::inArrayNoCase($part, $both)) {
             $ret = true;
         }
     }
     if ($ret == false) {
         if (strlen($all) > 2 && stristr($url, $all)) {
             $ret = true;
         }
         if (strlen($no_common) > 2 && stristr($url, $no_common)) {
             $ret = true;
         }
         if (strlen($no_corp) > 2 && stristr($url, $no_corp)) {
             $ret = true;
         }
     }
     return $ret;
 }
Ejemplo n.º 2
0
 static function getNameWithLast($str, $last)
 {
     $re_last = LsString::escapeStringForRegex($last);
     //hyphens and spaces interchangeable in last names
     $re_last = preg_replace('/\\\\s+|\\\\\\-/is', '(\\s+|\\-)', $re_last);
     $matches = array();
     $matched = preg_match_all('/\\b' . $re_last . '\\b/isu', $str, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
     $name = null;
     foreach ($matches as $match) {
         $pos_last = $match[0][1];
         if ($pos_last == 0) {
             return null;
         }
         $last = $match[0][0];
         //work backwards from last name to find comma
         $pos = -1 * (strlen($str) - $pos_last);
         $comma = strripos($str, ',', $pos);
         $str = substr($str, $comma);
         $splat = preg_split('/\\b' . $re_last . '\\b/is', $str);
         $pre = $splat[0];
         $post = $splat[1];
         $arr = array_reverse(preg_split('/[\\s]+/', $pre, -1, PREG_SPLIT_NO_EMPTY));
         $new = array();
         foreach ($arr as $a) {
             if ($case = LsString::checkCase($a)) {
                 if ($case == 'initial') {
                     $new[] = $a;
                 } else {
                     if ($case == 'lower') {
                         break;
                     } else {
                         if (preg_match('/\\.(\\P{L})*$/u', $a) == 1) {
                             $a = LsString::stripNonAlpha($a);
                             if ($s = LsArray::inArrayNoCase($a, PersonTable::$nameParsePrefixes)) {
                                 $new[] = $s;
                             }
                             break;
                         } else {
                             $new[] = $a;
                         }
                     }
                 }
             }
         }
         $pre = implode(' ', array_reverse($new));
         if (strlen(trim($pre)) == 0) {
             continue;
         }
         $arr = preg_split('/[\\s]+/', $post, -1, PREG_SPLIT_NO_EMPTY);
         $new = array();
         foreach ($arr as $a) {
             if ($case = LsString::checkCase($a)) {
                 if ($case == 'lower') {
                     break;
                 }
                 $a = LsString::stripNonAlpha($a);
                 if ($s = LsArray::inArrayNoCase($a, PersonTable::$nameParseSuffixes)) {
                     $new[] = $s;
                 } else {
                     break;
                 }
             }
         }
         $post = trim(implode(' ', $new));
         $full = $pre . ' ' . $last;
         if (strlen($post) > 0) {
             $full .= ', ' . $post;
         }
         $name = array('nameFull' => $full, 'nameStart' => $pre, 'nameLast' => $last, 'namePost' => $post);
     }
     return $name;
 }
Ejemplo n.º 3
0
 static function parseDescriptionStr($str, $entity = null)
 {
     $descriptions = array();
     $remains = array();
     //cleanup text to be parsed
     $str = trim($str);
     $str = preg_replace('/(?<!=\\s)\\.(?!=\\s)/', '', $str);
     $str = str_replace('.', ' ', $str);
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     $str = preg_replace('/\\s+,(?=\\s)/', ',', $str);
     $str = preg_replace('/\\)\\s*$/', '', $str);
     if (strtolower($str) == 'see remarks') {
         $str = '';
     }
     /*
     if ($entity)
     {
           $name_re = LsString::escapeStringForRegex($entity->name);
           $str = preg_replace('/\b' . $name_re . '\b/isu', '', $str);
           
           if ($entity->ticker)
           {
             $tick_re = LsString::escapeStringForRegex($entity->ticker);
             $str = preg_replace('/\b' . $tick_re . '\b/isu', '', $str);
           }
         }
     */
     //don't parse if there's more than one separator
     $num = 0;
     $patterns = array('/\\s&\\s/', '/,/', '/;/', '/\\band\\b/i');
     foreach ($patterns as $pattern) {
         if (preg_match($pattern, $str)) {
             $num++;
         }
     }
     if ($num > 1) {
         return array($str);
     }
     //split by commas
     $parts = preg_split('/,|;|\\band\\b|\\s&\\s/', $str, -1, PREG_SPLIT_NO_EMPTY);
     foreach ($parts as $part) {
         $part = trim($part);
         $part = preg_replace('/\\s{2,}/', ' ', $part);
         //abbreviation replacements
         $part = preg_replace('/( |^)(\\w) (\\w) (\\w)( |$)/', '\\2\\3\\4', $part);
         $part = preg_replace('/(Interim|Acting|Incoming) /i', '', $part);
         $part = preg_replace('/Sr /i', 'Senior ', $part);
         $part = preg_replace('/Chf /i', 'Chief ', $part);
         $part = preg_replace('/( |^)V( |$)/i', ' Vice ', $part);
         $part = preg_replace('/( |^)VP( |$)/i', ' Vice President ', $part);
         $part = preg_replace('/( |^)VC( |$)/i', ' Vice Chairman ', $part);
         $part = preg_replace('/( |^)Chr( |$)/i', ' Chairman ', $part);
         $part = preg_replace('/( |^)Ofcr( |$)/i', ' Officer ', $part);
         $part = preg_replace('/( |^)Vice P( |$)/i', ' Vice President ', $part);
         $part = preg_replace('/( |^)(Ex|Exec)( |$)/i', ' Executive ', $part);
         $part = preg_replace('/( |^)EVP( |$)/i', ' Executive Vice President ', $part);
         $part = preg_replace('/( |^)(Off|Offic|Offcr)( |$)/i', ' Officer ', $part);
         $part = str_replace('Gen ', 'General ', $part);
         $part = preg_replace('/( |^)(Op|Oper) /', ' Operating ', $part);
         $part = preg_replace('/( |^)(Bd|Brd)( |$)/i', ' Board ', $part);
         $part = preg_replace('/of Board/i', ' of the Board', $part);
         $part = preg_replace('/( |^)COB( |$)/i', ' Chairman of the Board ', $part);
         $part = preg_replace('/( |^)(Pres|Prs|Presid|Prsdt|Prsdnt)( |$)/i', ' President ', $part);
         $part = preg_replace('/( |^)Admin( |$)/i', ' Administrative ', $part);
         $part = preg_replace('/( |^)Info( |$)/i', ' Information ', $part);
         $part = preg_replace('/\\bComm\\b/i', 'Committee', $part);
         $part = preg_replace('/\\bInc\\b/i', '', $part);
         $part = preg_replace('/( |-|^)(Ch|Chm|Chmn|Chrm|Chrmn|Chair|Chairmain|Chariman)( |$)/i', '\\1Chairman ', $part);
         $part = preg_replace('/(Sec|Secr|Secy|Secretar|Secreta)( |$)/i', 'Secretary ', $part);
         $part = str_replace('Vice-', 'Vice ', $part);
         $part = preg_replace('/( |^)Non /i', ' Non-', $part);
         $part = preg_replace('/\\bCompl\\b/i', 'Compliance', $part);
         $part = str_ireplace('of Advisory', 'of the Advisory', $part);
         $part = preg_replace('/Advisory (Panel|Council)/i', 'Advisory Board', $part);
         $part = str_ireplace('Independent ', '', $part);
         $part = str_ireplace('Lead ', '', $part);
         $part = str_ireplace('Corporate ', '', $part);
         $part = str_ireplace('Outside ', '', $part);
         $part = str_ireplace('Non-interested', '', $part);
         $part = str_ireplace('Interested', '', $part);
         $part = str_replace('Main ', '', $part);
         $part = str_ireplace('Presiding ', '', $part);
         $part = str_ireplace('Founding ', '', $part);
         $part = str_ireplace('Acctg', 'Accounting', $part);
         $part = str_ireplace('Chairperson', 'Chairman', $part);
         $part = str_ireplace('Chairwoman', 'Chairman', $part);
         $part = str_ireplace("Gen'l", 'General', $part);
         $part = trim($part);
         $part = preg_replace('/\\s{2,}/', ' ', $part);
         $position = array('description' => null, 'note' => array());
         if (LsArray::inArrayNoCase($part, PositionTable::$businessPositions)) {
             $descriptions[] = $part;
         }
     }
     if (!count($descriptions)) {
         $descriptions[] = $str;
     }
     return $descriptions;
 }
Ejemplo n.º 4
0
 static function parseFlatName($str, $surname = null, $returnArray = false)
 {
     $namePrefix = $nameFirst = $nameMiddle = $nameLast = $nameSuffix = $nameNick = null;
     //to handle multi-word last names like Van der Twerp
     $sub = null;
     if ($surname) {
         $sub = preg_replace('/(^(\\P{L})+|(\\P{L})+$)/u', '', $surname);
         $sub = preg_replace('/\\s+/is', '_', $sub);
         $str = str_ireplace($surname, $sub, $str);
     }
     //trim and remove periods
     $str = trim(str_replace('.', ' ', $str));
     //remove extra spaces
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     //remove anything in parentheses at the end
     $str = preg_replace('/ \\([^\\)]+\\)/', '', $str);
     //get prefixes
     $prefixes = self::$nameParsePrefixes;
     while ($prefix = current($prefixes)) {
         if ($str != ($new = preg_replace('/^' . $prefix . ' /i', '', $str))) {
             if (!LsArray::inArrayNoCase($prefix, LsLanguage::$commonPrefixes)) {
                 $namePrefix .= $prefix . ' ';
             }
             $str = trim($new);
             reset($prefixes);
             continue;
         }
         next($prefixes);
     }
     $namePrefix = $namePrefix ? trim($namePrefix) : null;
     //get suffixes
     $suffixes = self::$nameParseSuffixes;
     while ($suffix = current($suffixes)) {
         if ($str != ($new = preg_replace('/ ' . $suffix . '$/i', '', $str))) {
             $nameSuffix = $suffix . ' ' . $nameSuffix;
             $str = trim($new);
             reset($suffixes);
             continue;
         }
         next($suffixes);
     }
     $nameSuffix = $nameSuffix ? trim($nameSuffix) : null;
     //remove commas left over from suffixes
     $str = trim(str_replace(',', '', $str));
     //find nickname in quotes
     if (preg_match('/["\']([\\S]+)[\'"]/', $str, $nickFound)) {
         $nameNick = $nickFound[1] ? $nickFound[1] : $nickFound[2];
         $str = trim(preg_replace('/["\']([\\S]+)[\'"]/', '', $str));
     }
     //condense multiple spaces
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     //split into parts
     $parts = explode(' ', $str);
     switch (count($parts)) {
         case 1:
             if ($namePrefix) {
                 $nameFirst = $namePrefix;
                 $nameLast = $parts[0];
                 $namePrefix = null;
             } else {
                 if ($nameSuffix) {
                     $nameFirst = $parts[0];
                     $nameLast = $nameSuffix;
                     $nameSuffix = null;
                 } else {
                     if (strtolower($sub) == strtolower($parts[0])) {
                         $nameLast = $parts[0];
                     } else {
                         $nameFirst = $parts[0];
                     }
                 }
             }
             break;
         case 2:
             $nameFirst = $parts[0];
             $nameLast = $parts[1];
             break;
         case 3:
             $nameFirst = $parts[0];
             $nameMiddle = $parts[1];
             $nameLast = $parts[2];
             break;
         default:
             $nameFirst = $parts[0];
             $nameLast = $parts[count($parts) - 1];
             for ($n = 1; $n < count($parts) - 1; $n++) {
                 $nameMiddle .= $parts[$n] . ' ';
             }
             $nameMiddle = trim($nameMiddle);
             break;
     }
     $nameLast = str_replace('_', ' ', $nameLast);
     $name = array('name_first' => $nameFirst, 'name_last' => $nameLast, 'name_middle' => $nameMiddle, 'name_prefix' => $namePrefix, 'name_suffix' => $nameSuffix, 'name_nick' => $nameNick);
     foreach ($name as $nk => &$nv) {
         if ($nv && $nk != 'name_suffix' && $nk != 'name_prefix') {
             $nv = preg_replace('/^(\\P{L})+|(\\P{L})+$/u', '', $nv);
             $case = LsString::checkCase($nv);
             $nv = $case == 'upper' || $case == 'lower' ? LsLanguage::nameize($nv) : $nv;
             if ($nk != 'name_last') {
                 $nv = LsLanguage::hgCaser($nv, false);
             }
         }
     }
     unset($nv);
     if ($returnArray) {
         return $name;
     }
     $person = new Entity();
     $person->addExtension('Person');
     $person->name_first = $name['name_first'];
     $person->name_middle = $name['name_middle'];
     $person->name_last = $name['name_last'];
     $person->name_nick = $name['name_nick'];
     $person->name_prefix = $name['name_prefix'];
     $person->name_suffix = $name['name_suffix'];
     return $person;
 }
 public function parseDescriptionStr($str, $corp)
 {
     $descriptions = array();
     $remains = array();
     //cleanup text to be parsed
     $str = trim($str);
     $str = str_replace('.', ' ', $str);
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     $name_re = LsString::escapeStringForRegex($corp->name);
     $str = preg_replace('/\\b' . $name_re . '\\b/isu', '', $str);
     if ($corp->name_nick) {
         $nick_re = LsString::escapeStringForRegex($corp->name_nick);
         $str = preg_replace('/\\b' . $nick_re . '\\b/isu', '', $str);
     }
     if ($corp->ticker) {
         $tick_re = LsString::escapeStringForRegex($corp->ticker);
         $str = preg_replace('/\\b' . $tick_re . '\\b/isu', '', $str);
     }
     //split by commas
     $parts = preg_split('/,|;|\\band\\b|(?<!C[Oo])\\-|\\bAND\\b|\\s&\\s|\\//', $str, -1, PREG_SPLIT_NO_EMPTY);
     foreach ($parts as $part) {
         $part = trim($part);
         $part = preg_replace('/\\s{2,}/', ' ', $part);
         //abbreviation replacements
         $part = preg_replace('/( |^)(\\w) (\\w) (\\w)( |$)/', '\\2\\3\\4', $part);
         $part = preg_replace('/(Interim|Acting|Incoming) /i', '', $part);
         $part = preg_replace('/Sr /i', 'Senior ', $part);
         $part = preg_replace('/Chf /i', 'Chief ', $part);
         $part = preg_replace('/( |^)V( |$)/i', ' Vice ', $part);
         $part = preg_replace('/( |^)VP( |$)/i', ' Vice President ', $part);
         $part = preg_replace('/( |^)VC( |$)/i', ' Vice Chairman ', $part);
         $part = preg_replace('/( |^)Chr( |$)/i', ' Chairman ', $part);
         $part = preg_replace('/( |^)Ofcr( |$)/i', ' Officer ', $part);
         $part = preg_replace('/( |^)Vice P( |$)/i', ' Vice President ', $part);
         $part = preg_replace('/( |^)(Ex|Exec)( |$)/i', ' Executive ', $part);
         $part = preg_replace('/( |^)EVP( |$)/i', ' Executive Vice President ', $part);
         $part = preg_replace('/( |^)(Off|Offic|Offcr)( |$)/i', ' Officer ', $part);
         $part = str_replace('Gen ', 'General ', $part);
         $part = preg_replace('/( |^)(Op|Oper) /', ' Operating ', $part);
         $part = preg_replace('/( |^)(Bd|Brd)( |$)/i', ' Board ', $part);
         $part = preg_replace('/of Board/i', ' of the Board', $part);
         $part = preg_replace('/( |^)COB( |$)/i', ' Chairman of the Board ', $part);
         $part = preg_replace('/( |^)(Pres|Prs|Presid|Prsdt|Prsdnt)( |$)/i', ' President ', $part);
         $part = preg_replace('/( |^)Admin( |$)/i', ' Administrative ', $part);
         $part = preg_replace('/( |^)Info( |$)/i', ' Information ', $part);
         $part = preg_replace('/\\bComm\\b/i', 'Committee', $part);
         $part = preg_replace('/\\bInc\\b/i', '', $part);
         $part = preg_replace('/( |-|^)(Ch|Chm|Chmn|Chrm|Chrmn|Chair|Chairmain|Chariman)( |$)/i', '\\1Chairman ', $part);
         $part = preg_replace('/(Sec|Secr|Secy|Secretar|Secreta)( |$)/i', 'Secretary ', $part);
         $part = str_replace('Vice-', 'Vice ', $part);
         $part = preg_replace('/( |^)Non /i', ' Non-', $part);
         $part = preg_replace('/\\bCompl\\b/i', 'Compliance', $part);
         $part = str_ireplace('of Advisory', 'of the Advisory', $part);
         $part = preg_replace('/Advisory (Panel|Council)/i', 'Advisory Board', $part);
         $part = str_ireplace('Independent ', '', $part);
         $part = str_ireplace('Lead ', '', $part);
         $part = str_ireplace('Corporate ', '', $part);
         $part = str_ireplace('Outside ', '', $part);
         $part = str_ireplace('Non-interested', '', $part);
         $part = str_ireplace('Interested', '', $part);
         $part = str_replace('Main ', '', $part);
         $part = str_ireplace('Presiding ', '', $part);
         $part = str_ireplace('Founding ', '', $part);
         $part = str_ireplace('Acctg', 'Accounting', $part);
         $part = str_ireplace('Chairperson', 'Chairman', $part);
         $part = str_ireplace('Chairwoman', 'Chairman', $part);
         $part = str_ireplace("Gen'l", 'General', $part);
         $part = trim($part);
         $part = preg_replace('/\\s{2,}/', ' ', $part);
         $position = array('description' => null, 'note' => array());
         if ($part != '') {
             //look for matching title
             $p = LsArray::inArrayNoCase($part, PositionTable::$businessPositions);
             if ($p) {
                 $position['description'] = $p;
             } else {
                 if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) {
                     $position['description'] = $q->description1;
                 } else {
                     if (count($descriptions) == 0) {
                         $part_splat = LsString::split($part);
                         $note = array();
                         //$this->printDebug($part);
                         //var_dump($part_splat);
                         $lim = count($part_splat) - 1;
                         for ($i = 0; $i < $lim; $i++) {
                             $note[] = array_pop($part_splat);
                             $part_new = implode(' ', $part_splat);
                             if (strtoupper($part_new) == 'DIRECTOR') {
                                 break;
                             }
                             $p = LsArray::inArrayNoCase($part_new, PositionTable::$businessPositions);
                             if ($p) {
                                 $position['description'] = $p;
                             } else {
                                 if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) {
                                     $position['description'] = $q->description1;
                                 }
                             }
                         }
                         if (!$position['description']) {
                             $position['description'] = $part;
                         }
                     } else {
                         $descriptions[count($descriptions) - 1]['note'][] = $part;
                     }
                 }
             }
             if (isset($position['description'])) {
                 $descriptions[] = $position;
             }
         }
     }
     return $descriptions;
 }