예제 #1
0
 public function getNameRegex($first_required = false)
 {
     $last_re = $this->getLastNameRegex();
     $name_first = $this->name_first;
     if (isset(PersonTable::$shortFirstNames[$name_first])) {
         $fn_arr = (array) PersonTable::$shortFirstNames[$name_first];
         $name_first = $this->name_first . ' ' . implode(' ', $fn_arr);
     }
     if ($first_required) {
         $fm = $this->name_middle . ' ' . $this->name_nick;
     } else {
         $fm = $name_first . ' ' . $this->name_middle . ' ' . $this->name_nick;
     }
     $fm_arr = preg_split('/[\\s-]+/', $fm, -1, PREG_SPLIT_NO_EMPTY);
     $initials = '';
     foreach ($fm_arr as &$fm) {
         $len = strlen(LsString::stripNonAlpha($fm));
         $fm = preg_replace("/(\\p{Ll})/e", "'['.'\\1'.strtoupper('\\1').']'", $fm);
         $initials .= strtoupper($fm[0]);
         //if string is longer than 3, then
         if ($len > 3) {
             $offset = strpos($fm, ']', strpos($fm, ']') + 1) + 1;
             $str = substr($fm, $offset);
             $str = str_replace(']', ']?', $str);
             $fm = substr($fm, 0, $offset) . $str;
         }
     }
     $fm = implode('|', $fm_arr);
     $separator = '\\b([\'"\\(\\)\\.]{0,3}\\s+|\\.\\s*|\\s?-\\s?)?';
     if ($first_required) {
         $nf_arr = LsString::split($name_first);
         foreach ($nf_arr as &$nf) {
             $nf = preg_replace("/(\\p{Ll})/e", "'['.'\\1'.strtoupper('\\1').']'", $nf);
         }
         $name_first = implode('|', $nf_arr);
         $re = '((\\b(' . $name_first . ')' . $separator . '(' . $fm . '|[' . $initials . '])?' . $separator . '((\\p{L}|[\'\\-])+' . $separator . ')?)+((' . $last_re . ')\\b))';
     } else {
         $re = '((\\b(' . $fm . '|[' . $initials . '])' . $separator . '((\\p{L}|[\'\\-])+' . $separator . ')?)+((' . $last_re . ')\\b))';
     }
     return $re;
 }
예제 #2
0
 static function getStringInTag($str, $tag, $start, $end = null)
 {
     $ret = null;
     $tag = trim(LsString::stripNonAlpha($tag));
     if (!$end) {
         $end = $start;
     }
     $a = strripos(substr($str, 0, $start), '<' . $tag);
     $aa = strripos(substr($str, 0, $start), '</' . $tag);
     if ($aa > $a) {
         $a = false;
     }
     $b = stripos($str, '</' . $tag, $end);
     $bb = stripos($str, '<' . $tag, $end);
     if ($b !== false && $b < $bb) {
         $b = stripos($str, '>', $b) + 1;
     } else {
         $b = false;
     }
     if ($a !== false && $b !== false) {
         $ret = substr($str, $a, $b - $a);
     }
     return $ret;
 }
예제 #3
0
 private function parseBlurb($info, $age_match)
 {
     if (count($info['blurb_arr']) == 0) {
         return $info;
     }
     $id = $age_match['name_match']['id'];
     $person = Doctrine::getTable('Entity')->find($id);
     $name_words = explode(' ', $person->name);
     $skip = array('director', 'directors', 'since', 'board', $info['since'], $age_match['age'], 'age');
     $skip = array_merge($skip, $name_words);
     $new = array();
     foreach ($info['blurb_arr'] as $b) {
         $n = $b;
         foreach ($skip as $s) {
             $s = LsString::escapeStringForRegex($s);
             $n = preg_replace('/\\b' . $s . '\\b/isu', '', $n);
         }
         $n = preg_replace('/\\b\\d\\d\\d\\d\\b/', '', $n);
         $n = LsString::stripNonAlpha($n, ' ');
         $words = preg_split('/\\s+/s', $n);
         if (count($words) > 3) {
             $new[] = $b;
         }
     }
     if (count($new) > 0) {
         $blurb = implode(' ', $new);
         $blurb_parts = preg_split('/\\s+/s', $blurb);
         $skip = array_merge($skip, array('executive', 'vice', 'president', 'chief', 'chairman', 'of', 'the'));
         $n = $blurb;
         foreach ($skip as $s) {
             $s = LsString::escapeStringForRegex($s);
             $n = preg_replace('/\\b' . $s . '\\b/isu', '', $n);
         }
         $n = preg_replace('/\\b\\d\\d\\d\\d\\b/', '', $n);
         $n = LsString::stripNonAlpha($n, ' ');
         $words = preg_split('/\\s+/s', $n);
         if (count($words) > 4) {
             $info['blurb'] = $blurb;
         }
     }
     return $info;
 }
예제 #4
0
 static function getNameWithLast($str, $last)
 {
     $re_last = LsString::escapeStringForRegex($last);
     //hyphens and spaces interchangeable in last names
     $re_last = preg_replace('/\\\\s+|\\\\\\-/is', '(\\s+|\\-)', $re_last);
     $matches = array();
     $matched = preg_match_all('/\\b' . $re_last . '\\b/isu', $str, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
     $name = null;
     foreach ($matches as $match) {
         $pos_last = $match[0][1];
         if ($pos_last == 0) {
             return null;
         }
         $last = $match[0][0];
         //work backwards from last name to find comma
         $pos = -1 * (strlen($str) - $pos_last);
         $comma = strripos($str, ',', $pos);
         $str = substr($str, $comma);
         $splat = preg_split('/\\b' . $re_last . '\\b/is', $str);
         $pre = $splat[0];
         $post = $splat[1];
         $arr = array_reverse(preg_split('/[\\s]+/', $pre, -1, PREG_SPLIT_NO_EMPTY));
         $new = array();
         foreach ($arr as $a) {
             if ($case = LsString::checkCase($a)) {
                 if ($case == 'initial') {
                     $new[] = $a;
                 } else {
                     if ($case == 'lower') {
                         break;
                     } else {
                         if (preg_match('/\\.(\\P{L})*$/u', $a) == 1) {
                             $a = LsString::stripNonAlpha($a);
                             if ($s = LsArray::inArrayNoCase($a, PersonTable::$nameParsePrefixes)) {
                                 $new[] = $s;
                             }
                             break;
                         } else {
                             $new[] = $a;
                         }
                     }
                 }
             }
         }
         $pre = implode(' ', array_reverse($new));
         if (strlen(trim($pre)) == 0) {
             continue;
         }
         $arr = preg_split('/[\\s]+/', $post, -1, PREG_SPLIT_NO_EMPTY);
         $new = array();
         foreach ($arr as $a) {
             if ($case = LsString::checkCase($a)) {
                 if ($case == 'lower') {
                     break;
                 }
                 $a = LsString::stripNonAlpha($a);
                 if ($s = LsArray::inArrayNoCase($a, PersonTable::$nameParseSuffixes)) {
                     $new[] = $s;
                 } else {
                     break;
                 }
             }
         }
         $post = trim(implode(' ', $new));
         $full = $pre . ' ' . $last;
         if (strlen($post) > 0) {
             $full .= ', ' . $post;
         }
         $name = array('nameFull' => $full, 'nameStart' => $pre, 'nameLast' => $last, 'namePost' => $post);
     }
     return $name;
 }