public function getNameRegex($first_required = false) { $last_re = $this->getLastNameRegex(); $name_first = $this->name_first; if (isset(PersonTable::$shortFirstNames[$name_first])) { $fn_arr = (array) PersonTable::$shortFirstNames[$name_first]; $name_first = $this->name_first . ' ' . implode(' ', $fn_arr); } if ($first_required) { $fm = $this->name_middle . ' ' . $this->name_nick; } else { $fm = $name_first . ' ' . $this->name_middle . ' ' . $this->name_nick; } $fm_arr = preg_split('/[\\s-]+/', $fm, -1, PREG_SPLIT_NO_EMPTY); $initials = ''; foreach ($fm_arr as &$fm) { $len = strlen(LsString::stripNonAlpha($fm)); $fm = preg_replace("/(\\p{Ll})/e", "'['.'\\1'.strtoupper('\\1').']'", $fm); $initials .= strtoupper($fm[0]); //if string is longer than 3, then if ($len > 3) { $offset = strpos($fm, ']', strpos($fm, ']') + 1) + 1; $str = substr($fm, $offset); $str = str_replace(']', ']?', $str); $fm = substr($fm, 0, $offset) . $str; } } $fm = implode('|', $fm_arr); $separator = '\\b([\'"\\(\\)\\.]{0,3}\\s+|\\.\\s*|\\s?-\\s?)?'; if ($first_required) { $nf_arr = LsString::split($name_first); foreach ($nf_arr as &$nf) { $nf = preg_replace("/(\\p{Ll})/e", "'['.'\\1'.strtoupper('\\1').']'", $nf); } $name_first = implode('|', $nf_arr); $re = '((\\b(' . $name_first . ')' . $separator . '(' . $fm . '|[' . $initials . '])?' . $separator . '((\\p{L}|[\'\\-])+' . $separator . ')?)+((' . $last_re . ')\\b))'; } else { $re = '((\\b(' . $fm . '|[' . $initials . '])' . $separator . '((\\p{L}|[\'\\-])+' . $separator . ')?)+((' . $last_re . ')\\b))'; } return $re; }
static function getStringInTag($str, $tag, $start, $end = null) { $ret = null; $tag = trim(LsString::stripNonAlpha($tag)); if (!$end) { $end = $start; } $a = strripos(substr($str, 0, $start), '<' . $tag); $aa = strripos(substr($str, 0, $start), '</' . $tag); if ($aa > $a) { $a = false; } $b = stripos($str, '</' . $tag, $end); $bb = stripos($str, '<' . $tag, $end); if ($b !== false && $b < $bb) { $b = stripos($str, '>', $b) + 1; } else { $b = false; } if ($a !== false && $b !== false) { $ret = substr($str, $a, $b - $a); } return $ret; }
private function parseBlurb($info, $age_match) { if (count($info['blurb_arr']) == 0) { return $info; } $id = $age_match['name_match']['id']; $person = Doctrine::getTable('Entity')->find($id); $name_words = explode(' ', $person->name); $skip = array('director', 'directors', 'since', 'board', $info['since'], $age_match['age'], 'age'); $skip = array_merge($skip, $name_words); $new = array(); foreach ($info['blurb_arr'] as $b) { $n = $b; foreach ($skip as $s) { $s = LsString::escapeStringForRegex($s); $n = preg_replace('/\\b' . $s . '\\b/isu', '', $n); } $n = preg_replace('/\\b\\d\\d\\d\\d\\b/', '', $n); $n = LsString::stripNonAlpha($n, ' '); $words = preg_split('/\\s+/s', $n); if (count($words) > 3) { $new[] = $b; } } if (count($new) > 0) { $blurb = implode(' ', $new); $blurb_parts = preg_split('/\\s+/s', $blurb); $skip = array_merge($skip, array('executive', 'vice', 'president', 'chief', 'chairman', 'of', 'the')); $n = $blurb; foreach ($skip as $s) { $s = LsString::escapeStringForRegex($s); $n = preg_replace('/\\b' . $s . '\\b/isu', '', $n); } $n = preg_replace('/\\b\\d\\d\\d\\d\\b/', '', $n); $n = LsString::stripNonAlpha($n, ' '); $words = preg_split('/\\s+/s', $n); if (count($words) > 4) { $info['blurb'] = $blurb; } } return $info; }
static function getNameWithLast($str, $last) { $re_last = LsString::escapeStringForRegex($last); //hyphens and spaces interchangeable in last names $re_last = preg_replace('/\\\\s+|\\\\\\-/is', '(\\s+|\\-)', $re_last); $matches = array(); $matched = preg_match_all('/\\b' . $re_last . '\\b/isu', $str, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); $name = null; foreach ($matches as $match) { $pos_last = $match[0][1]; if ($pos_last == 0) { return null; } $last = $match[0][0]; //work backwards from last name to find comma $pos = -1 * (strlen($str) - $pos_last); $comma = strripos($str, ',', $pos); $str = substr($str, $comma); $splat = preg_split('/\\b' . $re_last . '\\b/is', $str); $pre = $splat[0]; $post = $splat[1]; $arr = array_reverse(preg_split('/[\\s]+/', $pre, -1, PREG_SPLIT_NO_EMPTY)); $new = array(); foreach ($arr as $a) { if ($case = LsString::checkCase($a)) { if ($case == 'initial') { $new[] = $a; } else { if ($case == 'lower') { break; } else { if (preg_match('/\\.(\\P{L})*$/u', $a) == 1) { $a = LsString::stripNonAlpha($a); if ($s = LsArray::inArrayNoCase($a, PersonTable::$nameParsePrefixes)) { $new[] = $s; } break; } else { $new[] = $a; } } } } } $pre = implode(' ', array_reverse($new)); if (strlen(trim($pre)) == 0) { continue; } $arr = preg_split('/[\\s]+/', $post, -1, PREG_SPLIT_NO_EMPTY); $new = array(); foreach ($arr as $a) { if ($case = LsString::checkCase($a)) { if ($case == 'lower') { break; } $a = LsString::stripNonAlpha($a); if ($s = LsArray::inArrayNoCase($a, PersonTable::$nameParseSuffixes)) { $new[] = $s; } else { break; } } } $post = trim(implode(' ', $new)); $full = $pre . ' ' . $last; if (strlen($post) > 0) { $full .= ', ' . $post; } $name = array('nameFull' => $full, 'nameStart' => $pre, 'nameLast' => $last, 'namePost' => $post); } return $name; }