public function parseBio($bio = null) { if (!$bio) { $bio = $this->Entity->summary; } $name_matches = LsLanguage::getAllNames($bio); $names = array(); for ($i = 0; $i < count($name_matches); $i++) { $name = $name_matches[$i]; $arr = array('for\\s+the', 'of\\s+the', 'at\\s+the', 'at', 'of', 'the', 'for', 'and'); foreach ($arr as $a) { $splat = preg_split('/\\s+' . $a . '\\s+/isu', $name, -1, PREG_SPLIT_NO_EMPTY); if (count($splat) > 1) { if (!in_array($splat[0], LsLanguage::$commonPositions)) { $name_matches = array_merge($name_matches, $splat); } else { array_shift($splat); $a = str_replace('\\s+', ' ', $a); $name = implode(" {$a} ", $splat); } } } $splat = preg_split('/\'s\\s+/isu', $name, -1, PREG_SPLIT_NO_EMPTY); if (count($splat) > 1) { $name_matches = array_merge($name_matches, $splat); } } unset($name); $exclude = array_merge(LsLanguage::$regions, LsLanguage::$commonFirstNames, LsLanguage::$commonLastNames, LsLanguage::$states, LsLanguage::$commonCities, LsLanguage::$grammar, LsLanguage::$weekdays, LsLanguage::$months, LsLanguage::$geography, LsLanguage::$possessives, explode(' ', $this->Entity->name), array($this->Entity->name), LsLanguage::$schools, LsLanguage::$commonPositions); $names = array(); foreach ($name_matches as $name) { $new = str_replace("'s ", " ", $name); if ($new != $name) { $name_matches[] = $new; } $name = trim($name); $name = preg_replace('/[\\,\\.\'\\’]$/isu', '', $name); if (!in_array($name, $exclude)) { $names[] = $name; } //else $this->printDebug($name . ' rejected'); } $names = array_unique($names); $names = LsArray::strlenSort($names); /* $found_entities = array(); foreach($names as $name) { $entities = EntityTable::getByExtensionAndNameQuery(array('Person'),$name, $strict = 1)->execute(); if (count($entities)) { //$this->printDebug($name . ":"); foreach($entities as $e) { //$this->printDebug(' ' . $org->name); $found_entities[] = $e; } } else if (count(LsString::split($name)) > 1) { $possible_orgs = array(); $google_scraper = new LsGoogle; $google_scraper->setQuery(trim($name)); $google_scraper->execute(); if ($google_scraper->getNumResults()) { $results = $google_scraper->getResults(); foreach ($results as $result) { $title = LsHtml::stripTags($result->title); preg_match('/http\:\/\/[^\/]+\//isu',$result->unescapedUrl,$match); if (!$match) continue; $trimmed_url = $match[0]; $title_first = LsString::split($title); $title_first = array_shift($title_first); if (!stristr($title,'wikipedia') && (OrgTable::checkUrl($trimmed_url, $name) && preg_match('/^(The\s+)?' . LsString::escapeStringForRegex($title_first) . '/su',$name))) { $this->printDebug($name . ":"); $possible_orgs[] = $name; $this->printDebug(' ' . $title); //$this->printDebug(' ' . $result->unescapedUrl); //$this->printDebug(' ' . LsHtml::stripTags($result->content)); break; } } } //var_dump($possible_orgs); } }*/ //$this->printDebug(''); return $names; }