예제 #1
0
 public function parseBio($bio = null)
 {
     if (!$bio) {
         $bio = $this->Entity->summary;
     }
     $name_matches = LsLanguage::getAllNames($bio);
     $names = array();
     for ($i = 0; $i < count($name_matches); $i++) {
         $name = $name_matches[$i];
         $arr = array('for\\s+the', 'of\\s+the', 'at\\s+the', 'at', 'of', 'the', 'for', 'and');
         foreach ($arr as $a) {
             $splat = preg_split('/\\s+' . $a . '\\s+/isu', $name, -1, PREG_SPLIT_NO_EMPTY);
             if (count($splat) > 1) {
                 if (!in_array($splat[0], LsLanguage::$commonPositions)) {
                     $name_matches = array_merge($name_matches, $splat);
                 } else {
                     array_shift($splat);
                     $a = str_replace('\\s+', ' ', $a);
                     $name = implode(" {$a} ", $splat);
                 }
             }
         }
         $splat = preg_split('/\'s\\s+/isu', $name, -1, PREG_SPLIT_NO_EMPTY);
         if (count($splat) > 1) {
             $name_matches = array_merge($name_matches, $splat);
         }
     }
     unset($name);
     $exclude = array_merge(LsLanguage::$regions, LsLanguage::$commonFirstNames, LsLanguage::$commonLastNames, LsLanguage::$states, LsLanguage::$commonCities, LsLanguage::$grammar, LsLanguage::$weekdays, LsLanguage::$months, LsLanguage::$geography, LsLanguage::$possessives, explode(' ', $this->Entity->name), array($this->Entity->name), LsLanguage::$schools, LsLanguage::$commonPositions);
     $names = array();
     foreach ($name_matches as $name) {
         $new = str_replace("'s ", " ", $name);
         if ($new != $name) {
             $name_matches[] = $new;
         }
         $name = trim($name);
         $name = preg_replace('/[\\,\\.\'\\’]$/isu', '', $name);
         if (!in_array($name, $exclude)) {
             $names[] = $name;
         }
         //else $this->printDebug($name . ' rejected');
     }
     $names = array_unique($names);
     $names = LsArray::strlenSort($names);
     /*
         $found_entities = array();
        
         foreach($names as $name)
         {
           
           $entities = EntityTable::getByExtensionAndNameQuery(array('Person'),$name, $strict = 1)->execute();
           if (count($entities))
           {
             //$this->printDebug($name . ":");
             foreach($entities as $e)
             {
               //$this->printDebug('  ' . $org->name);
               $found_entities[] = $e;
             }
           }
           else if (count(LsString::split($name)) > 1)
           {
             $possible_orgs = array();
             $google_scraper = new LsGoogle;
             $google_scraper->setQuery(trim($name));
             $google_scraper->execute();
             if ($google_scraper->getNumResults())
             {
               $results = $google_scraper->getResults();
               foreach ($results as $result)
               {
                 $title = LsHtml::stripTags($result->title);
                 preg_match('/http\:\/\/[^\/]+\//isu',$result->unescapedUrl,$match);
                 if (!$match) continue;      
                 $trimmed_url = $match[0];
                 $title_first = LsString::split($title);
                 $title_first = array_shift($title_first);
                 if (!stristr($title,'wikipedia') && (OrgTable::checkUrl($trimmed_url, $name) && preg_match('/^(The\s+)?' . LsString::escapeStringForRegex($title_first) . '/su',$name)))
                 {
                   $this->printDebug($name . ":");
                   $possible_orgs[] = $name;
                   $this->printDebug('   ' . $title);  
                   //$this->printDebug('     ' . $result->unescapedUrl); 
                   //$this->printDebug('      ' . LsHtml::stripTags($result->content));
                   break;           
                 }
                 
               }
             }
             //var_dump($possible_orgs);
           }
         }*/
     //$this->printDebug('');
     return $names;
 }