protected function importRows($text)
 {
     $rows = array();
     //if (preg_match_all('/<p>\s*<strong>([^<]*)<\/strong>\s*<br>\s*<a\s+href\="([^"]+)">([^<]*)</isu',$text,$matches, PREG_SET_ORDER))
     if (preg_match_all('/<tr\\s+height\\="25" bgcolor="#ffffff">\\s*<td.*?>(.*?)<\\/td><td.*?>(.*?)<\\/td><td.*?>(.*?)<\\/td><td.*?>(.*?)<\\/td>\\s*<\\/tr>/su', $text, $matches, PREG_SET_ORDER)) {
         foreach ($matches as $match) {
             array_shift($match);
             $row = array();
             foreach ($match as &$m) {
                 $m = trim(str_replace('&nbsp;', ' ', $m));
                 //$this->printDebug($m);
             }
             $links = LsHtml::matchLinks($match[0]);
             $row['name'] = $links[0]['text'];
             $row['url'] = $links[0]['url'];
             $row['state'] = $match[1];
             if (preg_match_all('/\\d\\d\\d\\d/', $match[2], $years)) {
                 $row['years'] = $years[0];
             }
             $row['party'] = $match[3];
             $rows[] = $row;
         }
     }
     $this->_rows = $rows;
 }
 function getBusinessWeek(Entity $person)
 {
     /*
         $yahoo = new LsYahoo;
         $yahoo->setService('Web Search');
         $yahoo->setSite('http://investing.businessweek.com');
         $yahoo->setQuery($person->name);
         $this->printDebug($yahoo->getQueryUrl());
     
         $yahoo->execute();    
         $results = $yahoo->getResults();  */
     $google_scraper = new LsGoogle();
     $google_scraper->setQuery('site:investing.businessweek.com ' . $person->name);
     $this->printDebug('site:investing.businessweek.com ' . $person->name);
     $google_scraper->execute();
     if (!$google_scraper->getNumResults()) {
         return null;
     }
     $results = $google_scraper->getResults();
     $businessweek_profile = null;
     foreach ($results as $result) {
         $this->printDebug($result->unescapedUrl);
         if (preg_match('/^.*?person\\.asp\\?personId=\\d+/is', $result->unescapedUrl, $match)) {
             $businessweek_profile = $match[0];
             break;
         }
     }
     if (!$businessweek_profile) {
         foreach ($results as $result) {
             $url = $result->unescapedUrl;
             if (preg_match('/^(.*?)\\&/is', $url, $match)) {
                 $url = $match[1];
             }
             if (!stristr($url, 'http://')) {
                 $url = 'http://investing.businessweek.com/' . $url;
             }
             $this->printDebug('new url: ' . $url);
             if (!$this->browser->get($url)->responseIsError()) {
                 $text = $this->browser->getResponseText();
                 //var_dump($text);
                 $links = LsHtml::matchLinks($text);
                 foreach ($links as $link) {
                     if (preg_match('/' . $person->getNameRegex(true) . '/s', $link['text']) && preg_match('/^.*?person\\.asp\\?personId=\\d+/is', $link['url'], $match)) {
                         $url = $match[0];
                         if (!stristr($url, 'http://')) {
                             $url = 'http://investing.businessweek.com/' . $url;
                         }
                         $businessweek_profile = $url;
                         break;
                     }
                 }
                 if ($businessweek_profile) {
                     $this->printDebug('Businessweek profile found on 2nd attempt: ' . $businessweek_profile);
                     break;
                 }
             }
         }
         if (!$businessweek_profile) {
             $this->printDebug('Buisnessweek profile not found');
             return;
         }
     }
     $education_found = false;
     $employment_found = false;
     $summary_found = false;
     $ed_matched = false;
     //go to businessweek profile and get education
     $this->browser->get($businessweek_profile);
     if ($text = $this->browser->getResponseText()) {
         //$education = null;
         //$employment = null;
         if (preg_match('#EDUCATION[\\*]?<\\/h2>[\\n\\s]*(.+?)\\<h2#is', $text, $education)) {
             $ed_matched = preg_match_all('/<strong>(.+?)<\\/strong>\\s*(\\d{4})?\\s*<\\/div><div.*?>(.+?)</s', $education[1], $education_found);
         }
         if (preg_match('#OTHER AFFILIATIONS[\\*]?<\\/h2>[\\n\\s]*(.+?)\\<\\/td#s', $text, $employment)) {
             preg_match_all('#href\\=\\".+?\\"\\>(.+?)\\<\\/a\\>#is', $employment[1], $employment_found);
         }
         preg_match('#BACKGROUND[\\*]?<\\/h2>[\\n\\s]*(.+?)\\<\\/p>#s', $text, $summary_found);
         $summary_found = strip_tags($summary_found[1]);
         //var_dump($summary_found);
         if ($ed_matched) {
             $this->printDebug('Education info found at Businessweek');
         } else {
             $this->printDebug('Education info not found at Businessweek');
             return;
         }
     } else {
         $this->printDebug('Businessweek browser error');
         return;
     }
     $education_history = null;
     $employment_history = null;
     $wikipedia = new LsWikipedia();
     $wikipedia->request($person->name);
     $wikipedia->execute();
     $plaintext = $wikipedia->getPlainText();
     foreach ($education_found[3] as $key => $institution) {
         $arr = null;
         $arr['institution'] = $institution;
         $arr['degree'] = $education_found[1][$key];
         $arr['year'] = null;
         if ($education_found[2][$key] != '') {
             $arr['year'] = $education_found[2][$key];
         }
         $wikipedia_matches = LsLanguage::getCommonPronouns($arr['institution'], $plaintext, array_merge(LsLanguage::$business, LsLanguage::$schools, LsLanguage::$grammar));
         if ($wikipedia_matches) {
             $arr['source'] = 'http://en.wikipedia.org/wiki/' . str_replace('+', '_', $wikipedia->getTitle());
         } else {
             $arr['source'] = $businessweek_profile;
         }
         $education_history[] = (object) $arr;
     }
     foreach ($employment_found[1] as $key => $company) {
         $arr = null;
         $arr['company'] = $company;
         $arr['title'] = null;
         $employment_history[] = (object) $arr;
     }
     $possible_person = array('name' => $person->name, 'summary' => $summary_found, 'employment_history' => (object) $employment_history, 'education' => (object) $education_history);
     $possible_persons[] = (object) $possible_person;
     $this->import($person, $possible_persons);
 }
 public function setWikiTables()
 {
     $wikiTables = array();
     if (preg_match_all('/<table\\s+class\\="wikitable"[^>]*>(.*?)<\\/table/isu', $this->_content, $matches)) {
         $table_count = 1;
         foreach ($matches[1] as $table_match) {
             $table = array();
             if (preg_match_all('/<tr[^>]*>(.*?)<\\/tr/isu', $table_match, $row_matches)) {
                 $row_count = 1;
                 foreach ($row_matches[1] as $row_match) {
                     $row = array();
                     if (preg_match_all('/<td[^>]*>(.*?)<\\/td/isu', $row_match, $cell_matches)) {
                         $cell_count = 1;
                         foreach ($cell_matches[1] as $cell_match) {
                             $cell = array();
                             $cell['str'] = LsString::spacesToSpace(LsHtml::replaceEntities(LsHtml::stripTags($cell_match)));
                             $cell['links'] = LsHtml::matchLinks($cell_match);
                             $row['cell' . $cell_count] = $cell;
                             $cell_count++;
                         }
                     }
                     $table['row' . $row_count] = $row;
                     $row_count++;
                 }
             }
             $wikiTables['table' . $table_count] = $table;
             $table_count++;
         }
     }
     $this->_wikiTables = $wikiTables;
 }