/**
  *  get donor info       
  */
 private function generateDonor($text)
 {
     $text_arr = explode("<BR>", $text);
     //var_dump($text_arr[0]);
     $donor = $this->generatePerson(LsHtml::stripTags($text_arr[0], ''));
     $address_arr = LsLanguage::parseCityStatePostal($text_arr[1]);
     $a = new Address();
     $a->street1 = isset($address_arr['street1']) ? $address_arr['street1'] : null;
     $a->street2 = isset($address_arr['street2']) ? $address_arr['street2'] : null;
     $a->city = $address_arr['city'];
     if ($state = AddressStateTable::retrieveByText($address_arr['state'])) {
         $a->State = $state;
     }
     $a->postal = $address_arr['zip'];
     $donor->addAddress($a);
     $donor->summary = strip_tags(trim($text_arr[2]));
     return $donor;
 }
 protected function import($url)
 {
     $company = null;
     if (!$this->browser->get($url)->responseIsError()) {
         $text = $this->browser->getResponseText();
         $rank = null;
         $name = null;
         $industryName = null;
         $street1 = null;
         $street2 = null;
         $city = null;
         $state = null;
         $postal = null;
         $phone = null;
         $fax = null;
         $website = null;
         $blurb = null;
         $summary = null;
         $revenue = null;
         $employees = null;
         $ceoName = null;
         $ceoBirthYear = null;
         //get rank
         if ($this->year > 1999 && $this->year < 2005 && preg_match('/ForbesListRank" content="(\\d+)"/i', $text, $match)) {
             $rank = $match[1];
         } elseif ($this->year < 2000 && preg_match('/td class="highlightcolor1">(\\d+)/i', $text, $match)) {
             $rank = $match[1];
         } elseif ($this->year > 2004 && preg_match('/<b>#(\\d+) ([^<]+)<\\/b>/i', $text, $match)) {
             $rank = html_entity_decode($match[1]);
         }
         //get name
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/span class="mainlisttitle">([^<]+)<\\/span>/i', $text, $match)) {
             $name = html_entity_decode($match[1]);
         } elseif ($this->year > 2004 && preg_match('/<b>#(\\d+) ([^<]+)<\\/b>/i', $text, $match)) {
             $name = html_entity_decode($match[2]);
         } else {
             $this->printDebug("Company name not found");
             return;
         }
         //get industry
         if ($this->year > 1995 && $this->year < 2001 && preg_match('/<b>See more private companies in <a [^>]+>([^<]+)<\\/a><\\/b>/ism', $text, $match)) {
             $industryName = trim(html_entity_decode($match[1]));
         } elseif ($this->year > 2000 && $this->year < 2005 && preg_match('/private companies\\<\\/a> in ([^\\.]+)/ism', $text, $match)) {
             $industryName = trim(html_entity_decode($match[1]));
         } elseif ($this->year > 2004 && preg_match('/<b>Industry:<\\/b> <a href="[^"]+">([^<]+)<\\/a>/ism', $text, $match)) {
             $industryName = trim(html_entity_decode($match[1]));
         }
         //get address
         if ($this->year > 1995 && $this->year < 2000 && preg_match('/<td class="mainlisttxt"\\>(.+)phone/smU', $text, $match)) {
             $contactLines = explode('<br>', trim($match[1]));
             array_pop($contactLines);
             $street1 = $contactLines[0];
             $street2 = count($contactLines) == 3 ? $contactLines[2] : null;
             $city_state_zip = count($contactLines) == 3 ? LsLanguage::parseCityStatePostal($contactLines[2]) : LsLanguage::parseCityStatePostal($contactLines[1]);
             $city = $city_state_zip['city'];
             $state = $city_state_zip['state'];
             $postal = $city_state_zip['zip'];
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/(view private companies under this industry|in the same industry).+<br><br>(.+)phone/is', $text, $match)) {
             var_dump($match);
             $contactLines = explode('<br>', trim($match[1]));
             array_pop($contactLines);
             $street1 = $contactLines[0];
             $street2 = count($contactLines) == 3 ? $contactLines[2] : null;
             $city_state_zip = count($contactLines) == 3 ? LsLanguage::parseCityStatePostal($contactLines[2]) : LsLanguage::parseCityStatePostal($contactLines[1]);
             $city = $city_state_zip['city'];
             $state = $city_state_zip['state'];
             $postal = $city_state_zip['zip'];
         } elseif ($this->year > 2004 && preg_match('/<div class="spaced">(.+)<\\/div>/ismU', $text, $match)) {
             $contactLines = explode('<br>', $match[1]);
             if (!preg_match('/Phone\\:|Fax\\:/i', $contactLines[0]) && !preg_match('/Phone\\:|Fax\\:/i', $contactLines[1])) {
                 $street1 = trim($contactLines[0]);
                 if (count($contactLines) == 4) {
                     if (preg_match('/^(.+?) ([A-Z]{2}) (\\d{5})($|-)/sU', trim($contactLines[1]), $match)) {
                         $city = $match[1];
                         $state = $match[2];
                         $postal = $match[3];
                     }
                 } elseif (count($contactLines) == 5) {
                     $street2 = $contactLines[1];
                     if (preg_match('/^(.+?) ([A-Z]{2}) (\\d{5})($|-)/sU', trim($contactLines[2]), $match)) {
                         $city = $match[1];
                         $state = $match[2];
                         $postal = $match[3];
                     }
                 }
             }
         }
         //get phone
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/phone ([\\d\\-]{12})/is', $text, $match)) {
             $phone = trim(str_replace('-', '', $match[1]));
         } elseif ($this->year > 2004 && preg_match('/Phone: ([\\d\\-]{12})/is', $text, $match)) {
             $phone = trim(str_replace('-', '', $match[1]));
         }
         //get fax
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/fax ([\\d\\-]{12})/is', $text, $match)) {
             $fax = trim(str_replace('-', '', $match[1]));
         } else {
             if ($this->year > 2004 && preg_match('/Fax: ([\\d\\-]{12})/is', $text, $match)) {
                 $fax = trim(str_replace('-', '', $match[1]));
             }
         }
         //get website
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/this company\'s web site[^>]+\\>(http[^\\<]+)/is', $text, $match)) {
             $website = $match[1];
         } elseif ($this->year > 2004 && preg_match('/<div class="spaced">.*<\\/div>\\s+<br>\\s+<a href="(http:\\/\\/[^"]+)">/ismU', $text, $match)) {
             $website = $match[1];
         }
         //get ceo
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/b>CEO: ([^<]+)<\\/b>/ism', $text, $match)) {
             $ceoName = $match[1];
         } elseif ($this->year > 2004 && preg_match('/CEO: ([^<]+)<\\/b> , (\\d+) <br>/ism', $text, $match)) {
             $ceoName = html_entity_decode($match[1]);
             $ceoBirthYear = date("Y");
             -$match[2];
         }
         //get summary
         if ($this->year > 1995 && $this->year < 2000 && preg_match_all('/p class="mainlisttxt">(.*)<\\/p>/ismU', $text, $match)) {
             $summary = str_replace(array('  ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1][1]))));
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/p class="mainlisttxt">(.*)<\\/p>/ismU', $text, $match)) {
             $summary = str_replace(array('  ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1]))));
         } elseif ($this->year > 2004 && preg_match('/<blockquote class="spaced">(.*)<\\/blockquote>/ismU', $text, $match)) {
             $summary = str_replace(array('  ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1]))));
         }
         //get revenue
         if ($this->year > 1995 && $this->year < 2000 && preg_match('/<td class="mainlisttxt">\\$([\\S]+) mil<sup>e?<\\/sup><\\/td>/ismU', $text, $match)) {
             $this->printDebug($match[1]);
             $revenue = str_replace(",", "", $match[1] . ",000,000");
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/<td class="mainlisttxt" nowrap>([^<]+)<sup>e?<\\/sup><\\/td>/ismU', $text, $match)) {
             $this->printDebug($match[1]);
             $revenue = str_replace(",", "", $match[1] . ",000,000");
         } elseif ($this->year > 2004 && preg_match('/<td class="highlight" nowrap="nowrap">\\$([\\S]+) bil.*<\\/td> <td class="highlight" nowrap="nowrap">[^<]+<\\/td> <td class="highlight" nowrap="nowrap">([^<]+)<\\/td>/ismU', $text, $match)) {
             $revenue = 1000000000 * $match[1];
         }
         //get employees
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/mil<\\/td>.+<td class="mainlisttxt"( nowrap)?>(\\d[^<]+)<\\/td>.+<td class="mainlisttxt">[a-zA-Z]+<\\/td>/ismU', $text, $match)) {
             $employees = str_replace(',', '', $match[2]);
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/<sup>e?<\\/sup><\\/td> <td class="mainlisttxt"( nowrap)?>(\\d[^<]+)<sup>e?<\\/sup><\\/td> <td class="mainlisttxt">[a-zA-Z]+<\\/td>/ismU', $text, $match)) {
             $employees = str_replace(',', '', $match[2]);
         } elseif ($this->year > 2004 && preg_match('/<td class="highlight" nowrap="nowrap">([\\d,]+)<\\/td> <td class="highlight" nowrap="nowrap">[A-Z][a-z]{2,}<\\/td>/', $text, $match)) {
             $employees = str_replace(',', '', $match[1]);
         }
         /*$this->printDebug( "URL: ". $url);
           $this->printDebug( "Rank: " . $rank );
           $this->printDebug( "Name: " . $name );
           $this->printDebug( "Industry: " . $industryName );
           $this->printDebug( "Street: " . $street1 );
           $this->printDebug( "Street2: " . $street2 );
           $this->printDebug( "City: " . $city );
           $this->printDebug( "State: " . $state );
           $this->printDebug( "Postal: " . $postal );
           $this->printDebug( "Phone: " . $phone );
           $this->printDebug( "Fax: " . $fax );
           $this->printDebug( "Website: " . $website );
           $this->printDebug( "CEO: " . $ceoName . "  " . $ceoBirthYear);
           $this->printDebug( "Summary: " . $summary );
           $this->printDebug( "Revenue: " . $revenue );
           $this->printDebug( "Employees: " . $employees );*/
         $search_company_name = trim(implode(' ', array_diff(explode(' ', ucwords(strtolower($name))), array_merge(LsLanguage::$business, LsLanguage::$businessAbbreviations))));
         //continue;
         $this->printDebug("{$search_company_name} == {$name}");
         if ($company = EntityTable::getByExtensionQuery(array('Org', 'PrivateCompany'))->addWhere("LOWER(REPLACE( org.name, '-' , '')) = ?", strtolower($name))->fetchOne()) {
             $this->printDebug("Company exists");
             $company->revenue = $revenue;
             $company->save();
         } else {
             $this->printDebug("Creating new company {$name}");
             Doctrine::getTable('ExtensionDefinition')->clear();
             $company = new Entity();
             $company->addExtension('Org');
             $company->addExtension('Business');
             $company->addExtension('PrivateCompany');
             $company->name = LsLanguage::titleize($name);
             $company->employees = strlen($employees) ? $employees : null;
             $company->revenue = strlen($revenue) ? $revenue : null;
             $company->website = strlen($website) ? $website : null;
             $company->summary = strlen($summary) ? trim($summary) : null;
             //add address
             if ($phone) {
                 $company->addPhone($phone);
             }
             if ($fax) {
                 //$company->addPhone($fax);
             }
             if ($city && $state) {
                 $address = new Address();
                 $address->street1 = strlen($street1) ? $street1 : null;
                 $address->street2 = strlen($street2) ? $street2 : null;
                 $address->city = strlen($city) ? $city : null;
                 if ($state = AddressStateTable::retrieveByText($state)) {
                     $address->State = $state;
                 }
                 $address->postal = $postal;
                 $company->addAddress($address);
                 $address->save();
                 $address->addReference($source = $url, $excerpt = null, $fields = array('city', 'country_id', 'postal', 'state_id', 'street1'), $name = 'Forbes.com', $detail = null, $date = null);
             }
         }
         /*$this->printDebug( "URL: ". $url);
           $this->printDebug( "Rank: " . $rank );
           $this->printDebug( "Name: " . $name );
           $this->printDebug( "Industry: " . $industryName );
           $this->printDebug( "Street: " . $street1 );
           $this->printDebug( "Street2: " . $street2 );
           $this->printDebug( "City: " . $city );
           $this->printDebug( "State: " . $state );
           $this->printDebug( "Postal: " . $postal );
           $this->printDebug( "Phone: " . $phone );
           $this->printDebug( "Fax: " . $fax );
           $this->printDebug( "Website: " . $website );
           $this->printDebug( "CEO: " . $ceoName . "  " . $ceoBirthYear);
           $this->printDebug( "Summary: " . $summary );
           $this->printDebug( "Revenue: " . $revenue );
           $this->printDebug( "Employees: " . $employees );*/
         $company->save();
         $company->addReference($source = $url, $excerpt = null, $fields = array('website', 'name', 'website', 'summary', 'revenue', 'employees'), $name = 'Forbes.com', $detail = null, $date = null);
         $this->saveToList($company, $rank);
     } else {
         $this->printDebug("Couldn't get company: " . $url);
     }
 }