static function addFederalDistrict($state_name, $federal_district)
 {
     $state_name = trim($state_name);
     $federal_district = trim($federal_district);
     $exists = self::getFederalDistrict($state_name, $federal_district);
     if ($exists != false) {
         //echo "it exists;";
         return false;
     }
     $state = null;
     $state = AddressStateTable::retrieveByText($state_name);
     if (!$state) {
         throw new Exception("AddressStateTable did not return state");
     }
     if ($state && strlen($federal_district)) {
         $district = new PoliticalDistrict();
         $district->State = $state;
         $district->federal_district = $federal_district;
         $district->save();
     }
     return self::getFederalDistrict($state_name, $federal_district);
 }
Example #2
0
 public function import($school)
 {
     if (EntityTable::getByExtensionQuery('Org')->addWhere('LOWER(org.name) LIKE ?', '%' . strtolower($school->instnm) . "%")->fetchOne()) {
         $this->printDebug("School exists in database: " . $school->instnm);
     } else {
         $address = new Address();
         $address->street1 = isset($school->addr) ? $school->addr : null;
         $address->street2 = isset($school->street2) ? $school->street2 : null;
         $address->city = $school->city;
         if ($state = AddressStateTable::retrieveByText($school->stabbr)) {
             $address->State = $state;
         }
         $address->postal = $school->zip;
         $aliases = explode("|", $school->ialias);
         $website = null;
         if (!preg_match('/^http\\:\\/\\//i', trim($school->webaddr))) {
             $website = "http://" . strtolower($school->webaddr);
         }
         $this->printDebug($website);
         $newschool = new Entity();
         $newschool->addExtension('Org');
         $newschool->addExtension('School');
         $newschool->name = $school->instnm;
         $newschool->website = $website;
         $newschool->addAddress($address);
         $newschool->save();
         foreach ($aliases as $alias) {
             try {
                 $newalias = new Alias();
                 $newalias->Entity = $newschool;
                 $newalias->name = $alias;
                 $newalias->save();
             } catch (Exception $e) {
                 $this->printDebug("An alias exception. No biggie. It's most likely that the name already exists. so we ignore it and move on: " . $e);
             }
         }
         $this->printDebug("Adding new school: " . $school->instnm);
     }
 }
 /**
  *  get donor info       
  */
 private function generateDonor($text)
 {
     $text_arr = explode("<BR>", $text);
     //var_dump($text_arr[0]);
     $donor = $this->generatePerson(LsHtml::stripTags($text_arr[0], ''));
     $address_arr = LsLanguage::parseCityStatePostal($text_arr[1]);
     $a = new Address();
     $a->street1 = isset($address_arr['street1']) ? $address_arr['street1'] : null;
     $a->street2 = isset($address_arr['street2']) ? $address_arr['street2'] : null;
     $a->city = $address_arr['city'];
     if ($state = AddressStateTable::retrieveByText($address_arr['state'])) {
         $a->State = $state;
     }
     $a->postal = $address_arr['zip'];
     $donor->addAddress($a);
     $donor->summary = strip_tags(trim($text_arr[2]));
     return $donor;
 }
Example #4
0
 static function parseGeoArray($arr)
 {
     $raw_address = $arr['results'][0];
     $address = new Address();
     //COUNTRY (USA only for now)
     $address->country_id = 1;
     foreach ($raw_address['address_components'] as $component) {
         switch ($component['types']) {
             case in_array('postal_code', $component['types']):
                 $address->postal = $component['long_name'];
                 break;
             case in_array('administrative_area_level_1', $component['types']):
                 if ($state = AddressStateTable::retrieveByText($component['long_name'])) {
                     $address->state_id = $state->id;
                 }
                 break;
             case in_array('administrative_area_level_2', $component['types']):
                 $address->county = $component['long_name'];
                 break;
             case in_array('administrative_area_level_3', $component['types']):
                 $admin_area_3 = $component['long_name'];
                 break;
             case in_array('locality', $component['types']):
                 $locality = $component['long_name'];
                 break;
             case in_array('sublocality', $component['types']):
                 $sublocality = $component['long_name'];
                 break;
             case in_array('route', $component['types']):
                 $street = $component['long_name'];
                 break;
             case in_array('street_number', $component['types']):
                 $street_number = $component['long_name'];
                 break;
             case in_array('subpremise', $component['types']):
                 $address->street2 = $component['long_name'];
                 break;
             default:
                 break;
         }
     }
     if (isset($locality) && isset($sublocality)) {
         if ($locality == 'New York' && $sublocality != 'Manhattan') {
             $address->city = $sublocality;
         } else {
             $address->city = $locality;
         }
     } else {
         if (isset($locality)) {
             $address->city = $locality;
         } else {
             if (isset($admin_area_3)) {
                 $address->city = $admin_area_3;
             }
         }
     }
     if (isset($street) && isset($street_number)) {
         $address->street1 = $street_number . " " . $street;
     }
     $address->latitude = $raw_address['geometry']['location']['lat'];
     $address->longitude = $raw_address['geometry']['location']['lng'];
     sleep(1);
     return $address;
 }
 protected function import($url)
 {
     $company = null;
     if (!$this->browser->get($url)->responseIsError()) {
         $text = $this->browser->getResponseText();
         $rank = null;
         $name = null;
         $industryName = null;
         $street1 = null;
         $street2 = null;
         $city = null;
         $state = null;
         $postal = null;
         $phone = null;
         $fax = null;
         $website = null;
         $blurb = null;
         $summary = null;
         $revenue = null;
         $employees = null;
         $ceoName = null;
         $ceoBirthYear = null;
         //get rank
         if ($this->year > 1999 && $this->year < 2005 && preg_match('/ForbesListRank" content="(\\d+)"/i', $text, $match)) {
             $rank = $match[1];
         } elseif ($this->year < 2000 && preg_match('/td class="highlightcolor1">(\\d+)/i', $text, $match)) {
             $rank = $match[1];
         } elseif ($this->year > 2004 && preg_match('/<b>#(\\d+) ([^<]+)<\\/b>/i', $text, $match)) {
             $rank = html_entity_decode($match[1]);
         }
         //get name
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/span class="mainlisttitle">([^<]+)<\\/span>/i', $text, $match)) {
             $name = html_entity_decode($match[1]);
         } elseif ($this->year > 2004 && preg_match('/<b>#(\\d+) ([^<]+)<\\/b>/i', $text, $match)) {
             $name = html_entity_decode($match[2]);
         } else {
             $this->printDebug("Company name not found");
             return;
         }
         //get industry
         if ($this->year > 1995 && $this->year < 2001 && preg_match('/<b>See more private companies in <a [^>]+>([^<]+)<\\/a><\\/b>/ism', $text, $match)) {
             $industryName = trim(html_entity_decode($match[1]));
         } elseif ($this->year > 2000 && $this->year < 2005 && preg_match('/private companies\\<\\/a> in ([^\\.]+)/ism', $text, $match)) {
             $industryName = trim(html_entity_decode($match[1]));
         } elseif ($this->year > 2004 && preg_match('/<b>Industry:<\\/b> <a href="[^"]+">([^<]+)<\\/a>/ism', $text, $match)) {
             $industryName = trim(html_entity_decode($match[1]));
         }
         //get address
         if ($this->year > 1995 && $this->year < 2000 && preg_match('/<td class="mainlisttxt"\\>(.+)phone/smU', $text, $match)) {
             $contactLines = explode('<br>', trim($match[1]));
             array_pop($contactLines);
             $street1 = $contactLines[0];
             $street2 = count($contactLines) == 3 ? $contactLines[2] : null;
             $city_state_zip = count($contactLines) == 3 ? LsLanguage::parseCityStatePostal($contactLines[2]) : LsLanguage::parseCityStatePostal($contactLines[1]);
             $city = $city_state_zip['city'];
             $state = $city_state_zip['state'];
             $postal = $city_state_zip['zip'];
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/(view private companies under this industry|in the same industry).+<br><br>(.+)phone/is', $text, $match)) {
             var_dump($match);
             $contactLines = explode('<br>', trim($match[1]));
             array_pop($contactLines);
             $street1 = $contactLines[0];
             $street2 = count($contactLines) == 3 ? $contactLines[2] : null;
             $city_state_zip = count($contactLines) == 3 ? LsLanguage::parseCityStatePostal($contactLines[2]) : LsLanguage::parseCityStatePostal($contactLines[1]);
             $city = $city_state_zip['city'];
             $state = $city_state_zip['state'];
             $postal = $city_state_zip['zip'];
         } elseif ($this->year > 2004 && preg_match('/<div class="spaced">(.+)<\\/div>/ismU', $text, $match)) {
             $contactLines = explode('<br>', $match[1]);
             if (!preg_match('/Phone\\:|Fax\\:/i', $contactLines[0]) && !preg_match('/Phone\\:|Fax\\:/i', $contactLines[1])) {
                 $street1 = trim($contactLines[0]);
                 if (count($contactLines) == 4) {
                     if (preg_match('/^(.+?) ([A-Z]{2}) (\\d{5})($|-)/sU', trim($contactLines[1]), $match)) {
                         $city = $match[1];
                         $state = $match[2];
                         $postal = $match[3];
                     }
                 } elseif (count($contactLines) == 5) {
                     $street2 = $contactLines[1];
                     if (preg_match('/^(.+?) ([A-Z]{2}) (\\d{5})($|-)/sU', trim($contactLines[2]), $match)) {
                         $city = $match[1];
                         $state = $match[2];
                         $postal = $match[3];
                     }
                 }
             }
         }
         //get phone
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/phone ([\\d\\-]{12})/is', $text, $match)) {
             $phone = trim(str_replace('-', '', $match[1]));
         } elseif ($this->year > 2004 && preg_match('/Phone: ([\\d\\-]{12})/is', $text, $match)) {
             $phone = trim(str_replace('-', '', $match[1]));
         }
         //get fax
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/fax ([\\d\\-]{12})/is', $text, $match)) {
             $fax = trim(str_replace('-', '', $match[1]));
         } else {
             if ($this->year > 2004 && preg_match('/Fax: ([\\d\\-]{12})/is', $text, $match)) {
                 $fax = trim(str_replace('-', '', $match[1]));
             }
         }
         //get website
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/this company\'s web site[^>]+\\>(http[^\\<]+)/is', $text, $match)) {
             $website = $match[1];
         } elseif ($this->year > 2004 && preg_match('/<div class="spaced">.*<\\/div>\\s+<br>\\s+<a href="(http:\\/\\/[^"]+)">/ismU', $text, $match)) {
             $website = $match[1];
         }
         //get ceo
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/b>CEO: ([^<]+)<\\/b>/ism', $text, $match)) {
             $ceoName = $match[1];
         } elseif ($this->year > 2004 && preg_match('/CEO: ([^<]+)<\\/b> , (\\d+) <br>/ism', $text, $match)) {
             $ceoName = html_entity_decode($match[1]);
             $ceoBirthYear = date("Y");
             -$match[2];
         }
         //get summary
         if ($this->year > 1995 && $this->year < 2000 && preg_match_all('/p class="mainlisttxt">(.*)<\\/p>/ismU', $text, $match)) {
             $summary = str_replace(array('  ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1][1]))));
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/p class="mainlisttxt">(.*)<\\/p>/ismU', $text, $match)) {
             $summary = str_replace(array('  ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1]))));
         } elseif ($this->year > 2004 && preg_match('/<blockquote class="spaced">(.*)<\\/blockquote>/ismU', $text, $match)) {
             $summary = str_replace(array('  ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1]))));
         }
         //get revenue
         if ($this->year > 1995 && $this->year < 2000 && preg_match('/<td class="mainlisttxt">\\$([\\S]+) mil<sup>e?<\\/sup><\\/td>/ismU', $text, $match)) {
             $this->printDebug($match[1]);
             $revenue = str_replace(",", "", $match[1] . ",000,000");
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/<td class="mainlisttxt" nowrap>([^<]+)<sup>e?<\\/sup><\\/td>/ismU', $text, $match)) {
             $this->printDebug($match[1]);
             $revenue = str_replace(",", "", $match[1] . ",000,000");
         } elseif ($this->year > 2004 && preg_match('/<td class="highlight" nowrap="nowrap">\\$([\\S]+) bil.*<\\/td> <td class="highlight" nowrap="nowrap">[^<]+<\\/td> <td class="highlight" nowrap="nowrap">([^<]+)<\\/td>/ismU', $text, $match)) {
             $revenue = 1000000000 * $match[1];
         }
         //get employees
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/mil<\\/td>.+<td class="mainlisttxt"( nowrap)?>(\\d[^<]+)<\\/td>.+<td class="mainlisttxt">[a-zA-Z]+<\\/td>/ismU', $text, $match)) {
             $employees = str_replace(',', '', $match[2]);
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/<sup>e?<\\/sup><\\/td> <td class="mainlisttxt"( nowrap)?>(\\d[^<]+)<sup>e?<\\/sup><\\/td> <td class="mainlisttxt">[a-zA-Z]+<\\/td>/ismU', $text, $match)) {
             $employees = str_replace(',', '', $match[2]);
         } elseif ($this->year > 2004 && preg_match('/<td class="highlight" nowrap="nowrap">([\\d,]+)<\\/td> <td class="highlight" nowrap="nowrap">[A-Z][a-z]{2,}<\\/td>/', $text, $match)) {
             $employees = str_replace(',', '', $match[1]);
         }
         /*$this->printDebug( "URL: ". $url);
           $this->printDebug( "Rank: " . $rank );
           $this->printDebug( "Name: " . $name );
           $this->printDebug( "Industry: " . $industryName );
           $this->printDebug( "Street: " . $street1 );
           $this->printDebug( "Street2: " . $street2 );
           $this->printDebug( "City: " . $city );
           $this->printDebug( "State: " . $state );
           $this->printDebug( "Postal: " . $postal );
           $this->printDebug( "Phone: " . $phone );
           $this->printDebug( "Fax: " . $fax );
           $this->printDebug( "Website: " . $website );
           $this->printDebug( "CEO: " . $ceoName . "  " . $ceoBirthYear);
           $this->printDebug( "Summary: " . $summary );
           $this->printDebug( "Revenue: " . $revenue );
           $this->printDebug( "Employees: " . $employees );*/
         $search_company_name = trim(implode(' ', array_diff(explode(' ', ucwords(strtolower($name))), array_merge(LsLanguage::$business, LsLanguage::$businessAbbreviations))));
         //continue;
         $this->printDebug("{$search_company_name} == {$name}");
         if ($company = EntityTable::getByExtensionQuery(array('Org', 'PrivateCompany'))->addWhere("LOWER(REPLACE( org.name, '-' , '')) = ?", strtolower($name))->fetchOne()) {
             $this->printDebug("Company exists");
             $company->revenue = $revenue;
             $company->save();
         } else {
             $this->printDebug("Creating new company {$name}");
             Doctrine::getTable('ExtensionDefinition')->clear();
             $company = new Entity();
             $company->addExtension('Org');
             $company->addExtension('Business');
             $company->addExtension('PrivateCompany');
             $company->name = LsLanguage::titleize($name);
             $company->employees = strlen($employees) ? $employees : null;
             $company->revenue = strlen($revenue) ? $revenue : null;
             $company->website = strlen($website) ? $website : null;
             $company->summary = strlen($summary) ? trim($summary) : null;
             //add address
             if ($phone) {
                 $company->addPhone($phone);
             }
             if ($fax) {
                 //$company->addPhone($fax);
             }
             if ($city && $state) {
                 $address = new Address();
                 $address->street1 = strlen($street1) ? $street1 : null;
                 $address->street2 = strlen($street2) ? $street2 : null;
                 $address->city = strlen($city) ? $city : null;
                 if ($state = AddressStateTable::retrieveByText($state)) {
                     $address->State = $state;
                 }
                 $address->postal = $postal;
                 $company->addAddress($address);
                 $address->save();
                 $address->addReference($source = $url, $excerpt = null, $fields = array('city', 'country_id', 'postal', 'state_id', 'street1'), $name = 'Forbes.com', $detail = null, $date = null);
             }
         }
         /*$this->printDebug( "URL: ". $url);
           $this->printDebug( "Rank: " . $rank );
           $this->printDebug( "Name: " . $name );
           $this->printDebug( "Industry: " . $industryName );
           $this->printDebug( "Street: " . $street1 );
           $this->printDebug( "Street2: " . $street2 );
           $this->printDebug( "City: " . $city );
           $this->printDebug( "State: " . $state );
           $this->printDebug( "Postal: " . $postal );
           $this->printDebug( "Phone: " . $phone );
           $this->printDebug( "Fax: " . $fax );
           $this->printDebug( "Website: " . $website );
           $this->printDebug( "CEO: " . $ceoName . "  " . $ceoBirthYear);
           $this->printDebug( "Summary: " . $summary );
           $this->printDebug( "Revenue: " . $revenue );
           $this->printDebug( "Employees: " . $employees );*/
         $company->save();
         $company->addReference($source = $url, $excerpt = null, $fields = array('website', 'name', 'website', 'summary', 'revenue', 'employees'), $name = 'Forbes.com', $detail = null, $date = null);
         $this->saveToList($company, $rank);
     } else {
         $this->printDebug("Couldn't get company: " . $url);
     }
 }
 private function importAddress($address_arr, $person, $person_arr)
 {
     $a = new Address();
     $a->street1 = LsLanguage::nameize($address_arr['street1']);
     $a->street2 = LsLanguage::nameize($address_arr['street2']);
     $a->city = $address_arr['city'];
     $a->Category = Doctrine::getTable('AddressCategory')->findOneByName('Mailing');
     if ($state = AddressStateTable::retrieveByText($address_arr['state'])) {
         $a->State = $state;
     } else {
         return;
     }
     $a->postal = $address_arr['postal'];
     if (!$this->testMode) {
         if ($person->addAddress($a)) {
             $person->save();
             $a->addReference($person_arr['readableXmlUrl'], null, null, $this->entity->name . ' ' . $person_arr['formName'], null, $person_arr['date']);
         }
     }
 }
 private function importAddress($address_arr, $person, $person_arr, $corp_name)
 {
     $a = new Address();
     $a->street1 = LsLanguage::nameize($address_arr['street1']);
     $a->street2 = LsLanguage::nameize($address_arr['street2']);
     $a->city = $address_arr['city'];
     $a->Category = Doctrine::getTable('AddressCategory')->findOneByName('Mailing');
     if ($state = AddressStateTable::retrieveByText($address_arr['state'])) {
         $a->State = $state;
     } else {
         return;
     }
     $a->postal = $address_arr['postal'];
     $modifiedFields = $a->getAllModifiedFields();
     if ($person->addAddress($a)) {
         $person->save();
         $a->addReference($person_arr['form4Url'], null, null, $corp_name . ' Form 4', null, $person_arr['date']);
     }
 }
 public function getCandidateEntityByCrpId($id)
 {
     if (!count($candidates = $this->getOsCandidatesById($id))) {
         return null;
     }
     //try to find entity by fec_id
     $fecIds = array();
     foreach ($candidates as $candidate) {
         if (!$candidate['fec_id'] || !$candidate['name_last']) {
             continue;
         }
         if ($entity = $this->getCandidateEntityByFecId($candidate['fec_id'], $candidate['name_last'])) {
             return $entity;
         }
         $fecIds[] = $candidate['fec_id'];
     }
     $candidate = $candidates[0];
     if ($this->debugMode) {
         print "+ Creating new entity for person " . $id . " (" . $candidate['name'] . ")\n";
     }
     $entity = new Entity();
     $entity->addExtension('Person');
     $entity->addExtension('PoliticalCandidate');
     $entity->name_last = $candidate['name_last'];
     $entity->name_first = $candidate['name_first'];
     $entity->name_middle = $candidate['name_middle'];
     $entity->name_suffix = $candidate['name_suffix'];
     $entity->crp_id = $id;
     foreach ($fecIds as $fecId) {
         $map = array('P' => 'pres_fec_id', 'S' => 'senate_fec_id', 'H' => 'house_fec_id');
         $code = substr($fecId, 0, 1);
         if (@($field = $map[$code])) {
             $entity->{$field} = $fecId;
         }
     }
     $entity->save();
     if (!($district = PoliticalDistrictTable::getFederalDistrict($state, $district))) {
         if ($state = AddressStateTable::retrieveByText($state)) {
             $district = new PoliticalDistrict();
             $district->state_id = $state['id'];
             $district->federal_district = $district;
             $district->save();
         }
     }
     if ($district) {
         $pc = $entity->getExtensionObject('PoliticalCandidate');
         $cd = new CandidateDistrict();
         $cd->candidate_id = $pc->id;
         $cd->district_id = $district->id;
         $cd->save();
     }
     return $entity;
 }