static function addFederalDistrict($state_name, $federal_district) { $state_name = trim($state_name); $federal_district = trim($federal_district); $exists = self::getFederalDistrict($state_name, $federal_district); if ($exists != false) { //echo "it exists;"; return false; } $state = null; $state = AddressStateTable::retrieveByText($state_name); if (!$state) { throw new Exception("AddressStateTable did not return state"); } if ($state && strlen($federal_district)) { $district = new PoliticalDistrict(); $district->State = $state; $district->federal_district = $federal_district; $district->save(); } return self::getFederalDistrict($state_name, $federal_district); }
public function import($school) { if (EntityTable::getByExtensionQuery('Org')->addWhere('LOWER(org.name) LIKE ?', '%' . strtolower($school->instnm) . "%")->fetchOne()) { $this->printDebug("School exists in database: " . $school->instnm); } else { $address = new Address(); $address->street1 = isset($school->addr) ? $school->addr : null; $address->street2 = isset($school->street2) ? $school->street2 : null; $address->city = $school->city; if ($state = AddressStateTable::retrieveByText($school->stabbr)) { $address->State = $state; } $address->postal = $school->zip; $aliases = explode("|", $school->ialias); $website = null; if (!preg_match('/^http\\:\\/\\//i', trim($school->webaddr))) { $website = "http://" . strtolower($school->webaddr); } $this->printDebug($website); $newschool = new Entity(); $newschool->addExtension('Org'); $newschool->addExtension('School'); $newschool->name = $school->instnm; $newschool->website = $website; $newschool->addAddress($address); $newschool->save(); foreach ($aliases as $alias) { try { $newalias = new Alias(); $newalias->Entity = $newschool; $newalias->name = $alias; $newalias->save(); } catch (Exception $e) { $this->printDebug("An alias exception. No biggie. It's most likely that the name already exists. so we ignore it and move on: " . $e); } } $this->printDebug("Adding new school: " . $school->instnm); } }
/** * get donor info */ private function generateDonor($text) { $text_arr = explode("<BR>", $text); //var_dump($text_arr[0]); $donor = $this->generatePerson(LsHtml::stripTags($text_arr[0], '')); $address_arr = LsLanguage::parseCityStatePostal($text_arr[1]); $a = new Address(); $a->street1 = isset($address_arr['street1']) ? $address_arr['street1'] : null; $a->street2 = isset($address_arr['street2']) ? $address_arr['street2'] : null; $a->city = $address_arr['city']; if ($state = AddressStateTable::retrieveByText($address_arr['state'])) { $a->State = $state; } $a->postal = $address_arr['zip']; $donor->addAddress($a); $donor->summary = strip_tags(trim($text_arr[2])); return $donor; }
static function parseGeoArray($arr) { $raw_address = $arr['results'][0]; $address = new Address(); //COUNTRY (USA only for now) $address->country_id = 1; foreach ($raw_address['address_components'] as $component) { switch ($component['types']) { case in_array('postal_code', $component['types']): $address->postal = $component['long_name']; break; case in_array('administrative_area_level_1', $component['types']): if ($state = AddressStateTable::retrieveByText($component['long_name'])) { $address->state_id = $state->id; } break; case in_array('administrative_area_level_2', $component['types']): $address->county = $component['long_name']; break; case in_array('administrative_area_level_3', $component['types']): $admin_area_3 = $component['long_name']; break; case in_array('locality', $component['types']): $locality = $component['long_name']; break; case in_array('sublocality', $component['types']): $sublocality = $component['long_name']; break; case in_array('route', $component['types']): $street = $component['long_name']; break; case in_array('street_number', $component['types']): $street_number = $component['long_name']; break; case in_array('subpremise', $component['types']): $address->street2 = $component['long_name']; break; default: break; } } if (isset($locality) && isset($sublocality)) { if ($locality == 'New York' && $sublocality != 'Manhattan') { $address->city = $sublocality; } else { $address->city = $locality; } } else { if (isset($locality)) { $address->city = $locality; } else { if (isset($admin_area_3)) { $address->city = $admin_area_3; } } } if (isset($street) && isset($street_number)) { $address->street1 = $street_number . " " . $street; } $address->latitude = $raw_address['geometry']['location']['lat']; $address->longitude = $raw_address['geometry']['location']['lng']; sleep(1); return $address; }
protected function import($url) { $company = null; if (!$this->browser->get($url)->responseIsError()) { $text = $this->browser->getResponseText(); $rank = null; $name = null; $industryName = null; $street1 = null; $street2 = null; $city = null; $state = null; $postal = null; $phone = null; $fax = null; $website = null; $blurb = null; $summary = null; $revenue = null; $employees = null; $ceoName = null; $ceoBirthYear = null; //get rank if ($this->year > 1999 && $this->year < 2005 && preg_match('/ForbesListRank" content="(\\d+)"/i', $text, $match)) { $rank = $match[1]; } elseif ($this->year < 2000 && preg_match('/td class="highlightcolor1">(\\d+)/i', $text, $match)) { $rank = $match[1]; } elseif ($this->year > 2004 && preg_match('/<b>#(\\d+) ([^<]+)<\\/b>/i', $text, $match)) { $rank = html_entity_decode($match[1]); } //get name if ($this->year > 1995 && $this->year < 2005 && preg_match('/span class="mainlisttitle">([^<]+)<\\/span>/i', $text, $match)) { $name = html_entity_decode($match[1]); } elseif ($this->year > 2004 && preg_match('/<b>#(\\d+) ([^<]+)<\\/b>/i', $text, $match)) { $name = html_entity_decode($match[2]); } else { $this->printDebug("Company name not found"); return; } //get industry if ($this->year > 1995 && $this->year < 2001 && preg_match('/<b>See more private companies in <a [^>]+>([^<]+)<\\/a><\\/b>/ism', $text, $match)) { $industryName = trim(html_entity_decode($match[1])); } elseif ($this->year > 2000 && $this->year < 2005 && preg_match('/private companies\\<\\/a> in ([^\\.]+)/ism', $text, $match)) { $industryName = trim(html_entity_decode($match[1])); } elseif ($this->year > 2004 && preg_match('/<b>Industry:<\\/b> <a href="[^"]+">([^<]+)<\\/a>/ism', $text, $match)) { $industryName = trim(html_entity_decode($match[1])); } //get address if ($this->year > 1995 && $this->year < 2000 && preg_match('/<td class="mainlisttxt"\\>(.+)phone/smU', $text, $match)) { $contactLines = explode('<br>', trim($match[1])); array_pop($contactLines); $street1 = $contactLines[0]; $street2 = count($contactLines) == 3 ? $contactLines[2] : null; $city_state_zip = count($contactLines) == 3 ? LsLanguage::parseCityStatePostal($contactLines[2]) : LsLanguage::parseCityStatePostal($contactLines[1]); $city = $city_state_zip['city']; $state = $city_state_zip['state']; $postal = $city_state_zip['zip']; } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/(view private companies under this industry|in the same industry).+<br><br>(.+)phone/is', $text, $match)) { var_dump($match); $contactLines = explode('<br>', trim($match[1])); array_pop($contactLines); $street1 = $contactLines[0]; $street2 = count($contactLines) == 3 ? $contactLines[2] : null; $city_state_zip = count($contactLines) == 3 ? LsLanguage::parseCityStatePostal($contactLines[2]) : LsLanguage::parseCityStatePostal($contactLines[1]); $city = $city_state_zip['city']; $state = $city_state_zip['state']; $postal = $city_state_zip['zip']; } elseif ($this->year > 2004 && preg_match('/<div class="spaced">(.+)<\\/div>/ismU', $text, $match)) { $contactLines = explode('<br>', $match[1]); if (!preg_match('/Phone\\:|Fax\\:/i', $contactLines[0]) && !preg_match('/Phone\\:|Fax\\:/i', $contactLines[1])) { $street1 = trim($contactLines[0]); if (count($contactLines) == 4) { if (preg_match('/^(.+?) ([A-Z]{2}) (\\d{5})($|-)/sU', trim($contactLines[1]), $match)) { $city = $match[1]; $state = $match[2]; $postal = $match[3]; } } elseif (count($contactLines) == 5) { $street2 = $contactLines[1]; if (preg_match('/^(.+?) ([A-Z]{2}) (\\d{5})($|-)/sU', trim($contactLines[2]), $match)) { $city = $match[1]; $state = $match[2]; $postal = $match[3]; } } } } //get phone if ($this->year > 1995 && $this->year < 2005 && preg_match('/phone ([\\d\\-]{12})/is', $text, $match)) { $phone = trim(str_replace('-', '', $match[1])); } elseif ($this->year > 2004 && preg_match('/Phone: ([\\d\\-]{12})/is', $text, $match)) { $phone = trim(str_replace('-', '', $match[1])); } //get fax if ($this->year > 1995 && $this->year < 2005 && preg_match('/fax ([\\d\\-]{12})/is', $text, $match)) { $fax = trim(str_replace('-', '', $match[1])); } else { if ($this->year > 2004 && preg_match('/Fax: ([\\d\\-]{12})/is', $text, $match)) { $fax = trim(str_replace('-', '', $match[1])); } } //get website if ($this->year > 1995 && $this->year < 2005 && preg_match('/this company\'s web site[^>]+\\>(http[^\\<]+)/is', $text, $match)) { $website = $match[1]; } elseif ($this->year > 2004 && preg_match('/<div class="spaced">.*<\\/div>\\s+<br>\\s+<a href="(http:\\/\\/[^"]+)">/ismU', $text, $match)) { $website = $match[1]; } //get ceo if ($this->year > 1995 && $this->year < 2005 && preg_match('/b>CEO: ([^<]+)<\\/b>/ism', $text, $match)) { $ceoName = $match[1]; } elseif ($this->year > 2004 && preg_match('/CEO: ([^<]+)<\\/b> , (\\d+) <br>/ism', $text, $match)) { $ceoName = html_entity_decode($match[1]); $ceoBirthYear = date("Y"); -$match[2]; } //get summary if ($this->year > 1995 && $this->year < 2000 && preg_match_all('/p class="mainlisttxt">(.*)<\\/p>/ismU', $text, $match)) { $summary = str_replace(array(' ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1][1])))); } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/p class="mainlisttxt">(.*)<\\/p>/ismU', $text, $match)) { $summary = str_replace(array(' ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1])))); } elseif ($this->year > 2004 && preg_match('/<blockquote class="spaced">(.*)<\\/blockquote>/ismU', $text, $match)) { $summary = str_replace(array(' ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1])))); } //get revenue if ($this->year > 1995 && $this->year < 2000 && preg_match('/<td class="mainlisttxt">\\$([\\S]+) mil<sup>e?<\\/sup><\\/td>/ismU', $text, $match)) { $this->printDebug($match[1]); $revenue = str_replace(",", "", $match[1] . ",000,000"); } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/<td class="mainlisttxt" nowrap>([^<]+)<sup>e?<\\/sup><\\/td>/ismU', $text, $match)) { $this->printDebug($match[1]); $revenue = str_replace(",", "", $match[1] . ",000,000"); } elseif ($this->year > 2004 && preg_match('/<td class="highlight" nowrap="nowrap">\\$([\\S]+) bil.*<\\/td> <td class="highlight" nowrap="nowrap">[^<]+<\\/td> <td class="highlight" nowrap="nowrap">([^<]+)<\\/td>/ismU', $text, $match)) { $revenue = 1000000000 * $match[1]; } //get employees if ($this->year > 1995 && $this->year < 2005 && preg_match('/mil<\\/td>.+<td class="mainlisttxt"( nowrap)?>(\\d[^<]+)<\\/td>.+<td class="mainlisttxt">[a-zA-Z]+<\\/td>/ismU', $text, $match)) { $employees = str_replace(',', '', $match[2]); } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/<sup>e?<\\/sup><\\/td> <td class="mainlisttxt"( nowrap)?>(\\d[^<]+)<sup>e?<\\/sup><\\/td> <td class="mainlisttxt">[a-zA-Z]+<\\/td>/ismU', $text, $match)) { $employees = str_replace(',', '', $match[2]); } elseif ($this->year > 2004 && preg_match('/<td class="highlight" nowrap="nowrap">([\\d,]+)<\\/td> <td class="highlight" nowrap="nowrap">[A-Z][a-z]{2,}<\\/td>/', $text, $match)) { $employees = str_replace(',', '', $match[1]); } /*$this->printDebug( "URL: ". $url); $this->printDebug( "Rank: " . $rank ); $this->printDebug( "Name: " . $name ); $this->printDebug( "Industry: " . $industryName ); $this->printDebug( "Street: " . $street1 ); $this->printDebug( "Street2: " . $street2 ); $this->printDebug( "City: " . $city ); $this->printDebug( "State: " . $state ); $this->printDebug( "Postal: " . $postal ); $this->printDebug( "Phone: " . $phone ); $this->printDebug( "Fax: " . $fax ); $this->printDebug( "Website: " . $website ); $this->printDebug( "CEO: " . $ceoName . " " . $ceoBirthYear); $this->printDebug( "Summary: " . $summary ); $this->printDebug( "Revenue: " . $revenue ); $this->printDebug( "Employees: " . $employees );*/ $search_company_name = trim(implode(' ', array_diff(explode(' ', ucwords(strtolower($name))), array_merge(LsLanguage::$business, LsLanguage::$businessAbbreviations)))); //continue; $this->printDebug("{$search_company_name} == {$name}"); if ($company = EntityTable::getByExtensionQuery(array('Org', 'PrivateCompany'))->addWhere("LOWER(REPLACE( org.name, '-' , '')) = ?", strtolower($name))->fetchOne()) { $this->printDebug("Company exists"); $company->revenue = $revenue; $company->save(); } else { $this->printDebug("Creating new company {$name}"); Doctrine::getTable('ExtensionDefinition')->clear(); $company = new Entity(); $company->addExtension('Org'); $company->addExtension('Business'); $company->addExtension('PrivateCompany'); $company->name = LsLanguage::titleize($name); $company->employees = strlen($employees) ? $employees : null; $company->revenue = strlen($revenue) ? $revenue : null; $company->website = strlen($website) ? $website : null; $company->summary = strlen($summary) ? trim($summary) : null; //add address if ($phone) { $company->addPhone($phone); } if ($fax) { //$company->addPhone($fax); } if ($city && $state) { $address = new Address(); $address->street1 = strlen($street1) ? $street1 : null; $address->street2 = strlen($street2) ? $street2 : null; $address->city = strlen($city) ? $city : null; if ($state = AddressStateTable::retrieveByText($state)) { $address->State = $state; } $address->postal = $postal; $company->addAddress($address); $address->save(); $address->addReference($source = $url, $excerpt = null, $fields = array('city', 'country_id', 'postal', 'state_id', 'street1'), $name = 'Forbes.com', $detail = null, $date = null); } } /*$this->printDebug( "URL: ". $url); $this->printDebug( "Rank: " . $rank ); $this->printDebug( "Name: " . $name ); $this->printDebug( "Industry: " . $industryName ); $this->printDebug( "Street: " . $street1 ); $this->printDebug( "Street2: " . $street2 ); $this->printDebug( "City: " . $city ); $this->printDebug( "State: " . $state ); $this->printDebug( "Postal: " . $postal ); $this->printDebug( "Phone: " . $phone ); $this->printDebug( "Fax: " . $fax ); $this->printDebug( "Website: " . $website ); $this->printDebug( "CEO: " . $ceoName . " " . $ceoBirthYear); $this->printDebug( "Summary: " . $summary ); $this->printDebug( "Revenue: " . $revenue ); $this->printDebug( "Employees: " . $employees );*/ $company->save(); $company->addReference($source = $url, $excerpt = null, $fields = array('website', 'name', 'website', 'summary', 'revenue', 'employees'), $name = 'Forbes.com', $detail = null, $date = null); $this->saveToList($company, $rank); } else { $this->printDebug("Couldn't get company: " . $url); } }
private function importAddress($address_arr, $person, $person_arr) { $a = new Address(); $a->street1 = LsLanguage::nameize($address_arr['street1']); $a->street2 = LsLanguage::nameize($address_arr['street2']); $a->city = $address_arr['city']; $a->Category = Doctrine::getTable('AddressCategory')->findOneByName('Mailing'); if ($state = AddressStateTable::retrieveByText($address_arr['state'])) { $a->State = $state; } else { return; } $a->postal = $address_arr['postal']; if (!$this->testMode) { if ($person->addAddress($a)) { $person->save(); $a->addReference($person_arr['readableXmlUrl'], null, null, $this->entity->name . ' ' . $person_arr['formName'], null, $person_arr['date']); } } }
private function importAddress($address_arr, $person, $person_arr, $corp_name) { $a = new Address(); $a->street1 = LsLanguage::nameize($address_arr['street1']); $a->street2 = LsLanguage::nameize($address_arr['street2']); $a->city = $address_arr['city']; $a->Category = Doctrine::getTable('AddressCategory')->findOneByName('Mailing'); if ($state = AddressStateTable::retrieveByText($address_arr['state'])) { $a->State = $state; } else { return; } $a->postal = $address_arr['postal']; $modifiedFields = $a->getAllModifiedFields(); if ($person->addAddress($a)) { $person->save(); $a->addReference($person_arr['form4Url'], null, null, $corp_name . ' Form 4', null, $person_arr['date']); } }
public function getCandidateEntityByCrpId($id) { if (!count($candidates = $this->getOsCandidatesById($id))) { return null; } //try to find entity by fec_id $fecIds = array(); foreach ($candidates as $candidate) { if (!$candidate['fec_id'] || !$candidate['name_last']) { continue; } if ($entity = $this->getCandidateEntityByFecId($candidate['fec_id'], $candidate['name_last'])) { return $entity; } $fecIds[] = $candidate['fec_id']; } $candidate = $candidates[0]; if ($this->debugMode) { print "+ Creating new entity for person " . $id . " (" . $candidate['name'] . ")\n"; } $entity = new Entity(); $entity->addExtension('Person'); $entity->addExtension('PoliticalCandidate'); $entity->name_last = $candidate['name_last']; $entity->name_first = $candidate['name_first']; $entity->name_middle = $candidate['name_middle']; $entity->name_suffix = $candidate['name_suffix']; $entity->crp_id = $id; foreach ($fecIds as $fecId) { $map = array('P' => 'pres_fec_id', 'S' => 'senate_fec_id', 'H' => 'house_fec_id'); $code = substr($fecId, 0, 1); if (@($field = $map[$code])) { $entity->{$field} = $fecId; } } $entity->save(); if (!($district = PoliticalDistrictTable::getFederalDistrict($state, $district))) { if ($state = AddressStateTable::retrieveByText($state)) { $district = new PoliticalDistrict(); $district->state_id = $state['id']; $district->federal_district = $district; $district->save(); } } if ($district) { $pc = $entity->getExtensionObject('PoliticalCandidate'); $cd = new CandidateDistrict(); $cd->candidate_id = $pc->id; $cd->district_id = $district->id; $cd->save(); } return $entity; }