public function prepLobbyistName($str) { //get rid of extra spaces and stuff in parens $str = trim(preg_replace(array('/\\([^\\)]*\\)?/s', '/\\s+/s'), array('', ' '), $str)); $name_parts = explode(',', $str); //no comma, no parsable name (for now) if (count($name_parts) < 2) { return null; } $name_last = trim(array_shift($name_parts)); $name_rest = trim(implode(' ', $name_parts)); /*$person = new Entity; $person->addExtension('Person'); $person->addExtension('Lobbyist'); $person->name_last = trim(array_shift($name_parts)); $name_rest = trim(implode(' ',$name_parts));*/ $name_nick = null; if (preg_match('/["\'](.*?)["\']/isu', $name_rest, $match, PREG_OFFSET_CAPTURE) == 1) { $name_nick = $match[1][0]; $name_rest = str_replace($match[0][0], '', $name_rest); } $name_suffix = null; $suffixes = PersonTable::$nameParseSuffixes; while ($suffix = current($suffixes)) { if ($name_rest != ($new = preg_replace('/ ' . $suffix . '$/i', '', $name_rest))) { $name_suffix = $suffix . ' ' . $name_suffix; $name_rest = trim($new); reset($suffixes); continue; } next($suffixes); } $name_suffix = $name_suffix ? trim($name_suffix) : null; $person = PersonTable::parseFlatName($name_rest . ' ' . $name_last, $name_last); if ($name_nick) { $person->name_nick = LsLanguage::nameize($name_nick); } if ($name_suffix) { $person->name_suffix = $name_suffix; } $person->addExtension('Lobbyist'); $person->name_last = trim($person->name_last); if (!$person->name_last || $person->name_last == '') { return null; } return $person; }
static function parseBioguideName($str) { $entity = new Entity(); $entity->addExtension('Person'); //extract nickname if (preg_match('/\\(([^(]+)\\)/', $str, $nick)) { $entity->name_nick = $nick[1]; $str = preg_replace('/\\(.*\\)/U', '', $str); } $str = preg_replace('/\\s{2,}/', ' ', $str); $str = str_replace('.', '', $str); $parts = explode(',', trim($str)); if (count($parts) > 1) { $entity->name_last = LsLanguage::nameize(mb_strtolower(trim($parts[0]), mb_detect_encoding(trim($parts[0])))); $other = explode(' ', trim($parts[1])); $entity->name_first = trim($other[0]); if (count($other) > 1) { $middles = array_slice($other, 1); $middle = trim(implode($middles, ' ')); $entity->name_middle = $middle; } if (count($parts) > 2) { $suffix = trim($parts[2]); $entity->name_suffix = $suffix; } } else { return null; } return $entity; }
public function execute() { foreach ($this->corp_ids as $corp_id) { try { $this->db->beginTransaction(); $this->corp = Doctrine::getTable('Entity')->find($corp_id); if (!$this->corp->sec_cik) { if ($result = $this->getCik($this->corp->ticker)) { $this->corp->sec_cik = $result['cik']; if (!$this->corp->Industry->count()) { if ($result['sic']['name'] && $result['sic']['name'] != '') { $q = LsDoctrineQuery::create()->from('Industry i')->where('i.name = ? and i.code = ?', array($result['sic']['name'], $result['sic']['code']))->fetchOne(); if (!($industry = $q->fetchOne())) { $industry = new Industry(); $industry->name = LsLanguage::nameize(LsHtml::replaceEntities($result['sic']['name'])); $industry->context = 'SIC'; $industry->code = $result['sic']['code']; $industry->save(); } $q = LsQuery::getByModelAndFieldsQuery('BusinessIndustry', array('industry_id' => $industry->id, 'business_id' => $this->corp->id)); if (!$q->fetchOne()) { $this->corp->Industry[] = $industry; } } $this->corp->save(); $this->corp->addReference($result['url'], null, $corp->getAllModifiedFields(), 'SEC EDGAR Page'); } } $this->corp->save(); } if ($this->corp->sec_cik) { $category = Doctrine::getTable('RelationshipCategory')->findOneByName('Position'); $this->people = $this->corp->getRelatedEntitiesQuery('Person', $category->id, 'Director', null, null, false)->execute(); if (count($this->people) > 1) { if ($this->need_proxy) { $this->getProxy(); $this->need_proxy = true; } if ($this->url) { $this->paginate(); if ($this->pages) { $this->printDebug('paginated'); $this->findNamePages(); $this->findBasicInfo(); } else { $this->saveMeta($this->corp->id, 'error', 'not_paginated'); $this->printDebug('not paginated'); } } else { $this->saveMeta($this->corp->id, 'error', 'no_proxy_retrieved'); $this->printDebug('could not get proxy'); } } } $this->saveMeta($this->corp->id, 'scraped', '1'); if (!$this->testMode) { $this->db->commit(); } else { $this->db->rollback(); } } catch (Exception $e) { //something bad happened, rollback $this->db->rollback(); throw $e; } } }
static function parseFlatName($str) { $namePrefix = $nameFirst = $nameMiddle = $nameLast = $nameSuffix = $nameNick = null; //trim and remove periods and commas $str = strip_tags($str); $name_in_reverse_order = false; if (strpos($str, ',')) { $name_in_reverse_order = true; } $name = LsLanguage::nameize(str_ireplace(LsLanguage::$punctuations, '', $str)); $nameArray = explode(" ", $name); foreach ($nameArray as $key => $part) { if ($name_in_reverse_order) { if ($key == 0) { $nameLast = $part; } if ($key == 1) { $nameFirst = $part; } } else { if ($key == 0) { $nameFirst = $part; } if ($key == 1) { $nameLast = $part; } } if (in_array($part, LsLanguage::$generationalSuffixes)) { $nameSuffix = $part; } //find nickname in quotes if (preg_match('/\'([\\S]+)\'|"([\\S]+)"/', $part, $nickFound)) { $nameNick = $nickFound[1] ? $nickFound[1] : $nickFound[2]; $str = trim(preg_replace('/\'([\\S]+)\'|"([\\S]+)"/', '', $str)); } if ($key == 2 and !in_array($part, LsLanguage::$commonPrefixes) and !in_array($part, LsLanguage::$generationalSuffixes)) { $nameMiddle = $part; } } //return person with name fields return array('name_prefix' => $namePrefix, 'name_first' => $nameFirst, 'name_middle' => $nameMiddle, 'name_last' => $nameLast, 'name_suffix' => $nameSuffix, 'name_nick' => $nameNick); }
static function nameizePersonName($name) { $parts = preg_split('/\\s+/', $name); $cleanParts = array(); foreach ($parts as $part) { if (strlen(str_replace('.', '', $part)) > 2) { $part = LsLanguage::nameize($part); } $cleanParts[] = $part; } return implode(' ', $parts); }
private function importAddress($address_arr, $person, $person_arr) { $a = new Address(); $a->street1 = LsLanguage::nameize($address_arr['street1']); $a->street2 = LsLanguage::nameize($address_arr['street2']); $a->city = $address_arr['city']; $a->Category = Doctrine::getTable('AddressCategory')->findOneByName('Mailing'); if ($state = AddressStateTable::retrieveByText($address_arr['state'])) { $a->State = $state; } else { return; } $a->postal = $address_arr['postal']; if (!$this->testMode) { if ($person->addAddress($a)) { $person->save(); $a->addReference($person_arr['readableXmlUrl'], null, null, $this->entity->name . ' ' . $person_arr['formName'], null, $person_arr['date']); } } }
private function importAddress($address_arr, $person, $person_arr, $corp_name) { $a = new Address(); $a->street1 = LsLanguage::nameize($address_arr['street1']); $a->street2 = LsLanguage::nameize($address_arr['street2']); $a->city = $address_arr['city']; $a->Category = Doctrine::getTable('AddressCategory')->findOneByName('Mailing'); if ($state = AddressStateTable::retrieveByText($address_arr['state'])) { $a->State = $state; } else { return; } $a->postal = $address_arr['postal']; $modifiedFields = $a->getAllModifiedFields(); if ($person->addAddress($a)) { $person->save(); $a->addReference($person_arr['form4Url'], null, null, $corp_name . ' Form 4', null, $person_arr['date']); } }