예제 #1
0
 public function prepLobbyistName($str)
 {
     //get rid of extra spaces and stuff in parens
     $str = trim(preg_replace(array('/\\([^\\)]*\\)?/s', '/\\s+/s'), array('', ' '), $str));
     $name_parts = explode(',', $str);
     //no comma, no parsable name (for now)
     if (count($name_parts) < 2) {
         return null;
     }
     $name_last = trim(array_shift($name_parts));
     $name_rest = trim(implode(' ', $name_parts));
     /*$person = new Entity;
       $person->addExtension('Person');
       $person->addExtension('Lobbyist');
       $person->name_last = trim(array_shift($name_parts));
       $name_rest = trim(implode(' ',$name_parts));*/
     $name_nick = null;
     if (preg_match('/["\'](.*?)["\']/isu', $name_rest, $match, PREG_OFFSET_CAPTURE) == 1) {
         $name_nick = $match[1][0];
         $name_rest = str_replace($match[0][0], '', $name_rest);
     }
     $name_suffix = null;
     $suffixes = PersonTable::$nameParseSuffixes;
     while ($suffix = current($suffixes)) {
         if ($name_rest != ($new = preg_replace('/ ' . $suffix . '$/i', '', $name_rest))) {
             $name_suffix = $suffix . ' ' . $name_suffix;
             $name_rest = trim($new);
             reset($suffixes);
             continue;
         }
         next($suffixes);
     }
     $name_suffix = $name_suffix ? trim($name_suffix) : null;
     $person = PersonTable::parseFlatName($name_rest . ' ' . $name_last, $name_last);
     if ($name_nick) {
         $person->name_nick = LsLanguage::nameize($name_nick);
     }
     if ($name_suffix) {
         $person->name_suffix = $name_suffix;
     }
     $person->addExtension('Lobbyist');
     $person->name_last = trim($person->name_last);
     if (!$person->name_last || $person->name_last == '') {
         return null;
     }
     return $person;
 }
 static function parseBioguideName($str)
 {
     $entity = new Entity();
     $entity->addExtension('Person');
     //extract nickname
     if (preg_match('/\\(([^(]+)\\)/', $str, $nick)) {
         $entity->name_nick = $nick[1];
         $str = preg_replace('/\\(.*\\)/U', '', $str);
     }
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     $str = str_replace('.', '', $str);
     $parts = explode(',', trim($str));
     if (count($parts) > 1) {
         $entity->name_last = LsLanguage::nameize(mb_strtolower(trim($parts[0]), mb_detect_encoding(trim($parts[0]))));
         $other = explode(' ', trim($parts[1]));
         $entity->name_first = trim($other[0]);
         if (count($other) > 1) {
             $middles = array_slice($other, 1);
             $middle = trim(implode($middles, ' '));
             $entity->name_middle = $middle;
         }
         if (count($parts) > 2) {
             $suffix = trim($parts[2]);
             $entity->name_suffix = $suffix;
         }
     } else {
         return null;
     }
     return $entity;
 }
예제 #3
0
 public function execute()
 {
     foreach ($this->corp_ids as $corp_id) {
         try {
             $this->db->beginTransaction();
             $this->corp = Doctrine::getTable('Entity')->find($corp_id);
             if (!$this->corp->sec_cik) {
                 if ($result = $this->getCik($this->corp->ticker)) {
                     $this->corp->sec_cik = $result['cik'];
                     if (!$this->corp->Industry->count()) {
                         if ($result['sic']['name'] && $result['sic']['name'] != '') {
                             $q = LsDoctrineQuery::create()->from('Industry i')->where('i.name = ? and i.code = ?', array($result['sic']['name'], $result['sic']['code']))->fetchOne();
                             if (!($industry = $q->fetchOne())) {
                                 $industry = new Industry();
                                 $industry->name = LsLanguage::nameize(LsHtml::replaceEntities($result['sic']['name']));
                                 $industry->context = 'SIC';
                                 $industry->code = $result['sic']['code'];
                                 $industry->save();
                             }
                             $q = LsQuery::getByModelAndFieldsQuery('BusinessIndustry', array('industry_id' => $industry->id, 'business_id' => $this->corp->id));
                             if (!$q->fetchOne()) {
                                 $this->corp->Industry[] = $industry;
                             }
                         }
                         $this->corp->save();
                         $this->corp->addReference($result['url'], null, $corp->getAllModifiedFields(), 'SEC EDGAR Page');
                     }
                 }
                 $this->corp->save();
             }
             if ($this->corp->sec_cik) {
                 $category = Doctrine::getTable('RelationshipCategory')->findOneByName('Position');
                 $this->people = $this->corp->getRelatedEntitiesQuery('Person', $category->id, 'Director', null, null, false)->execute();
                 if (count($this->people) > 1) {
                     if ($this->need_proxy) {
                         $this->getProxy();
                         $this->need_proxy = true;
                     }
                     if ($this->url) {
                         $this->paginate();
                         if ($this->pages) {
                             $this->printDebug('paginated');
                             $this->findNamePages();
                             $this->findBasicInfo();
                         } else {
                             $this->saveMeta($this->corp->id, 'error', 'not_paginated');
                             $this->printDebug('not paginated');
                         }
                     } else {
                         $this->saveMeta($this->corp->id, 'error', 'no_proxy_retrieved');
                         $this->printDebug('could not get proxy');
                     }
                 }
             }
             $this->saveMeta($this->corp->id, 'scraped', '1');
             if (!$this->testMode) {
                 $this->db->commit();
             } else {
                 $this->db->rollback();
             }
         } catch (Exception $e) {
             //something bad happened, rollback
             $this->db->rollback();
             throw $e;
         }
     }
 }
예제 #4
0
 static function parseFlatName($str)
 {
     $namePrefix = $nameFirst = $nameMiddle = $nameLast = $nameSuffix = $nameNick = null;
     //trim and remove periods and commas
     $str = strip_tags($str);
     $name_in_reverse_order = false;
     if (strpos($str, ',')) {
         $name_in_reverse_order = true;
     }
     $name = LsLanguage::nameize(str_ireplace(LsLanguage::$punctuations, '', $str));
     $nameArray = explode(" ", $name);
     foreach ($nameArray as $key => $part) {
         if ($name_in_reverse_order) {
             if ($key == 0) {
                 $nameLast = $part;
             }
             if ($key == 1) {
                 $nameFirst = $part;
             }
         } else {
             if ($key == 0) {
                 $nameFirst = $part;
             }
             if ($key == 1) {
                 $nameLast = $part;
             }
         }
         if (in_array($part, LsLanguage::$generationalSuffixes)) {
             $nameSuffix = $part;
         }
         //find nickname in quotes
         if (preg_match('/\'([\\S]+)\'|"([\\S]+)"/', $part, $nickFound)) {
             $nameNick = $nickFound[1] ? $nickFound[1] : $nickFound[2];
             $str = trim(preg_replace('/\'([\\S]+)\'|"([\\S]+)"/', '', $str));
         }
         if ($key == 2 and !in_array($part, LsLanguage::$commonPrefixes) and !in_array($part, LsLanguage::$generationalSuffixes)) {
             $nameMiddle = $part;
         }
     }
     //return person with name fields
     return array('name_prefix' => $namePrefix, 'name_first' => $nameFirst, 'name_middle' => $nameMiddle, 'name_last' => $nameLast, 'name_suffix' => $nameSuffix, 'name_nick' => $nameNick);
 }
예제 #5
0
 static function nameizePersonName($name)
 {
     $parts = preg_split('/\\s+/', $name);
     $cleanParts = array();
     foreach ($parts as $part) {
         if (strlen(str_replace('.', '', $part)) > 2) {
             $part = LsLanguage::nameize($part);
         }
         $cleanParts[] = $part;
     }
     return implode(' ', $parts);
 }
 private function importAddress($address_arr, $person, $person_arr)
 {
     $a = new Address();
     $a->street1 = LsLanguage::nameize($address_arr['street1']);
     $a->street2 = LsLanguage::nameize($address_arr['street2']);
     $a->city = $address_arr['city'];
     $a->Category = Doctrine::getTable('AddressCategory')->findOneByName('Mailing');
     if ($state = AddressStateTable::retrieveByText($address_arr['state'])) {
         $a->State = $state;
     } else {
         return;
     }
     $a->postal = $address_arr['postal'];
     if (!$this->testMode) {
         if ($person->addAddress($a)) {
             $person->save();
             $a->addReference($person_arr['readableXmlUrl'], null, null, $this->entity->name . ' ' . $person_arr['formName'], null, $person_arr['date']);
         }
     }
 }
 private function importAddress($address_arr, $person, $person_arr, $corp_name)
 {
     $a = new Address();
     $a->street1 = LsLanguage::nameize($address_arr['street1']);
     $a->street2 = LsLanguage::nameize($address_arr['street2']);
     $a->city = $address_arr['city'];
     $a->Category = Doctrine::getTable('AddressCategory')->findOneByName('Mailing');
     if ($state = AddressStateTable::retrieveByText($address_arr['state'])) {
         $a->State = $state;
     } else {
         return;
     }
     $a->postal = $address_arr['postal'];
     $modifiedFields = $a->getAllModifiedFields();
     if ($person->addAddress($a)) {
         $person->save();
         $a->addReference($person_arr['form4Url'], null, null, $corp_name . ' Form 4', null, $person_arr['date']);
     }
 }