Exemplo n.º 1
0
 public function getEntitiesByExtension($extension = 'Person')
 {
     $q = EntityTable::getByExtensionQuery($extension);
     if ($this->limit) {
         $q->limit($this->limit);
     }
     return $q;
 }
 protected function execute($arguments = array(), $options = array())
 {
     $databaseManager = new sfDatabaseManager($this->configuration);
     $databaseManager->initialize($this->configuration);
     $extensions1 = explode(',', $options['extensions1']);
     $extensions2 = explode(',', $options['extensions2']);
     $exact_first = $options['exact_first'];
     $start_id = $options['start_id'];
     $min_duplicates = $options['min_duplicates'] - 1;
     if (!in_array('Person', $extensions1) && !in_array('Person', $extensions2) && (!in_array('Org', $extensions1) && !in_array('Org', $extensions2))) {
         $this->printDebug('extension issues, exiting');
         die;
     }
     $this->fileName = $options['file_name'];
     $entity = EntityTable::getByExtensionQuery($extensions1)->addWhere('e.id > ?', $start_id)->limit(1)->execute()->getFirst();
     $ct = 0;
     $fh = fopen($this->fileName, 'a');
     $been_done = array();
     while ($entity) {
         $start_id = $entity->id;
         if (in_array('Person', $extensions1)) {
             $q = EntityTable::getByExtensionQuery($extensions2)->addWhere('person.name_last = ? and e.id <> ?', array($entity->name_last, $entity->id));
             if (!$exact_first) {
                 $q->addWhere('person.name_first like ?', substr($entity->name_first, 0, 1) . '%');
             } else {
                 $q->addWhere('person.name_first = ?', $entity->name_first);
             }
             if ($entity->name_middle) {
                 $q->addWhere('person.name_middle like ? OR person.name_middle IS NULL', substr($entity->name_middle, 0, 1) . '%');
             }
         } else {
             if (strlen($entity->name) > 8) {
                 $q->addWhere('e.name like ?', '%' . $entity->name . '%');
             } else {
                 $q->addWhere('e.name = ?', $entity->name);
             }
         }
         if ($q->count() > $min_duplicates) {
             $matches = $q->execute();
             $arr = array();
             echo $entity->name . "\n";
             foreach ($matches as $match) {
                 $been_done[] = $match->id;
                 $arr[] = $match->name;
             }
             $links = $entity->name . "\t";
             $links .= implode("; ", $arr) . "\t";
             $links .= 'http://littlesis.org/entity/view?id=' . $entity->id . "\n";
             fwrite($fh, $links);
         }
         $entity = EntityTable::getByExtensionQuery($extensions1)->addWhere('e.id > ?', $start_id)->limit(1)->execute()->getFirst();
     }
     fclose($fh);
 }
 protected function execute($arguments = array(), $options = array())
 {
     $configuration = ProjectConfiguration::getApplicationConfiguration($options['application'], $options['env'], true);
     $databaseManager = new sfDatabaseManager($configuration);
     $databaseManager->initialize($configuration);
     $q = EntityTable::getByExtensionQuery(array('Person', 'ElectedRepresentative'))->addWhere('summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ?', array('(daughter%', '(son%', '(father%', '(mother%', '(cousin%', '(husband%', '(wife%', '(brother%', '(sister%'))->orderBy('person.name_last');
     $members = $q->execute();
     foreach ($members as $member) {
         if (preg_match('/\\([^\\)]*\\)/isu', $member->summary, $match)) {
             echo $member->name . ":\n";
             if (preg_match_all('/(brother|sister|daughter|mother|father|wife|husband|cousin)\\sof\\s+([^\\;\\)\\,]*)(\\;|\\)|\\,)/isu', $match[0], $matches, PREG_SET_ORDER)) {
                 foreach ($matches as $m) {
                     echo "\t\t" . $m[1] . ' : of : ' . $m[2] . "\n";
                     $m[2] = str_replace('.', '', $m[2]);
                     $parts = LsString::split($m[2]);
                     $q = EntityTable::getByExtensionQuery(array('Person', 'ElectedRepresentative'));
                     foreach ($parts as $part) {
                         $q->addWhere('e.name like ?', '%' . $part . '%');
                     }
                     $people = $q->execute();
                     $family = array();
                     foreach ($people as $person) {
                         echo "\t\t\t\t" . $person->name . "\n";
                         if ($person->id != $member->id) {
                             $family[] = $person;
                         }
                     }
                     if (count($family) == 1) {
                         $q = LsDoctrineQuery::create()->from('Relationship r')->where('(r.entity1_id = ? or r.entity2_id =?) and (r.entity1_id = ? or r.entity2_id = ?)', array($member->id, $member->id, $person->id, $person->id));
                         if (!$q->count()) {
                             if ($description2 = FamilyTable::getDescription2($m[1], $family[0]->Gender->id)) {
                                 $relationship = new Relationship();
                                 $relationship->setCategory('Family');
                                 $relationship->Entity1 = $member;
                                 $relationship->Entity2 = $person;
                                 $relationship->description1 = $m[1];
                                 $relationship->description2 = $description2;
                                 $relationship->save();
                                 $ref = LsQuery::getByModelAndFieldsQuery('Reference', array('object_model' => 'Entity', 'object_id' => $member->id, 'name' => 'Congressional Biographical Directory'))->fetchOne();
                                 if ($ref) {
                                     $relationship->addReference($ref->source, null, null, $ref->name, $ref->source_detail, $ref->publication_date);
                                 }
                                 echo "-------------------------------added relationship\n";
                             }
                         }
                     }
                 }
             }
             echo "\n";
         }
     }
 }
Exemplo n.º 4
0
 public function setOrgs($orgs = null)
 {
     if ($orgs) {
         $this->_orgs = $orgs;
     } else {
         array_unshift($this->_orgExtensions, 'Org');
         $q = EntityTable::getByExtensionQuery($this->_orgExtensions)->addWhere('website is not NULL')->limit(100);
         if ($this->hasMeta($this->_metaName, $this->_metaPredicate)) {
             $start_id = $this->getMeta($this->_metaName, $this->_metaPredicate);
             $q->addWhere('e.id > ?', $start_id);
         }
         $this->_orgs = $q->execute();
     }
 }
Exemplo n.º 5
0
 static function getOrgsWithSimilarNames($name, $strict = false)
 {
     $name = trim(OrgTable::removeSuffixes($name, $exclude = array('Bancorp')));
     if (strlen($name) < 3) {
         return array();
     }
     $terms = LsQuery::splitSearchPhrase($name);
     $q = EntityTable::getByExtensionQuery('Org')->leftJoin('e.Alias a');
     $search_terms = array();
     $e = array();
     $a = array();
     for ($i = 0; $i < count($terms); $i++) {
         $term = $terms[$i];
         if (is_array($term)) {
             $e_temp = array();
             $a_temp = array();
             foreach ($term as $t) {
                 $search_terms[] = $i == 0 ? $t . '%' : '%' . $t . '%';
                 $e_temp[] = 'e.name like ?';
                 $a_temp[] = 'a.name like ?';
             }
             $e[] = '(' . implode(' or ', $e_temp) . ')';
             $a[] = '(' . implode(' or ', $a_temp) . ')';
         } else {
             $search_terms[] = $i == 0 ? $term . '%' : '%' . $term . '%';
             $e[] = 'e.name like ?';
             $a[] = 'a.name like ?';
         }
     }
     $e = implode(' and ', $e);
     $a = implode(' and ', $a);
     $search_terms = array_merge($search_terms, $search_terms);
     $q->addWhere('(' . $e . ') or (' . $a . ')', $search_terms);
     $orgs = $q->execute();
     $org_names = array();
     $found_orgs = array();
     foreach ($orgs as $org) {
         if ($org->hasSimilarName($name, $strict)) {
             $found_orgs[] = $org;
             $org_names[] = $org->name;
         }
     }
     $org_names = array_unique($org_names);
     return $found_orgs;
 }
Exemplo n.º 6
0
 public function import($school)
 {
     if (EntityTable::getByExtensionQuery('Org')->addWhere('LOWER(org.name) LIKE ?', '%' . strtolower($school->instnm) . "%")->fetchOne()) {
         $this->printDebug("School exists in database: " . $school->instnm);
     } else {
         $address = new Address();
         $address->street1 = isset($school->addr) ? $school->addr : null;
         $address->street2 = isset($school->street2) ? $school->street2 : null;
         $address->city = $school->city;
         if ($state = AddressStateTable::retrieveByText($school->stabbr)) {
             $address->State = $state;
         }
         $address->postal = $school->zip;
         $aliases = explode("|", $school->ialias);
         $website = null;
         if (!preg_match('/^http\\:\\/\\//i', trim($school->webaddr))) {
             $website = "http://" . strtolower($school->webaddr);
         }
         $this->printDebug($website);
         $newschool = new Entity();
         $newschool->addExtension('Org');
         $newschool->addExtension('School');
         $newschool->name = $school->instnm;
         $newschool->website = $website;
         $newschool->addAddress($address);
         $newschool->save();
         foreach ($aliases as $alias) {
             try {
                 $newalias = new Alias();
                 $newalias->Entity = $newschool;
                 $newalias->name = $alias;
                 $newalias->save();
             } catch (Exception $e) {
                 $this->printDebug("An alias exception. No biggie. It's most likely that the name already exists. so we ignore it and move on: " . $e);
             }
         }
         $this->printDebug("Adding new school: " . $school->instnm);
     }
 }
 private function getCandidatesQuery()
 {
     return EntityTable::getByExtensionQuery(array('Person', 'PoliticalCandidate'));
 }
Exemplo n.º 8
0
 protected function importGovernor($row)
 {
     $url = $this->_baseUrl . $row['url'];
     if (!$this->browser->get($url)->responseIsError()) {
         $text = $this->browser->getResponseText();
         $text = LsHtml::replaceEntities($text);
         //preg_match('/>Family\:<\/b>([^<]*)<br/is',$text,$family_arr);
         $name = trim(str_ireplace('Gov.', '', $row['name']));
         $this->printDebug('');
         $this->printDebug($name . ':');
         $governor = PersonTable::parseFlatName($name);
         $governor->addExtension('PoliticalCandidate');
         $governor->addExtension('ElectedRepresentative');
         $governor->is_state = 1;
         $similar = $governor->getSimilarEntitiesQuery(true)->execute();
         foreach ($similar as $s) {
             $sim_re = LsString::escapeStringForRegex($s->name_first);
             $search_re = LsString::escapeStringForRegex($governor->name_first);
             if (preg_match('/^' . $sim_re . '/su', $governor->name_first) == 0 && preg_match('/^' . $search_re . '/su', $s->name_first) == 0) {
                 continue;
             }
             $bio = $s->getExtendedBio();
             if (preg_match('/\\bgovernor(ship)?\\b/isu', $bio)) {
                 $governor = $s;
                 $this->printDebug(' Found existing governor: ' . $s->name . ' ' . $s->id);
                 break;
             }
         }
         $governor->save();
         $this->printDebug($governor->id);
         if (!$governor->start_date && preg_match('/>Born\\:<\\/b>([^<]*)<br/is', $text, $birth_arr)) {
             $this->printDebug(' Birthdate: ' . $birth_arr[1]);
             $governor->start_date = trim($birth_arr[1]);
         }
         if (!$governor->birthplace && preg_match('/>Birth State\\:<\\/b>([^<]*)<br/is', $text, $birth_state_arr)) {
             $this->printDebug(' Birthplace: ' . trim($birth_state_arr[1]));
             $governor->birthplace = trim($birth_state_arr[1]);
         }
         //PARTY MEMBERSHIP
         if (preg_match('/>Party\\:<\\/b>([^<]*)<br/is', $text, $party_arr)) {
             $party_str = $party_arr[1];
             $this->printDebug(' Party: ' . $party_str);
             if (stristr($party_str, 'Democrat')) {
                 $party = EntityTable::getByExtensionQuery('PoliticalParty')->addWhere('name = ?', 'Democratic Party')->fetchOne();
             }
             if (stristr($party_str, 'Republican')) {
                 $party = EntityTable::getByExtensionQuery('PoliticalParty')->addWhere('name = ?', 'Republican Party')->fetchOne();
             }
             if (isset($party) && $party && !$governor->party_id) {
                 $governor->Party = $party;
                 $governor->is_independent = false;
                 $this->printDebug(' Added membership in ' . $party);
             } else {
                 if (stristr($party_str, 'Independent')) {
                     $governor->is_independent = true;
                 }
             }
         }
         if (!$governor->summary && preg_match_all('/>([^<]{240,})/isu', $text, $bio_match)) {
             $str = '';
             foreach ($bio_match[1] as $b) {
                 if (!stristr($b, 'Javascript')) {
                     $str .= "\n\n" . $b;
                 }
             }
             $str = trim($str);
             if (strlen($str)) {
                 $governor->summary = $str;
             }
         }
         $governor->save();
         $governor->addReference($url, null, $governor->getAllModifiedFields(), 'Governors Association');
         //SCHOOLS
         if (preg_match('/>School\\(s\\)\\:<\\/b>([^<]*)<br/is', $text, $school_arr)) {
             $school_names = explode(';', trim($school_arr[1]));
             if (count($school_names) == 1) {
                 $school_names = explode(',', $school_names[0]);
             }
             foreach ($school_names as $school_name) {
                 $school_name = trim($school_name);
                 if (!($school = EntityTable::getByExtensionQuery('School')->leftJoin('e.Alias a')->addWhere('e.name = ? or a.name = ?', array($school_name, $school_name))->fetchOne())) {
                     $school = new Entity();
                     $school->addExtension('Org');
                     $school->addExtension('School');
                     $school->name = $school_name;
                     $school->save();
                     $this->printDebug(' Added School: ' . $school_name);
                 }
                 $q = RelationshipTable::getByCategoryQuery('Education')->addWhere('entity1_id = ? and entity2_id = ?', array($governor->id, $school->id))->fetchOne();
                 if (!$q) {
                     $relationship = new Relationship();
                     $relationship->setCategory('Education');
                     $relationship->Entity1 = $governor;
                     $relationship->Entity2 = $school;
                     $relationship->is_current = 0;
                     $relationship->save();
                     $relationship->addReference($url, null, $relationship->getAllModifiedFields(), 'Governors Association');
                     $this->printDebug(' Added education: ' . $relationship->name);
                 }
             }
         }
         //GOVERNOR OFFICE AND POSITION
         $office_name = 'Office of the Governor of ' . $row['state'];
         if (!($office = EntityTable::getByExtensionQuery('GovernmentBody')->addWhere('name = ?', $office_name)->fetchOne())) {
             $office = new Entity();
             $office->name = $office_name;
             $office->addExtension('Org');
             $office->addExtension('GovernmentBody');
             $state = Doctrine::getTable('AddressState')->findOneByName($row['state']);
             if ($state) {
                 $office->state_id = $state->id;
             }
             $office->save();
             $office->addReference($url, null, $office->getAllModifiedFields(), 'Governors Association');
             $this->printDebug(' Added office: ' . $office->name);
         }
         $q = RelationshipTable::getByCategoryQuery('Position')->addWhere('entity1_id = ? and entity2_id = ? and description1 = ?', array($governor->id, $office->id, 'Governor'))->fetchOne();
         if (!$q) {
             sort($row['years']);
             $i = 0;
             while ($i < count($row['years'])) {
                 $governorship = new Relationship();
                 $governorship->setCategory('Position');
                 $governorship->Entity1 = $governor;
                 $governorship->Entity2 = $office;
                 $governorship->description1 = 'Governor';
                 $governorship->start_date = $row['years'][$i];
                 $i++;
                 if (isset($row['years'][$i])) {
                     $governorship->end_date = $row['years'][$i];
                     $governorship->is_current = 0;
                     if (!$governor->blurb && !isset($row['years'][$i + 1])) {
                         $governor->blurb = 'Former Governor of ' . $row['state'];
                     }
                 } else {
                     $governorship->is_current = 1;
                     if (!$governor->blurb) {
                         $governor->blurb = 'Governor of ' . $row['state'];
                     }
                 }
                 $governor->save();
                 $i++;
                 $governorship->save();
                 $governorship->addReference($url, null, $governorship->getAllModifiedFields(), 'Governors Association');
                 $this->printDebug(' Added governorship: ' . $governorship->name);
             }
         }
         //SPOUSE
         if (preg_match('/>Spouse\\:<\\/b>(.*?)<br/is', $text, $spouse_arr)) {
             $spouse = trim(LsHtml::stripTags($spouse_arr[1]));
             $q = RelationshipTable::getByCategoryQuery('Family')->addWhere('entity1_id = ? or entity2_id = ?', array($governor->id, $governor->id))->fetchOne();
             if (!$q && strlen($spouse)) {
                 $spouse = PersonTable::parseFlatName($spouse);
                 $spouse->save();
                 $this->printDebug(' Added spouse: ' . $spouse->name);
                 $relationship = new Relationship();
                 $relationship->setCategory('Family');
                 $relationship->Entity1 = $spouse;
                 $relationship->Entity2 = $governor;
                 $relationship->description1 = 'Spouse';
                 $relationship->description2 = 'Spouse';
                 $relationship->save();
                 $relationship->addReference($url, null, $relationship->getAllModifiedFields(), 'Governors Association');
                 $this->printDebug(' Added spouse relationship: ' . $relationship->name);
             }
         }
         //ADDRESS --not working, malformed addresses
         /*
               if (preg_match('/>Address\:\s*<\/b>(.*?)<b>/is',$text,$address_arr))      
               {
                 $address = trim(str_replace('<br/>',', ',$address_arr[1]));
                 $this->printDebug($address);
                 if ($governor->Address->count() == 0 && $a = $governor->addAddress($address))
                 {
                   $this->printDebug(' Address: ' . $a);
                   $governor->save();
                 }
               }*/
         //PHONE NUMBER
         if (preg_match('/>Phone\\(s\\)\\:<\\/b>([^<]*)<br/is', $text, $phone_arr)) {
             $phone_number = trim($phone_arr[1]);
             if (!$governor->Phone->count()) {
                 $phone = $governor->addPhone($phone_number);
                 $this->printDebug(' Phone: ' . $phone);
             }
         }
         if (!$governor->Image->count() && preg_match('/<img .*?class\\="display" src\\="([^"]*)"/is', $text, $img_arr)) {
             $url = $img_arr[1];
             try {
                 $fileName = ImageTable::createFiles($url, $governor->name_first);
             } catch (Exception $e) {
                 $fileName = null;
             }
             if ($fileName) {
                 //insert image record
                 $image = new Image();
                 $image->filename = $fileName;
                 $image->entity_id = $governor->id;
                 $image->title = $governor->name;
                 $image->caption = 'From Governors Association website';
                 $image->is_featured = true;
                 $image->is_free = false;
                 $image->url = $url;
                 $image->save();
                 $this->printDebug("Imported image: " . $image->filename);
             }
         }
     }
 }
Exemplo n.º 9
0
 private function getBusinessPersonQuery()
 {
     return EntityTable::getByExtensionQuery('Person');
 }
Exemplo n.º 10
0
 public function execute()
 {
     if (!$this->safeToRun('uk-mp-candidates')) {
         $this->printDebug('Script already running');
         die;
     }
     // Get (or create) the UK local Network
     $uk = Doctrine::getTable('LsList')->findOneByName('United Kingdom');
     if (!$uk) {
         $uk = new LsList();
         $uk->name = 'United Kingdom';
         $uk->is_network = 1;
         $uk->description = 'People and organizations with significant influence on the policies of the United Kingdom';
         $uk->display_name = 'uk';
         $uk->save();
     }
     // Get the MP list
     $raw = $this->getMPs();
     // Add new MPs to the list
     foreach ($raw as $mp) {
         $this->printDebug(sprintf('Processing %s', $mp['name']));
         // Split name
         $entity = PersonTable::parseFlatName($mp['name']);
         $entity->blurb = 'Prospective Parliamentary Candidate for ' . $mp['constituency'];
         $q = TagTable::getByTripleQuery('yournextmp', 'url', $mp['url']);
         $r = $q->count();
         if ($r) {
             $this->printDebug('Already processed, skipping.');
             continue;
         }
         // Get political party
         $q = EntityTable::getByExtensionQuery('PoliticalParty')->addWhere('e.name = ?', $mp['party']);
         if (!($partyEntity = $q->fetchOne())) {
             $partyEntity = new Entity();
             $partyEntity->addExtension('Org');
             $partyEntity->addExtension('PoliticalParty');
             $partyEntity->name = $mp['party'];
             $partyEntity->blurb = 'UK Political Party';
             $partyEntity->save(null, true, array($uk->id));
             $this->printDebug("Created new political party: " . $mp['party']);
         }
         // Save entity to UK Network
         $entity->party_id = $partyEntity->id;
         $entity->save(null, true, array($uk->id));
         // Add party relationship
         $r = new Relationship();
         $r->entity1_id = $entity->id;
         $r->entity2_id = $partyEntity->id;
         $r->setCategory('Membership');
         $r->description1 = 'Prospective parliamentary candidate';
         $r->is_current = true;
         // $r->start_date = // Don't know where we can get this, and "now" seems kind of wrong
         $r->save();
         // Add YourNextMP triple
         $entity->addTagByTriple('yournextmp', 'url', $mp['url']);
         // Add references
         $ref = new Reference();
         $ref->addFields(array('name_first', 'name_last', 'name_middle'));
         // Don't need this
         $ref->source = $mp['url'];
         $ref->name = 'YourNextMP.com - ' . $entity['name'];
         $ref->object_model = 'Entity';
         $ref->object_id = $entity->getId();
         $ref->save();
         unset($ref);
         $ref = new Reference();
         $ref->addFields(array('name'));
         $ref->source = $mp['party_url'];
         $ref->name = 'YourNextMP.com - ' . $partyEntity['name'];
         $ref->object_model = 'Entity';
         $ref->object_id = $partyEntity->getId();
         $ref->save();
         unset($ref);
         $ref = new Reference();
         $ref->addFields(array('name'));
         $ref->source = $mp['url'];
         $ref->name = 'YourNextMP.com - ' . $entity['name'];
         $ref->object_model = 'Relationship';
         $ref->object_id = $r->getId();
         $ref->save();
         unset($ref);
         $r->free(true);
         unset($r);
         // Add image?
         if ($mp['image']) {
             if ($fileName = ImageTable::createFiles($mp['image'])) {
                 //insert image record
                 $image = new Image();
                 $image->filename = $fileName;
                 $image->title = $entity['name'];
                 $image->caption = 'From YourNextMP under CC-BY-SA license.';
                 $image->is_featured = true;
                 $image->is_free = true;
                 $image->url = $mp['image'];
                 $this->printDebug("Imported image: " . $image->filename);
             }
             $image->Entity = $entity;
             $image->save();
             if ($mp['image']) {
                 //save image source
                 $image->addReference($mp['image']);
                 $this->printDebug("Saved image reference");
             }
             unset($image);
         }
         // Add party image?
         if ($mp['party_image']) {
             if ($fileName = ImageTable::createFiles($mp['party_image'])) {
                 //insert image record
                 $partyImage = new Image();
                 $partyImage->filename = $fileName;
                 $partyImage->title = $partyEntity['name'];
                 $partyImage->caption = 'From YourNextMP under CC-BY-SA license.';
                 $partyImage->is_featured = true;
                 $partyImage->is_free = true;
                 $partyImage->url = $mp['party_image'];
                 $this->printDebug("Imported image: " . $partyImage->filename);
             }
             $partyImage->Entity = $partyEntity;
             $partyImage->save();
             if ($mp['party_image']) {
                 //save image source
                 $partyImage->addReference($mp['party_image']);
                 $this->printDebug("Saved image reference");
             }
             unset($partyImage);
         }
         unset($entity);
         unset($partyEntity);
     }
 }
Exemplo n.º 11
0
 private function getPersonsQuery()
 {
     $q = EntityTable::getByExtensionQuery(array('Person', 'BusinessPerson'))->limit($this->_limit);
     return $q;
 }
 public function execute()
 {
     if (!$this->safeToRun('sec')) {
         $this->printDebug('script already running');
         die;
     }
     if (!isset($this->corp_ids)) {
         return null;
     }
     foreach ($this->corp_ids as $corp_id) {
         if (!$this->override && $this->hasMeta($corp_id, 'is_complete') && $this->getMeta($corp_id, 'is_complete')) {
             $this->printDebug("Already fetched roster for Entity " . $corp_id . "; skipping...");
             continue;
         } else {
             if (!$this->override && $this->hasMeta($corp_id, 'lacks_cik') && $this->getMeta($corp_id, 'lacks_cik')) {
                 $this->printDebug("No SEC cik found for Entity " . $corp_id . "; skipping...");
                 continue;
             }
         }
         try {
             echo number_format(memory_get_usage()) . "\n";
             $this->browser->restart($this->defaultHeaders);
             $this->db->beginTransaction();
             $corp = Doctrine::getTable('Entity')->find($corp_id);
             echo "\n*****************\n\nfetching roster for " . $corp->name . " (" . $corp->ticker . ")" . "\n\n";
             //grab the corporation's cik if it doesn't have one already
             if (!$corp->sec_cik) {
                 if ($result = $this->getCik($corp->ticker)) {
                     $corp->sec_cik = $result['cik'];
                     if ($corp->Industry->count() == 0) {
                         if ($result['sic']['name'] && $result['sic']['name'] != '') {
                             if (!($industry = LsDoctrineQuery::create()->from('Industry i')->where('i.name = ? and i.code = ?', array($result['sic']['name'], $result['sic']['code']))->fetchOne())) {
                                 $industry = new Industry();
                                 $industry->name = LsLanguage::nameize(LsHtml::replaceEntities($result['sic']['name']));
                                 $industry->context = 'SIC';
                                 $industry->code = $result['sic']['code'];
                                 $industry->save();
                                 $this->printDebug('Industry: ' . $industry->name . ' (' . $industry->code . ')');
                             }
                             $q = LsQuery::getByModelAndFieldsQuery('BusinessIndustry', array('industry_id' => $industry->id, 'business_id' => $corp->id));
                             if (!$q->fetchOne()) {
                                 $corp->Industry[] = $industry;
                             }
                         }
                         $corp->save();
                         $corp->addReference($result['url'], null, $corp->getAllModifiedFields(), 'SEC EDGAR Page');
                     }
                 } else {
                     $this->saveMeta($corp->id, 'lacks_cik', true);
                     $this->db->commit();
                     continue;
                 }
             }
             if ($corp->sec_cik) {
                 $form4_urls = $this->getForm4Urls($corp->sec_cik);
                 $roster = array();
                 foreach ($form4_urls as $url_arr) {
                     $result = $this->getForm4Data($url_arr, $corp->sec_cik);
                     if ($result) {
                         $roster[] = $result;
                     }
                 }
                 $proxy_urls = $this->getProxyUrls($corp->sec_cik, array('2007', '2008'));
                 if (count($proxy_urls)) {
                     $proxy_url = $proxy_urls[0]['url'];
                     $proxy_year = $proxy_urls[0]['year'];
                     //search proxy for names appearing on form 4s
                     $roster = $this->getProxyData($roster, $proxy_url, $proxy_year);
                 } else {
                     $this->saveMeta($corp->id, 'lacks_cik', true);
                     $this->db->commit();
                     continue;
                 }
                 $corp->addReference($proxy_url, null, null, $proxy_year . ' Proxy');
                 //loop through names found on form 4s and search proxy
                 foreach ($roster as $r) {
                     echo "\n" . $r['personName'] . " is director? " . $r['isDirector'] . " at " . $r['form4Url'] . " \n";
                     if (isset($r['proxyName'])) {
                         echo "in proxy as " . $r['proxyName'] . " \n";
                     } else {
                         echo "not in proxy \n\n";
                     }
                     //make sure this appears in the proxy and has either an officer title or is a director
                     if (isset($r['proxyName']) && ($r['isDirector'] == '1' || $r['officerTitle'] != '')) {
                         $p = EntityTable::getByExtensionQuery('BusinessPerson')->addWhere('businessperson.sec_cik = ?', $r['personCik'])->fetchOne();
                         if (!$p) {
                             $p = $this->importPerson($r, $corp->name);
                         }
                         if ($p) {
                             $this->importAddress($r['address'], $p, $r, $corp->name);
                             if ($r['isDirector'] == 1) {
                                 $this->importRelationship($p, $corp, 'Director', $r);
                             }
                             if ($r['officerTitle'] != '') {
                                 $descriptions = $this->parseDescriptionStr($r['officerTitle'], $corp);
                                 foreach ($descriptions as $d) {
                                     if ($d['note']) {
                                         $position = $d['description'] . ' (' . implode(', ', $d['note']) . ')';
                                     } else {
                                         $position = $d['description'];
                                     }
                                     $this->importRelationship($p, $corp, $position, $r);
                                 }
                             }
                         }
                     }
                 }
             }
             if (!$this->testMode) {
                 $this->db->commit();
             }
             if (isset($proxy_url)) {
                 $proxy_scraper = new ProxyScraper($this->testMode, $this->debugMode, $this->appConfiguration);
                 $proxy_scraper->setCorpIds(1, $corp->id);
                 $proxy_scraper->setProxy($this->proxyText, $proxy_url, $proxy_year);
                 $proxy_scraper->disableBeep();
                 $proxy_scraper->run();
             }
         } catch (Exception $e) {
             //something bad happened, rollback
             $this->db->rollback();
             throw $e;
         }
         $this->saveMeta($corp_id, 'is_complete', true);
     }
 }
Exemplo n.º 13
0
 public function processCongressMemberRow($row)
 {
     $this->_references['bioguide'] = new Reference();
     $this->printDebug("\nProcessing member with name " . $row->name . " and ID " . $row->id);
     try {
         //we have to begin the transaction here because matchInDatabase might merge Entities and save
         $this->db->beginTransaction();
         //check that congress member isn't a repeat
         $member = EntityTable::getByExtensionQuery('ElectedRepresentative')->addWhere('electedrepresentative.bioguide_id = ?', $row->id)->fetchOne();
         //if member hasn't been imported already as a member of congress,
         //create with all bio info and look for a match
         if (!$member) {
             $member = $this->importNewMember($row);
             $member = $this->matchInDatabase($member);
             /*
                     if(!$merged_member = $this->matchInDatabase($member))
                     {
                       $redundant_member = false;
                     }
                     else
                     {
                       $redundant_member = $member;
                       $member = $merged_member;
                     }  */
         } else {
             $this->printDebug("Member exists in database with entity ID: " . $member->id);
             //if member is tagged with this session, skip
             if (in_array($member->id, $this->_existingSessionMemberIds)) {
                 $this->db->rollback();
                 $this->printDebug("Member has already been tagged with session " . current($this->_sessions) . "; skipping");
                 return $member;
             }
             //update member's bio
             $this->updateBio($member);
             $this->printDebug("Updated member bio");
             //check if member is continuing from previous session
             $q = $member->getTripleTagsQuery('congress', 'session')->addWhere('tag.triple_value = ?', current($this->_sessions) - 1);
             if ($q->count()) {
                 $this->printDebug("Continuing member from previous session...");
                 //if member continuing, look for relationship with opposite chamber to end,
                 //in case the member's switched chambers
                 $oppositeChamberId = $row->type == 'Senator' ? $this->_houseEntityId : $this->_senateEntityId;
                 $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? AND r.entity2_id = ?', array($member->id, $oppositeChamberId))->andWhere('r.category_id = ? AND r.end_date IS NULL', RelationshipTable::MEMBERSHIP_CATEGORY);
                 foreach ($q->execute() as $rel) {
                     $rel->end_date = $this->_sessionStartYear - 1 . '-00-00';
                     $rel->is_current = false;
                     $rel->save();
                     $this->printDebug("Ended relationship " . $rel->id . " with opposite chamber from previous session");
                 }
                 //if no current relationships with same chamber, create one
                 $thisChamberId = $row->type == 'Senator' ? $this->_senateEntityId : $this->_houseEntityId;
                 $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? AND r.entity2_id = ?', array($member->id, $thisChamberId))->andWhere('r.category_id = ? AND r.end_date IS NULL', RelationshipTable::MEMBERSHIP_CATEGORY);
                 if (!$q->count()) {
                     $this->printDebug("No relationships with this chamber from previous session; creating new one...");
                     $r = new Relationship();
                     $r->entity1_id = $member->id;
                     $r->entity2_id = $row->type == 'Senator' ? $this->_senateEntityId : $this->_houseEntityId;
                     $r->setCategory('Membership');
                     $r->description1 = $row->type;
                     $r->start_date = $row->termStart . '-00-00';
                     $r->is_current = true;
                     if ($row->type = 'Senator') {
                         $this->_senateRelationships[] = $r;
                     } else {
                         $this->_houseRelationships[] = $r;
                     }
                 }
             } else {
                 $this->printDebug("Member not continuing from previous session; creating new relationship...");
                 //if member not continuing, add a new relationship for this session and chamber
                 $r = new Relationship();
                 $r->entity1_id = $member->id;
                 $r->entity2_id = $row->type == 'Senator' ? $this->_senateEntityId : $this->_houseEntityId;
                 $r->setCategory('Membership');
                 $r->description1 = $row->type;
                 $r->start_date = $row->termStart . '-00-00';
                 $r->is_current = true;
                 if ($row->type = 'Senator') {
                     $this->_senateRelationships[] = $r;
                 } else {
                     $this->_houseRelationships[] = $r;
                 }
             }
         }
         //set party name
         $partyName = null;
         $blurb = '';
         if ($party = $row->party) {
             if ($party == 'Democrat') {
                 $blurb .= $party . 'ic';
                 $partyName = 'Democratic Party';
             } elseif ($party == 'Independent') {
                 $blurb .= $party;
                 $partyName = null;
             } else {
                 $blurb .= $party;
                 $partyName = $party . ' Party';
             }
             //if party entity doesn't exist, create one
             if ($partyName) {
                 $q = EntityTable::getByExtensionQuery('PoliticalParty')->addWhere('e.name = ?', $partyName);
                 if (!($partyEntity = $q->fetchOne())) {
                     $partyEntity = new Entity();
                     $partyEntity->addExtension('Org');
                     $partyEntity->addExtension('PoliticalParty');
                     $partyEntity->name = $partyName;
                     $partyEntity->save();
                     $this->printDebug("Created new political party: " . $partyName);
                 }
             }
             //create current party affiliation if session is member's most recent session
             if ($member->exists()) {
                 $q = $member->getTripleTagsQuery('congress', 'session')->addWhere('tag.triple_value > ?', current($this->_sessions));
                 $setParty = $q->count() ? false : true;
             } else {
                 $setParty = true;
             }
             if ($setParty) {
                 if ($partyName) {
                     $member->Party = $partyEntity;
                     $member->is_independent = false;
                     $this->printDebug("Set current political affiliation to " . $partyName);
                 } else {
                     $member->is_independent = true;
                     $member->party_id = null;
                     $this->printDebug("Set current political affiliation to Independent");
                 }
             }
         }
         //save member
         $modified = $member->getAllModifiedFields();
         $member->save();
         $this->printDebug("Saved member with entity ID: " . $member->id);
         $this->addListMember($member);
         //set member reference fields
         $excludeFields = array();
         foreach ($this->_references as $key => $ref) {
             $ref->object_model = 'Entity';
             $ref->object_id = $member->id;
             if ($key != 'bioguide') {
                 $ref->save();
                 $excludeFields = array_merge($excludeFields, $ref->getFieldsArray());
             }
         }
         $modified = array_diff($modified, $excludeFields);
         $this->_references['bioguide']->addFields($modified);
         $this->_references['bioguide']->save();
         $this->printDebug("Saved member references");
         //tag member with congress session
         $member->addTagByTriple('congress', 'session', current($this->_sessions));
         $this->printDebug("Added tag for session " . current($this->_sessions));
         //save image, if any
         if ($this->_image) {
             $this->_image->Entity = $member;
             $this->_image->save();
             $this->printDebug("Saved member image");
             if ($this->_photoUrl) {
                 //save image source
                 $this->_image->addReference($this->_photoUrl);
                 $this->printDebug("Saved image reference");
             }
         }
         //create party membership relationships
         if ($partyName) {
             //if membership relationship with party doesn't exist, create it
             $partyRel = LsQuery::getByModelAndFieldsQuery('Relationship', array('entity1_id' => $member->id, 'entity2_id' => $partyEntity->id, 'category_id' => RelationshipTable::MEMBERSHIP_CATEGORY))->fetchOne();
             if (!$partyRel) {
                 $partyRel = new Relationship();
                 $partyRel->Entity1 = $member;
                 $partyRel->Entity2 = $partyEntity;
                 $partyRel->setCategory('Membership');
                 $modified = $partyRel->getAllModifiedFields();
                 $partyRel->save();
                 $partyRel->addReference($this->_profileUrlBase . $member->bioguide_id, null, $modified, 'Congressional Biographical Directory');
                 $this->printDebug("Created membership in political party: " . $partyName);
             }
         }
         $senator = null;
         //create senate relationships
         foreach ($this->_senateRelationships as $rel) {
             $modified = $rel->getAllModifiedFields();
             $rel->save();
             $rel->addReference($this->_profileUrlBase . $member->bioguide_id, null, $modified, 'Congressional Biographical Directory');
             $this->printDebug("Saved Senate relationship");
         }
         //create house relationships
         foreach ($this->_houseRelationships as $rel) {
             $modified = $rel->getAllModifiedFields();
             $rel->save();
             $rel->addReference($this->_profileUrlBase . $member->bioguide_id, null, $modified, 'Congressional Biographical Directory');
             $this->printDebug("Saved House relationship");
         }
         //save everything
         if (!$this->testMode) {
             $this->db->commit();
         }
     } catch (Exception $e) {
         $this->db->rollback();
         throw $e;
     }
 }
Exemplo n.º 14
0
 protected function hasImageAttached($person)
 {
     if (EntityTable::getByExtensionQuery('Org')->leftJoin('e.Image i')->addWhere('i.id == ?', $person->id)->fetchOne()) {
         return true;
     } else {
         return false;
     }
 }
 protected function import($url)
 {
     $company = null;
     if (!$this->browser->get($url)->responseIsError()) {
         $text = $this->browser->getResponseText();
         $rank = null;
         $name = null;
         $industryName = null;
         $street1 = null;
         $street2 = null;
         $city = null;
         $state = null;
         $postal = null;
         $phone = null;
         $fax = null;
         $website = null;
         $blurb = null;
         $summary = null;
         $revenue = null;
         $employees = null;
         $ceoName = null;
         $ceoBirthYear = null;
         //get rank
         if ($this->year > 1999 && $this->year < 2005 && preg_match('/ForbesListRank" content="(\\d+)"/i', $text, $match)) {
             $rank = $match[1];
         } elseif ($this->year < 2000 && preg_match('/td class="highlightcolor1">(\\d+)/i', $text, $match)) {
             $rank = $match[1];
         } elseif ($this->year > 2004 && preg_match('/<b>#(\\d+) ([^<]+)<\\/b>/i', $text, $match)) {
             $rank = html_entity_decode($match[1]);
         }
         //get name
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/span class="mainlisttitle">([^<]+)<\\/span>/i', $text, $match)) {
             $name = html_entity_decode($match[1]);
         } elseif ($this->year > 2004 && preg_match('/<b>#(\\d+) ([^<]+)<\\/b>/i', $text, $match)) {
             $name = html_entity_decode($match[2]);
         } else {
             $this->printDebug("Company name not found");
             return;
         }
         //get industry
         if ($this->year > 1995 && $this->year < 2001 && preg_match('/<b>See more private companies in <a [^>]+>([^<]+)<\\/a><\\/b>/ism', $text, $match)) {
             $industryName = trim(html_entity_decode($match[1]));
         } elseif ($this->year > 2000 && $this->year < 2005 && preg_match('/private companies\\<\\/a> in ([^\\.]+)/ism', $text, $match)) {
             $industryName = trim(html_entity_decode($match[1]));
         } elseif ($this->year > 2004 && preg_match('/<b>Industry:<\\/b> <a href="[^"]+">([^<]+)<\\/a>/ism', $text, $match)) {
             $industryName = trim(html_entity_decode($match[1]));
         }
         //get address
         if ($this->year > 1995 && $this->year < 2000 && preg_match('/<td class="mainlisttxt"\\>(.+)phone/smU', $text, $match)) {
             $contactLines = explode('<br>', trim($match[1]));
             array_pop($contactLines);
             $street1 = $contactLines[0];
             $street2 = count($contactLines) == 3 ? $contactLines[2] : null;
             $city_state_zip = count($contactLines) == 3 ? LsLanguage::parseCityStatePostal($contactLines[2]) : LsLanguage::parseCityStatePostal($contactLines[1]);
             $city = $city_state_zip['city'];
             $state = $city_state_zip['state'];
             $postal = $city_state_zip['zip'];
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/(view private companies under this industry|in the same industry).+<br><br>(.+)phone/is', $text, $match)) {
             var_dump($match);
             $contactLines = explode('<br>', trim($match[1]));
             array_pop($contactLines);
             $street1 = $contactLines[0];
             $street2 = count($contactLines) == 3 ? $contactLines[2] : null;
             $city_state_zip = count($contactLines) == 3 ? LsLanguage::parseCityStatePostal($contactLines[2]) : LsLanguage::parseCityStatePostal($contactLines[1]);
             $city = $city_state_zip['city'];
             $state = $city_state_zip['state'];
             $postal = $city_state_zip['zip'];
         } elseif ($this->year > 2004 && preg_match('/<div class="spaced">(.+)<\\/div>/ismU', $text, $match)) {
             $contactLines = explode('<br>', $match[1]);
             if (!preg_match('/Phone\\:|Fax\\:/i', $contactLines[0]) && !preg_match('/Phone\\:|Fax\\:/i', $contactLines[1])) {
                 $street1 = trim($contactLines[0]);
                 if (count($contactLines) == 4) {
                     if (preg_match('/^(.+?) ([A-Z]{2}) (\\d{5})($|-)/sU', trim($contactLines[1]), $match)) {
                         $city = $match[1];
                         $state = $match[2];
                         $postal = $match[3];
                     }
                 } elseif (count($contactLines) == 5) {
                     $street2 = $contactLines[1];
                     if (preg_match('/^(.+?) ([A-Z]{2}) (\\d{5})($|-)/sU', trim($contactLines[2]), $match)) {
                         $city = $match[1];
                         $state = $match[2];
                         $postal = $match[3];
                     }
                 }
             }
         }
         //get phone
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/phone ([\\d\\-]{12})/is', $text, $match)) {
             $phone = trim(str_replace('-', '', $match[1]));
         } elseif ($this->year > 2004 && preg_match('/Phone: ([\\d\\-]{12})/is', $text, $match)) {
             $phone = trim(str_replace('-', '', $match[1]));
         }
         //get fax
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/fax ([\\d\\-]{12})/is', $text, $match)) {
             $fax = trim(str_replace('-', '', $match[1]));
         } else {
             if ($this->year > 2004 && preg_match('/Fax: ([\\d\\-]{12})/is', $text, $match)) {
                 $fax = trim(str_replace('-', '', $match[1]));
             }
         }
         //get website
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/this company\'s web site[^>]+\\>(http[^\\<]+)/is', $text, $match)) {
             $website = $match[1];
         } elseif ($this->year > 2004 && preg_match('/<div class="spaced">.*<\\/div>\\s+<br>\\s+<a href="(http:\\/\\/[^"]+)">/ismU', $text, $match)) {
             $website = $match[1];
         }
         //get ceo
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/b>CEO: ([^<]+)<\\/b>/ism', $text, $match)) {
             $ceoName = $match[1];
         } elseif ($this->year > 2004 && preg_match('/CEO: ([^<]+)<\\/b> , (\\d+) <br>/ism', $text, $match)) {
             $ceoName = html_entity_decode($match[1]);
             $ceoBirthYear = date("Y");
             -$match[2];
         }
         //get summary
         if ($this->year > 1995 && $this->year < 2000 && preg_match_all('/p class="mainlisttxt">(.*)<\\/p>/ismU', $text, $match)) {
             $summary = str_replace(array('  ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1][1]))));
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/p class="mainlisttxt">(.*)<\\/p>/ismU', $text, $match)) {
             $summary = str_replace(array('  ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1]))));
         } elseif ($this->year > 2004 && preg_match('/<blockquote class="spaced">(.*)<\\/blockquote>/ismU', $text, $match)) {
             $summary = str_replace(array('  ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1]))));
         }
         //get revenue
         if ($this->year > 1995 && $this->year < 2000 && preg_match('/<td class="mainlisttxt">\\$([\\S]+) mil<sup>e?<\\/sup><\\/td>/ismU', $text, $match)) {
             $this->printDebug($match[1]);
             $revenue = str_replace(",", "", $match[1] . ",000,000");
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/<td class="mainlisttxt" nowrap>([^<]+)<sup>e?<\\/sup><\\/td>/ismU', $text, $match)) {
             $this->printDebug($match[1]);
             $revenue = str_replace(",", "", $match[1] . ",000,000");
         } elseif ($this->year > 2004 && preg_match('/<td class="highlight" nowrap="nowrap">\\$([\\S]+) bil.*<\\/td> <td class="highlight" nowrap="nowrap">[^<]+<\\/td> <td class="highlight" nowrap="nowrap">([^<]+)<\\/td>/ismU', $text, $match)) {
             $revenue = 1000000000 * $match[1];
         }
         //get employees
         if ($this->year > 1995 && $this->year < 2005 && preg_match('/mil<\\/td>.+<td class="mainlisttxt"( nowrap)?>(\\d[^<]+)<\\/td>.+<td class="mainlisttxt">[a-zA-Z]+<\\/td>/ismU', $text, $match)) {
             $employees = str_replace(',', '', $match[2]);
         } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/<sup>e?<\\/sup><\\/td> <td class="mainlisttxt"( nowrap)?>(\\d[^<]+)<sup>e?<\\/sup><\\/td> <td class="mainlisttxt">[a-zA-Z]+<\\/td>/ismU', $text, $match)) {
             $employees = str_replace(',', '', $match[2]);
         } elseif ($this->year > 2004 && preg_match('/<td class="highlight" nowrap="nowrap">([\\d,]+)<\\/td> <td class="highlight" nowrap="nowrap">[A-Z][a-z]{2,}<\\/td>/', $text, $match)) {
             $employees = str_replace(',', '', $match[1]);
         }
         /*$this->printDebug( "URL: ". $url);
           $this->printDebug( "Rank: " . $rank );
           $this->printDebug( "Name: " . $name );
           $this->printDebug( "Industry: " . $industryName );
           $this->printDebug( "Street: " . $street1 );
           $this->printDebug( "Street2: " . $street2 );
           $this->printDebug( "City: " . $city );
           $this->printDebug( "State: " . $state );
           $this->printDebug( "Postal: " . $postal );
           $this->printDebug( "Phone: " . $phone );
           $this->printDebug( "Fax: " . $fax );
           $this->printDebug( "Website: " . $website );
           $this->printDebug( "CEO: " . $ceoName . "  " . $ceoBirthYear);
           $this->printDebug( "Summary: " . $summary );
           $this->printDebug( "Revenue: " . $revenue );
           $this->printDebug( "Employees: " . $employees );*/
         $search_company_name = trim(implode(' ', array_diff(explode(' ', ucwords(strtolower($name))), array_merge(LsLanguage::$business, LsLanguage::$businessAbbreviations))));
         //continue;
         $this->printDebug("{$search_company_name} == {$name}");
         if ($company = EntityTable::getByExtensionQuery(array('Org', 'PrivateCompany'))->addWhere("LOWER(REPLACE( org.name, '-' , '')) = ?", strtolower($name))->fetchOne()) {
             $this->printDebug("Company exists");
             $company->revenue = $revenue;
             $company->save();
         } else {
             $this->printDebug("Creating new company {$name}");
             Doctrine::getTable('ExtensionDefinition')->clear();
             $company = new Entity();
             $company->addExtension('Org');
             $company->addExtension('Business');
             $company->addExtension('PrivateCompany');
             $company->name = LsLanguage::titleize($name);
             $company->employees = strlen($employees) ? $employees : null;
             $company->revenue = strlen($revenue) ? $revenue : null;
             $company->website = strlen($website) ? $website : null;
             $company->summary = strlen($summary) ? trim($summary) : null;
             //add address
             if ($phone) {
                 $company->addPhone($phone);
             }
             if ($fax) {
                 //$company->addPhone($fax);
             }
             if ($city && $state) {
                 $address = new Address();
                 $address->street1 = strlen($street1) ? $street1 : null;
                 $address->street2 = strlen($street2) ? $street2 : null;
                 $address->city = strlen($city) ? $city : null;
                 if ($state = AddressStateTable::retrieveByText($state)) {
                     $address->State = $state;
                 }
                 $address->postal = $postal;
                 $company->addAddress($address);
                 $address->save();
                 $address->addReference($source = $url, $excerpt = null, $fields = array('city', 'country_id', 'postal', 'state_id', 'street1'), $name = 'Forbes.com', $detail = null, $date = null);
             }
         }
         /*$this->printDebug( "URL: ". $url);
           $this->printDebug( "Rank: " . $rank );
           $this->printDebug( "Name: " . $name );
           $this->printDebug( "Industry: " . $industryName );
           $this->printDebug( "Street: " . $street1 );
           $this->printDebug( "Street2: " . $street2 );
           $this->printDebug( "City: " . $city );
           $this->printDebug( "State: " . $state );
           $this->printDebug( "Postal: " . $postal );
           $this->printDebug( "Phone: " . $phone );
           $this->printDebug( "Fax: " . $fax );
           $this->printDebug( "Website: " . $website );
           $this->printDebug( "CEO: " . $ceoName . "  " . $ceoBirthYear);
           $this->printDebug( "Summary: " . $summary );
           $this->printDebug( "Revenue: " . $revenue );
           $this->printDebug( "Employees: " . $employees );*/
         $company->save();
         $company->addReference($source = $url, $excerpt = null, $fields = array('website', 'name', 'website', 'summary', 'revenue', 'employees'), $name = 'Forbes.com', $detail = null, $date = null);
         $this->saveToList($company, $rank);
     } else {
         $this->printDebug("Couldn't get company: " . $url);
     }
 }
Exemplo n.º 16
0
 private function importFiling($org, $lda_filing)
 {
     try {
         $this->printTimeSince();
         $this->printDebug('Starting import...');
         $excerpt = array();
         //$time = microtime(1);
         $this->db->beginTransaction();
         $date = null;
         $excerpt['Federal Filing Id'] = $lda_filing->federal_filing_id;
         $excerpt['Year'] = $lda_filing->year;
         $excerpt['Type'] = $lda_filing->LdaType->description;
         if (preg_match('/^[^T]*/su', $lda_filing->received, $match)) {
             $date = $match[0];
             $date = str_replace('/', '-', $date);
         }
         $lda_registrant = Doctrine::getTable('LdaRegistrant')->find($lda_filing->registrant_id);
         $excerpt['Registrant'] = $lda_registrant->name;
         if ($lda_filing->client_id) {
             $lda_client = Doctrine::getTable('LdaClient')->find($lda_filing->client_id);
             $excerpt['Client'] = $lda_client->name;
         } else {
             $this->db->rollback();
             return null;
         }
         $lobbying_entity = null;
         //DETERMINE (& CREATE) LOBBYING ENTITY
         //$this->printTimeSince();
         //$this->printDebug('determine/create...');
         if (strtolower(OrgTable::stripNamePunctuation($lda_client->name)) == strtolower(OrgTable::stripNamePunctuation($lda_registrant->name))) {
             $lobbying_entity = $org;
             $client_entity = null;
             if (!$lobbying_entity->lda_registrant_id) {
                 $lobbying_entity->lda_registrant_id = $lda_registrant->federal_registrant_id;
                 $lobbying_entity->save();
                 $lobbying_entity->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $lobbying_entity->getAllModifiedFields(), 'LDA Filing', null, $date, false);
             } else {
                 if ($lobbying_entity->lda_registrant_id != $lda_registrant->federal_registrant_id) {
                     $this->printDebug("LDA registrant ids did not match up for {$lobbying_entity->name} and {$lda_registrant->name} even though names matched {$lda_client->name}\n");
                     $this->db->rollback();
                     return null;
                 }
             }
             $this->printDebug($lobbying_entity->name . ' noted (same as client ' . $lda_client->name . ')');
         } else {
             $client_entity = $org;
             if ($lda_client->description) {
                 $description = trim($lda_client->description);
                 if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) {
                     if (strlen($description) < 200) {
                         if (!$org->blurb || $org->blurb == '') {
                             $org->blurb = $description;
                         }
                     } else {
                         if (!$org->summary || $org->summary == '') {
                             $org->summary = $description;
                         }
                     }
                 }
             }
             $org->save();
             $this->printDebug($lda_client->name . ' is distinct from ' . $lda_registrant->name);
         }
         $lda_lobbyists = $lda_filing->LdaLobbyists;
         $excerpt['Lobbyists'] = array();
         foreach ($lda_lobbyists as $lda_lobbyist) {
             $excerpt['Lobbyists'][] = $lda_lobbyist->name;
         }
         $excerpt['Lobbyists'] = implode('; ', $excerpt['Lobbyists']);
         if (!$lobbying_entity) {
             $lobbyist_name = null;
             if (count($lda_lobbyists)) {
                 $lobbyist_parts = explode(',', $lda_lobbyists[0]->name);
                 if (count($lobbyist_parts) > 1) {
                     $lobbyist_last = trim($lobbyist_parts[0]);
                     $arr = LsString::split($lobbyist_parts[1]);
                     $lens = array_map('strlen', $arr);
                     arsort($lens);
                     $keys = array_keys($lens);
                     $lobbyist_longest = $arr[$keys[0]];
                     $lobbyist_name = trim($lobbyist_parts[1]) . ' ' . trim($lobbyist_parts[0]);
                     $existing_lobbyist_registrant = null;
                 } else {
                     $lobbyist_name = preg_replace('/^(Mr|MR|MS|Dr|DR|MRS|Mrs|Ms)\\b\\.?/su', '', $lda_lobbyists[0]->name);
                     $arr = LsString::split(trim($lobbyist_name));
                     $arr = LsArray::strlenSort($arr);
                     $lobbyist_last = array_pop($arr);
                     if (count($arr)) {
                         $lobbyist_longest = array_shift(LsArray::strlenSort($arr));
                     } else {
                         $lobbyist_longest = '';
                     }
                 }
             }
             //check to see if registrant and lobbyist are same
             if (count($lda_lobbyists) == 1 && (strtoupper($lda_lobbyists[0]->name) == strtoupper($lda_registrant->name) || $lobbyist_last && stripos($lda_registrant->name, $lobbyist_last) == strlen($lda_registrant->name) - strlen($lobbyist_last) && stristr($lda_registrant->name, $lobbyist_longest))) {
                 $existing_lobbyist_registrant = EntityTable::getByExtensionQuery('Lobbyist')->addWhere('lobbyist.lda_registrant_id = ?', $lda_registrant->federal_registrant_id)->execute()->getFirst();
                 if ($existing_lobbyist_registrant) {
                     $lobbying_entity = $existing_lobbyist_registrant;
                     $this->printDebug('Existing lobbyist is lobbying entity: ' . $lobbying_entity->name);
                 } else {
                     $lobbyist = $this->prepLobbyistName($lda_lobbyists[0]->name);
                     if ($lobbyist) {
                         $lobbyist->lda_registrant_id = $lda_registrant->federal_registrant_id;
                         $lobbyist->save();
                         $lobbyist->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                         $this->printDebug('New lobbyist/lobbying entity saved: ' . $lobbyist->name);
                         $lobbying_entity = $lobbyist;
                     }
                 }
             } else {
                 if ($existing_firm = EntityTable::getByExtensionQuery('Org')->addWhere('org.lda_registrant_id = ? ', $lda_registrant->federal_registrant_id)->execute()->getFirst()) {
                     $modified = array();
                     $lobbying_entity = $existing_firm;
                     if ($lda_registrant->description) {
                         $description = trim($lda_registrant->description);
                         if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) {
                             if (strlen($description) < 200) {
                                 if (!$existing_firm->blurb || $existing_firm->blurb == '') {
                                     $existing_firm->blurb = $description;
                                     $modified[] = 'blurb';
                                 }
                             } else {
                                 if (!$existing_firm->summary || $existing_firm->summary == '') {
                                     $existing_firm->summary = $description;
                                     $modified[] = 'summary';
                                 }
                             }
                         }
                     }
                     if ($lda_registrant->address && $lda_registrant->address != '' && count($existing_firm->Address) == 0) {
                         if ($address = $existing_firm->addAddress($lda_registrant->address)) {
                             $existing_firm->save();
                             $address->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $address->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                         }
                     }
                     $existing_firm->save();
                     if (count($modified)) {
                         $existing_firm->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $modified, 'LDA Filing', null, $date, false);
                     }
                     $this->printDebug('Existing firm is lobbying entity: ' . $lobbying_entity->name);
                 } else {
                     $firm = new Entity();
                     $firm->addExtension('Org');
                     $firm->addExtension('Business');
                     $firm->addExtension('LobbyingFirm');
                     $firm->name = LsLanguage::titleize(OrgTable::stripNamePunctuation($lda_registrant->name), true);
                     $firm->lda_registrant_id = $lda_registrant->federal_registrant_id;
                     if ($lda_registrant->description) {
                         $description = trim($lda_registrant->description);
                         if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) {
                             if (strlen($description) < 200) {
                                 $firm->blurb = $description;
                             } else {
                                 $firm->summary = $description;
                             }
                         }
                     }
                     if ($lda_registrant->address && $lda_registrant->address != '') {
                         if ($address = $firm->addAddress($lda_registrant->address)) {
                             $firm->save();
                             $address->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $address->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                         }
                     }
                     $firm->save();
                     $this->printDebug('New lobbying firm/lobbying entity saved: ' . $firm->name);
                     $firm->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $firm->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                     $lobbying_entity = $firm;
                 }
             }
         }
         //PREP GOVT ENTITIES
         //$this->printTimeSince();
         //$this->printDebug('gov entities...');
         $lda_govts = $lda_filing->LdaGovts;
         //$this->printDebug('count of lda govs is ***** ' . count($lda_govts));
         $govt_entities = array();
         $excerpt['Government Bodies'] = array();
         foreach ($lda_govts as $lda_govt) {
             $excerpt['Government Bodies'][] = $lda_govt->name;
             $name_arr = $this->prepGovtName($lda_govt->name);
             if (!$name_arr) {
                 continue;
             }
             if ($govt_entity = EntityTable::findByAlias($lda_govt->name, $context = 'lda_government_body')) {
                 $govt_entities[] = $govt_entity;
                 //$this->printDebug('Existing govt entity: ' . $govt_entity->name);
             } else {
                 if ($govt_entity = EntityTable::getByExtensionQuery(array('Org', 'GovernmentBody'))->addWhere('name = ?', array($name_arr[0]))->fetchOne()) {
                     $govt_entities[] = $govt_entity;
                     $alias = new Alias();
                     $alias->context = 'lda_government_body';
                     $alias->name = $lda_govt->name;
                     $alias->entity_id = $govt_entity->id;
                     $alias->save();
                 } else {
                     $govt_entity = new Entity();
                     $govt_entity->addExtension('Org');
                     $govt_entity->addExtension('GovernmentBody');
                     $govt_entity->name = $name_arr[0];
                     $govt_entity->name_nick = $name_arr[1];
                     $govt_entity->is_federal = 1;
                     $govt_entity->save();
                     $alias = new Alias();
                     $alias->context = 'lda_government_body';
                     $alias->name = $lda_govt->name;
                     $alias->entity_id = $govt_entity->id;
                     $alias->save();
                     $govt_entity->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $govt_entity->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                     $govt_entities[] = $govt_entity;
                 }
             }
         }
         $excerpt['Government Bodies'] = implode('; ', $excerpt['Government Bodies']);
         $excerpt_str = '';
         foreach ($excerpt as $k => $v) {
             $excerpt_str .= $k . ": ";
             $excerpt_str .= $v . "\n";
         }
         $excerpt = trim($excerpt_str);
         $this->printDebug($excerpt);
         $relationships = array();
         $lobbying_entity_extensions = $lobbying_entity->getExtensions();
         //CREATE LOBBYIST POSITION RELATIONSHIPS
         //$this->printTimeSince();
         //$this->printDebug('lobbyist positions...');
         $category = Doctrine::getTable('RelationshipCategory')->findOneByName('Position');
         if (!in_array('Lobbyist', $lobbying_entity_extensions)) {
             $firm_lobbyists = array();
             if ($lobbying_entity->exists()) {
                 $q = LsDoctrineQuery::create()->from('Entity e')->leftJoin('e.Relationship r ON (r.entity1_id = e.id)')->where('r.entity2_id = ? AND r.category_id = ?', array($lobbying_entity->id, RelationshipTable::POSITION_CATEGORY));
                 $firm_lobbyists = $q->execute();
             }
             $lobbyists = array();
             foreach ($lda_lobbyists as $lda_lobbyist) {
                 $lobbyist = $this->prepLobbyistName($lda_lobbyist->name);
                 if (!$lobbyist) {
                     continue;
                 }
                 $existing_lobbyist = null;
                 foreach ($firm_lobbyists as $fl) {
                     if (PersonTable::areNameCompatible($fl, $lobbyist)) {
                         $existing_lobbyist = $fl;
                         break;
                     }
                 }
                 //echo "before lobb save or rel save: ";
                 //$this->printTimeSince();
                 if (!$existing_lobbyist) {
                     $lobbyist->save();
                     $lobbyist->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                     //$this->printDebug('New lobbyist saved: ' . $lobbyist->name);
                     $r = new Relationship();
                     $r->Entity1 = $lobbyist;
                     $r->Entity2 = $lobbying_entity;
                     $r->setCategory('Position');
                     $r->description1 = 'Lobbyist';
                     $r->is_employee = 1;
                     $r->save();
                     $r->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                     //$this->printDebug('New position relationship saved: ' . $lobbying_entity->name . ' and ' . $lobbyist->name);
                     $lobbyists[] = $lobbyist;
                 } else {
                     //$this->printDebug('Lobbyist exists: ' . $lobbyist->name . ' is same as ' . $existing_lobbyist->name);
                     $lobbyists[] = $existing_lobbyist;
                 }
             }
         }
         //PREP ISSUES
         //$this->printTimeSince();
         //$this->printDebug('issues...');
         $issues = array();
         $lda_issues = Doctrine_Query::create()->from('LdaFilingIssue f')->leftJoin('f.LdaIssue i')->where('f.filing_id = ?', $lda_filing->id)->execute();
         foreach ($lda_issues as $lda_issue) {
             $name = LsLanguage::nameize($lda_issue->LdaIssue->name);
             if (!($issue = Doctrine::getTable('LobbyIssue')->findOneByName($name))) {
                 $issue = new LobbyIssue();
                 $issue->name = $name;
                 $issue->save();
                 //$this->printDebug('Lobbying issue saved: ' . $issue->name);
             }
             $issues[] = array($issue, $lda_issue->specific_issue);
         }
         //CREATE LOBBY FILING
         //$this->printTimeSince();
         //$this->printDebug('creating lobby filing:');
         $lobby_filing = new LobbyFiling();
         $lobby_filing->year = $lda_filing->year;
         $lobby_filing->amount = $lda_filing->amount;
         $lobby_filing->federal_filing_id = $lda_filing->federal_filing_id;
         $period = $lda_filing->LdaPeriod->description;
         $lobby_filing->start_date = $date;
         if ($paren = strpos($period, '(')) {
             $lobby_filing->period = trim(substr($period, 0, $paren));
         } else {
             $lobby_filing->period = 'Undetermined';
         }
         $lobby_filing->report_type = LsLanguage::nameize($lda_filing->LdaType->description);
         foreach ($issues as $issue) {
             $filing_issue = new LobbyFilingLobbyIssue();
             $filing_issue->Issue = $issue[0];
             $filing_issue->Filing = $lobby_filing;
             $filing_issue->specific_issue = $issue[1];
             $filing_issue->save();
         }
         if (in_array('Lobbyist', $lobbying_entity_extensions)) {
             $lobby_filing->Lobbyist[] = $lobbying_entity;
             //$this->printDebug('Lobbying entity lobbyist added to lobbying relationship: ' . $lobbying_entity->name);
         } else {
             foreach ($lobbyists as $lobbyist) {
                 $lobby_filing->Lobbyist[] = $lobbyist;
             }
         }
         //var_dump($lobby_filing->toArray());
         $lobby_filing->save();
         //CREATE TRANSACTION RELATIONSHIP, IF ANY
         //$this->printTimeSince();
         //$this->printDebug('starting transaction relationships:');
         $transaction = null;
         if ($client_entity != null) {
             $transaction = RelationshipTable::getByCategoryQuery('Transaction')->addWhere('r.entity1_id = ?', $client_entity->id)->addWhere('r.entity2_id = ?', $lobbying_entity->id)->addWhere('transaction.is_lobbying = ?', 1)->fetchOne();
             if ($transaction) {
                 $transaction->updateDateRange($date, true);
                 if ($lda_filing->amount && $lda_filing->amount != '') {
                     if (!$transaction->amount || $transaction->amount == '') {
                         $transaction->amount = $lda_filing->amount;
                     } else {
                         $transaction->amount += $lda_filing->amount;
                     }
                 }
                 $transaction->filings++;
                 $transaction->save();
                 $transaction->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $transaction->getAllModifiedFields(), 'LDA Filing', null, $date, false);
             } else {
                 $transaction = new Relationship();
                 $transaction->Entity1 = $client_entity;
                 $transaction->Entity2 = $lobbying_entity;
                 $transaction->setCategory('Transaction');
                 $transaction->description1 = 'Lobbying Client';
                 $transaction->is_lobbying = 1;
                 $transaction->filings = 1;
                 $transaction->updateDateRange($date, true);
                 if (in_array('Person', $lobbying_entity_extensions)) {
                     $transaction->description2 = 'Hired Lobbyist';
                 } else {
                     $transaction->description2 = 'Lobbying Firm';
                 }
                 if ($lda_filing->amount && $lda_filing->amount != '') {
                     $transaction->amount = $lda_filing->amount;
                 }
                 $transaction->save();
                 $transaction->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $transaction->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                 //$this->printDebug('New lobbying transaction saved between client ' . $client_entity->name . ' and lobbying firm ' . $lobbying_entity->name);
             }
             $relationships[] = $transaction;
         }
         //CREATE LOBBYING RELATIONSHIP
         //$this->printTimeSince();
         //$this->printDebug('starting lobbying relationships:');
         foreach ($govt_entities as $govt_entity) {
             $lobbying_relationship = RelationshipTable::getByCategoryQuery('Lobbying')->addWhere('r.entity1_id = ?', $lobbying_entity->id)->addWhere('r.entity2_id = ?', $govt_entity->id)->fetchOne();
             if ($lobbying_relationship) {
                 $lobbying_relationship->updateDateRange($date);
                 $lobbying_relationship->filings++;
                 $lobbying_relationship->save();
             } else {
                 $lobbying_relationship = new Relationship();
                 $lobbying_relationship->Entity1 = $lobbying_entity;
                 $lobbying_relationship->Entity2 = $govt_entity;
                 $lobbying_relationship->setCategory('Lobbying');
                 if ($transaction) {
                     $lobbying_relationship->description1 = 'Lobbying (for client)';
                 } else {
                     $lobbying_relationship->description1 = 'Direct Lobbying';
                 }
                 $lobbying_relationship->description2 = $lobbying_relationship->description1;
                 $lobbying_relationship->updateDateRange($date, true);
                 $lobbying_relationship->filings = 1;
                 $lobbying_relationship->save();
                 $lobbying_relationship->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbying_relationship->getAllModifiedFields(), 'LDA Filing', null, $date, false);
             }
             $relationships[] = $lobbying_relationship;
         }
         foreach ($relationships as $relationship) {
             $lobby_filing->Relationship[] = $relationship;
         }
         $lobby_filing->save();
         //$this->printTimeSince();
         $this->printDebug("Import Completed\n");
         $this->db->commit();
     } catch (Exception $e) {
         $this->db->rollback();
         throw $e;
     }
 }
Exemplo n.º 17
0
 protected function getEntityTypeWithOutImageQuery($extension)
 {
     return EntityTable::getByExtensionQuery($extension)->leftJoin('e.Image i')->addWhere('i.id IS NULL')->groupby('e.id');
 }
Exemplo n.º 18
0
 public function importRoster($include_execs = true, $include_board = true)
 {
     // THE FOLLOWING CODE SHOULD NOT BE USED
     // SEC CIKs should be scraped from tickers beforehand using "rake companies:get_sec_ciks" in Rails
     //
     // if ((($this->entity->sec_cik == NULL) || ($this->entity->sec_cik == '')) && $this->entity->ticker)
     // {
     //   $this->printDebug("Fetching CIK for company with ticker " . $this->entity->ticker . "...");
     //   $this->entity->getCik();
     // }
     //we need a company CIK to get data from the SEC
     if (!$this->entity->sec_cik) {
         $this->printDebug("Can't scrape public company: no company CIK!\n");
         $this->empty = true;
         return;
     }
     //make sure we didn't already scrape this company
     if ($this->is_already_scraped = $this->isAlreadyScraped($this->entity)) {
         $this->printDebug("Already scraped " . $this->entity->name . "; skipping...\n");
         return;
     }
     //get existing director and executive entity & relationship IDs for later use
     $sql = 'SELECT r.id, r.entity1_id FROM relationship r LEFT JOIN position p ON (p.relationship_id = r.id) ' . 'WHERE r.entity2_id = ? AND r.category_id = ? AND p.is_board = 1';
     $stmt = $this->db->execute($sql, array($this->entity->id, RelationshipTable::POSITION_CATEGORY));
     $this->old_board_rel_ids = $stmt->fetchAll(PDO::FETCH_COLUMN);
     $stmt = $this->db->execute($sql, array($this->entity->id, RelationshipTable::POSITION_CATEGORY));
     $this->old_board_entity_ids = array_unique($stmt->fetchAll(PDO::FETCH_COLUMN, 1));
     $sql = 'SELECT r.id, r.entity1_id FROM relationship r LEFT JOIN position p ON (p.relationship_id = r.id) ' . 'WHERE r.entity2_id = ? AND r.category_id = ? AND p.is_executive = 1';
     $stmt = $this->db->execute($sql, array($this->entity->id, RelationshipTable::POSITION_CATEGORY));
     $this->old_exec_rel_ids = $stmt->fetchAll(PDO::FETCH_COLUMN);
     $stmt = $this->db->execute($sql, array($this->entity->id, RelationshipTable::POSITION_CATEGORY));
     $this->old_exec_entity_ids = array_unique($stmt->fetchAll(PDO::FETCH_COLUMN, 1));
     //compile roster of company directors and executives using recent Form 4s
     $form4_urls = $this->getForm4Urls();
     $roster = array();
     $unique_ciks = array();
     foreach ($form4_urls as $url_arr) {
         if ($result = $this->getForm4Data($url_arr)) {
             if (!in_array($result['personCik'], $unique_ciks)) {
                 $roster[] = $result;
                 $unique_ciks[] = $result['personCik'];
                 $this->printDebug("Added " . $result['parsedName'] . " to roster");
             }
         }
     }
     $this->printDebug("Fetched roster with " . count($roster) . " names for " . $this->entity->name . " (" . $this->entity->id . ")");
     if (!count($roster)) {
         $this->printDebug("No roster found; aborting company scrape...");
         $this->empty = true;
         return;
     }
     //search company info for roster names
     //try both S- registration statements and proxies for given years
     $this->getFilings();
     if (!count($this->filings)) {
         $this->printDebug("No annual filings found; aborting company scrape...");
         $this->empty = true;
         return;
     }
     $this->printDebug("Cross-checking roster names using annual filings:");
     foreach ($this->filings as $filing) {
         $this->printDebug($filing['url'] . " (" . $filing['date'] . ")");
     }
     $this->printDebug("Recent filing date: " . $this->recent_filing_date);
     $current_board_ids = array();
     $current_roster = array();
     //loop through names from form 3 & 4s
     foreach ($roster as $r) {
         $this->printDebug("Cross-checking " . $r['parsedName'] . "...");
         if ($r['primaryExt'] == 'Org') {
             $this->printDebug("Organization; skipping...");
             continue;
         }
         $matched = false;
         $this->filing_date = null;
         $this->filing_name = null;
         $this->filing_url = null;
         //look for name in proxies and S-registrations
         foreach ($this->filings as $filing) {
             if (preg_match_all($r['regexName'], $filing['doc']->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) {
                 $matched = true;
                 $this->filing_date = $filing['date'];
                 $this->filing_name = $filing['name'];
                 $this->filing_url = $filing['url'];
                 break;
             }
         }
         //if name found in filing text, or if form 4 is more recent than the filing date, consider it current
         //if not: consider not current if board, consider it unknown otherwise
         if ($r['isDirector'] == '1') {
             $current = $matched || strtotime($r['date']) > strtotime($this->recent_filing_date);
         } else {
             if ($matched) {
                 $current = 1;
             } elseif (strtotime($r['date']) > strtotime('1 year ago')) {
                 $current = 1;
             } else {
                 $current = null;
             }
         }
         //match existing or create person
         if ($r['isDirector'] == '1' && $include_board || $include_execs && $r['officerTitle'] != '' || $r['isTenPercentOwner'] == '1') {
             //look for existing entity by CIK
             $p = EntityTable::getByExtensionQuery('BusinessPerson')->addWhere('businessperson.sec_cik = ?', $r['personCik'])->fetchOne();
             if (!$p) {
                 //check for entity with same first & last names and a position in this company
                 $matches = LsDoctrineQuery::create()->select('e.*')->from('Entity e')->leftJoin('e.Person p')->leftJoin('e.Relationship r ON (e.id = r.entity1_id)')->where('p.name_last = ?', $r['person']['name_last'])->andWhere('p.name_first = ?', $r['person']['name_first'])->andWhere('e.primary_ext = ?', 'Person')->andWhere('r.entity2_id = ?', $this->entity->id)->andWhere('r.category_id = 1')->execute();
                 if (count($matches) == 1) {
                     $p = $matches[0];
                     $p->addExtension('BusinessPerson');
                     $p->sec_cik = $r['personCik'];
                     if (!$this->testMode) {
                         $p->save();
                     }
                     $this->printDebug("Found existing person with same name in same company: " . $p['name'] . " (" . $p['id'] . ")");
                 } else {
                     $p = $this->importPerson($r);
                     $new_entities[] = $p;
                 }
             } else {
                 $this->printDebug("Found existing person with same SEC CIK: " . $p['name']);
             }
             //$p should always exist at this point right?
             if ($p) {
                 //save entity ID for comparison with existing entities
                 if ($current) {
                     $current_board_ids[] = $p->id;
                     $current_roster[] = $r;
                 }
                 //add address to person
                 //$this->importAddress($r['address'], $p, $r);
                 if ($r['isDirector'] == '1' && $include_board) {
                     $this->importBoardRelationship($p->id, $r, $r['officerTitle'], $current);
                 }
                 if ($r['officerTitle'] != '' && $include_execs) {
                     $descriptions = self::parseDescriptionStr($r['officerTitle'], $this->entity);
                     foreach ($descriptions as $d) {
                         //don't create executive positions with board titles
                         if ($r['isDirector'] != '1' || !LsArray::inArrayNoCase($d, PositionTable::$boardTitles)) {
                             $this->importExecutiveRelationship($p->id, $r, $d, $current);
                         }
                     }
                 }
                 if ($r['isTenPercentOwner']) {
                     //make sure there isn't already one
                     $count = LsDoctrineQuery::create()->from('Relationship r')->where('r.category_id = ?', RelationshipTable::OWNERSHIP_CATEGORY)->andWhere('r.entity1_id = ? AND r.entity2_id = ?', array($p->id, $this->entity->id))->count();
                     if (!$count) {
                         $rel = new Relationship();
                         $rel->setCategory('Ownership');
                         $rel->entity1_id = $p->id;
                         $rel->entity2_id = $this->entity->id;
                         $rel->is_current = strtotime($r['date']) > strtotime('1 year ago') ? true : null;
                         $rel->description1 = 'major shareholder';
                         //Form 3s let us set a start date
                         if ($r['formName'] == 'Form 3' && $r['date']) {
                             //filing date could be innacurate, so only indicate month
                             $date = LsDate::formatFromText($r['date']);
                             $rel->start_date = preg_replace('/-\\d\\d$/', '-00', $date);
                         }
                         if (!$this->testMode) {
                             $rel->save();
                             //save source
                             $rel->addReference($r['readableXmlUrl'], null, null, $this->entity->name . ' ' . $r['formName'], null, $r['date']);
                         }
                         $this->printDebug("+ Ownership relationship created: " . $rel->id);
                     }
                 }
             }
         } else {
             $this->printDebug("Not a board, executive, or ownership position; skipping...");
         }
     }
     $sql = 'SELECT r.id, r.entity1_id FROM relationship r LEFT JOIN   position p ON (p.relationship_id = r.id) ' . 'WHERE r.entity2_id = ? AND r.category_id = ? AND p.is_board = 1';
     $stmt = $this->db->execute($sql, array($this->entity->id, RelationshipTable::POSITION_CATEGORY));
     $board_rel_ids = $stmt->fetchAll(PDO::FETCH_COLUMN);
     $stmt = $this->db->execute($sql, array($this->entity->id, RelationshipTable::POSITION_CATEGORY));
     if (count($board_rel_ids)) {
         //update old board relationships
         $board_rels = LsDoctrineQuery::create()->from('Relationship r')->leftJoin('r.Entity1 e1')->leftJoin('e1.Person p')->whereIn('r.id', $board_rel_ids)->execute();
         /*$board_checked = $this->checkBoardPage($board_rels);
           if ($board_checked == -1)
           {
             $this->printDebug("PROBLEMS ACCESSING GOOGLE"); 
             //die;
           }*/
         if (1) {
             //$this->printDebug("GOOD BOARD PAGE NOT FOUND");
             foreach ($board_rels as $rel) {
                 //only update if old board relationship is current but board member isn't on current roster
                 if ($rel->is_board && $rel->is_current && !$rel->is_executive) {
                     //maybe the board member has no recent Form 4s, so check the recent filings
                     $matched = false;
                     foreach ($this->filings as $filing) {
                         if (preg_match_all($rel->Entity1->Person->getNameRegex(), $filing['doc']->text, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE)) {
                             $matched = true;
                             break;
                         }
                     }
                     if (!$matched) {
                         $this->printDebug("~ Previously existing board relationship no longer current: " . $rel);
                         $rel->is_current = false;
                         if (!$this->testMode) {
                             $rel->save();
                         }
                     }
                 }
                 /* 
                   executive relationships can't have their is_current field updated because
                   not all executives from Form 4s appear on the annual filings!
                 */
             }
         }
     }
     //add all filings as references for the company
     foreach ($this->filings as $filing) {
         //save source
         if (!$this->testMode) {
             $this->entity->addReference($filing['url'], null, null, $filing['name'], null, $filing['date']);
         }
     }
     $url = "http://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=" . $this->entity->sec_cik . "&dateb=&owner=exclude&count=40";
     if (!$this->testMode) {
         $this->entity->addReference($url, null, null, $this->entity->name . ' SEC EDGAR filings', null, null);
     }
 }
Exemplo n.º 19
0
 public static function getByExtensionAndNameQuery($extensions, $str, $strength = 1)
 {
     $extensions = (array) $extensions;
     $str = trim($str);
     if (in_array('Org', $extensions)) {
         $str = OrgTable::removeSuffixes($str, $exclude = array('Bancorp'));
     }
     if (strlen($str) < 3) {
         return array();
     }
     $q = EntityTable::getByExtensionQuery($extensions)->leftJoin('e.Alias a');
     $search_queries = array($str);
     $arr = array('\\.' => ' ', '\\.' => '', '\\s&\\s' => ' and ', '\\sand\\s' => ' & ', ' & ' => ' ', ',' => '', '\\bUS\\b' => 'United States', 'United States\\b' => 'US');
     $i = 0;
     while ($i < count($search_queries)) {
         $name = $search_queries[$i];
         $i++;
         if (strlen($name) < 3) {
             break;
         }
         foreach ($arr as $k => $v) {
             $new = preg_replace('/' . $k . '/isu', $v, $name);
             if ($new != $name) {
                 if (!in_array($new, $search_queries)) {
                     $search_queries[] = $new;
                 }
             }
         }
     }
     foreach ($search_queries as &$s) {
         if ($strength == 0) {
             $s = '%' . $s . '%';
         } else {
             if ($strength == 1) {
                 $s .= '%';
             }
         }
     }
     unset($s);
     $e = implode(' or ', array_fill(0, count($search_queries), 'e.name like ?'));
     $a = implode(' or ', array_fill(0, count($search_queries), 'a.name like ?'));
     $search_queries = array_merge($search_queries, $search_queries);
     $q->addWhere($e . ' or ' . $a, $search_queries);
     return $q;
 }
Exemplo n.º 20
0
 private function getBusinessQuery()
 {
     $q = EntityTable::getByExtensionQuery('Business')->limit($this->_org_limit);
     if ($this->hasMeta($this->_round, 'last_processed') && ($start_id = $this->getMeta($this->_round, 'last_processed'))) {
         $q->addWhere('e.id > ?', $start_id);
     }
     $q->addWhere('e.id < ?', '1006');
     return $q;
 }