protected function execute($arguments = array(), $options = array()) { $configuration = ProjectConfiguration::getApplicationConfiguration($options['application'], $options['env'], true); $databaseManager = new sfDatabaseManager($configuration); $databaseManager->initialize($configuration); $q = EntityTable::getByExtensionQuery(array('Person', 'ElectedRepresentative'))->addWhere('summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ?', array('(daughter%', '(son%', '(father%', '(mother%', '(cousin%', '(husband%', '(wife%', '(brother%', '(sister%'))->orderBy('person.name_last'); $members = $q->execute(); foreach ($members as $member) { if (preg_match('/\\([^\\)]*\\)/isu', $member->summary, $match)) { echo $member->name . ":\n"; if (preg_match_all('/(brother|sister|daughter|mother|father|wife|husband|cousin)\\sof\\s+([^\\;\\)\\,]*)(\\;|\\)|\\,)/isu', $match[0], $matches, PREG_SET_ORDER)) { foreach ($matches as $m) { echo "\t\t" . $m[1] . ' : of : ' . $m[2] . "\n"; $m[2] = str_replace('.', '', $m[2]); $parts = LsString::split($m[2]); $q = EntityTable::getByExtensionQuery(array('Person', 'ElectedRepresentative')); foreach ($parts as $part) { $q->addWhere('e.name like ?', '%' . $part . '%'); } $people = $q->execute(); $family = array(); foreach ($people as $person) { echo "\t\t\t\t" . $person->name . "\n"; if ($person->id != $member->id) { $family[] = $person; } } if (count($family) == 1) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('(r.entity1_id = ? or r.entity2_id =?) and (r.entity1_id = ? or r.entity2_id = ?)', array($member->id, $member->id, $person->id, $person->id)); if (!$q->count()) { if ($description2 = FamilyTable::getDescription2($m[1], $family[0]->Gender->id)) { $relationship = new Relationship(); $relationship->setCategory('Family'); $relationship->Entity1 = $member; $relationship->Entity2 = $person; $relationship->description1 = $m[1]; $relationship->description2 = $description2; $relationship->save(); $ref = LsQuery::getByModelAndFieldsQuery('Reference', array('object_model' => 'Entity', 'object_id' => $member->id, 'name' => 'Congressional Biographical Directory'))->fetchOne(); if ($ref) { $relationship->addReference($ref->source, null, null, $ref->name, $ref->source_detail, $ref->publication_date); } echo "-------------------------------added relationship\n"; } } } } } echo "\n"; } } }
public function setAddresses($addresses = null) { if ($addresses) { $this->_addresses = $addresses; } else { $q = LsQuery::getByModelAndFieldsQuery('Address', array('latitude' => null))->limit($this->_limit); if ($this->hasMeta('standardization', 'last_processed')) { $start_id = $this->getMeta('standardization', 'last_processed'); $q->addWhere('id > ?', $start_id); } $this->printDebug($q->getSqlQuery()); $this->_addresses = $q->execute(); } }
public function executeMassiveAdd($request) { $this->massive_form = new MassiveForm(); if (!$request->getParameter('filename')) { $this->redirect('operation/upload'); } else { if ($request->isMethod('post')) { $this->massive_form->bind($request->getParameter('massive')); if ($this->massive_form->isValid()) { $filename = sfConfig::get('sf_root_dir') . DIRECTORY_SEPARATOR . 'tmp' . DIRECTORY_SEPARATOR . $request->getParameter('filename'); $org_id = $request->getParameter('org_id'); $list_id = $request->getParameter('list_id'); $category_id = $request->getParameter('category_id'); $relationship_description = $request->getParameter('relationship_description'); $source_url = $request->getParameter('massive[source]'); $source_name = $request->getParameter('massive[name]'); $q = LsQuery::getByModelAndFieldsQuery('Relationship', array('category_id' => $category_id, 'description1' => $relationship_description))->fetchOne(); if (!$category_id && !$org_id || $category_id && !$relationship_description || $category_id && $q) { $scraper = new MassAddScraper(false, false, sfContext::getInstance()->getConfiguration(), $this->getUser()); $scraper->setShowTime(false); $scraper->disableBeep(); $scraper->setOptions($filename, $source_url, $source_name, $list_id, $org_id, $category_id, $relationship_description); $scraper->run(); $errors = $scraper->getErrors(); $edits = $scraper->getEdits(); $this->filename = $request->getParameter('filename'); $this->original = $request->getParameter('original'); $this->getUser()->setAttribute('edits', $edits); $this->getUser()->setAttribute('errors', $errors); $this->redirect('operation/massiveReview'); } else { $this->getUser()->setFlash('notice', 'Problems with relationship section. If org is set, make sure category is set. Also, title/description must already exist in db for specific category.'); } } } } $this->filename = $request->getParameter('filename'); $this->original = $request->getParameter('original'); $this->org = isset($org_id) ? Doctrine::getTable('Entity')->find($org_id) : null; $this->list = isset($list_id) ? Doctrine::getTable('LsList')->find($list_id) : null; $this->category_id = isset($category_id) ? $category_id : null; $this->categories = RelationshipCategoryTable::getByExtensionsQuery('Person', 'Org')->execute(); }
static function getByTripleQuery($namespace, $predicate, $value) { return LsQuery::getByModelAndFieldsQuery('Tag', array('triple_namespace' => $namespace, 'triple_predicate' => $predicate, 'triple_value' => $value)); }
public function updateDiscontinuingMemberRelationships($session) { $this->printDebug("\n\nUpdating relationships for discontinuing members of session " . ($session - 1) . "\n"); try { $this->db->beginTransaction(); //get current session members for reference $this->loadExistingSessionMembers($session); //get previous session members $q = LsQuery::getByModelAndFieldsQuery('ObjectTag', array('object_model' => 'Entity'))->select('objecttag.object_id'); $results = $q->leftJoin('objecttag.Tag t')->addWhere('t.triple_namespace = ? AND t.triple_predicate = ? AND t.triple_value = ?', array('congress', 'session', $session - 1))->fetchArray(); foreach ($results as $ary) { //if member not in existing session, end their previous session relationship if (!in_array($ary['object_id'], $this->_existingSessionMemberIds)) { $this->printDebug("Ending relationships for discontinuing member with ID " . $ary['object_id']); $q = LsDoctrineQuery::create()->update('Relationship r')->where('r.entity1_id = ?', $ary['object_id'])->andWhere('r.category_id = ?', RelationshipTable::MEMBERSHIP_CATEGORY)->andWhere('r.end_date IS NULL')->andWhereIn('r.entity2_id', array($this->_houseEntityId, $this->_senateEntityId))->set('r.is_current', '?', false)->set('r.end_date', '?', $this->_sessionStartYear - 1 . '-00-00')->execute(); } } if (!$this->testMode) { $this->db->commit(); } } catch (Exception $e) { $this->db->rollback(); throw $e; } }
public function addTagByTriple($namespace, $predicate, $value, $visible = true) { $object = $this->getInvoker(); //check for existing Tag $tag = LsQuery::getByModelAndFieldsQuery('Tag', array('triple_namespace' => $namespace, 'triple_predicate' => $predicate, 'triple_value' => $value))->fetchOne(); $db = Doctrine_Manager::connection(); try { $db->beginTransaction(); if ($tag) { if ($this->hasObjectTag($tag)) { return false; } } else { $tag = new Tag(); $tag->triple_namespace = $namespace; $tag->triple_predicate = $predicate; $tag->triple_value = $value; $tag->is_visible = $visible; $tag->save(); } //link object to Tag $objectTag = new ObjectTag(); $objectTag->object_model = get_class($object); $objectTag->object_id = $object->id; $objectTag->Tag = $tag; $objectTag->save(); $db->commit(); } catch (Exception $e) { $db->rollback(); throw $e; } return $objectTag; }
static function findByAlias($name, $context = null) { if ($alias = LsQuery::getByModelAndFieldsQuery('Alias', array('context' => $context, 'name' => $name))->fetchOne()) { return $alias->Entity; } else { return null; } }
private function importLdaData($lobby_import) { $path = $this->_dir . $lobby_import->filename; $raw = file_get_contents($path); $xml = new SimpleXMLElement($raw); $filings = $xml->Filing; $limit = count($filings); $this->printDebug('importing data from ' . $lobby_import->filename . ' (record ' . $lobby_import->offset . ' of ' . $limit . ')'); for ($n = (int) $lobby_import->offset; $n < $limit; $n++) { $this->_count = $this->_count + 1; if ($this->_count > $this->_limit) { die; } try { $this->db->beginTransaction(); $lobby_import->offset = $n; if ($n == $limit - 1) { $lobby_import->done = 1; } $lobby_import->save(); if (!isset($filings[$n])) { echo 'ok'; var_dump($filings[$n - 1]); var_dump($filings[$n + 1]); $this->printDebug('not set' . $n); $this->db->commit(); continue; } $filing = $filings[$n]; if (!isset($filing->Registrant)) { $this->db->commit(); continue; } //var_dump($filing); $f = new LdaFiling(); $f->federal_filing_id = $filing['ID']; $f->year = $filing['Year']; $f->amount = $filing['Amount']; $f->received = $filing['Received']; $f->import_id = $lobby_import->id; $f->offset = $n; //check for duplicate if (Doctrine::getTable('LdaFiling')->findOneByFederalFilingId($f->federal_filing_id)) { $this->db->commit(); continue; } //set registrant if (!($r = Doctrine::getTable('LdaRegistrant')->findOneByFederalRegistrantId($filing->Registrant['RegistrantID']))) { $r = new LdaRegistrant(); $r->name = LsString::spacesToSpace($filing->Registrant['RegistrantName']); $r->federal_registrant_id = $filing->Registrant['RegistrantID']; $r->address = $filing->Registrant['Address']; $r->description = LsString::spacesToSpace($filing->Registrant['GeneralDescription']); $r->country = $filing->Registrant['RegistrantCountry']; $r->save(); } $f->registrant_id = $r->id; //set client if ($filing->Client) { if (!($c = LsQuery::getByModelAndFieldsQuery('LdaClient', array('registrant_id' => $r->id, 'federal_client_id' => $filing->Client['ClientID']))->execute()->getFirst())) { $c = new LdaClient(); $c->name = LsString::spacesToSpace($filing->Client['ClientName']); $c->federal_client_id = $filing->Client['ClientID']; $c->registrant_id = $r->id; $c->contact_name = LsString::spacesToSpace($filing->Client['ContactFullname']); $c->description = LsString::spacesToSpace($filing->Client['GeneralDescription']); $c->country = $filing->Client['ClientCountry']; $c->state = $filing->Client['ClientState']; $c->save(); } $f->client_id = $c->id; } //set filing type if ($type = (string) $filing['Type']) { //look for existing type if (!($t = Doctrine::getTable('LdaType')->findOneByDescription($type))) { $t = new LdaType(); $t->description = $type; $t->save(); } $f->type_id = $t->id; unset($t); } if ($period = (string) $filing['Period']) { //look for existing period if (!($p = Doctrine::getTable('LdaPeriod')->findOneByDescription($period))) { $p = new LdaPeriod(); $p->description = $period; $p->save(); } $f->period_id = $p->id; } $f->save(); //add lobbyists if ($filing->Lobbyists) { foreach ($filing->Lobbyists->Lobbyist as $lobbyist) { $name = (string) $lobbyist['LobbyistName']; if (!($l = LsQuery::getByModelAndFieldsQuery('LdaLobbyist', array('registrant_id' => $r->id, 'name' => $name))->execute()->getFirst())) { $l = new LdaLobbyist(); $l->name = $name; $l->registrant_id = $r->id; $l->status = $lobbyist['LobbyistStatus']; $l->indicator = $lobbyist['LobbyisteIndicator']; $l->official_position = $lobbyist['OfficialPosition']; $l->save(); } $fl = new LdaFilingLobbyist(); $fl->filing_id = $f->id; $fl->lobbyist_id = $l->id; $fl->save(); unset($fl); unset($l); } } //add govt entities if ($filing->GovernmentEntities) { foreach ($filing->GovernmentEntities->GovernmentEntity as $govt) { $govt = trim($govt['GovEntityName']); if (!($g = Doctrine::getTable('LdaGovt')->findOneByName($govt))) { $g = new LdaGovt(); $g->name = $govt; $g->save(); } $fg = new LdaFilingGovt(); $fg->filing_id = $f->id; $fg->govt_id = $g->id; $fg->save(); unset($fg); unset($g); } } //add issues if ($filing->Issues) { foreach ($filing->Issues->Issue as $issue) { $code = (string) $issue['Code']; if (!($i = Doctrine::getTable('LdaIssue')->findOneByName($code))) { $i = new LdaIssue(); $i->name = $code; $i->save(); } $fi = new LdaFilingIssue(); $fi->filing_id = $f->id; $fi->issue_id = $i->id; $fi->specific_issue = $issue['SpecificIssue']; $fi->save(); unset($fi); unset($i); } } $this->printDebug($f->federal_filing_id); //check for duplicate again if (Doctrine::getTable('LdaFiling')->findOneByFederalFilingId($f->federal_filing_id)) { $this->db->rollback(); continue; } $this->db->commit(); } catch (Exception $e) { $this->db->rollback(); throw $e; } unset($f); unset($r); unset($c); unset($filing); } unset($xml); unset($raw); unset($filings); }
protected function _getMeta($namespace, $predicate) { return LsQuery::getByModelAndFieldsQuery('ScraperMeta', array('scraper' => $this->getName(), 'namespace' => $namespace, 'predicate' => $predicate))->fetchOne(); }
public function execute() { foreach ($this->corp_ids as $corp_id) { try { $this->db->beginTransaction(); $this->corp = Doctrine::getTable('Entity')->find($corp_id); if (!$this->corp->sec_cik) { if ($result = $this->getCik($this->corp->ticker)) { $this->corp->sec_cik = $result['cik']; if (!$this->corp->Industry->count()) { if ($result['sic']['name'] && $result['sic']['name'] != '') { $q = LsDoctrineQuery::create()->from('Industry i')->where('i.name = ? and i.code = ?', array($result['sic']['name'], $result['sic']['code']))->fetchOne(); if (!($industry = $q->fetchOne())) { $industry = new Industry(); $industry->name = LsLanguage::nameize(LsHtml::replaceEntities($result['sic']['name'])); $industry->context = 'SIC'; $industry->code = $result['sic']['code']; $industry->save(); } $q = LsQuery::getByModelAndFieldsQuery('BusinessIndustry', array('industry_id' => $industry->id, 'business_id' => $this->corp->id)); if (!$q->fetchOne()) { $this->corp->Industry[] = $industry; } } $this->corp->save(); $this->corp->addReference($result['url'], null, $corp->getAllModifiedFields(), 'SEC EDGAR Page'); } } $this->corp->save(); } if ($this->corp->sec_cik) { $category = Doctrine::getTable('RelationshipCategory')->findOneByName('Position'); $this->people = $this->corp->getRelatedEntitiesQuery('Person', $category->id, 'Director', null, null, false)->execute(); if (count($this->people) > 1) { if ($this->need_proxy) { $this->getProxy(); $this->need_proxy = true; } if ($this->url) { $this->paginate(); if ($this->pages) { $this->printDebug('paginated'); $this->findNamePages(); $this->findBasicInfo(); } else { $this->saveMeta($this->corp->id, 'error', 'not_paginated'); $this->printDebug('not paginated'); } } else { $this->saveMeta($this->corp->id, 'error', 'no_proxy_retrieved'); $this->printDebug('could not get proxy'); } } } $this->saveMeta($this->corp->id, 'scraped', '1'); if (!$this->testMode) { $this->db->commit(); } else { $this->db->rollback(); } } catch (Exception $e) { //something bad happened, rollback $this->db->rollback(); throw $e; } } }
public function execute() { if (!$this->safeToRun('sec')) { $this->printDebug('script already running'); die; } if (!isset($this->corp_ids)) { return null; } foreach ($this->corp_ids as $corp_id) { if (!$this->override && $this->hasMeta($corp_id, 'is_complete') && $this->getMeta($corp_id, 'is_complete')) { $this->printDebug("Already fetched roster for Entity " . $corp_id . "; skipping..."); continue; } else { if (!$this->override && $this->hasMeta($corp_id, 'lacks_cik') && $this->getMeta($corp_id, 'lacks_cik')) { $this->printDebug("No SEC cik found for Entity " . $corp_id . "; skipping..."); continue; } } try { echo number_format(memory_get_usage()) . "\n"; $this->browser->restart($this->defaultHeaders); $this->db->beginTransaction(); $corp = Doctrine::getTable('Entity')->find($corp_id); echo "\n*****************\n\nfetching roster for " . $corp->name . " (" . $corp->ticker . ")" . "\n\n"; //grab the corporation's cik if it doesn't have one already if (!$corp->sec_cik) { if ($result = $this->getCik($corp->ticker)) { $corp->sec_cik = $result['cik']; if ($corp->Industry->count() == 0) { if ($result['sic']['name'] && $result['sic']['name'] != '') { if (!($industry = LsDoctrineQuery::create()->from('Industry i')->where('i.name = ? and i.code = ?', array($result['sic']['name'], $result['sic']['code']))->fetchOne())) { $industry = new Industry(); $industry->name = LsLanguage::nameize(LsHtml::replaceEntities($result['sic']['name'])); $industry->context = 'SIC'; $industry->code = $result['sic']['code']; $industry->save(); $this->printDebug('Industry: ' . $industry->name . ' (' . $industry->code . ')'); } $q = LsQuery::getByModelAndFieldsQuery('BusinessIndustry', array('industry_id' => $industry->id, 'business_id' => $corp->id)); if (!$q->fetchOne()) { $corp->Industry[] = $industry; } } $corp->save(); $corp->addReference($result['url'], null, $corp->getAllModifiedFields(), 'SEC EDGAR Page'); } } else { $this->saveMeta($corp->id, 'lacks_cik', true); $this->db->commit(); continue; } } if ($corp->sec_cik) { $form4_urls = $this->getForm4Urls($corp->sec_cik); $roster = array(); foreach ($form4_urls as $url_arr) { $result = $this->getForm4Data($url_arr, $corp->sec_cik); if ($result) { $roster[] = $result; } } $proxy_urls = $this->getProxyUrls($corp->sec_cik, array('2007', '2008')); if (count($proxy_urls)) { $proxy_url = $proxy_urls[0]['url']; $proxy_year = $proxy_urls[0]['year']; //search proxy for names appearing on form 4s $roster = $this->getProxyData($roster, $proxy_url, $proxy_year); } else { $this->saveMeta($corp->id, 'lacks_cik', true); $this->db->commit(); continue; } $corp->addReference($proxy_url, null, null, $proxy_year . ' Proxy'); //loop through names found on form 4s and search proxy foreach ($roster as $r) { echo "\n" . $r['personName'] . " is director? " . $r['isDirector'] . " at " . $r['form4Url'] . " \n"; if (isset($r['proxyName'])) { echo "in proxy as " . $r['proxyName'] . " \n"; } else { echo "not in proxy \n\n"; } //make sure this appears in the proxy and has either an officer title or is a director if (isset($r['proxyName']) && ($r['isDirector'] == '1' || $r['officerTitle'] != '')) { $p = EntityTable::getByExtensionQuery('BusinessPerson')->addWhere('businessperson.sec_cik = ?', $r['personCik'])->fetchOne(); if (!$p) { $p = $this->importPerson($r, $corp->name); } if ($p) { $this->importAddress($r['address'], $p, $r, $corp->name); if ($r['isDirector'] == 1) { $this->importRelationship($p, $corp, 'Director', $r); } if ($r['officerTitle'] != '') { $descriptions = $this->parseDescriptionStr($r['officerTitle'], $corp); foreach ($descriptions as $d) { if ($d['note']) { $position = $d['description'] . ' (' . implode(', ', $d['note']) . ')'; } else { $position = $d['description']; } $this->importRelationship($p, $corp, $position, $r); } } } } } } if (!$this->testMode) { $this->db->commit(); } if (isset($proxy_url)) { $proxy_scraper = new ProxyScraper($this->testMode, $this->debugMode, $this->appConfiguration); $proxy_scraper->setCorpIds(1, $corp->id); $proxy_scraper->setProxy($this->proxyText, $proxy_url, $proxy_year); $proxy_scraper->disableBeep(); $proxy_scraper->run(); } } catch (Exception $e) { //something bad happened, rollback $this->db->rollback(); throw $e; } $this->saveMeta($corp_id, 'is_complete', true); } }
private function getGovernmentBodyEntity($name, $fedspending_name) { $alias = LsQuery::getByModelAndFieldsQuery('Alias', array('context' => 'fedspending_government_body', 'name' => $fedspending_name))->fetchOne(); if ($alias) { return $alias->Entity; } else { $gov = EntityTable::getByExtensionQuery('GovernmentBody')->addWhere('e.name = ?', $name)->fetchOne(); if ($gov) { $alias = new Alias(); $alias->context = 'fedspending_government_body'; $alias->name = $fedspending_name; $alias->entity_id = $gov->id; $alias->save(); return $gov; } else { return null; } } }