public function execute() { if (!$this->safeToRun('fortune1000')) { $this->printDebug('script already running'); die; } $this->setList(); switch ($this->year) { case 2008: $this->getCompanyList2008(); echo "list imported\n"; } while ($company = current($this->companies)) { try { $this->db->beginTransaction(); $company['name'] = OrgTable::stripNamePunctuation($company['name']); $rank = $company['rank']; $existing = Doctrine_Query::create()->from('Entity e')->where('name = ?', $company['name']); if ($existing->count() == 0) { switch ($this->year) { case 2008: $corp = $this->getCompany2008($fortune_id = $company['fortune_id'], $name = $company['name'], $revenue = $company['revenue']); } } else { //echo "corp already exists\n"; $corp = $existing->fetchOne(); } if ($corp) { //two corps can have the same rank, so searches for duplicate entity_id and rank $rank_existing = Doctrine_Query::create()->from('LsListEntity L')->where('list_id = ? and rank = ? and entity_id = ?', array($this->list->id, $rank, $corp->id))->count(); if ($rank_existing == 0) { $listentity = new LsListEntity(); $listentity->entity_id = $corp->id; $listentity->list_id = $this->list->id; $listentity->rank = $rank; $listentity->save(); echo "{$rank} {$corp->name} (saved)\n"; } else { echo "{$rank} {$corp->name} (already saved)\n"; } } unset($corp); if (!$this->testMode) { $this->db->commit(); } } catch (Exception $e) { $this->db->rollback(); throw $e; } next($this->companies); } }
static function getNameParts($id, $name = null) { if (!$name) { $name = EntityTable::getName($id); } //get base parts of org name to filter donations $parts = preg_split('#[ \\\\/]#', OrgTable::stripNamePunctuation($name)); $parts = self::filterNameParts($parts); if (count($parts) == 0 || count($parts) == 1 && $parts[0] == strtoupper($parts[0])) { $moreParts = array(); $aliases = AliasTable::getByEntityId($id, false); foreach ($aliases as $alias) { $aliasParts = preg_split('#[ \\-\\\\/]#', OrgTable::stripNamePunctuation($alias)); $aliasParts = self::filterNameParts($aliasParts); foreach ($aliasParts as $aliasPart) { $moreParts[] = $aliasPart; if (count($moreParts) > 1) { break 2; } } } // trick for finding unique values of merged $parts + $moreParts without changing the order: $parts = array_keys(array_count_values(array_merge($parts, $moreParts))); } $parts = count($parts) ? array_slice($parts, 0, 2) : array($name); return $parts; }
private function prepGovtName($str) { $str = trim($str); if ($str == 'HOUSE OF REPRESENTATIVES') { return array('US House of Representatives', null); } else { if ($str == 'SENATE') { return array('US Senate', null); } else { if ($str == 'NONE' || $str == 'UNDETERMINED' || $str == '') { return null; } else { if (preg_match('/(Navy|Army|Air\\sForce)\\,\\s+Dept\\s+of/', $str, $match)) { $str = str_replace($match[0], 'US ' . $match[1], $str); } } } } preg_match('/\\(([^\\)]+)\\)?/s', $str, $match); $abb = null; if (count($match)) { $str = trim(str_replace($match[0], '', $str)); $abb = $match[1]; } if ($abb == 'Corps of Engineers') { return array('US Army Corps of Engineers', null); } else { if ($abb == 'Other') { $abb = null; } } $str = str_replace(array('Natl', 'Dept', '.'), array('National', 'Department', ''), $str); $parts = explode(',', $str); if (count($parts) > 1) { $str = trim(array_pop($parts)); $str .= ' ' . implode('; ', $parts); } $str = LsLanguage::titleize(OrgTable::stripNamePunctuation($str)); return array($str, $abb); }