public function checkUrl($url, $org_name) { $ret = false; if (preg_match('/\\/\\/[^\\/]+\\//isu', $url, $match)) { $url = $match[0]; } $parts = LsString::split($org_name); $all = ''; $no_common = ''; $no_corp = ''; $stripped = ''; $common = array('and', 'the', 'of', 'in', 'at', '&'); $abbrevs = array('Corporation', 'Inc', 'Group', 'LLC', 'LLP', 'Corp', 'Co', 'Cos', 'LP', 'PA', 'Dept', 'Department', 'International', 'Administration'); $both = array_merge($common, $abbrevs); foreach ($parts as $part) { if (!LsArray::inArrayNoCase($part, $common)) { $no_common .= $part[0]; } if (!LsArray::inArrayNoCase($part, $abbrevs)) { $no_corp .= $part[0]; } if (!LsArray::inArrayNoCase($part, $both)) { $stripped .= $part[0]; } $all .= $part[0]; if (stristr($url, $part) && strlen($part) > 1 && !LsArray::inArrayNoCase($part, $both)) { $ret = true; } } if ($ret == false) { if (strlen($all) > 2 && stristr($url, $all)) { $ret = true; } if (strlen($no_common) > 2 && stristr($url, $no_common)) { $ret = true; } if (strlen($no_corp) > 2 && stristr($url, $no_corp)) { $ret = true; } } return $ret; }
protected function getLogoFromGoogleImage(Entity $org) { if ($this->imageExists($org)) { return true; } //construct search query $nameParts = array_diff(explode(' ', $org->name), array_merge(LsLanguage::$business, LsLanguage::$businessAbbreviations)); $cleanName = trim(implode(' ', $nameParts)); $query = $cleanName . ' logo'; $this->printDebug("Querying Google with term: " . $query); $google = new LsGoogle(); $google->setService('images'); $google->setQuery($query); $google->execute(); $results = $google->getResults(); foreach ($results as $key => $result) { $image_url = $result->url; $image_content = $result->contentNoFormatting; $this->printDebug("Checking: " . $image_url); if (preg_match('/(png|gif|jpg)$/i', $image_url)) { $this->printDebug("Checking " . $image_url); $basefilename = basename($image_url); //$organization_name_parts = array_diff(explode(' ', strtolower($org->name)), array_merge( LsLanguage::$business, LsLanguage::$businessAbbreviations)); $organization_name_parts = split("[ \\.\\_\\-]", strtolower($org->name)); $organization_name_parts[] = "logo"; $organization_name_parts[] = "seal"; $organization_match_parts = LsArray::arrayTrim(split("[ \\.\\_\\-]", preg_replace("/[0-9]/", "", strtolower(basename(urldecode($basefilename)) . " " . urldecode($image_content))))); $intersect = array_intersect($organization_name_parts, $organization_match_parts); //var_dump($organization_name_parts); //var_dump($organization_match_parts); //var_dump($intersect); if (count($intersect) >= 2) { //Entity $entity, $url, $title = 'title', $caption='caption', $is_featured = 1, $is_free = 0 $attached = $this->attachImage($org, $image_url, 'Organization logo'); if ($attached) { $this->printDebug("Saved"); return true; } } } } $this->printDebug("Logo not found on Google"); return false; }
private function findBasicInfo() { if (!$this->sets) { return null; } $re = '/^([^<]*?<[^>]*>)*?[^<]*?(?<!([\\.,$\\/]))(\\b[2-9]\\d\\b)(?!((,\\s+200\\d|199\\d)|%|[,\\.]\\d|[-\\s]+([Yy]ears?\\s+(with|career)|[Dd]ays?|[Mm]onths?)\\b))/su'; $age_match_sets = array(); //go through the sets of name matches and find age matches for each foreach ($this->sets as $set) { $age_matches = array(); for ($i = 0; $i < count($set); $i++) { $len = $i == count($set) - 1 ? 2000 : $set[$i + 1]['pos'] - $set[$i]['pos']; if ($len > 100000) { continue; } $str = substr($this->text, $set[$i]['pos'], $len); if (preg_match($re, $str, $match)) { $n = preg_match_all('/<(\\p{L}+)[^>]*>/s', $match[0], $m, PREG_SET_ORDER); $tag = 'empty'; if ($n > 0) { $tag = $m[count($m) - 1][1]; } $stripped = LsHtml::stripTags($match[0]); if (strlen($stripped) < 2000) { $age_matches[] = array('ind' => $i, 'age_match' => $match, 'age' => $match[3], 'name_match' => $set[$i], 'num_tags' => $n, 'tag' => $tag, 'len' => strlen($match[0])); } //$this->printDebug($i . '. ' . $set[$i]['name'] . ' : ' . $match[3] . ' : ' . strlen($match[0]) . ' : ' . $n . ' : ' . $tag); //$this->printDebug($set[$i]['match'][1][0]); } //else $this->printDebug('--'); //$this->printDebug($set[$i]['match'][1][0]); } $this->printDebug('count age matches is ' . count($age_matches)); $age_match_sets[] = $age_matches; } //find the best set (most unique names and ages) $max = 0; $best = array(array('unique' => array(), 'set' => array())); foreach ($age_match_sets as $age_matches) { if (count($age_matches) < 2) { continue; } $unique = array($age_matches[0]['name_match']['id']); $temp = array($age_matches[0]); for ($i = 1; $i < count($age_matches); $i++) { if ($age_matches[$i]['ind'] - 4 <= $age_matches[$i - 1]['ind']) { $temp[] = $age_matches[$i]; if (!in_array($age_matches[$i]['name_match']['id'], $unique)) { $unique[] = $age_matches[$i]['name_match']['id']; } } else { if (count($unique) > $max) { $max = count($unique); if (count(array_intersect($best[0]['unique'], $unique)) == 0 && count($best[0]['unique']) > 2) { array_unshift($best, array('unique' => $unique, 'set' => $temp)); } else { $best = array(array('unique' => $unique, 'set' => $temp)); } } else { if (count(array_intersect($best[0]['unique'], $unique)) == 0 && count($unique) > 2) { $best[] = array('unique' => $unique, 'set' => $temp); } } $unique = array($age_matches[$i]['name_match']['id']); $temp = array($age_matches[$i]); } } if (count($unique) > $max) { $max = count($unique); if (count(array_intersect($best[0]['unique'], $unique)) == 0) { array_unshift($best, array('unique' => $unique, 'set' => $temp)); } else { $best = array(array('unique' => $unique, 'set' => $temp)); } } } $best = $best[0]['set']; //$this->printDebug('count best is ' . count($best)); //find the tag all names have in common (if there is one) $tag_counts = array(); foreach ($best as $b) { if (isset($tag_counts[$b['tag']])) { $tag_counts[$b['tag']]++; } else { $tag_counts[$b['tag']] = 1; } $this->printDebug($b['ind'] . '. ' . $b['name_match']['name'] . ' : ' . $b['age'] . ' : ' . strlen($b['age_match'][0]) . ' : ' . $b['num_tags'] . ' : ' . $b['tag']); } $tag = null; foreach ($tag_counts as $k => $v) { if ($v > 0.8 * count($best)) { $tag = $k; break; } } $age_set = array(); if ($tag) { foreach ($best as $b) { if ($b['tag'] == $tag) { $age_set[] = $b; } } } else { $age_set = $best; } $age_set = LsArray::multiSort($age_set, array('name_match', 'id')); //find duplicates and determine the best match out of the pair/set $singles = array(); $doubles = array(); $num_tags = 0; $len = 0; for ($i = 0; $i < count($age_set); $i++) { $double = array($age_set[$i]); while ($i < count($age_set) - 1 && $double[0]['name_match']['id'] == $age_set[$i + 1]['name_match']['id']) { $double[] = $age_set[$i + 1]; $i++; } if (count($double) == 1) { $singles[] = $age_set[$i]; $num_tags += $age_set[$i]['num_tags']; $len += $age_set[$i]['len']; } else { $doubles[] = $double; } } if (count($singles) < 3) { $unique = array(); $sets = array(array()); $age_set = LsArray::multiSort($age_set, array('name_match', 'pos')); foreach ($age_set as $a) { //$this->printDebug($a['name_match']['name'] . ": "); if (!in_array($a['name_match']['id'], $unique)) { $unique[] = $a['name_match']['id']; $sets[count($sets) - 1][] = $a; } else { $unique = array($a['name_match']['id']); $sets[] = array($a); } } $age_set = $sets[0]; } else { $avg_len = $len / count($singles); $avg_tags = $num_tags / count($singles); //$this->printDebug('len is ' . $avg_len . ' and tags is ' . $avg_tags); foreach ($doubles as $double) { $best = null; foreach ($double as $d) { $lf = $d['len'] / $avg_len; $tf = $d['num_tags'] / $avg_tags; $f = abs(2 - ($lf + $tf)); if (!$best) { $best = $d; } else { if (abs($avg_tags - $best['num_tags']) > abs($avg_tags - $d['num_tags'])) { $best = $d; } else { if (abs($avg_tags - $best['num_tags']) == abs($avg_tags - $d['num_tags']) && abs($avg_len - $best['len']) == abs($avg_len - $d['len'])) { $best = $d; } } } } $singles[] = $best; } $age_set = LsArray::multiSort($singles, array('name_match', 'pos')); } //determine which directors were found, which weren't $ids = array(); foreach ($age_set as $a) { $ids[] = $a['name_match']['id']; //$this->printDebug($a['ind'] . '. ' . $a['name_match']['name'] . ' : ' . $a['age'] . ' : ' . strlen($a['age_match'][0]) . ' : ' . $a['num_tags'] . ' : ' . $a['tag']); } foreach ($this->people as $p) { if (!in_array($p->id, $ids)) { $category = Doctrine::getTable('RelationshipCategory')->findOneByName('Position'); $relationship = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ?', $p->id)->addWhere('r.entity2_id = ?', $this->corp->id)->addWhere('r.category_id = ?', $category->id)->addWhere('r.description1 = ?', 'Director')->fetchOne(); if ($relationship) { $relationship->is_current = 0; $relationship->save(); } } } if (count($age_set) < 0.5 * count($this->people)) { $this->printDebug('not enough names in age set:' . count($age_set) . ' vs. ' . count($this->people)); return null; } //figure out which tags surround name/age pairs $tag_arr = array('<table' => array(), '<tr' => array(), '<td' => array(), '<div' => array(), '<br' => array(), '<p' => array()); $tag_arr = array('table' => array(), 'tr' => array(), 'td' => array(), 'div' => array(), 'br' => array(), 'p' => array()); for ($i = 1; $i < count($age_set) - 1; $i++) { $str = substr($this->text, $age_set[$i - 1]['name_match']['pos'], $age_set[$i + 1]['name_match']['pos'] - $age_set[$i - 1]['name_match']['pos']); //$this->printDebug($str); foreach ($tag_arr as $tag => &$arr) { $tag_str = LsHtml::getStringInTag($str, $tag, $age_set[$i]['name_match']['pos'] - $age_set[$i - 1]['name_match']['pos']); if (strlen($tag_str) > 0) { $arr[] = strlen($tag_str); //$this->printDebug($tag_str); //echo "\n*****\n"; } } } arsort($tag_arr); //var_dump($tag_arr); //$this->printDebug(count($this->people)); if (count(reset($tag_arr)) == 0) { $this->printDebug('problems with enclosing tag detection'); return null; } foreach ($tag_arr as $tag => $arr) { $avg = array_sum($arr) / count($arr); $splitter = $tag; break; } $tag_counts = array(); for ($i = 0; $i < count($age_set) - 1; $i++) { $str = substr($this->text, $age_set[$i]['name_match']['pos'], $age_set[$i + 1]['name_match']['pos'] - $age_set[$i]['name_match']['pos']); str_ireplace('<' . $splitter, ' ', $str, $count); $tag_counts[] = $count; } sort($tag_counts); $ct = $tag_counts[0]; if (!$ct) { return null; } $post_strlen = 0; $info_arr = array(); for ($i = 0; $i < count($age_set); $i++) { $a = $age_set[$i]; $matches = LsString::striposMulti($this->text, '</' . $splitter, $ct, $a['name_match']['pos']); $end = $matches[count($matches) - 1]; $start = strripos(substr($this->text, 0, $a['name_match']['pos']), '<' . $splitter); $str = substr($this->text, $start, $end - $start); if ($i == count($age_set) - 1 && count($matches) > 1) { $end = $matches[count($matches) - 2]; $str2 = substr($this->text, $start, $end - $start); $avg = strlen(implode(' ', $segments)) / count($segments); if (abs(strlen($str2) - $avg) < abs(strlen($str) - $avg)) { $str = $str2; } } $segments[] = $str; //$this->printDebug($str); $info = $this->parseSegment($str, $a['name_match']['pos'] - $start, $a['name_match']['pos'] - $start + strlen($a['name_match']['match'][2][0])); $info = $this->parseBlurb($info, $a); //looks to see if bio appears aftr the parsed segment if ($i < count($age_set) - 1) { $next_start = strripos(substr($this->text, 0, $age_set[$i + 1]['name_match']['pos']), '<' . $splitter); $post_str = substr($this->text, $end, $next_start - $end); } else { $avg = $post_strlen / (count($age_set) - 1); $post_str = substr($this->text, $end, $avg); } $post_strlen += strlen($post_str); $post_str = LsHtml::replaceFontStyleTags($post_str); $person = $a['name_match']['person']; $last = LsString::escapeStringForRegex($person->name_last); $info['post_blurb'] = ''; if (preg_match_all('/>([^<]*' . $last . '[^<]*)</isu', $post_str, $matches)) { $post_blurb = implode(' ', $matches[1]); $post_blurb = trim(preg_replace('/\\s+/s', ' ', $post_blurb)); if (strlen($post_blurb) > 40) { $info['post_blurb'] = $post_blurb; } } $info_arr[] = $info; //echo "\n\n***\n\n"; } $ct = 0; $unv_ct = 0; foreach ($info_arr as $info) { if (strlen($info['post_blurb']) > strlen($info['blurb'])) { $ct++; } if ($info['img'] == null && $info['unverified_img'] != null) { $unv_ct++; } } //if most of the profile segments have images at the end, check to see if they belong to the next profile segment if ($unv_ct > count($age_set) - 3) { for ($i = 0; $i < count($age_set); $i++) { $len = strripos(substr($this->text, 0, $age_set[$i]['name_match']['pos']), '<' . $splitter); $tag_start = strripos(substr($this->text, 0, $len), '<img'); $str = substr($this->text, $tag_start, 200); if (preg_match('/^<img[^>]+src=[\'"]([^\'"]+)[\'"]/is', $str, $match) == 1) { $info['img'] = $match[1]; } else { if ($i == 0) { break; } } } } for ($i = 0; $i < count($info_arr); $i++) { if ($ct > 0.8 * count($age_set)) { $info_arr[$i]['blurb'] = $info_arr[$i]['post_blurb']; if (!$info_arr[$i]['since']) { $info_arr[$i]['since'] = $this->getStartDate($info_arr[$i]['blurb']); } } $this->importDirectorInfo($info_arr[$i], $age_set[$i]); $this->printDebug("\n***"); } //$this->printDebug($splitter); //var_dump($tag_counts); }
private function importFiling($org, $lda_filing) { try { $this->printTimeSince(); $this->printDebug('Starting import...'); $excerpt = array(); //$time = microtime(1); $this->db->beginTransaction(); $date = null; $excerpt['Federal Filing Id'] = $lda_filing->federal_filing_id; $excerpt['Year'] = $lda_filing->year; $excerpt['Type'] = $lda_filing->LdaType->description; if (preg_match('/^[^T]*/su', $lda_filing->received, $match)) { $date = $match[0]; $date = str_replace('/', '-', $date); } $lda_registrant = Doctrine::getTable('LdaRegistrant')->find($lda_filing->registrant_id); $excerpt['Registrant'] = $lda_registrant->name; if ($lda_filing->client_id) { $lda_client = Doctrine::getTable('LdaClient')->find($lda_filing->client_id); $excerpt['Client'] = $lda_client->name; } else { $this->db->rollback(); return null; } $lobbying_entity = null; //DETERMINE (& CREATE) LOBBYING ENTITY //$this->printTimeSince(); //$this->printDebug('determine/create...'); if (strtolower(OrgTable::stripNamePunctuation($lda_client->name)) == strtolower(OrgTable::stripNamePunctuation($lda_registrant->name))) { $lobbying_entity = $org; $client_entity = null; if (!$lobbying_entity->lda_registrant_id) { $lobbying_entity->lda_registrant_id = $lda_registrant->federal_registrant_id; $lobbying_entity->save(); $lobbying_entity->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $lobbying_entity->getAllModifiedFields(), 'LDA Filing', null, $date, false); } else { if ($lobbying_entity->lda_registrant_id != $lda_registrant->federal_registrant_id) { $this->printDebug("LDA registrant ids did not match up for {$lobbying_entity->name} and {$lda_registrant->name} even though names matched {$lda_client->name}\n"); $this->db->rollback(); return null; } } $this->printDebug($lobbying_entity->name . ' noted (same as client ' . $lda_client->name . ')'); } else { $client_entity = $org; if ($lda_client->description) { $description = trim($lda_client->description); if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) { if (strlen($description) < 200) { if (!$org->blurb || $org->blurb == '') { $org->blurb = $description; } } else { if (!$org->summary || $org->summary == '') { $org->summary = $description; } } } } $org->save(); $this->printDebug($lda_client->name . ' is distinct from ' . $lda_registrant->name); } $lda_lobbyists = $lda_filing->LdaLobbyists; $excerpt['Lobbyists'] = array(); foreach ($lda_lobbyists as $lda_lobbyist) { $excerpt['Lobbyists'][] = $lda_lobbyist->name; } $excerpt['Lobbyists'] = implode('; ', $excerpt['Lobbyists']); if (!$lobbying_entity) { $lobbyist_name = null; if (count($lda_lobbyists)) { $lobbyist_parts = explode(',', $lda_lobbyists[0]->name); if (count($lobbyist_parts) > 1) { $lobbyist_last = trim($lobbyist_parts[0]); $arr = LsString::split($lobbyist_parts[1]); $lens = array_map('strlen', $arr); arsort($lens); $keys = array_keys($lens); $lobbyist_longest = $arr[$keys[0]]; $lobbyist_name = trim($lobbyist_parts[1]) . ' ' . trim($lobbyist_parts[0]); $existing_lobbyist_registrant = null; } else { $lobbyist_name = preg_replace('/^(Mr|MR|MS|Dr|DR|MRS|Mrs|Ms)\\b\\.?/su', '', $lda_lobbyists[0]->name); $arr = LsString::split(trim($lobbyist_name)); $arr = LsArray::strlenSort($arr); $lobbyist_last = array_pop($arr); if (count($arr)) { $lobbyist_longest = array_shift(LsArray::strlenSort($arr)); } else { $lobbyist_longest = ''; } } } //check to see if registrant and lobbyist are same if (count($lda_lobbyists) == 1 && (strtoupper($lda_lobbyists[0]->name) == strtoupper($lda_registrant->name) || $lobbyist_last && stripos($lda_registrant->name, $lobbyist_last) == strlen($lda_registrant->name) - strlen($lobbyist_last) && stristr($lda_registrant->name, $lobbyist_longest))) { $existing_lobbyist_registrant = EntityTable::getByExtensionQuery('Lobbyist')->addWhere('lobbyist.lda_registrant_id = ?', $lda_registrant->federal_registrant_id)->execute()->getFirst(); if ($existing_lobbyist_registrant) { $lobbying_entity = $existing_lobbyist_registrant; $this->printDebug('Existing lobbyist is lobbying entity: ' . $lobbying_entity->name); } else { $lobbyist = $this->prepLobbyistName($lda_lobbyists[0]->name); if ($lobbyist) { $lobbyist->lda_registrant_id = $lda_registrant->federal_registrant_id; $lobbyist->save(); $lobbyist->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false); $this->printDebug('New lobbyist/lobbying entity saved: ' . $lobbyist->name); $lobbying_entity = $lobbyist; } } } else { if ($existing_firm = EntityTable::getByExtensionQuery('Org')->addWhere('org.lda_registrant_id = ? ', $lda_registrant->federal_registrant_id)->execute()->getFirst()) { $modified = array(); $lobbying_entity = $existing_firm; if ($lda_registrant->description) { $description = trim($lda_registrant->description); if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) { if (strlen($description) < 200) { if (!$existing_firm->blurb || $existing_firm->blurb == '') { $existing_firm->blurb = $description; $modified[] = 'blurb'; } } else { if (!$existing_firm->summary || $existing_firm->summary == '') { $existing_firm->summary = $description; $modified[] = 'summary'; } } } } if ($lda_registrant->address && $lda_registrant->address != '' && count($existing_firm->Address) == 0) { if ($address = $existing_firm->addAddress($lda_registrant->address)) { $existing_firm->save(); $address->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $address->getAllModifiedFields(), 'LDA Filing', null, $date, false); } } $existing_firm->save(); if (count($modified)) { $existing_firm->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $modified, 'LDA Filing', null, $date, false); } $this->printDebug('Existing firm is lobbying entity: ' . $lobbying_entity->name); } else { $firm = new Entity(); $firm->addExtension('Org'); $firm->addExtension('Business'); $firm->addExtension('LobbyingFirm'); $firm->name = LsLanguage::titleize(OrgTable::stripNamePunctuation($lda_registrant->name), true); $firm->lda_registrant_id = $lda_registrant->federal_registrant_id; if ($lda_registrant->description) { $description = trim($lda_registrant->description); if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) { if (strlen($description) < 200) { $firm->blurb = $description; } else { $firm->summary = $description; } } } if ($lda_registrant->address && $lda_registrant->address != '') { if ($address = $firm->addAddress($lda_registrant->address)) { $firm->save(); $address->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $address->getAllModifiedFields(), 'LDA Filing', null, $date, false); } } $firm->save(); $this->printDebug('New lobbying firm/lobbying entity saved: ' . $firm->name); $firm->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $firm->getAllModifiedFields(), 'LDA Filing', null, $date, false); $lobbying_entity = $firm; } } } //PREP GOVT ENTITIES //$this->printTimeSince(); //$this->printDebug('gov entities...'); $lda_govts = $lda_filing->LdaGovts; //$this->printDebug('count of lda govs is ***** ' . count($lda_govts)); $govt_entities = array(); $excerpt['Government Bodies'] = array(); foreach ($lda_govts as $lda_govt) { $excerpt['Government Bodies'][] = $lda_govt->name; $name_arr = $this->prepGovtName($lda_govt->name); if (!$name_arr) { continue; } if ($govt_entity = EntityTable::findByAlias($lda_govt->name, $context = 'lda_government_body')) { $govt_entities[] = $govt_entity; //$this->printDebug('Existing govt entity: ' . $govt_entity->name); } else { if ($govt_entity = EntityTable::getByExtensionQuery(array('Org', 'GovernmentBody'))->addWhere('name = ?', array($name_arr[0]))->fetchOne()) { $govt_entities[] = $govt_entity; $alias = new Alias(); $alias->context = 'lda_government_body'; $alias->name = $lda_govt->name; $alias->entity_id = $govt_entity->id; $alias->save(); } else { $govt_entity = new Entity(); $govt_entity->addExtension('Org'); $govt_entity->addExtension('GovernmentBody'); $govt_entity->name = $name_arr[0]; $govt_entity->name_nick = $name_arr[1]; $govt_entity->is_federal = 1; $govt_entity->save(); $alias = new Alias(); $alias->context = 'lda_government_body'; $alias->name = $lda_govt->name; $alias->entity_id = $govt_entity->id; $alias->save(); $govt_entity->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $govt_entity->getAllModifiedFields(), 'LDA Filing', null, $date, false); $govt_entities[] = $govt_entity; } } } $excerpt['Government Bodies'] = implode('; ', $excerpt['Government Bodies']); $excerpt_str = ''; foreach ($excerpt as $k => $v) { $excerpt_str .= $k . ": "; $excerpt_str .= $v . "\n"; } $excerpt = trim($excerpt_str); $this->printDebug($excerpt); $relationships = array(); $lobbying_entity_extensions = $lobbying_entity->getExtensions(); //CREATE LOBBYIST POSITION RELATIONSHIPS //$this->printTimeSince(); //$this->printDebug('lobbyist positions...'); $category = Doctrine::getTable('RelationshipCategory')->findOneByName('Position'); if (!in_array('Lobbyist', $lobbying_entity_extensions)) { $firm_lobbyists = array(); if ($lobbying_entity->exists()) { $q = LsDoctrineQuery::create()->from('Entity e')->leftJoin('e.Relationship r ON (r.entity1_id = e.id)')->where('r.entity2_id = ? AND r.category_id = ?', array($lobbying_entity->id, RelationshipTable::POSITION_CATEGORY)); $firm_lobbyists = $q->execute(); } $lobbyists = array(); foreach ($lda_lobbyists as $lda_lobbyist) { $lobbyist = $this->prepLobbyistName($lda_lobbyist->name); if (!$lobbyist) { continue; } $existing_lobbyist = null; foreach ($firm_lobbyists as $fl) { if (PersonTable::areNameCompatible($fl, $lobbyist)) { $existing_lobbyist = $fl; break; } } //echo "before lobb save or rel save: "; //$this->printTimeSince(); if (!$existing_lobbyist) { $lobbyist->save(); $lobbyist->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false); //$this->printDebug('New lobbyist saved: ' . $lobbyist->name); $r = new Relationship(); $r->Entity1 = $lobbyist; $r->Entity2 = $lobbying_entity; $r->setCategory('Position'); $r->description1 = 'Lobbyist'; $r->is_employee = 1; $r->save(); $r->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false); //$this->printDebug('New position relationship saved: ' . $lobbying_entity->name . ' and ' . $lobbyist->name); $lobbyists[] = $lobbyist; } else { //$this->printDebug('Lobbyist exists: ' . $lobbyist->name . ' is same as ' . $existing_lobbyist->name); $lobbyists[] = $existing_lobbyist; } } } //PREP ISSUES //$this->printTimeSince(); //$this->printDebug('issues...'); $issues = array(); $lda_issues = Doctrine_Query::create()->from('LdaFilingIssue f')->leftJoin('f.LdaIssue i')->where('f.filing_id = ?', $lda_filing->id)->execute(); foreach ($lda_issues as $lda_issue) { $name = LsLanguage::nameize($lda_issue->LdaIssue->name); if (!($issue = Doctrine::getTable('LobbyIssue')->findOneByName($name))) { $issue = new LobbyIssue(); $issue->name = $name; $issue->save(); //$this->printDebug('Lobbying issue saved: ' . $issue->name); } $issues[] = array($issue, $lda_issue->specific_issue); } //CREATE LOBBY FILING //$this->printTimeSince(); //$this->printDebug('creating lobby filing:'); $lobby_filing = new LobbyFiling(); $lobby_filing->year = $lda_filing->year; $lobby_filing->amount = $lda_filing->amount; $lobby_filing->federal_filing_id = $lda_filing->federal_filing_id; $period = $lda_filing->LdaPeriod->description; $lobby_filing->start_date = $date; if ($paren = strpos($period, '(')) { $lobby_filing->period = trim(substr($period, 0, $paren)); } else { $lobby_filing->period = 'Undetermined'; } $lobby_filing->report_type = LsLanguage::nameize($lda_filing->LdaType->description); foreach ($issues as $issue) { $filing_issue = new LobbyFilingLobbyIssue(); $filing_issue->Issue = $issue[0]; $filing_issue->Filing = $lobby_filing; $filing_issue->specific_issue = $issue[1]; $filing_issue->save(); } if (in_array('Lobbyist', $lobbying_entity_extensions)) { $lobby_filing->Lobbyist[] = $lobbying_entity; //$this->printDebug('Lobbying entity lobbyist added to lobbying relationship: ' . $lobbying_entity->name); } else { foreach ($lobbyists as $lobbyist) { $lobby_filing->Lobbyist[] = $lobbyist; } } //var_dump($lobby_filing->toArray()); $lobby_filing->save(); //CREATE TRANSACTION RELATIONSHIP, IF ANY //$this->printTimeSince(); //$this->printDebug('starting transaction relationships:'); $transaction = null; if ($client_entity != null) { $transaction = RelationshipTable::getByCategoryQuery('Transaction')->addWhere('r.entity1_id = ?', $client_entity->id)->addWhere('r.entity2_id = ?', $lobbying_entity->id)->addWhere('transaction.is_lobbying = ?', 1)->fetchOne(); if ($transaction) { $transaction->updateDateRange($date, true); if ($lda_filing->amount && $lda_filing->amount != '') { if (!$transaction->amount || $transaction->amount == '') { $transaction->amount = $lda_filing->amount; } else { $transaction->amount += $lda_filing->amount; } } $transaction->filings++; $transaction->save(); $transaction->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $transaction->getAllModifiedFields(), 'LDA Filing', null, $date, false); } else { $transaction = new Relationship(); $transaction->Entity1 = $client_entity; $transaction->Entity2 = $lobbying_entity; $transaction->setCategory('Transaction'); $transaction->description1 = 'Lobbying Client'; $transaction->is_lobbying = 1; $transaction->filings = 1; $transaction->updateDateRange($date, true); if (in_array('Person', $lobbying_entity_extensions)) { $transaction->description2 = 'Hired Lobbyist'; } else { $transaction->description2 = 'Lobbying Firm'; } if ($lda_filing->amount && $lda_filing->amount != '') { $transaction->amount = $lda_filing->amount; } $transaction->save(); $transaction->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $transaction->getAllModifiedFields(), 'LDA Filing', null, $date, false); //$this->printDebug('New lobbying transaction saved between client ' . $client_entity->name . ' and lobbying firm ' . $lobbying_entity->name); } $relationships[] = $transaction; } //CREATE LOBBYING RELATIONSHIP //$this->printTimeSince(); //$this->printDebug('starting lobbying relationships:'); foreach ($govt_entities as $govt_entity) { $lobbying_relationship = RelationshipTable::getByCategoryQuery('Lobbying')->addWhere('r.entity1_id = ?', $lobbying_entity->id)->addWhere('r.entity2_id = ?', $govt_entity->id)->fetchOne(); if ($lobbying_relationship) { $lobbying_relationship->updateDateRange($date); $lobbying_relationship->filings++; $lobbying_relationship->save(); } else { $lobbying_relationship = new Relationship(); $lobbying_relationship->Entity1 = $lobbying_entity; $lobbying_relationship->Entity2 = $govt_entity; $lobbying_relationship->setCategory('Lobbying'); if ($transaction) { $lobbying_relationship->description1 = 'Lobbying (for client)'; } else { $lobbying_relationship->description1 = 'Direct Lobbying'; } $lobbying_relationship->description2 = $lobbying_relationship->description1; $lobbying_relationship->updateDateRange($date, true); $lobbying_relationship->filings = 1; $lobbying_relationship->save(); $lobbying_relationship->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbying_relationship->getAllModifiedFields(), 'LDA Filing', null, $date, false); } $relationships[] = $lobbying_relationship; } foreach ($relationships as $relationship) { $lobby_filing->Relationship[] = $relationship; } $lobby_filing->save(); //$this->printTimeSince(); $this->printDebug("Import Completed\n"); $this->db->commit(); } catch (Exception $e) { $this->db->rollback(); throw $e; } }
static function getNameWithLast($str, $last) { $re_last = LsString::escapeStringForRegex($last); //hyphens and spaces interchangeable in last names $re_last = preg_replace('/\\\\s+|\\\\\\-/is', '(\\s+|\\-)', $re_last); $matches = array(); $matched = preg_match_all('/\\b' . $re_last . '\\b/isu', $str, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); $name = null; foreach ($matches as $match) { $pos_last = $match[0][1]; if ($pos_last == 0) { return null; } $last = $match[0][0]; //work backwards from last name to find comma $pos = -1 * (strlen($str) - $pos_last); $comma = strripos($str, ',', $pos); $str = substr($str, $comma); $splat = preg_split('/\\b' . $re_last . '\\b/is', $str); $pre = $splat[0]; $post = $splat[1]; $arr = array_reverse(preg_split('/[\\s]+/', $pre, -1, PREG_SPLIT_NO_EMPTY)); $new = array(); foreach ($arr as $a) { if ($case = LsString::checkCase($a)) { if ($case == 'initial') { $new[] = $a; } else { if ($case == 'lower') { break; } else { if (preg_match('/\\.(\\P{L})*$/u', $a) == 1) { $a = LsString::stripNonAlpha($a); if ($s = LsArray::inArrayNoCase($a, PersonTable::$nameParsePrefixes)) { $new[] = $s; } break; } else { $new[] = $a; } } } } } $pre = implode(' ', array_reverse($new)); if (strlen(trim($pre)) == 0) { continue; } $arr = preg_split('/[\\s]+/', $post, -1, PREG_SPLIT_NO_EMPTY); $new = array(); foreach ($arr as $a) { if ($case = LsString::checkCase($a)) { if ($case == 'lower') { break; } $a = LsString::stripNonAlpha($a); if ($s = LsArray::inArrayNoCase($a, PersonTable::$nameParseSuffixes)) { $new[] = $s; } else { break; } } } $post = trim(implode(' ', $new)); $full = $pre . ' ' . $last; if (strlen($post) > 0) { $full .= ', ' . $post; } $name = array('nameFull' => $full, 'nameStart' => $pre, 'nameLast' => $last, 'namePost' => $post); } return $name; }
public function parseBio($bio = null) { if (!$bio) { $bio = $this->Entity->summary; } $name_matches = LsLanguage::getAllNames($bio); $names = array(); for ($i = 0; $i < count($name_matches); $i++) { $name = $name_matches[$i]; $arr = array('for\\s+the', 'of\\s+the', 'at\\s+the', 'at', 'of', 'the', 'for', 'and'); foreach ($arr as $a) { $splat = preg_split('/\\s+' . $a . '\\s+/isu', $name, -1, PREG_SPLIT_NO_EMPTY); if (count($splat) > 1) { if (!in_array($splat[0], LsLanguage::$commonPositions)) { $name_matches = array_merge($name_matches, $splat); } else { array_shift($splat); $a = str_replace('\\s+', ' ', $a); $name = implode(" {$a} ", $splat); } } } $splat = preg_split('/\'s\\s+/isu', $name, -1, PREG_SPLIT_NO_EMPTY); if (count($splat) > 1) { $name_matches = array_merge($name_matches, $splat); } } unset($name); $exclude = array_merge(LsLanguage::$regions, LsLanguage::$commonFirstNames, LsLanguage::$commonLastNames, LsLanguage::$states, LsLanguage::$commonCities, LsLanguage::$grammar, LsLanguage::$weekdays, LsLanguage::$months, LsLanguage::$geography, LsLanguage::$possessives, explode(' ', $this->Entity->name), array($this->Entity->name), LsLanguage::$schools, LsLanguage::$commonPositions); $names = array(); foreach ($name_matches as $name) { $new = str_replace("'s ", " ", $name); if ($new != $name) { $name_matches[] = $new; } $name = trim($name); $name = preg_replace('/[\\,\\.\'\\’]$/isu', '', $name); if (!in_array($name, $exclude)) { $names[] = $name; } //else $this->printDebug($name . ' rejected'); } $names = array_unique($names); $names = LsArray::strlenSort($names); /* $found_entities = array(); foreach($names as $name) { $entities = EntityTable::getByExtensionAndNameQuery(array('Person'),$name, $strict = 1)->execute(); if (count($entities)) { //$this->printDebug($name . ":"); foreach($entities as $e) { //$this->printDebug(' ' . $org->name); $found_entities[] = $e; } } else if (count(LsString::split($name)) > 1) { $possible_orgs = array(); $google_scraper = new LsGoogle; $google_scraper->setQuery(trim($name)); $google_scraper->execute(); if ($google_scraper->getNumResults()) { $results = $google_scraper->getResults(); foreach ($results as $result) { $title = LsHtml::stripTags($result->title); preg_match('/http\:\/\/[^\/]+\//isu',$result->unescapedUrl,$match); if (!$match) continue; $trimmed_url = $match[0]; $title_first = LsString::split($title); $title_first = array_shift($title_first); if (!stristr($title,'wikipedia') && (OrgTable::checkUrl($trimmed_url, $name) && preg_match('/^(The\s+)?' . LsString::escapeStringForRegex($title_first) . '/su',$name))) { $this->printDebug($name . ":"); $possible_orgs[] = $name; $this->printDebug(' ' . $title); //$this->printDebug(' ' . $result->unescapedUrl); //$this->printDebug(' ' . LsHtml::stripTags($result->content)); break; } } } //var_dump($possible_orgs); } }*/ //$this->printDebug(''); return $names; }
static function parseFlatName($str, $surname = null, $returnArray = false) { $namePrefix = $nameFirst = $nameMiddle = $nameLast = $nameSuffix = $nameNick = null; //to handle multi-word last names like Van der Twerp $sub = null; if ($surname) { $sub = preg_replace('/(^(\\P{L})+|(\\P{L})+$)/u', '', $surname); $sub = preg_replace('/\\s+/is', '_', $sub); $str = str_ireplace($surname, $sub, $str); } //trim and remove periods $str = trim(str_replace('.', ' ', $str)); //remove extra spaces $str = preg_replace('/\\s{2,}/', ' ', $str); //remove anything in parentheses at the end $str = preg_replace('/ \\([^\\)]+\\)/', '', $str); //get prefixes $prefixes = self::$nameParsePrefixes; while ($prefix = current($prefixes)) { if ($str != ($new = preg_replace('/^' . $prefix . ' /i', '', $str))) { if (!LsArray::inArrayNoCase($prefix, LsLanguage::$commonPrefixes)) { $namePrefix .= $prefix . ' '; } $str = trim($new); reset($prefixes); continue; } next($prefixes); } $namePrefix = $namePrefix ? trim($namePrefix) : null; //get suffixes $suffixes = self::$nameParseSuffixes; while ($suffix = current($suffixes)) { if ($str != ($new = preg_replace('/ ' . $suffix . '$/i', '', $str))) { $nameSuffix = $suffix . ' ' . $nameSuffix; $str = trim($new); reset($suffixes); continue; } next($suffixes); } $nameSuffix = $nameSuffix ? trim($nameSuffix) : null; //remove commas left over from suffixes $str = trim(str_replace(',', '', $str)); //find nickname in quotes if (preg_match('/["\']([\\S]+)[\'"]/', $str, $nickFound)) { $nameNick = $nickFound[1] ? $nickFound[1] : $nickFound[2]; $str = trim(preg_replace('/["\']([\\S]+)[\'"]/', '', $str)); } //condense multiple spaces $str = preg_replace('/\\s{2,}/', ' ', $str); //split into parts $parts = explode(' ', $str); switch (count($parts)) { case 1: if ($namePrefix) { $nameFirst = $namePrefix; $nameLast = $parts[0]; $namePrefix = null; } else { if ($nameSuffix) { $nameFirst = $parts[0]; $nameLast = $nameSuffix; $nameSuffix = null; } else { if (strtolower($sub) == strtolower($parts[0])) { $nameLast = $parts[0]; } else { $nameFirst = $parts[0]; } } } break; case 2: $nameFirst = $parts[0]; $nameLast = $parts[1]; break; case 3: $nameFirst = $parts[0]; $nameMiddle = $parts[1]; $nameLast = $parts[2]; break; default: $nameFirst = $parts[0]; $nameLast = $parts[count($parts) - 1]; for ($n = 1; $n < count($parts) - 1; $n++) { $nameMiddle .= $parts[$n] . ' '; } $nameMiddle = trim($nameMiddle); break; } $nameLast = str_replace('_', ' ', $nameLast); $name = array('name_first' => $nameFirst, 'name_last' => $nameLast, 'name_middle' => $nameMiddle, 'name_prefix' => $namePrefix, 'name_suffix' => $nameSuffix, 'name_nick' => $nameNick); foreach ($name as $nk => &$nv) { if ($nv && $nk != 'name_suffix' && $nk != 'name_prefix') { $nv = preg_replace('/^(\\P{L})+|(\\P{L})+$/u', '', $nv); $case = LsString::checkCase($nv); $nv = $case == 'upper' || $case == 'lower' ? LsLanguage::nameize($nv) : $nv; if ($nk != 'name_last') { $nv = LsLanguage::hgCaser($nv, false); } } } unset($nv); if ($returnArray) { return $name; } $person = new Entity(); $person->addExtension('Person'); $person->name_first = $name['name_first']; $person->name_middle = $name['name_middle']; $person->name_last = $name['name_last']; $person->name_nick = $name['name_nick']; $person->name_prefix = $name['name_prefix']; $person->name_suffix = $name['name_suffix']; return $person; }
static function parseDescriptionStr($str, $entity = null) { $descriptions = array(); $remains = array(); //cleanup text to be parsed $str = trim($str); $str = preg_replace('/(?<!=\\s)\\.(?!=\\s)/', '', $str); $str = str_replace('.', ' ', $str); $str = preg_replace('/\\s{2,}/', ' ', $str); $str = preg_replace('/\\s+,(?=\\s)/', ',', $str); $str = preg_replace('/\\)\\s*$/', '', $str); if (strtolower($str) == 'see remarks') { $str = ''; } /* if ($entity) { $name_re = LsString::escapeStringForRegex($entity->name); $str = preg_replace('/\b' . $name_re . '\b/isu', '', $str); if ($entity->ticker) { $tick_re = LsString::escapeStringForRegex($entity->ticker); $str = preg_replace('/\b' . $tick_re . '\b/isu', '', $str); } } */ //don't parse if there's more than one separator $num = 0; $patterns = array('/\\s&\\s/', '/,/', '/;/', '/\\band\\b/i'); foreach ($patterns as $pattern) { if (preg_match($pattern, $str)) { $num++; } } if ($num > 1) { return array($str); } //split by commas $parts = preg_split('/,|;|\\band\\b|\\s&\\s/', $str, -1, PREG_SPLIT_NO_EMPTY); foreach ($parts as $part) { $part = trim($part); $part = preg_replace('/\\s{2,}/', ' ', $part); //abbreviation replacements $part = preg_replace('/( |^)(\\w) (\\w) (\\w)( |$)/', '\\2\\3\\4', $part); $part = preg_replace('/(Interim|Acting|Incoming) /i', '', $part); $part = preg_replace('/Sr /i', 'Senior ', $part); $part = preg_replace('/Chf /i', 'Chief ', $part); $part = preg_replace('/( |^)V( |$)/i', ' Vice ', $part); $part = preg_replace('/( |^)VP( |$)/i', ' Vice President ', $part); $part = preg_replace('/( |^)VC( |$)/i', ' Vice Chairman ', $part); $part = preg_replace('/( |^)Chr( |$)/i', ' Chairman ', $part); $part = preg_replace('/( |^)Ofcr( |$)/i', ' Officer ', $part); $part = preg_replace('/( |^)Vice P( |$)/i', ' Vice President ', $part); $part = preg_replace('/( |^)(Ex|Exec)( |$)/i', ' Executive ', $part); $part = preg_replace('/( |^)EVP( |$)/i', ' Executive Vice President ', $part); $part = preg_replace('/( |^)(Off|Offic|Offcr)( |$)/i', ' Officer ', $part); $part = str_replace('Gen ', 'General ', $part); $part = preg_replace('/( |^)(Op|Oper) /', ' Operating ', $part); $part = preg_replace('/( |^)(Bd|Brd)( |$)/i', ' Board ', $part); $part = preg_replace('/of Board/i', ' of the Board', $part); $part = preg_replace('/( |^)COB( |$)/i', ' Chairman of the Board ', $part); $part = preg_replace('/( |^)(Pres|Prs|Presid|Prsdt|Prsdnt)( |$)/i', ' President ', $part); $part = preg_replace('/( |^)Admin( |$)/i', ' Administrative ', $part); $part = preg_replace('/( |^)Info( |$)/i', ' Information ', $part); $part = preg_replace('/\\bComm\\b/i', 'Committee', $part); $part = preg_replace('/\\bInc\\b/i', '', $part); $part = preg_replace('/( |-|^)(Ch|Chm|Chmn|Chrm|Chrmn|Chair|Chairmain|Chariman)( |$)/i', '\\1Chairman ', $part); $part = preg_replace('/(Sec|Secr|Secy|Secretar|Secreta)( |$)/i', 'Secretary ', $part); $part = str_replace('Vice-', 'Vice ', $part); $part = preg_replace('/( |^)Non /i', ' Non-', $part); $part = preg_replace('/\\bCompl\\b/i', 'Compliance', $part); $part = str_ireplace('of Advisory', 'of the Advisory', $part); $part = preg_replace('/Advisory (Panel|Council)/i', 'Advisory Board', $part); $part = str_ireplace('Independent ', '', $part); $part = str_ireplace('Lead ', '', $part); $part = str_ireplace('Corporate ', '', $part); $part = str_ireplace('Outside ', '', $part); $part = str_ireplace('Non-interested', '', $part); $part = str_ireplace('Interested', '', $part); $part = str_replace('Main ', '', $part); $part = str_ireplace('Presiding ', '', $part); $part = str_ireplace('Founding ', '', $part); $part = str_ireplace('Acctg', 'Accounting', $part); $part = str_ireplace('Chairperson', 'Chairman', $part); $part = str_ireplace('Chairwoman', 'Chairman', $part); $part = str_ireplace("Gen'l", 'General', $part); $part = trim($part); $part = preg_replace('/\\s{2,}/', ' ', $part); $position = array('description' => null, 'note' => array()); if (LsArray::inArrayNoCase($part, PositionTable::$businessPositions)) { $descriptions[] = $part; } } if (!count($descriptions)) { $descriptions[] = $str; } return $descriptions; }
public function getSchoolList() { $educational_institutions = null; $base_data_dir = sfConfig::get('sf_root_dir') . '/data/schools/'; $filename_zip = sfConfig::get('sf_root_dir') . '/data/schools/' . basename($this->url); $filename_csv = sfConfig::get('sf_root_dir') . '/data/schools/' . preg_replace('/zip$/i', 'csv', strtolower(basename($this->url))); $file_contents_csv = null; if (!is_dir($base_data_dir)) { mkdir($base_data_dir); } if (!$this->browser->get($this->url)->responseIsError() || file_exists($filename_zip)) { $zip_saved = null; $ret = null; if (!file_exists($filename_zip)) { $zip_saved = file_put_contents($filename_zip, $this->browser->getResponseText()); } else { $zip_saved = true; } if ($zip_saved !== FALSE) { if (!file_exists($filename_csv)) { exec("unzip {$filename_zip} -d {$base_data_dir}", $ret); } else { $ret = true; } if ($ret) { $educational_institutions = LsArray::CsvFileToArrayObject($filename_csv); $this->printDebug(' Found schools: ' . count($educational_institutions)); } else { $this->printDebug('Failed to load csv'); } } else { $this->printDebug('Zip failure'); } } else { $this->printDebug('Browser did not get file'); } return $educational_institutions; }
public function parseDescriptionStr($str, $corp) { $descriptions = array(); $remains = array(); //cleanup text to be parsed $str = trim($str); $str = str_replace('.', ' ', $str); $str = preg_replace('/\\s{2,}/', ' ', $str); $name_re = LsString::escapeStringForRegex($corp->name); $str = preg_replace('/\\b' . $name_re . '\\b/isu', '', $str); if ($corp->name_nick) { $nick_re = LsString::escapeStringForRegex($corp->name_nick); $str = preg_replace('/\\b' . $nick_re . '\\b/isu', '', $str); } if ($corp->ticker) { $tick_re = LsString::escapeStringForRegex($corp->ticker); $str = preg_replace('/\\b' . $tick_re . '\\b/isu', '', $str); } //split by commas $parts = preg_split('/,|;|\\band\\b|(?<!C[Oo])\\-|\\bAND\\b|\\s&\\s|\\//', $str, -1, PREG_SPLIT_NO_EMPTY); foreach ($parts as $part) { $part = trim($part); $part = preg_replace('/\\s{2,}/', ' ', $part); //abbreviation replacements $part = preg_replace('/( |^)(\\w) (\\w) (\\w)( |$)/', '\\2\\3\\4', $part); $part = preg_replace('/(Interim|Acting|Incoming) /i', '', $part); $part = preg_replace('/Sr /i', 'Senior ', $part); $part = preg_replace('/Chf /i', 'Chief ', $part); $part = preg_replace('/( |^)V( |$)/i', ' Vice ', $part); $part = preg_replace('/( |^)VP( |$)/i', ' Vice President ', $part); $part = preg_replace('/( |^)VC( |$)/i', ' Vice Chairman ', $part); $part = preg_replace('/( |^)Chr( |$)/i', ' Chairman ', $part); $part = preg_replace('/( |^)Ofcr( |$)/i', ' Officer ', $part); $part = preg_replace('/( |^)Vice P( |$)/i', ' Vice President ', $part); $part = preg_replace('/( |^)(Ex|Exec)( |$)/i', ' Executive ', $part); $part = preg_replace('/( |^)EVP( |$)/i', ' Executive Vice President ', $part); $part = preg_replace('/( |^)(Off|Offic|Offcr)( |$)/i', ' Officer ', $part); $part = str_replace('Gen ', 'General ', $part); $part = preg_replace('/( |^)(Op|Oper) /', ' Operating ', $part); $part = preg_replace('/( |^)(Bd|Brd)( |$)/i', ' Board ', $part); $part = preg_replace('/of Board/i', ' of the Board', $part); $part = preg_replace('/( |^)COB( |$)/i', ' Chairman of the Board ', $part); $part = preg_replace('/( |^)(Pres|Prs|Presid|Prsdt|Prsdnt)( |$)/i', ' President ', $part); $part = preg_replace('/( |^)Admin( |$)/i', ' Administrative ', $part); $part = preg_replace('/( |^)Info( |$)/i', ' Information ', $part); $part = preg_replace('/\\bComm\\b/i', 'Committee', $part); $part = preg_replace('/\\bInc\\b/i', '', $part); $part = preg_replace('/( |-|^)(Ch|Chm|Chmn|Chrm|Chrmn|Chair|Chairmain|Chariman)( |$)/i', '\\1Chairman ', $part); $part = preg_replace('/(Sec|Secr|Secy|Secretar|Secreta)( |$)/i', 'Secretary ', $part); $part = str_replace('Vice-', 'Vice ', $part); $part = preg_replace('/( |^)Non /i', ' Non-', $part); $part = preg_replace('/\\bCompl\\b/i', 'Compliance', $part); $part = str_ireplace('of Advisory', 'of the Advisory', $part); $part = preg_replace('/Advisory (Panel|Council)/i', 'Advisory Board', $part); $part = str_ireplace('Independent ', '', $part); $part = str_ireplace('Lead ', '', $part); $part = str_ireplace('Corporate ', '', $part); $part = str_ireplace('Outside ', '', $part); $part = str_ireplace('Non-interested', '', $part); $part = str_ireplace('Interested', '', $part); $part = str_replace('Main ', '', $part); $part = str_ireplace('Presiding ', '', $part); $part = str_ireplace('Founding ', '', $part); $part = str_ireplace('Acctg', 'Accounting', $part); $part = str_ireplace('Chairperson', 'Chairman', $part); $part = str_ireplace('Chairwoman', 'Chairman', $part); $part = str_ireplace("Gen'l", 'General', $part); $part = trim($part); $part = preg_replace('/\\s{2,}/', ' ', $part); $position = array('description' => null, 'note' => array()); if ($part != '') { //look for matching title $p = LsArray::inArrayNoCase($part, PositionTable::$businessPositions); if ($p) { $position['description'] = $p; } else { if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) { $position['description'] = $q->description1; } else { if (count($descriptions) == 0) { $part_splat = LsString::split($part); $note = array(); //$this->printDebug($part); //var_dump($part_splat); $lim = count($part_splat) - 1; for ($i = 0; $i < $lim; $i++) { $note[] = array_pop($part_splat); $part_new = implode(' ', $part_splat); if (strtoupper($part_new) == 'DIRECTOR') { break; } $p = LsArray::inArrayNoCase($part_new, PositionTable::$businessPositions); if ($p) { $position['description'] = $p; } else { if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) { $position['description'] = $q->description1; } } } if (!$position['description']) { $position['description'] = $part; } } else { $descriptions[count($descriptions) - 1]['note'][] = $part; } } } if (isset($position['description'])) { $descriptions[] = $position; } } } return $descriptions; }
} ?> <?php if (count($donors)) { ?> <?php include_partial('global/section', array('title' => 'Top Donors', 'pointer' => 'Top donors to US politicians/PACs with positions/memberships at ' . $entity['name'])); ?> <div id="top-donors"> </div> <?php $donors = LsArray::flip($donors); ?> <?php include_partial('global/polBarGraph', array('dataSet' => array_slice($donors['amount'], 0, 10), 'graphName' => "#top-donors", 'dataLabels' => array_slice($donors['name'], 0, 10), 'dataUrls' => array_slice($donors['url'], 0, 10))); ?> <?php } ?> <?php } ?>