public function checkUrl($url, $org_name)
 {
     $ret = false;
     if (preg_match('/\\/\\/[^\\/]+\\//isu', $url, $match)) {
         $url = $match[0];
     }
     $parts = LsString::split($org_name);
     $all = '';
     $no_common = '';
     $no_corp = '';
     $stripped = '';
     $common = array('and', 'the', 'of', 'in', 'at', '&');
     $abbrevs = array('Corporation', 'Inc', 'Group', 'LLC', 'LLP', 'Corp', 'Co', 'Cos', 'LP', 'PA', 'Dept', 'Department', 'International', 'Administration');
     $both = array_merge($common, $abbrevs);
     foreach ($parts as $part) {
         if (!LsArray::inArrayNoCase($part, $common)) {
             $no_common .= $part[0];
         }
         if (!LsArray::inArrayNoCase($part, $abbrevs)) {
             $no_corp .= $part[0];
         }
         if (!LsArray::inArrayNoCase($part, $both)) {
             $stripped .= $part[0];
         }
         $all .= $part[0];
         if (stristr($url, $part) && strlen($part) > 1 && !LsArray::inArrayNoCase($part, $both)) {
             $ret = true;
         }
     }
     if ($ret == false) {
         if (strlen($all) > 2 && stristr($url, $all)) {
             $ret = true;
         }
         if (strlen($no_common) > 2 && stristr($url, $no_common)) {
             $ret = true;
         }
         if (strlen($no_corp) > 2 && stristr($url, $no_corp)) {
             $ret = true;
         }
     }
     return $ret;
 }
 protected function getLogoFromGoogleImage(Entity $org)
 {
     if ($this->imageExists($org)) {
         return true;
     }
     //construct search query
     $nameParts = array_diff(explode(' ', $org->name), array_merge(LsLanguage::$business, LsLanguage::$businessAbbreviations));
     $cleanName = trim(implode(' ', $nameParts));
     $query = $cleanName . ' logo';
     $this->printDebug("Querying Google with term: " . $query);
     $google = new LsGoogle();
     $google->setService('images');
     $google->setQuery($query);
     $google->execute();
     $results = $google->getResults();
     foreach ($results as $key => $result) {
         $image_url = $result->url;
         $image_content = $result->contentNoFormatting;
         $this->printDebug("Checking: " . $image_url);
         if (preg_match('/(png|gif|jpg)$/i', $image_url)) {
             $this->printDebug("Checking " . $image_url);
             $basefilename = basename($image_url);
             //$organization_name_parts = array_diff(explode(' ', strtolower($org->name)), array_merge( LsLanguage::$business, LsLanguage::$businessAbbreviations));
             $organization_name_parts = split("[ \\.\\_\\-]", strtolower($org->name));
             $organization_name_parts[] = "logo";
             $organization_name_parts[] = "seal";
             $organization_match_parts = LsArray::arrayTrim(split("[ \\.\\_\\-]", preg_replace("/[0-9]/", "", strtolower(basename(urldecode($basefilename)) . " " . urldecode($image_content)))));
             $intersect = array_intersect($organization_name_parts, $organization_match_parts);
             //var_dump($organization_name_parts);
             //var_dump($organization_match_parts);
             //var_dump($intersect);
             if (count($intersect) >= 2) {
                 //Entity $entity, $url, $title = 'title', $caption='caption', $is_featured = 1, $is_free = 0
                 $attached = $this->attachImage($org, $image_url, 'Organization logo');
                 if ($attached) {
                     $this->printDebug("Saved");
                     return true;
                 }
             }
         }
     }
     $this->printDebug("Logo not found on Google");
     return false;
 }
 private function findBasicInfo()
 {
     if (!$this->sets) {
         return null;
     }
     $re = '/^([^<]*?<[^>]*>)*?[^<]*?(?<!([\\.,$\\/]))(\\b[2-9]\\d\\b)(?!((,\\s+200\\d|199\\d)|%|[,\\.]\\d|[-\\s]+([Yy]ears?\\s+(with|career)|[Dd]ays?|[Mm]onths?)\\b))/su';
     $age_match_sets = array();
     //go through the sets of name matches and find age matches for each
     foreach ($this->sets as $set) {
         $age_matches = array();
         for ($i = 0; $i < count($set); $i++) {
             $len = $i == count($set) - 1 ? 2000 : $set[$i + 1]['pos'] - $set[$i]['pos'];
             if ($len > 100000) {
                 continue;
             }
             $str = substr($this->text, $set[$i]['pos'], $len);
             if (preg_match($re, $str, $match)) {
                 $n = preg_match_all('/<(\\p{L}+)[^>]*>/s', $match[0], $m, PREG_SET_ORDER);
                 $tag = 'empty';
                 if ($n > 0) {
                     $tag = $m[count($m) - 1][1];
                 }
                 $stripped = LsHtml::stripTags($match[0]);
                 if (strlen($stripped) < 2000) {
                     $age_matches[] = array('ind' => $i, 'age_match' => $match, 'age' => $match[3], 'name_match' => $set[$i], 'num_tags' => $n, 'tag' => $tag, 'len' => strlen($match[0]));
                 }
                 //$this->printDebug($i . '. ' . $set[$i]['name'] . ' : ' . $match[3] . ' : ' . strlen($match[0]) . ' : ' . $n . ' : ' . $tag);
                 //$this->printDebug($set[$i]['match'][1][0]);
             }
             //else $this->printDebug('--');
             //$this->printDebug($set[$i]['match'][1][0]);
         }
         $this->printDebug('count age matches is ' . count($age_matches));
         $age_match_sets[] = $age_matches;
     }
     //find the best set (most unique names and ages)
     $max = 0;
     $best = array(array('unique' => array(), 'set' => array()));
     foreach ($age_match_sets as $age_matches) {
         if (count($age_matches) < 2) {
             continue;
         }
         $unique = array($age_matches[0]['name_match']['id']);
         $temp = array($age_matches[0]);
         for ($i = 1; $i < count($age_matches); $i++) {
             if ($age_matches[$i]['ind'] - 4 <= $age_matches[$i - 1]['ind']) {
                 $temp[] = $age_matches[$i];
                 if (!in_array($age_matches[$i]['name_match']['id'], $unique)) {
                     $unique[] = $age_matches[$i]['name_match']['id'];
                 }
             } else {
                 if (count($unique) > $max) {
                     $max = count($unique);
                     if (count(array_intersect($best[0]['unique'], $unique)) == 0 && count($best[0]['unique']) > 2) {
                         array_unshift($best, array('unique' => $unique, 'set' => $temp));
                     } else {
                         $best = array(array('unique' => $unique, 'set' => $temp));
                     }
                 } else {
                     if (count(array_intersect($best[0]['unique'], $unique)) == 0 && count($unique) > 2) {
                         $best[] = array('unique' => $unique, 'set' => $temp);
                     }
                 }
                 $unique = array($age_matches[$i]['name_match']['id']);
                 $temp = array($age_matches[$i]);
             }
         }
         if (count($unique) > $max) {
             $max = count($unique);
             if (count(array_intersect($best[0]['unique'], $unique)) == 0) {
                 array_unshift($best, array('unique' => $unique, 'set' => $temp));
             } else {
                 $best = array(array('unique' => $unique, 'set' => $temp));
             }
         }
     }
     $best = $best[0]['set'];
     //$this->printDebug('count best is ' . count($best));
     //find the tag all names have in common (if there is one)
     $tag_counts = array();
     foreach ($best as $b) {
         if (isset($tag_counts[$b['tag']])) {
             $tag_counts[$b['tag']]++;
         } else {
             $tag_counts[$b['tag']] = 1;
         }
         $this->printDebug($b['ind'] . '. ' . $b['name_match']['name'] . ' : ' . $b['age'] . ' : ' . strlen($b['age_match'][0]) . ' : ' . $b['num_tags'] . ' : ' . $b['tag']);
     }
     $tag = null;
     foreach ($tag_counts as $k => $v) {
         if ($v > 0.8 * count($best)) {
             $tag = $k;
             break;
         }
     }
     $age_set = array();
     if ($tag) {
         foreach ($best as $b) {
             if ($b['tag'] == $tag) {
                 $age_set[] = $b;
             }
         }
     } else {
         $age_set = $best;
     }
     $age_set = LsArray::multiSort($age_set, array('name_match', 'id'));
     //find duplicates and determine the best match out of the pair/set
     $singles = array();
     $doubles = array();
     $num_tags = 0;
     $len = 0;
     for ($i = 0; $i < count($age_set); $i++) {
         $double = array($age_set[$i]);
         while ($i < count($age_set) - 1 && $double[0]['name_match']['id'] == $age_set[$i + 1]['name_match']['id']) {
             $double[] = $age_set[$i + 1];
             $i++;
         }
         if (count($double) == 1) {
             $singles[] = $age_set[$i];
             $num_tags += $age_set[$i]['num_tags'];
             $len += $age_set[$i]['len'];
         } else {
             $doubles[] = $double;
         }
     }
     if (count($singles) < 3) {
         $unique = array();
         $sets = array(array());
         $age_set = LsArray::multiSort($age_set, array('name_match', 'pos'));
         foreach ($age_set as $a) {
             //$this->printDebug($a['name_match']['name'] . ": ");
             if (!in_array($a['name_match']['id'], $unique)) {
                 $unique[] = $a['name_match']['id'];
                 $sets[count($sets) - 1][] = $a;
             } else {
                 $unique = array($a['name_match']['id']);
                 $sets[] = array($a);
             }
         }
         $age_set = $sets[0];
     } else {
         $avg_len = $len / count($singles);
         $avg_tags = $num_tags / count($singles);
         //$this->printDebug('len is ' . $avg_len . ' and tags is ' . $avg_tags);
         foreach ($doubles as $double) {
             $best = null;
             foreach ($double as $d) {
                 $lf = $d['len'] / $avg_len;
                 $tf = $d['num_tags'] / $avg_tags;
                 $f = abs(2 - ($lf + $tf));
                 if (!$best) {
                     $best = $d;
                 } else {
                     if (abs($avg_tags - $best['num_tags']) > abs($avg_tags - $d['num_tags'])) {
                         $best = $d;
                     } else {
                         if (abs($avg_tags - $best['num_tags']) == abs($avg_tags - $d['num_tags']) && abs($avg_len - $best['len']) == abs($avg_len - $d['len'])) {
                             $best = $d;
                         }
                     }
                 }
             }
             $singles[] = $best;
         }
         $age_set = LsArray::multiSort($singles, array('name_match', 'pos'));
     }
     //determine which directors were found, which weren't
     $ids = array();
     foreach ($age_set as $a) {
         $ids[] = $a['name_match']['id'];
         //$this->printDebug($a['ind'] . '. ' . $a['name_match']['name'] . ' : ' . $a['age'] . ' : ' . strlen($a['age_match'][0]) . ' : ' . $a['num_tags'] . ' : ' . $a['tag']);
     }
     foreach ($this->people as $p) {
         if (!in_array($p->id, $ids)) {
             $category = Doctrine::getTable('RelationshipCategory')->findOneByName('Position');
             $relationship = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ?', $p->id)->addWhere('r.entity2_id = ?', $this->corp->id)->addWhere('r.category_id = ?', $category->id)->addWhere('r.description1 = ?', 'Director')->fetchOne();
             if ($relationship) {
                 $relationship->is_current = 0;
                 $relationship->save();
             }
         }
     }
     if (count($age_set) < 0.5 * count($this->people)) {
         $this->printDebug('not enough names in age set:' . count($age_set) . ' vs. ' . count($this->people));
         return null;
     }
     //figure out which tags surround name/age pairs
     $tag_arr = array('<table' => array(), '<tr' => array(), '<td' => array(), '<div' => array(), '<br' => array(), '<p' => array());
     $tag_arr = array('table' => array(), 'tr' => array(), 'td' => array(), 'div' => array(), 'br' => array(), 'p' => array());
     for ($i = 1; $i < count($age_set) - 1; $i++) {
         $str = substr($this->text, $age_set[$i - 1]['name_match']['pos'], $age_set[$i + 1]['name_match']['pos'] - $age_set[$i - 1]['name_match']['pos']);
         //$this->printDebug($str);
         foreach ($tag_arr as $tag => &$arr) {
             $tag_str = LsHtml::getStringInTag($str, $tag, $age_set[$i]['name_match']['pos'] - $age_set[$i - 1]['name_match']['pos']);
             if (strlen($tag_str) > 0) {
                 $arr[] = strlen($tag_str);
                 //$this->printDebug($tag_str);
                 //echo "\n*****\n";
             }
         }
     }
     arsort($tag_arr);
     //var_dump($tag_arr);
     //$this->printDebug(count($this->people));
     if (count(reset($tag_arr)) == 0) {
         $this->printDebug('problems with enclosing tag detection');
         return null;
     }
     foreach ($tag_arr as $tag => $arr) {
         $avg = array_sum($arr) / count($arr);
         $splitter = $tag;
         break;
     }
     $tag_counts = array();
     for ($i = 0; $i < count($age_set) - 1; $i++) {
         $str = substr($this->text, $age_set[$i]['name_match']['pos'], $age_set[$i + 1]['name_match']['pos'] - $age_set[$i]['name_match']['pos']);
         str_ireplace('<' . $splitter, ' ', $str, $count);
         $tag_counts[] = $count;
     }
     sort($tag_counts);
     $ct = $tag_counts[0];
     if (!$ct) {
         return null;
     }
     $post_strlen = 0;
     $info_arr = array();
     for ($i = 0; $i < count($age_set); $i++) {
         $a = $age_set[$i];
         $matches = LsString::striposMulti($this->text, '</' . $splitter, $ct, $a['name_match']['pos']);
         $end = $matches[count($matches) - 1];
         $start = strripos(substr($this->text, 0, $a['name_match']['pos']), '<' . $splitter);
         $str = substr($this->text, $start, $end - $start);
         if ($i == count($age_set) - 1 && count($matches) > 1) {
             $end = $matches[count($matches) - 2];
             $str2 = substr($this->text, $start, $end - $start);
             $avg = strlen(implode(' ', $segments)) / count($segments);
             if (abs(strlen($str2) - $avg) < abs(strlen($str) - $avg)) {
                 $str = $str2;
             }
         }
         $segments[] = $str;
         //$this->printDebug($str);
         $info = $this->parseSegment($str, $a['name_match']['pos'] - $start, $a['name_match']['pos'] - $start + strlen($a['name_match']['match'][2][0]));
         $info = $this->parseBlurb($info, $a);
         //looks to see if bio appears aftr the parsed segment
         if ($i < count($age_set) - 1) {
             $next_start = strripos(substr($this->text, 0, $age_set[$i + 1]['name_match']['pos']), '<' . $splitter);
             $post_str = substr($this->text, $end, $next_start - $end);
         } else {
             $avg = $post_strlen / (count($age_set) - 1);
             $post_str = substr($this->text, $end, $avg);
         }
         $post_strlen += strlen($post_str);
         $post_str = LsHtml::replaceFontStyleTags($post_str);
         $person = $a['name_match']['person'];
         $last = LsString::escapeStringForRegex($person->name_last);
         $info['post_blurb'] = '';
         if (preg_match_all('/>([^<]*' . $last . '[^<]*)</isu', $post_str, $matches)) {
             $post_blurb = implode(' ', $matches[1]);
             $post_blurb = trim(preg_replace('/\\s+/s', ' ', $post_blurb));
             if (strlen($post_blurb) > 40) {
                 $info['post_blurb'] = $post_blurb;
             }
         }
         $info_arr[] = $info;
         //echo "\n\n***\n\n";
     }
     $ct = 0;
     $unv_ct = 0;
     foreach ($info_arr as $info) {
         if (strlen($info['post_blurb']) > strlen($info['blurb'])) {
             $ct++;
         }
         if ($info['img'] == null && $info['unverified_img'] != null) {
             $unv_ct++;
         }
     }
     //if most of the profile segments have images at the end, check to see if they belong to the next profile segment
     if ($unv_ct > count($age_set) - 3) {
         for ($i = 0; $i < count($age_set); $i++) {
             $len = strripos(substr($this->text, 0, $age_set[$i]['name_match']['pos']), '<' . $splitter);
             $tag_start = strripos(substr($this->text, 0, $len), '<img');
             $str = substr($this->text, $tag_start, 200);
             if (preg_match('/^<img[^>]+src=[\'"]([^\'"]+)[\'"]/is', $str, $match) == 1) {
                 $info['img'] = $match[1];
             } else {
                 if ($i == 0) {
                     break;
                 }
             }
         }
     }
     for ($i = 0; $i < count($info_arr); $i++) {
         if ($ct > 0.8 * count($age_set)) {
             $info_arr[$i]['blurb'] = $info_arr[$i]['post_blurb'];
             if (!$info_arr[$i]['since']) {
                 $info_arr[$i]['since'] = $this->getStartDate($info_arr[$i]['blurb']);
             }
         }
         $this->importDirectorInfo($info_arr[$i], $age_set[$i]);
         $this->printDebug("\n***");
     }
     //$this->printDebug($splitter);
     //var_dump($tag_counts);
 }
 private function importFiling($org, $lda_filing)
 {
     try {
         $this->printTimeSince();
         $this->printDebug('Starting import...');
         $excerpt = array();
         //$time = microtime(1);
         $this->db->beginTransaction();
         $date = null;
         $excerpt['Federal Filing Id'] = $lda_filing->federal_filing_id;
         $excerpt['Year'] = $lda_filing->year;
         $excerpt['Type'] = $lda_filing->LdaType->description;
         if (preg_match('/^[^T]*/su', $lda_filing->received, $match)) {
             $date = $match[0];
             $date = str_replace('/', '-', $date);
         }
         $lda_registrant = Doctrine::getTable('LdaRegistrant')->find($lda_filing->registrant_id);
         $excerpt['Registrant'] = $lda_registrant->name;
         if ($lda_filing->client_id) {
             $lda_client = Doctrine::getTable('LdaClient')->find($lda_filing->client_id);
             $excerpt['Client'] = $lda_client->name;
         } else {
             $this->db->rollback();
             return null;
         }
         $lobbying_entity = null;
         //DETERMINE (& CREATE) LOBBYING ENTITY
         //$this->printTimeSince();
         //$this->printDebug('determine/create...');
         if (strtolower(OrgTable::stripNamePunctuation($lda_client->name)) == strtolower(OrgTable::stripNamePunctuation($lda_registrant->name))) {
             $lobbying_entity = $org;
             $client_entity = null;
             if (!$lobbying_entity->lda_registrant_id) {
                 $lobbying_entity->lda_registrant_id = $lda_registrant->federal_registrant_id;
                 $lobbying_entity->save();
                 $lobbying_entity->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $lobbying_entity->getAllModifiedFields(), 'LDA Filing', null, $date, false);
             } else {
                 if ($lobbying_entity->lda_registrant_id != $lda_registrant->federal_registrant_id) {
                     $this->printDebug("LDA registrant ids did not match up for {$lobbying_entity->name} and {$lda_registrant->name} even though names matched {$lda_client->name}\n");
                     $this->db->rollback();
                     return null;
                 }
             }
             $this->printDebug($lobbying_entity->name . ' noted (same as client ' . $lda_client->name . ')');
         } else {
             $client_entity = $org;
             if ($lda_client->description) {
                 $description = trim($lda_client->description);
                 if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) {
                     if (strlen($description) < 200) {
                         if (!$org->blurb || $org->blurb == '') {
                             $org->blurb = $description;
                         }
                     } else {
                         if (!$org->summary || $org->summary == '') {
                             $org->summary = $description;
                         }
                     }
                 }
             }
             $org->save();
             $this->printDebug($lda_client->name . ' is distinct from ' . $lda_registrant->name);
         }
         $lda_lobbyists = $lda_filing->LdaLobbyists;
         $excerpt['Lobbyists'] = array();
         foreach ($lda_lobbyists as $lda_lobbyist) {
             $excerpt['Lobbyists'][] = $lda_lobbyist->name;
         }
         $excerpt['Lobbyists'] = implode('; ', $excerpt['Lobbyists']);
         if (!$lobbying_entity) {
             $lobbyist_name = null;
             if (count($lda_lobbyists)) {
                 $lobbyist_parts = explode(',', $lda_lobbyists[0]->name);
                 if (count($lobbyist_parts) > 1) {
                     $lobbyist_last = trim($lobbyist_parts[0]);
                     $arr = LsString::split($lobbyist_parts[1]);
                     $lens = array_map('strlen', $arr);
                     arsort($lens);
                     $keys = array_keys($lens);
                     $lobbyist_longest = $arr[$keys[0]];
                     $lobbyist_name = trim($lobbyist_parts[1]) . ' ' . trim($lobbyist_parts[0]);
                     $existing_lobbyist_registrant = null;
                 } else {
                     $lobbyist_name = preg_replace('/^(Mr|MR|MS|Dr|DR|MRS|Mrs|Ms)\\b\\.?/su', '', $lda_lobbyists[0]->name);
                     $arr = LsString::split(trim($lobbyist_name));
                     $arr = LsArray::strlenSort($arr);
                     $lobbyist_last = array_pop($arr);
                     if (count($arr)) {
                         $lobbyist_longest = array_shift(LsArray::strlenSort($arr));
                     } else {
                         $lobbyist_longest = '';
                     }
                 }
             }
             //check to see if registrant and lobbyist are same
             if (count($lda_lobbyists) == 1 && (strtoupper($lda_lobbyists[0]->name) == strtoupper($lda_registrant->name) || $lobbyist_last && stripos($lda_registrant->name, $lobbyist_last) == strlen($lda_registrant->name) - strlen($lobbyist_last) && stristr($lda_registrant->name, $lobbyist_longest))) {
                 $existing_lobbyist_registrant = EntityTable::getByExtensionQuery('Lobbyist')->addWhere('lobbyist.lda_registrant_id = ?', $lda_registrant->federal_registrant_id)->execute()->getFirst();
                 if ($existing_lobbyist_registrant) {
                     $lobbying_entity = $existing_lobbyist_registrant;
                     $this->printDebug('Existing lobbyist is lobbying entity: ' . $lobbying_entity->name);
                 } else {
                     $lobbyist = $this->prepLobbyistName($lda_lobbyists[0]->name);
                     if ($lobbyist) {
                         $lobbyist->lda_registrant_id = $lda_registrant->federal_registrant_id;
                         $lobbyist->save();
                         $lobbyist->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                         $this->printDebug('New lobbyist/lobbying entity saved: ' . $lobbyist->name);
                         $lobbying_entity = $lobbyist;
                     }
                 }
             } else {
                 if ($existing_firm = EntityTable::getByExtensionQuery('Org')->addWhere('org.lda_registrant_id = ? ', $lda_registrant->federal_registrant_id)->execute()->getFirst()) {
                     $modified = array();
                     $lobbying_entity = $existing_firm;
                     if ($lda_registrant->description) {
                         $description = trim($lda_registrant->description);
                         if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) {
                             if (strlen($description) < 200) {
                                 if (!$existing_firm->blurb || $existing_firm->blurb == '') {
                                     $existing_firm->blurb = $description;
                                     $modified[] = 'blurb';
                                 }
                             } else {
                                 if (!$existing_firm->summary || $existing_firm->summary == '') {
                                     $existing_firm->summary = $description;
                                     $modified[] = 'summary';
                                 }
                             }
                         }
                     }
                     if ($lda_registrant->address && $lda_registrant->address != '' && count($existing_firm->Address) == 0) {
                         if ($address = $existing_firm->addAddress($lda_registrant->address)) {
                             $existing_firm->save();
                             $address->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $address->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                         }
                     }
                     $existing_firm->save();
                     if (count($modified)) {
                         $existing_firm->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $modified, 'LDA Filing', null, $date, false);
                     }
                     $this->printDebug('Existing firm is lobbying entity: ' . $lobbying_entity->name);
                 } else {
                     $firm = new Entity();
                     $firm->addExtension('Org');
                     $firm->addExtension('Business');
                     $firm->addExtension('LobbyingFirm');
                     $firm->name = LsLanguage::titleize(OrgTable::stripNamePunctuation($lda_registrant->name), true);
                     $firm->lda_registrant_id = $lda_registrant->federal_registrant_id;
                     if ($lda_registrant->description) {
                         $description = trim($lda_registrant->description);
                         if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) {
                             if (strlen($description) < 200) {
                                 $firm->blurb = $description;
                             } else {
                                 $firm->summary = $description;
                             }
                         }
                     }
                     if ($lda_registrant->address && $lda_registrant->address != '') {
                         if ($address = $firm->addAddress($lda_registrant->address)) {
                             $firm->save();
                             $address->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $address->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                         }
                     }
                     $firm->save();
                     $this->printDebug('New lobbying firm/lobbying entity saved: ' . $firm->name);
                     $firm->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $firm->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                     $lobbying_entity = $firm;
                 }
             }
         }
         //PREP GOVT ENTITIES
         //$this->printTimeSince();
         //$this->printDebug('gov entities...');
         $lda_govts = $lda_filing->LdaGovts;
         //$this->printDebug('count of lda govs is ***** ' . count($lda_govts));
         $govt_entities = array();
         $excerpt['Government Bodies'] = array();
         foreach ($lda_govts as $lda_govt) {
             $excerpt['Government Bodies'][] = $lda_govt->name;
             $name_arr = $this->prepGovtName($lda_govt->name);
             if (!$name_arr) {
                 continue;
             }
             if ($govt_entity = EntityTable::findByAlias($lda_govt->name, $context = 'lda_government_body')) {
                 $govt_entities[] = $govt_entity;
                 //$this->printDebug('Existing govt entity: ' . $govt_entity->name);
             } else {
                 if ($govt_entity = EntityTable::getByExtensionQuery(array('Org', 'GovernmentBody'))->addWhere('name = ?', array($name_arr[0]))->fetchOne()) {
                     $govt_entities[] = $govt_entity;
                     $alias = new Alias();
                     $alias->context = 'lda_government_body';
                     $alias->name = $lda_govt->name;
                     $alias->entity_id = $govt_entity->id;
                     $alias->save();
                 } else {
                     $govt_entity = new Entity();
                     $govt_entity->addExtension('Org');
                     $govt_entity->addExtension('GovernmentBody');
                     $govt_entity->name = $name_arr[0];
                     $govt_entity->name_nick = $name_arr[1];
                     $govt_entity->is_federal = 1;
                     $govt_entity->save();
                     $alias = new Alias();
                     $alias->context = 'lda_government_body';
                     $alias->name = $lda_govt->name;
                     $alias->entity_id = $govt_entity->id;
                     $alias->save();
                     $govt_entity->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $govt_entity->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                     $govt_entities[] = $govt_entity;
                 }
             }
         }
         $excerpt['Government Bodies'] = implode('; ', $excerpt['Government Bodies']);
         $excerpt_str = '';
         foreach ($excerpt as $k => $v) {
             $excerpt_str .= $k . ": ";
             $excerpt_str .= $v . "\n";
         }
         $excerpt = trim($excerpt_str);
         $this->printDebug($excerpt);
         $relationships = array();
         $lobbying_entity_extensions = $lobbying_entity->getExtensions();
         //CREATE LOBBYIST POSITION RELATIONSHIPS
         //$this->printTimeSince();
         //$this->printDebug('lobbyist positions...');
         $category = Doctrine::getTable('RelationshipCategory')->findOneByName('Position');
         if (!in_array('Lobbyist', $lobbying_entity_extensions)) {
             $firm_lobbyists = array();
             if ($lobbying_entity->exists()) {
                 $q = LsDoctrineQuery::create()->from('Entity e')->leftJoin('e.Relationship r ON (r.entity1_id = e.id)')->where('r.entity2_id = ? AND r.category_id = ?', array($lobbying_entity->id, RelationshipTable::POSITION_CATEGORY));
                 $firm_lobbyists = $q->execute();
             }
             $lobbyists = array();
             foreach ($lda_lobbyists as $lda_lobbyist) {
                 $lobbyist = $this->prepLobbyistName($lda_lobbyist->name);
                 if (!$lobbyist) {
                     continue;
                 }
                 $existing_lobbyist = null;
                 foreach ($firm_lobbyists as $fl) {
                     if (PersonTable::areNameCompatible($fl, $lobbyist)) {
                         $existing_lobbyist = $fl;
                         break;
                     }
                 }
                 //echo "before lobb save or rel save: ";
                 //$this->printTimeSince();
                 if (!$existing_lobbyist) {
                     $lobbyist->save();
                     $lobbyist->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                     //$this->printDebug('New lobbyist saved: ' . $lobbyist->name);
                     $r = new Relationship();
                     $r->Entity1 = $lobbyist;
                     $r->Entity2 = $lobbying_entity;
                     $r->setCategory('Position');
                     $r->description1 = 'Lobbyist';
                     $r->is_employee = 1;
                     $r->save();
                     $r->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                     //$this->printDebug('New position relationship saved: ' . $lobbying_entity->name . ' and ' . $lobbyist->name);
                     $lobbyists[] = $lobbyist;
                 } else {
                     //$this->printDebug('Lobbyist exists: ' . $lobbyist->name . ' is same as ' . $existing_lobbyist->name);
                     $lobbyists[] = $existing_lobbyist;
                 }
             }
         }
         //PREP ISSUES
         //$this->printTimeSince();
         //$this->printDebug('issues...');
         $issues = array();
         $lda_issues = Doctrine_Query::create()->from('LdaFilingIssue f')->leftJoin('f.LdaIssue i')->where('f.filing_id = ?', $lda_filing->id)->execute();
         foreach ($lda_issues as $lda_issue) {
             $name = LsLanguage::nameize($lda_issue->LdaIssue->name);
             if (!($issue = Doctrine::getTable('LobbyIssue')->findOneByName($name))) {
                 $issue = new LobbyIssue();
                 $issue->name = $name;
                 $issue->save();
                 //$this->printDebug('Lobbying issue saved: ' . $issue->name);
             }
             $issues[] = array($issue, $lda_issue->specific_issue);
         }
         //CREATE LOBBY FILING
         //$this->printTimeSince();
         //$this->printDebug('creating lobby filing:');
         $lobby_filing = new LobbyFiling();
         $lobby_filing->year = $lda_filing->year;
         $lobby_filing->amount = $lda_filing->amount;
         $lobby_filing->federal_filing_id = $lda_filing->federal_filing_id;
         $period = $lda_filing->LdaPeriod->description;
         $lobby_filing->start_date = $date;
         if ($paren = strpos($period, '(')) {
             $lobby_filing->period = trim(substr($period, 0, $paren));
         } else {
             $lobby_filing->period = 'Undetermined';
         }
         $lobby_filing->report_type = LsLanguage::nameize($lda_filing->LdaType->description);
         foreach ($issues as $issue) {
             $filing_issue = new LobbyFilingLobbyIssue();
             $filing_issue->Issue = $issue[0];
             $filing_issue->Filing = $lobby_filing;
             $filing_issue->specific_issue = $issue[1];
             $filing_issue->save();
         }
         if (in_array('Lobbyist', $lobbying_entity_extensions)) {
             $lobby_filing->Lobbyist[] = $lobbying_entity;
             //$this->printDebug('Lobbying entity lobbyist added to lobbying relationship: ' . $lobbying_entity->name);
         } else {
             foreach ($lobbyists as $lobbyist) {
                 $lobby_filing->Lobbyist[] = $lobbyist;
             }
         }
         //var_dump($lobby_filing->toArray());
         $lobby_filing->save();
         //CREATE TRANSACTION RELATIONSHIP, IF ANY
         //$this->printTimeSince();
         //$this->printDebug('starting transaction relationships:');
         $transaction = null;
         if ($client_entity != null) {
             $transaction = RelationshipTable::getByCategoryQuery('Transaction')->addWhere('r.entity1_id = ?', $client_entity->id)->addWhere('r.entity2_id = ?', $lobbying_entity->id)->addWhere('transaction.is_lobbying = ?', 1)->fetchOne();
             if ($transaction) {
                 $transaction->updateDateRange($date, true);
                 if ($lda_filing->amount && $lda_filing->amount != '') {
                     if (!$transaction->amount || $transaction->amount == '') {
                         $transaction->amount = $lda_filing->amount;
                     } else {
                         $transaction->amount += $lda_filing->amount;
                     }
                 }
                 $transaction->filings++;
                 $transaction->save();
                 $transaction->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $transaction->getAllModifiedFields(), 'LDA Filing', null, $date, false);
             } else {
                 $transaction = new Relationship();
                 $transaction->Entity1 = $client_entity;
                 $transaction->Entity2 = $lobbying_entity;
                 $transaction->setCategory('Transaction');
                 $transaction->description1 = 'Lobbying Client';
                 $transaction->is_lobbying = 1;
                 $transaction->filings = 1;
                 $transaction->updateDateRange($date, true);
                 if (in_array('Person', $lobbying_entity_extensions)) {
                     $transaction->description2 = 'Hired Lobbyist';
                 } else {
                     $transaction->description2 = 'Lobbying Firm';
                 }
                 if ($lda_filing->amount && $lda_filing->amount != '') {
                     $transaction->amount = $lda_filing->amount;
                 }
                 $transaction->save();
                 $transaction->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $transaction->getAllModifiedFields(), 'LDA Filing', null, $date, false);
                 //$this->printDebug('New lobbying transaction saved between client ' . $client_entity->name . ' and lobbying firm ' . $lobbying_entity->name);
             }
             $relationships[] = $transaction;
         }
         //CREATE LOBBYING RELATIONSHIP
         //$this->printTimeSince();
         //$this->printDebug('starting lobbying relationships:');
         foreach ($govt_entities as $govt_entity) {
             $lobbying_relationship = RelationshipTable::getByCategoryQuery('Lobbying')->addWhere('r.entity1_id = ?', $lobbying_entity->id)->addWhere('r.entity2_id = ?', $govt_entity->id)->fetchOne();
             if ($lobbying_relationship) {
                 $lobbying_relationship->updateDateRange($date);
                 $lobbying_relationship->filings++;
                 $lobbying_relationship->save();
             } else {
                 $lobbying_relationship = new Relationship();
                 $lobbying_relationship->Entity1 = $lobbying_entity;
                 $lobbying_relationship->Entity2 = $govt_entity;
                 $lobbying_relationship->setCategory('Lobbying');
                 if ($transaction) {
                     $lobbying_relationship->description1 = 'Lobbying (for client)';
                 } else {
                     $lobbying_relationship->description1 = 'Direct Lobbying';
                 }
                 $lobbying_relationship->description2 = $lobbying_relationship->description1;
                 $lobbying_relationship->updateDateRange($date, true);
                 $lobbying_relationship->filings = 1;
                 $lobbying_relationship->save();
                 $lobbying_relationship->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbying_relationship->getAllModifiedFields(), 'LDA Filing', null, $date, false);
             }
             $relationships[] = $lobbying_relationship;
         }
         foreach ($relationships as $relationship) {
             $lobby_filing->Relationship[] = $relationship;
         }
         $lobby_filing->save();
         //$this->printTimeSince();
         $this->printDebug("Import Completed\n");
         $this->db->commit();
     } catch (Exception $e) {
         $this->db->rollback();
         throw $e;
     }
 }
Exemple #5
0
 static function getNameWithLast($str, $last)
 {
     $re_last = LsString::escapeStringForRegex($last);
     //hyphens and spaces interchangeable in last names
     $re_last = preg_replace('/\\\\s+|\\\\\\-/is', '(\\s+|\\-)', $re_last);
     $matches = array();
     $matched = preg_match_all('/\\b' . $re_last . '\\b/isu', $str, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
     $name = null;
     foreach ($matches as $match) {
         $pos_last = $match[0][1];
         if ($pos_last == 0) {
             return null;
         }
         $last = $match[0][0];
         //work backwards from last name to find comma
         $pos = -1 * (strlen($str) - $pos_last);
         $comma = strripos($str, ',', $pos);
         $str = substr($str, $comma);
         $splat = preg_split('/\\b' . $re_last . '\\b/is', $str);
         $pre = $splat[0];
         $post = $splat[1];
         $arr = array_reverse(preg_split('/[\\s]+/', $pre, -1, PREG_SPLIT_NO_EMPTY));
         $new = array();
         foreach ($arr as $a) {
             if ($case = LsString::checkCase($a)) {
                 if ($case == 'initial') {
                     $new[] = $a;
                 } else {
                     if ($case == 'lower') {
                         break;
                     } else {
                         if (preg_match('/\\.(\\P{L})*$/u', $a) == 1) {
                             $a = LsString::stripNonAlpha($a);
                             if ($s = LsArray::inArrayNoCase($a, PersonTable::$nameParsePrefixes)) {
                                 $new[] = $s;
                             }
                             break;
                         } else {
                             $new[] = $a;
                         }
                     }
                 }
             }
         }
         $pre = implode(' ', array_reverse($new));
         if (strlen(trim($pre)) == 0) {
             continue;
         }
         $arr = preg_split('/[\\s]+/', $post, -1, PREG_SPLIT_NO_EMPTY);
         $new = array();
         foreach ($arr as $a) {
             if ($case = LsString::checkCase($a)) {
                 if ($case == 'lower') {
                     break;
                 }
                 $a = LsString::stripNonAlpha($a);
                 if ($s = LsArray::inArrayNoCase($a, PersonTable::$nameParseSuffixes)) {
                     $new[] = $s;
                 } else {
                     break;
                 }
             }
         }
         $post = trim(implode(' ', $new));
         $full = $pre . ' ' . $last;
         if (strlen($post) > 0) {
             $full .= ', ' . $post;
         }
         $name = array('nameFull' => $full, 'nameStart' => $pre, 'nameLast' => $last, 'namePost' => $post);
     }
     return $name;
 }
Exemple #6
0
 public function parseBio($bio = null)
 {
     if (!$bio) {
         $bio = $this->Entity->summary;
     }
     $name_matches = LsLanguage::getAllNames($bio);
     $names = array();
     for ($i = 0; $i < count($name_matches); $i++) {
         $name = $name_matches[$i];
         $arr = array('for\\s+the', 'of\\s+the', 'at\\s+the', 'at', 'of', 'the', 'for', 'and');
         foreach ($arr as $a) {
             $splat = preg_split('/\\s+' . $a . '\\s+/isu', $name, -1, PREG_SPLIT_NO_EMPTY);
             if (count($splat) > 1) {
                 if (!in_array($splat[0], LsLanguage::$commonPositions)) {
                     $name_matches = array_merge($name_matches, $splat);
                 } else {
                     array_shift($splat);
                     $a = str_replace('\\s+', ' ', $a);
                     $name = implode(" {$a} ", $splat);
                 }
             }
         }
         $splat = preg_split('/\'s\\s+/isu', $name, -1, PREG_SPLIT_NO_EMPTY);
         if (count($splat) > 1) {
             $name_matches = array_merge($name_matches, $splat);
         }
     }
     unset($name);
     $exclude = array_merge(LsLanguage::$regions, LsLanguage::$commonFirstNames, LsLanguage::$commonLastNames, LsLanguage::$states, LsLanguage::$commonCities, LsLanguage::$grammar, LsLanguage::$weekdays, LsLanguage::$months, LsLanguage::$geography, LsLanguage::$possessives, explode(' ', $this->Entity->name), array($this->Entity->name), LsLanguage::$schools, LsLanguage::$commonPositions);
     $names = array();
     foreach ($name_matches as $name) {
         $new = str_replace("'s ", " ", $name);
         if ($new != $name) {
             $name_matches[] = $new;
         }
         $name = trim($name);
         $name = preg_replace('/[\\,\\.\'\\’]$/isu', '', $name);
         if (!in_array($name, $exclude)) {
             $names[] = $name;
         }
         //else $this->printDebug($name . ' rejected');
     }
     $names = array_unique($names);
     $names = LsArray::strlenSort($names);
     /*
         $found_entities = array();
        
         foreach($names as $name)
         {
           
           $entities = EntityTable::getByExtensionAndNameQuery(array('Person'),$name, $strict = 1)->execute();
           if (count($entities))
           {
             //$this->printDebug($name . ":");
             foreach($entities as $e)
             {
               //$this->printDebug('  ' . $org->name);
               $found_entities[] = $e;
             }
           }
           else if (count(LsString::split($name)) > 1)
           {
             $possible_orgs = array();
             $google_scraper = new LsGoogle;
             $google_scraper->setQuery(trim($name));
             $google_scraper->execute();
             if ($google_scraper->getNumResults())
             {
               $results = $google_scraper->getResults();
               foreach ($results as $result)
               {
                 $title = LsHtml::stripTags($result->title);
                 preg_match('/http\:\/\/[^\/]+\//isu',$result->unescapedUrl,$match);
                 if (!$match) continue;      
                 $trimmed_url = $match[0];
                 $title_first = LsString::split($title);
                 $title_first = array_shift($title_first);
                 if (!stristr($title,'wikipedia') && (OrgTable::checkUrl($trimmed_url, $name) && preg_match('/^(The\s+)?' . LsString::escapeStringForRegex($title_first) . '/su',$name)))
                 {
                   $this->printDebug($name . ":");
                   $possible_orgs[] = $name;
                   $this->printDebug('   ' . $title);  
                   //$this->printDebug('     ' . $result->unescapedUrl); 
                   //$this->printDebug('      ' . LsHtml::stripTags($result->content));
                   break;           
                 }
                 
               }
             }
             //var_dump($possible_orgs);
           }
         }*/
     //$this->printDebug('');
     return $names;
 }
 static function parseFlatName($str, $surname = null, $returnArray = false)
 {
     $namePrefix = $nameFirst = $nameMiddle = $nameLast = $nameSuffix = $nameNick = null;
     //to handle multi-word last names like Van der Twerp
     $sub = null;
     if ($surname) {
         $sub = preg_replace('/(^(\\P{L})+|(\\P{L})+$)/u', '', $surname);
         $sub = preg_replace('/\\s+/is', '_', $sub);
         $str = str_ireplace($surname, $sub, $str);
     }
     //trim and remove periods
     $str = trim(str_replace('.', ' ', $str));
     //remove extra spaces
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     //remove anything in parentheses at the end
     $str = preg_replace('/ \\([^\\)]+\\)/', '', $str);
     //get prefixes
     $prefixes = self::$nameParsePrefixes;
     while ($prefix = current($prefixes)) {
         if ($str != ($new = preg_replace('/^' . $prefix . ' /i', '', $str))) {
             if (!LsArray::inArrayNoCase($prefix, LsLanguage::$commonPrefixes)) {
                 $namePrefix .= $prefix . ' ';
             }
             $str = trim($new);
             reset($prefixes);
             continue;
         }
         next($prefixes);
     }
     $namePrefix = $namePrefix ? trim($namePrefix) : null;
     //get suffixes
     $suffixes = self::$nameParseSuffixes;
     while ($suffix = current($suffixes)) {
         if ($str != ($new = preg_replace('/ ' . $suffix . '$/i', '', $str))) {
             $nameSuffix = $suffix . ' ' . $nameSuffix;
             $str = trim($new);
             reset($suffixes);
             continue;
         }
         next($suffixes);
     }
     $nameSuffix = $nameSuffix ? trim($nameSuffix) : null;
     //remove commas left over from suffixes
     $str = trim(str_replace(',', '', $str));
     //find nickname in quotes
     if (preg_match('/["\']([\\S]+)[\'"]/', $str, $nickFound)) {
         $nameNick = $nickFound[1] ? $nickFound[1] : $nickFound[2];
         $str = trim(preg_replace('/["\']([\\S]+)[\'"]/', '', $str));
     }
     //condense multiple spaces
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     //split into parts
     $parts = explode(' ', $str);
     switch (count($parts)) {
         case 1:
             if ($namePrefix) {
                 $nameFirst = $namePrefix;
                 $nameLast = $parts[0];
                 $namePrefix = null;
             } else {
                 if ($nameSuffix) {
                     $nameFirst = $parts[0];
                     $nameLast = $nameSuffix;
                     $nameSuffix = null;
                 } else {
                     if (strtolower($sub) == strtolower($parts[0])) {
                         $nameLast = $parts[0];
                     } else {
                         $nameFirst = $parts[0];
                     }
                 }
             }
             break;
         case 2:
             $nameFirst = $parts[0];
             $nameLast = $parts[1];
             break;
         case 3:
             $nameFirst = $parts[0];
             $nameMiddle = $parts[1];
             $nameLast = $parts[2];
             break;
         default:
             $nameFirst = $parts[0];
             $nameLast = $parts[count($parts) - 1];
             for ($n = 1; $n < count($parts) - 1; $n++) {
                 $nameMiddle .= $parts[$n] . ' ';
             }
             $nameMiddle = trim($nameMiddle);
             break;
     }
     $nameLast = str_replace('_', ' ', $nameLast);
     $name = array('name_first' => $nameFirst, 'name_last' => $nameLast, 'name_middle' => $nameMiddle, 'name_prefix' => $namePrefix, 'name_suffix' => $nameSuffix, 'name_nick' => $nameNick);
     foreach ($name as $nk => &$nv) {
         if ($nv && $nk != 'name_suffix' && $nk != 'name_prefix') {
             $nv = preg_replace('/^(\\P{L})+|(\\P{L})+$/u', '', $nv);
             $case = LsString::checkCase($nv);
             $nv = $case == 'upper' || $case == 'lower' ? LsLanguage::nameize($nv) : $nv;
             if ($nk != 'name_last') {
                 $nv = LsLanguage::hgCaser($nv, false);
             }
         }
     }
     unset($nv);
     if ($returnArray) {
         return $name;
     }
     $person = new Entity();
     $person->addExtension('Person');
     $person->name_first = $name['name_first'];
     $person->name_middle = $name['name_middle'];
     $person->name_last = $name['name_last'];
     $person->name_nick = $name['name_nick'];
     $person->name_prefix = $name['name_prefix'];
     $person->name_suffix = $name['name_suffix'];
     return $person;
 }
 static function parseDescriptionStr($str, $entity = null)
 {
     $descriptions = array();
     $remains = array();
     //cleanup text to be parsed
     $str = trim($str);
     $str = preg_replace('/(?<!=\\s)\\.(?!=\\s)/', '', $str);
     $str = str_replace('.', ' ', $str);
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     $str = preg_replace('/\\s+,(?=\\s)/', ',', $str);
     $str = preg_replace('/\\)\\s*$/', '', $str);
     if (strtolower($str) == 'see remarks') {
         $str = '';
     }
     /*
     if ($entity)
     {
           $name_re = LsString::escapeStringForRegex($entity->name);
           $str = preg_replace('/\b' . $name_re . '\b/isu', '', $str);
           
           if ($entity->ticker)
           {
             $tick_re = LsString::escapeStringForRegex($entity->ticker);
             $str = preg_replace('/\b' . $tick_re . '\b/isu', '', $str);
           }
         }
     */
     //don't parse if there's more than one separator
     $num = 0;
     $patterns = array('/\\s&\\s/', '/,/', '/;/', '/\\band\\b/i');
     foreach ($patterns as $pattern) {
         if (preg_match($pattern, $str)) {
             $num++;
         }
     }
     if ($num > 1) {
         return array($str);
     }
     //split by commas
     $parts = preg_split('/,|;|\\band\\b|\\s&\\s/', $str, -1, PREG_SPLIT_NO_EMPTY);
     foreach ($parts as $part) {
         $part = trim($part);
         $part = preg_replace('/\\s{2,}/', ' ', $part);
         //abbreviation replacements
         $part = preg_replace('/( |^)(\\w) (\\w) (\\w)( |$)/', '\\2\\3\\4', $part);
         $part = preg_replace('/(Interim|Acting|Incoming) /i', '', $part);
         $part = preg_replace('/Sr /i', 'Senior ', $part);
         $part = preg_replace('/Chf /i', 'Chief ', $part);
         $part = preg_replace('/( |^)V( |$)/i', ' Vice ', $part);
         $part = preg_replace('/( |^)VP( |$)/i', ' Vice President ', $part);
         $part = preg_replace('/( |^)VC( |$)/i', ' Vice Chairman ', $part);
         $part = preg_replace('/( |^)Chr( |$)/i', ' Chairman ', $part);
         $part = preg_replace('/( |^)Ofcr( |$)/i', ' Officer ', $part);
         $part = preg_replace('/( |^)Vice P( |$)/i', ' Vice President ', $part);
         $part = preg_replace('/( |^)(Ex|Exec)( |$)/i', ' Executive ', $part);
         $part = preg_replace('/( |^)EVP( |$)/i', ' Executive Vice President ', $part);
         $part = preg_replace('/( |^)(Off|Offic|Offcr)( |$)/i', ' Officer ', $part);
         $part = str_replace('Gen ', 'General ', $part);
         $part = preg_replace('/( |^)(Op|Oper) /', ' Operating ', $part);
         $part = preg_replace('/( |^)(Bd|Brd)( |$)/i', ' Board ', $part);
         $part = preg_replace('/of Board/i', ' of the Board', $part);
         $part = preg_replace('/( |^)COB( |$)/i', ' Chairman of the Board ', $part);
         $part = preg_replace('/( |^)(Pres|Prs|Presid|Prsdt|Prsdnt)( |$)/i', ' President ', $part);
         $part = preg_replace('/( |^)Admin( |$)/i', ' Administrative ', $part);
         $part = preg_replace('/( |^)Info( |$)/i', ' Information ', $part);
         $part = preg_replace('/\\bComm\\b/i', 'Committee', $part);
         $part = preg_replace('/\\bInc\\b/i', '', $part);
         $part = preg_replace('/( |-|^)(Ch|Chm|Chmn|Chrm|Chrmn|Chair|Chairmain|Chariman)( |$)/i', '\\1Chairman ', $part);
         $part = preg_replace('/(Sec|Secr|Secy|Secretar|Secreta)( |$)/i', 'Secretary ', $part);
         $part = str_replace('Vice-', 'Vice ', $part);
         $part = preg_replace('/( |^)Non /i', ' Non-', $part);
         $part = preg_replace('/\\bCompl\\b/i', 'Compliance', $part);
         $part = str_ireplace('of Advisory', 'of the Advisory', $part);
         $part = preg_replace('/Advisory (Panel|Council)/i', 'Advisory Board', $part);
         $part = str_ireplace('Independent ', '', $part);
         $part = str_ireplace('Lead ', '', $part);
         $part = str_ireplace('Corporate ', '', $part);
         $part = str_ireplace('Outside ', '', $part);
         $part = str_ireplace('Non-interested', '', $part);
         $part = str_ireplace('Interested', '', $part);
         $part = str_replace('Main ', '', $part);
         $part = str_ireplace('Presiding ', '', $part);
         $part = str_ireplace('Founding ', '', $part);
         $part = str_ireplace('Acctg', 'Accounting', $part);
         $part = str_ireplace('Chairperson', 'Chairman', $part);
         $part = str_ireplace('Chairwoman', 'Chairman', $part);
         $part = str_ireplace("Gen'l", 'General', $part);
         $part = trim($part);
         $part = preg_replace('/\\s{2,}/', ' ', $part);
         $position = array('description' => null, 'note' => array());
         if (LsArray::inArrayNoCase($part, PositionTable::$businessPositions)) {
             $descriptions[] = $part;
         }
     }
     if (!count($descriptions)) {
         $descriptions[] = $str;
     }
     return $descriptions;
 }
 public function getSchoolList()
 {
     $educational_institutions = null;
     $base_data_dir = sfConfig::get('sf_root_dir') . '/data/schools/';
     $filename_zip = sfConfig::get('sf_root_dir') . '/data/schools/' . basename($this->url);
     $filename_csv = sfConfig::get('sf_root_dir') . '/data/schools/' . preg_replace('/zip$/i', 'csv', strtolower(basename($this->url)));
     $file_contents_csv = null;
     if (!is_dir($base_data_dir)) {
         mkdir($base_data_dir);
     }
     if (!$this->browser->get($this->url)->responseIsError() || file_exists($filename_zip)) {
         $zip_saved = null;
         $ret = null;
         if (!file_exists($filename_zip)) {
             $zip_saved = file_put_contents($filename_zip, $this->browser->getResponseText());
         } else {
             $zip_saved = true;
         }
         if ($zip_saved !== FALSE) {
             if (!file_exists($filename_csv)) {
                 exec("unzip {$filename_zip}  -d {$base_data_dir}", $ret);
             } else {
                 $ret = true;
             }
             if ($ret) {
                 $educational_institutions = LsArray::CsvFileToArrayObject($filename_csv);
                 $this->printDebug(' Found schools: ' . count($educational_institutions));
             } else {
                 $this->printDebug('Failed to load csv');
             }
         } else {
             $this->printDebug('Zip failure');
         }
     } else {
         $this->printDebug('Browser did not get file');
     }
     return $educational_institutions;
 }
 public function parseDescriptionStr($str, $corp)
 {
     $descriptions = array();
     $remains = array();
     //cleanup text to be parsed
     $str = trim($str);
     $str = str_replace('.', ' ', $str);
     $str = preg_replace('/\\s{2,}/', ' ', $str);
     $name_re = LsString::escapeStringForRegex($corp->name);
     $str = preg_replace('/\\b' . $name_re . '\\b/isu', '', $str);
     if ($corp->name_nick) {
         $nick_re = LsString::escapeStringForRegex($corp->name_nick);
         $str = preg_replace('/\\b' . $nick_re . '\\b/isu', '', $str);
     }
     if ($corp->ticker) {
         $tick_re = LsString::escapeStringForRegex($corp->ticker);
         $str = preg_replace('/\\b' . $tick_re . '\\b/isu', '', $str);
     }
     //split by commas
     $parts = preg_split('/,|;|\\band\\b|(?<!C[Oo])\\-|\\bAND\\b|\\s&\\s|\\//', $str, -1, PREG_SPLIT_NO_EMPTY);
     foreach ($parts as $part) {
         $part = trim($part);
         $part = preg_replace('/\\s{2,}/', ' ', $part);
         //abbreviation replacements
         $part = preg_replace('/( |^)(\\w) (\\w) (\\w)( |$)/', '\\2\\3\\4', $part);
         $part = preg_replace('/(Interim|Acting|Incoming) /i', '', $part);
         $part = preg_replace('/Sr /i', 'Senior ', $part);
         $part = preg_replace('/Chf /i', 'Chief ', $part);
         $part = preg_replace('/( |^)V( |$)/i', ' Vice ', $part);
         $part = preg_replace('/( |^)VP( |$)/i', ' Vice President ', $part);
         $part = preg_replace('/( |^)VC( |$)/i', ' Vice Chairman ', $part);
         $part = preg_replace('/( |^)Chr( |$)/i', ' Chairman ', $part);
         $part = preg_replace('/( |^)Ofcr( |$)/i', ' Officer ', $part);
         $part = preg_replace('/( |^)Vice P( |$)/i', ' Vice President ', $part);
         $part = preg_replace('/( |^)(Ex|Exec)( |$)/i', ' Executive ', $part);
         $part = preg_replace('/( |^)EVP( |$)/i', ' Executive Vice President ', $part);
         $part = preg_replace('/( |^)(Off|Offic|Offcr)( |$)/i', ' Officer ', $part);
         $part = str_replace('Gen ', 'General ', $part);
         $part = preg_replace('/( |^)(Op|Oper) /', ' Operating ', $part);
         $part = preg_replace('/( |^)(Bd|Brd)( |$)/i', ' Board ', $part);
         $part = preg_replace('/of Board/i', ' of the Board', $part);
         $part = preg_replace('/( |^)COB( |$)/i', ' Chairman of the Board ', $part);
         $part = preg_replace('/( |^)(Pres|Prs|Presid|Prsdt|Prsdnt)( |$)/i', ' President ', $part);
         $part = preg_replace('/( |^)Admin( |$)/i', ' Administrative ', $part);
         $part = preg_replace('/( |^)Info( |$)/i', ' Information ', $part);
         $part = preg_replace('/\\bComm\\b/i', 'Committee', $part);
         $part = preg_replace('/\\bInc\\b/i', '', $part);
         $part = preg_replace('/( |-|^)(Ch|Chm|Chmn|Chrm|Chrmn|Chair|Chairmain|Chariman)( |$)/i', '\\1Chairman ', $part);
         $part = preg_replace('/(Sec|Secr|Secy|Secretar|Secreta)( |$)/i', 'Secretary ', $part);
         $part = str_replace('Vice-', 'Vice ', $part);
         $part = preg_replace('/( |^)Non /i', ' Non-', $part);
         $part = preg_replace('/\\bCompl\\b/i', 'Compliance', $part);
         $part = str_ireplace('of Advisory', 'of the Advisory', $part);
         $part = preg_replace('/Advisory (Panel|Council)/i', 'Advisory Board', $part);
         $part = str_ireplace('Independent ', '', $part);
         $part = str_ireplace('Lead ', '', $part);
         $part = str_ireplace('Corporate ', '', $part);
         $part = str_ireplace('Outside ', '', $part);
         $part = str_ireplace('Non-interested', '', $part);
         $part = str_ireplace('Interested', '', $part);
         $part = str_replace('Main ', '', $part);
         $part = str_ireplace('Presiding ', '', $part);
         $part = str_ireplace('Founding ', '', $part);
         $part = str_ireplace('Acctg', 'Accounting', $part);
         $part = str_ireplace('Chairperson', 'Chairman', $part);
         $part = str_ireplace('Chairwoman', 'Chairman', $part);
         $part = str_ireplace("Gen'l", 'General', $part);
         $part = trim($part);
         $part = preg_replace('/\\s{2,}/', ' ', $part);
         $position = array('description' => null, 'note' => array());
         if ($part != '') {
             //look for matching title
             $p = LsArray::inArrayNoCase($part, PositionTable::$businessPositions);
             if ($p) {
                 $position['description'] = $p;
             } else {
                 if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) {
                     $position['description'] = $q->description1;
                 } else {
                     if (count($descriptions) == 0) {
                         $part_splat = LsString::split($part);
                         $note = array();
                         //$this->printDebug($part);
                         //var_dump($part_splat);
                         $lim = count($part_splat) - 1;
                         for ($i = 0; $i < $lim; $i++) {
                             $note[] = array_pop($part_splat);
                             $part_new = implode(' ', $part_splat);
                             if (strtoupper($part_new) == 'DIRECTOR') {
                                 break;
                             }
                             $p = LsArray::inArrayNoCase($part_new, PositionTable::$businessPositions);
                             if ($p) {
                                 $position['description'] = $p;
                             } else {
                                 if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) {
                                     $position['description'] = $q->description1;
                                 }
                             }
                         }
                         if (!$position['description']) {
                             $position['description'] = $part;
                         }
                     } else {
                         $descriptions[count($descriptions) - 1]['note'][] = $part;
                     }
                 }
             }
             if (isset($position['description'])) {
                 $descriptions[] = $position;
             }
         }
     }
     return $descriptions;
 }
Exemple #11
0
    }
    ?>

<?php 
    if (count($donors)) {
        ?>

<?php 
        include_partial('global/section', array('title' => 'Top Donors', 'pointer' => 'Top donors to US politicians/PACs with positions/memberships at ' . $entity['name']));
        ?>

<div id="top-donors">
</div>

<?php 
        $donors = LsArray::flip($donors);
        ?>

<?php 
        include_partial('global/polBarGraph', array('dataSet' => array_slice($donors['amount'], 0, 10), 'graphName' => "#top-donors", 'dataLabels' => array_slice($donors['name'], 0, 10), 'dataUrls' => array_slice($donors['url'], 0, 10)));
        ?>



<?php 
    }
    ?>

<?php 
}
?>