Beispiel #1
0
 function __construct($text)
 {
     $text = LsHtml::replaceEntities($text);
     $text = LsString::utf8TransUnaccent($text);
     $this->text = $text;
 }
Beispiel #2
0
 private function getProxy()
 {
     $text = null;
     $this->printDebug($this->corp->name);
     $url = "http://searchwww.sec.gov/EDGARFSClient/jsp/EDGAR_Query_Result.jsp?startDoc=1&queryString=&queryForm=DEF+14A&isAdv=1&queryCik=" . $this->corp->sec_cik . "&numResults=10#topAnchor";
     $years = implode('|', $this->years);
     if ($this->browser->get($url)->responseIsError()) {
         echo "Couldn't get " . $url . "\n";
         return;
     }
     $re = '/(' . $years . ')<\\/i>(<[^>]*>){2}<a[^\']+\'([^\']+)(?<=\\.htm)\'[^>]*>([^<]*)</isu';
     $text = $this->browser->getResponseText();
     //echo $text;
     $matched = preg_match_all($re, $text, $matches, PREG_SET_ORDER);
     if ($matched > 0) {
         foreach ($matches as $match) {
             if (stristr($match[3], 'def14') !== false || stristr($match[4], 'def 14') !== false) {
                 $this->year = $match[1];
                 //$this->printDebug($this->year);
                 $this->url = $match[3];
                 break;
             }
         }
         if ($this->browser->get($this->url)->responseIsError()) {
             echo "Couldn't get " . $this->url . "\n";
             return;
         }
         $this->printDebug($this->url);
         $text = $this->browser->getResponseText();
         $text = LsHtml::replaceEntities($text);
         $text = LsString::utf8TransUnaccent($text);
         $this->text = $text;
     }
 }
 private function getProxyData($roster, $url, $proxy_year)
 {
     echo "fetching data from proxy at {$url} \n\n";
     $people_count = 0;
     if (!$this->browser->get($url)->responseIsError()) {
         $this->proxyText = $this->browser->getResponseText();
         $this->proxyText = LsHtml::replaceEntities($this->proxyText, ENT_QUOTES, 'UTF-8');
         $this->proxyText = LsString::utf8TransUnaccent($this->proxyText);
         foreach ($roster as &$r) {
             //make sure this is not form 4 data for a corporation, continue to the next if it is
             if ($r['officerTitle'] == '' && $r['isDirector'] != 1 && strtoupper($r['isDirector']) != strtoupper('true')) {
                 continue;
             }
             //echo $re;
             $parts = preg_split("/[\\s|\\.]+/", $r['personName'], -1, PREG_SPLIT_NO_EMPTY);
             //first word, but has to be part of last name because form4 names are in format RUBIN ROBERT E
             $last = trim($parts[0]);
             //sometimes O'LEARY can appear as O LEARY in the form 4
             if (strlen($last) == 1) {
                 $r['personName'] = $last . substr($r['personName'], 2);
                 $parts = preg_split("/[\\s|\\.]+/", $r['personName'], -1, PREG_SPLIT_NO_EMPTY);
                 $last = trim($parts[0]);
             }
             //prepare regex to match occurrences of full name
             //case insensitive to accommodate for various irregularities in names
             $re = LsLanguage::buildLooseNameRegex($r['personName']);
             $offset = 0;
             $found = true;
             //use stripos (much faster than regex) to find occurrences of the first word in the form 4 name (assumed to be part of the last name)
             //needs to be case insensitive
             //continue searching for last name in proxy until a matching full name (proxyName) is found
             while (!isset($r['proxyName']) && $found !== false) {
                 $found = stripos($this->proxyText, $last, $offset);
                 //$this->printDebug('found at pos:' . $found);
                 $offset = $found + 1;
                 if ($found !== false) {
                     $str = substr($this->proxyText, $found - 70, 120);
                     //$this->printDebug('found string: ' . $str);
                     //$this->printDebug($re);
                     preg_match_all($re, $str, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
                     //$this->printDebug('matchcount is ' . count($matches));
                     foreach ($matches as $match) {
                         if (stristr($match[1][0], '=')) {
                             continue;
                         }
                         //since we may or may not be working with the full last name, use getLastName to return full last name
                         $new_last = $this->getLastName($r['personName'], $match[1][0]);
                         if ($new_last) {
                             //if last name produced by case insensitive search has no capital letters, not a match
                             if (preg_match('/\\p{Lu}/su', $new_last) == 0) {
                                 continue;
                             }
                             //now that we have a last name, pull the full name from the string
                             $name = LsLanguage::getNameWithLast($match[0][0], $new_last);
                             if ($name) {
                                 $parts = preg_split('/\\s+/isu', $name['nameStart'], -1, PREG_SPLIT_NO_EMPTY);
                                 $non_prefixes = array_diff($parts, PersonTable::$nameParsePrefixes);
                                 //if all we've found are matching prefixes, not a match
                                 if (count($non_prefixes) == 0) {
                                     continue;
                                 } else {
                                     $name1_parts = preg_split('/\\s+/', $r['personName'], -1, PREG_SPLIT_NO_EMPTY);
                                     $ct = 0;
                                     //compatibility check to correct for vagueness of regex
                                     foreach ($non_prefixes as $n) {
                                         foreach ($name1_parts as $p) {
                                             if (stripos($n, $p) === 0 || stripos($p, $n) === 0) {
                                                 $ct++;
                                             }
                                         }
                                     }
                                     //phew -- if name is (somewhat) compatible, assume we've found it
                                     if ($ct > 0) {
                                         $r['proxyUrl'] = $url;
                                         $r['proxyYear'] = $proxy_year;
                                         $r['nameLast'] = trim(LsString::spacesToSpace($name['nameLast']));
                                         $r['proxyName'] = trim(LsString::spacesToSpace($name['nameFull']));
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
         unset($r);
     } else {
         //Error response (eg. 404, 500, etc)
         $log = fopen($this->logFile, 'a');
         fwrite($log, "Couldn't get " . $url . "\n");
         fclose($log);
     }
     return $roster;
 }