function GetHistoricalPrices() { echo "Looking up Current stock prices for stocks listed in the database."; echo "<br>(this may take awhile)<br><br>"; flush(); $today = getdate(); $day = $today['mday']; $month = $today['mon'] - 1; $year = $today['year']; $query = "SELECT symbol,id FROM New_company WHERE active = 'Y'"; $result = mysql_query($query); $answers = mysql_num_rows($result); if ($answers > 0) { for ($k = 0; $k < $answers; $k++) { $row = mysql_fetch_row($result); $sym = yahoo($row[0]); echo $sym . " "; //$yahoourl = "http://ichart.finance.yahoo.com/table.csv?s=$sym&a=$month&b=$day&c=$year&d=d&a=11&b=15&c=1997&ignore=.csv"; $yahoourl = "http://ichart.finance.yahoo.com/table.csv?s={$sym}&a=10&b=15&c=1997&d={$month}&e={$day}&f={$year}&g=d&ignore=.csv"; // echo $yahoourl; $yahooresults = file_get_contents($yahoourl); $yahooresults = str_replace(array("\r\n", "\r", "\n"), "<br>", $yahooresults); $yahooresults = explode('<br>', $yahooresults); $csvDelim = ","; //print_r( $yahooresults); foreach ($yahooresults as $i => $yahoorow) { $data = str_getcsv($yahoorow, $csvDelim); if ($data[4] == "" or $data[0] == "" or $i == 0) { continue; } /*foreach($data as $i => $cwvrow) echo "$csvrow"; echo " ";/**/ $q = "select * from New_historical where id=\"" . $row[1] . "\" and date=\"{$data['0']}\""; $queryresult = mysql_query($q); if (!$queryresult) { $message = 'Invalid query: ' . mysql_error() . "\n"; $message .= 'Whole query: ' . $query; die($message); } $qrow = mysql_fetch_assoc($queryresult); if ($qrow) { // this particular stock and date already exist if ($qrow['price'] != $data[4]) { echo "Database says " . $qrow['price'] . " but intenet says {$data['4']}<br>\n"; $q = "update New_historical set price={$data['4']} where id=" . $row[1] . " and date = \"{$data['0']}\""; $updateResult = mysql_query($q); } } else { $q = "insert into New_historical values(\"{$row['1']}\",\"{$data['4']}\",\"{$data['0']}\")"; $updateResult = mysql_query($q); } echo mysql_error(); //echo $updateResult; } } } die("done"); $answers = 30; $total = 0; $notFound = 0; //http://investdb.theglobeandmail.com/invest/investSQL/gx.process_rep1?pi_symbol=A-N+A-T&pi_report_type=DETAIL&pi_action=+Go+ while ($answers > 0) { $query = "SELECT `symbol` FROM `New_company` WHERE active = 'Y' and 1 LIMIT {$total}, 30"; $result = mysql_query($query); echo mysql_error(); $answers = mysql_num_rows($result); $total += $answers; // echo('<br>'. $query. '<br>'); $globe = "http://investdb.theglobeandmail.com/invest/investSQL/gx.stock_rep?pi_mode=SYMBLIST&pi_type=DETAIL&pi_qtime=200406170009100002&pi_currency=&pi_param_1="; $info = ''; for ($k = 0; $k < $answers; $k++) { $row = mysql_fetch_row($result); $info .= ' ' . $row[0]; if ($k == 0) { $globe = $globe . $row[0]; } else { $globe = $globe . "+" . $row[0]; } $stock[$k] = $row[0]; $found[$k] = false; } echo '<br><a href="' . $globe . '" target="_blank">lookup ' . $total . ')<br>' . $info . '</a> '; // echo "Getting globeinfo"; $retryCount = 5; do { $globeresults = file_get_contents($globe); if ($globeresults === false) { $retryCount--; if ($retryCount == 0) { echo 'giving up, try again in the morning<br>'; return; } echo 'There was a problem getting the data, waiting 10 seconds before trying again<br>'; echo "Problem url is {$globe}"; flush(stdout); sleep(10); } } while ($globeresults === false); // echo '<br>globe query is<br>'. $globe. '<br>globe results are<br>'. $globeresults; $pattern = "/class=\"dataalternate[12].*<\\/TR>/msi"; // works $pattern = "/<TD.*<\\/TD>/"; // this works and finds all table data $matched = preg_match_all($pattern, $globeresults, $out); for ($k = 0; $k < $matched; $k++) { $out[0][$k] = strip_tags($out[0][$k]); } // debugging section /* echo 'There were '. $matched. "sections of table data found <br><pre>"; for($stockcount = 0;$stockcount<$answers;$stockcount++) echo 'Stock #'. $stockcount. ' is '. $stock[$stockcount]. '<br>'; //$stockcount = 0; $breakout = 0; for($k=0;$k<$matched;$k++) { echo "<br>|". $out[0][$k]. "|"; for($i=0;$i<$stockcount;$i++) if ($out[0][$k]==$stock[$i]) { echo '<br>found '. $stock[$i]. ' with a value of '. $out[0][$k+1]; } else { // echo '|'. $stock[$i]. '| != |'. $out[0][$k]. '| '; } } }/**/ $stockcount = 0; $breakout = 0; while ($stockcount < $answers && $breakout < 1000) { $breakout++; //echo "<br>stockcount is $stockcount, answers is $answers, breakout is $breakout, matched is $matched<br>"; for ($k = 0; $k < $matched; $k++) { // globe changes their pattern from time to time. Look at the html source returned from one of the lookups and see whwat the pattern is for the stock symbol // $pattern = '>'. $stock[$stockcount]. '<'; // $pattern = 'pi_symbol='. $stock[$stockcount]; $pattern = $stock[$stockcount]; // echo '<br>pattern search |'.$out[0][$k].'| |'.$pattern.'|'; // if (!(strpos($out[0][$k], $pattern)===false)) $temp = explode(' ', $out[0][$k]); // this handles cases wher there is extra info after the symbol, like "A-N (US$)" if ($temp[0] === $pattern) { // echo 'found '. $pattern.'in '. $out[0][$k]. '<br>'; // die('here'); if (!$found[$stockcount]) { $found[$stockcount] = true; $price = strip_tags($out[0][$k + 1]); $date = date('YmdHis', time()); $query = "UPDATE New_company set lastPrice='{$price}', lastPriceDate='{$date}' WHERE symbol='{$stock[$stockcount]}'"; //echo $query; $result = mysql_query($query); if ($result == 0) { echo '<br>Possible error here<br>' . $query; } // else echo '<br>'. $query; echo mysql_error(); //echo '<p>Stock '. $stockcount. ' Found'. $stock[$stockcount]; //echo 'Value'. $out[0][$k + 1]; } } } $stockcount++; } for ($k = 0; $k < $answers; $k++) { if ($found[$k] == false) { // echo '<br>stock '. $stock[$k]. ' was not found <br>'; echo "<br>stock <a href=\"http://stockchase.com/company.php?fl=0&fm=0&sfn[0]=0&operation=View&rec=" . $stock[$k] . "\" Target=\"Company\">" . $stock[$k] . "</a> was not found<br>"; $notFound++; } } /*if ($answers>0) { // echo( $globe); echo('<a href="'. $globe. '"> link </a>'); }*/ //$answers = 0; flush(); // if ($total>100) // $answers = 0; } echo "Done!<br>"; echo "{$total} records looked up {$notFound} records were not found"; }
function process_domain($d) { global $stats; global $inputdir; global $triggers; global $domains; global $messages; global $nresources; // echo("$d\n"); $stats['n_domains']++; $c = file_get_contents($inputdir . $d); $lines = explode("\n", $c); foreach ($lines as $i => $line) { if (substr($line, 0, 1) != '{') { $lines[$i] = ''; } } $c = implode("\n", $lines); $c = "[" . $c . "]"; $c = str_replace("\n", "", $c); $c = str_replace(",]", "]", $c); $c = str_replace("Syntax error: parse error", "", $c); $c = str_replace("\n", "", $c); $resources = array_unique(json_decode($c, 1), SORT_REGULAR); $n = count($resources); if ($n == 0) { echo "{$d} has errors\n"; $stats['n_domains_with_errors']++; return; } @$nresources[$n / 10]++; if ($n > 500) { $messages[] = "{$d} has {$n} resources"; } // and then update the statistics $has_js = false; $has_external_js = false; $has_flash = false; $has_external_flash = false; $has_external_content = false; $has_google = false; $has_facebook = false; $has_yahoo = false; $has_twitter = false; $has_ads = false; $triggered = []; foreach ($triggers as $trigger) { $triggered[$trigger] = false; } foreach ($resources as $res) { $url = $res['url']; $type = $res['content-type']; if (isflash($type, $url)) { $has_flash = true; $is_flash = true; } else { $is_flash = false; } if (isjs($type, $url)) { $has_js = true; $is_js = true; } else { $is_js = false; } if (!$has_ads) { if (ads($url)) { echo "{$d} contains ads from {$url}\n"; $has_ads = true; } } foreach ($triggers as $trigger) { if (contains($url, $trigger)) { $triggered[$trigger] = $url; } } if (contains($url, 'jquery')) { $has_jquery = true; } if (facebook($url)) { $has_facebook = true; } if (google($url)) { $has_google = true; } if (yahoo($url)) { $has_yahoo = true; } if (twitter($url)) { $has_twitter = true; } if (!same_domain($d, $url)) { if ($is_js) { $has_external_js = true; } if ($is_flash) { $has_external_flash = true; } $has_external_content = true; } } foreach ($triggers as $trigger) { if ($triggered[$trigger]) { $stats["n_domains_with_{$trigger}"] += 1; $domains[$trigger][$d] = $triggered[$trigger]; } } $stats['n_domains_with_js'] += $has_js ? 1 : 0; $stats['n_domains_with_external_js'] += $has_external_js ? 1 : 0; $stats['n_domains_with_external_content'] += $has_external_content ? 1 : 0; $stats['n_domains_clean'] += $has_external_content ? 0 : 1; $stats['n_domains_with_flash'] += $has_flash ? 1 : 0; $stats['n_domains_with_external_flash'] += $has_external_flash ? 1 : 0; $stats['n_domains_with_google'] += $has_google ? 1 : 0; $stats['n_domains_with_facebook'] += $has_facebook ? 1 : 0; $stats['n_domains_with_yahoo'] += $has_yahoo ? 1 : 0; $stats['n_domains_with_twitter'] += $has_twitter ? 1 : 0; $stats['n_domains_with_ads'] += $has_ads ? 1 : 0; }