Example #1
0
function GetHistoricalPrices()
{
    echo "Looking up Current stock prices for stocks listed in the database.";
    echo "<br>(this may take awhile)<br><br>";
    flush();
    $today = getdate();
    $day = $today['mday'];
    $month = $today['mon'] - 1;
    $year = $today['year'];
    $query = "SELECT symbol,id FROM New_company WHERE active = 'Y'";
    $result = mysql_query($query);
    $answers = mysql_num_rows($result);
    if ($answers > 0) {
        for ($k = 0; $k < $answers; $k++) {
            $row = mysql_fetch_row($result);
            $sym = yahoo($row[0]);
            echo $sym . " ";
            //$yahoourl = "http://ichart.finance.yahoo.com/table.csv?s=$sym&a=$month&b=$day&c=$year&d=d&a=11&b=15&c=1997&ignore=.csv";
            $yahoourl = "http://ichart.finance.yahoo.com/table.csv?s={$sym}&a=10&b=15&c=1997&d={$month}&e={$day}&f={$year}&g=d&ignore=.csv";
            //	echo $yahoourl;
            $yahooresults = file_get_contents($yahoourl);
            $yahooresults = str_replace(array("\r\n", "\r", "\n"), "<br>", $yahooresults);
            $yahooresults = explode('<br>', $yahooresults);
            $csvDelim = ",";
            //print_r( $yahooresults);
            foreach ($yahooresults as $i => $yahoorow) {
                $data = str_getcsv($yahoorow, $csvDelim);
                if ($data[4] == "" or $data[0] == "" or $i == 0) {
                    continue;
                }
                /*foreach($data as $i => $cwvrow)
                			echo "$csvrow";
                		echo "
                			";/**/
                $q = "select * from New_historical where id=\"" . $row[1] . "\" and date=\"{$data['0']}\"";
                $queryresult = mysql_query($q);
                if (!$queryresult) {
                    $message = 'Invalid query: ' . mysql_error() . "\n";
                    $message .= 'Whole query: ' . $query;
                    die($message);
                }
                $qrow = mysql_fetch_assoc($queryresult);
                if ($qrow) {
                    // this particular stock and date already exist
                    if ($qrow['price'] != $data[4]) {
                        echo "Database says " . $qrow['price'] . " but intenet says {$data['4']}<br>\n";
                        $q = "update New_historical set price={$data['4']} where id=" . $row[1] . " and date = \"{$data['0']}\"";
                        $updateResult = mysql_query($q);
                    }
                } else {
                    $q = "insert into  New_historical values(\"{$row['1']}\",\"{$data['4']}\",\"{$data['0']}\")";
                    $updateResult = mysql_query($q);
                }
                echo mysql_error();
                //echo $updateResult;
            }
        }
    }
    die("done");
    $answers = 30;
    $total = 0;
    $notFound = 0;
    //http://investdb.theglobeandmail.com/invest/investSQL/gx.process_rep1?pi_symbol=A-N+A-T&pi_report_type=DETAIL&pi_action=+Go+
    while ($answers > 0) {
        $query = "SELECT `symbol` FROM `New_company` WHERE active = 'Y' and 1 LIMIT {$total}, 30";
        $result = mysql_query($query);
        echo mysql_error();
        $answers = mysql_num_rows($result);
        $total += $answers;
        //		echo('<br>'.  $query. '<br>');
        $globe = "http://investdb.theglobeandmail.com/invest/investSQL/gx.stock_rep?pi_mode=SYMBLIST&pi_type=DETAIL&pi_qtime=200406170009100002&pi_currency=&pi_param_1=";
        $info = '';
        for ($k = 0; $k < $answers; $k++) {
            $row = mysql_fetch_row($result);
            $info .= ' ' . $row[0];
            if ($k == 0) {
                $globe = $globe . $row[0];
            } else {
                $globe = $globe . "+" . $row[0];
            }
            $stock[$k] = $row[0];
            $found[$k] = false;
        }
        echo '<br><a href="' . $globe . '" target="_blank">lookup ' . $total . ')<br>' . $info . '</a> ';
        //		echo "Getting globeinfo";
        $retryCount = 5;
        do {
            $globeresults = file_get_contents($globe);
            if ($globeresults === false) {
                $retryCount--;
                if ($retryCount == 0) {
                    echo 'giving up, try again in the morning<br>';
                    return;
                }
                echo 'There was a problem getting the data, waiting 10 seconds before trying again<br>';
                echo "Problem url is {$globe}";
                flush(stdout);
                sleep(10);
            }
        } while ($globeresults === false);
        //	echo '<br>globe query is<br>'. $globe. '<br>globe results are<br>'. $globeresults;
        $pattern = "/class=\"dataalternate[12].*<\\/TR>/msi";
        // works
        $pattern = "/<TD.*<\\/TD>/";
        // this works and finds all table data
        $matched = preg_match_all($pattern, $globeresults, $out);
        for ($k = 0; $k < $matched; $k++) {
            $out[0][$k] = strip_tags($out[0][$k]);
        }
        // debugging section
        /*
        		echo 'There were '. $matched. "sections of table data found  <br><pre>";
        		for($stockcount = 0;$stockcount<$answers;$stockcount++)
        			echo 'Stock #'. $stockcount. ' is '. 
        			$stock[$stockcount]. '<br>';
        		//$stockcount = 0;
        		$breakout = 0;
        
        		for($k=0;$k<$matched;$k++)
        		{
        			echo "<br>|". $out[0][$k]. "|";
        			for($i=0;$i<$stockcount;$i++)
        				if ($out[0][$k]==$stock[$i])
        				{
        					echo '<br>found '. $stock[$i]. ' with a value of '. $out[0][$k+1];
        				}
        				else 
        				{
        					//			echo '|'. $stock[$i]. '| != |'. $out[0][$k]. '|  ';
        				}
        		}
        }/**/
        $stockcount = 0;
        $breakout = 0;
        while ($stockcount < $answers && $breakout < 1000) {
            $breakout++;
            //echo "<br>stockcount is $stockcount, answers is $answers, breakout is $breakout, matched is $matched<br>";
            for ($k = 0; $k < $matched; $k++) {
                // globe changes their pattern from time to time. Look at the html source returned from one of the lookups and see whwat the pattern is for the stock symbol
                //			$pattern = '>'. $stock[$stockcount]. '<';
                //			$pattern = 'pi_symbol='. $stock[$stockcount];
                $pattern = $stock[$stockcount];
                //			echo '<br>pattern search |'.$out[0][$k].'| |'.$pattern.'|';
                //			if (!(strpos($out[0][$k], $pattern)===false))
                $temp = explode(' ', $out[0][$k]);
                // this handles cases wher there is extra info after the symbol, like "A-N (US$)"
                if ($temp[0] === $pattern) {
                    //				echo 'found '. $pattern.'in '. $out[0][$k]. '<br>';
                    //				die('here');
                    if (!$found[$stockcount]) {
                        $found[$stockcount] = true;
                        $price = strip_tags($out[0][$k + 1]);
                        $date = date('YmdHis', time());
                        $query = "UPDATE New_company set lastPrice='{$price}', lastPriceDate='{$date}' WHERE symbol='{$stock[$stockcount]}'";
                        //echo $query;
                        $result = mysql_query($query);
                        if ($result == 0) {
                            echo '<br>Possible error here<br>' . $query;
                        }
                        //	else echo '<br>'. $query;
                        echo mysql_error();
                        //echo '<p>Stock '. $stockcount. ' Found'. $stock[$stockcount];
                        //echo 'Value'. $out[0][$k + 1];
                    }
                }
            }
            $stockcount++;
        }
        for ($k = 0; $k < $answers; $k++) {
            if ($found[$k] == false) {
                //				echo '<br>stock '. $stock[$k]. ' was not found <br>';
                echo "<br>stock <a href=\"http://stockchase.com/company.php?fl=0&fm=0&sfn[0]=0&operation=View&rec=" . $stock[$k] . "\" Target=\"Company\">" . $stock[$k] . "</a> was not found<br>";
                $notFound++;
            }
        }
        /*if ($answers>0)
        		{
        //			echo( $globe);
        			echo('<a href="'. $globe. '"> link </a>');
        		}*/
        //$answers = 0;
        flush();
        //	if ($total>100)
        //		$answers = 0;
    }
    echo "Done!<br>";
    echo "{$total} records looked up {$notFound} records were not found";
}
function process_domain($d)
{
    global $stats;
    global $inputdir;
    global $triggers;
    global $domains;
    global $messages;
    global $nresources;
    // echo("$d\n");
    $stats['n_domains']++;
    $c = file_get_contents($inputdir . $d);
    $lines = explode("\n", $c);
    foreach ($lines as $i => $line) {
        if (substr($line, 0, 1) != '{') {
            $lines[$i] = '';
        }
    }
    $c = implode("\n", $lines);
    $c = "[" . $c . "]";
    $c = str_replace("\n", "", $c);
    $c = str_replace(",]", "]", $c);
    $c = str_replace("Syntax error: parse error", "", $c);
    $c = str_replace("\n", "", $c);
    $resources = array_unique(json_decode($c, 1), SORT_REGULAR);
    $n = count($resources);
    if ($n == 0) {
        echo "{$d} has errors\n";
        $stats['n_domains_with_errors']++;
        return;
    }
    @$nresources[$n / 10]++;
    if ($n > 500) {
        $messages[] = "{$d} has {$n} resources";
    }
    // and then update the statistics
    $has_js = false;
    $has_external_js = false;
    $has_flash = false;
    $has_external_flash = false;
    $has_external_content = false;
    $has_google = false;
    $has_facebook = false;
    $has_yahoo = false;
    $has_twitter = false;
    $has_ads = false;
    $triggered = [];
    foreach ($triggers as $trigger) {
        $triggered[$trigger] = false;
    }
    foreach ($resources as $res) {
        $url = $res['url'];
        $type = $res['content-type'];
        if (isflash($type, $url)) {
            $has_flash = true;
            $is_flash = true;
        } else {
            $is_flash = false;
        }
        if (isjs($type, $url)) {
            $has_js = true;
            $is_js = true;
        } else {
            $is_js = false;
        }
        if (!$has_ads) {
            if (ads($url)) {
                echo "{$d} contains ads from {$url}\n";
                $has_ads = true;
            }
        }
        foreach ($triggers as $trigger) {
            if (contains($url, $trigger)) {
                $triggered[$trigger] = $url;
            }
        }
        if (contains($url, 'jquery')) {
            $has_jquery = true;
        }
        if (facebook($url)) {
            $has_facebook = true;
        }
        if (google($url)) {
            $has_google = true;
        }
        if (yahoo($url)) {
            $has_yahoo = true;
        }
        if (twitter($url)) {
            $has_twitter = true;
        }
        if (!same_domain($d, $url)) {
            if ($is_js) {
                $has_external_js = true;
            }
            if ($is_flash) {
                $has_external_flash = true;
            }
            $has_external_content = true;
        }
    }
    foreach ($triggers as $trigger) {
        if ($triggered[$trigger]) {
            $stats["n_domains_with_{$trigger}"] += 1;
            $domains[$trigger][$d] = $triggered[$trigger];
        }
    }
    $stats['n_domains_with_js'] += $has_js ? 1 : 0;
    $stats['n_domains_with_external_js'] += $has_external_js ? 1 : 0;
    $stats['n_domains_with_external_content'] += $has_external_content ? 1 : 0;
    $stats['n_domains_clean'] += $has_external_content ? 0 : 1;
    $stats['n_domains_with_flash'] += $has_flash ? 1 : 0;
    $stats['n_domains_with_external_flash'] += $has_external_flash ? 1 : 0;
    $stats['n_domains_with_google'] += $has_google ? 1 : 0;
    $stats['n_domains_with_facebook'] += $has_facebook ? 1 : 0;
    $stats['n_domains_with_yahoo'] += $has_yahoo ? 1 : 0;
    $stats['n_domains_with_twitter'] += $has_twitter ? 1 : 0;
    $stats['n_domains_with_ads'] += $has_ads ? 1 : 0;
}