Example #1
0
 */
set_time_limit(0);
error_reporting(E_ALL);
ini_set('display_errors', 'On');
header("Content-type: text/html; charset=gb2312");
$baseUrl = 'http://money.eastmoney.com/news/clcal_';
$linkTxt = 'eastmoney.txt';
$articleLinks = array();
$totalPages = 4;
$allArticleLinks = array();
for ($i = 1; $i <= $totalPages; $i++) {
    $pagerLink = $baseUrl . $i . '.html';
    $articleLinks = getArticleLinks($pagerLink);
    $allArticleLinks = array_merge($allArticleLinks, $articleLinks);
    //	echo '<pre>';print_r($articleLinks);echo '</pre>';
    writeToTxt($articleLinks);
    echo $i . '/' . $totalPages . ': ' . $pagerLink . '<br>';
    ob_flush();
    flush();
}
function getArticleLinks($pagerLink)
{
    $articleLinks = array();
    $listContentPattern = '/<div class="mod-list">[\\s\\S]*?<div class="PageBox">/i';
    $linkPattern = '/<a href="(.*?)"[^>]*>.*?<\\/a>/i';
    $content = file_get_contents($pagerLink);
    preg_match($listContentPattern, $content, $matches);
    $listContent = $matches[0];
    if ($listContent) {
        preg_match_all($linkPattern, $listContent, $matches);
        $articleLinks = $matches[1];
Example #2
0
die;
/**
 *
 */
set_time_limit(0);
error_reporting(E_ALL);
ini_set('display_errors', 'On');
header("Content-type: text/html; charset=gb2312");
$baseUrl = 'http://info.stockstar.com/info/dic/';
$pagerLink = 'http://info.stockstar.com/info/dic/left2.htm';
$linkTxt = 'tag.txt';
$articleLinks = array();
$tagLinks = getTagLinks($pagerLink);
//echo '<pre>';print_r($tagLinks);echo '</pre>';
writeToTxt($tagLinks);
function getTagLinks($pagerLink)
{
    $tagLinks = array();
    $listPattern = '/<div class=clSub>[\\s\\S]*?<\\/div>/i';
    $linkPattern = '/<a href="(.*?)"[^>]*>.*?<\\/a>/i';
    $content = file_get_contents($pagerLink);
    preg_match($listPattern, $content, $matches);
    $listContent = $matches[0];
    if ($listContent) {
        preg_match_all($linkPattern, $listContent, $matches);
        $tagLinks = $matches[1];
    }
    return $tagLinks;
}
function writeToTxt($tagLinks)
Example #3
0
<?php

header("Content-type: text/html; charset=utf-8");
set_time_limit(0);
error_reporting(E_ALL ^ E_NOTICE);
ini_set('display_errors', 'On');
$linkTxt = 'fetchcat.txt';
$lnk = mysql_connect('114.215.210.34', 'root', 'xujj10192917') or die('Not connected : ' . mysql_error());
// make foo the current db
mysql_select_db('touzilicai', $lnk) or die('Can\'t use foo : ' . mysql_error());
mysql_query("set names utf8");
$result = mysql_query('select * from articles group by src,src_category');
$total = mysql_affected_rows();
$index = 0;
while ($row = mysql_fetch_assoc($result)) {
    $index++;
    $line = $index . '::src:' . $row['src'] . ',src_cat:' . $row['src_category'] . ',cid:';
    echo $line . '<br>';
    writeToTxt($line);
    ob_flush();
    flush();
}
function writeToTxt($line)
{
    global $linkTxt;
    //file_put_contents($linkTxt, $line."\n", FILE_APPEND);
}
Example #4
0
 */
set_time_limit(0);
error_reporting(E_ALL);
ini_set('display_errors', 'On');
header("Content-type: text/html; charset=gb2312");
$host = 'http://school.stockstar.com';
$linkTxt = 'stockstar_links.txt';
$articleLinks = array();
$navLinks = getNavLinks();
//echo '<pre>';print_r($navLinks);echo '</pre>';
$total = count($navLinks);
$index = 0;
foreach ($navLinks as $navLink) {
    $index++;
    $navArticleLinks = getNavArticleLinks($navLink);
    writeToTxt($navArticleLinks);
    echo $index . "/" . $total . " " . $navLink . "<br />";
    ob_flush();
    flush();
    //$articleLinks[$navLink] = $navArticleLinks;
}
//echo '<pre>';print_r($articleLinks);echo '</pre>';
function getNavLinks()
{
    global $host;
    $startUrl = 'http://school.stockstar.com/list/4067.shtml';
    $navLinks = array();
    $content = file_get_contents($startUrl);
    //收集菜单
    $navPattern = '/<div id="listSidebar">[\\s\\S]*?<div id="foot08">/i';
    preg_match($navPattern, $content, $matches);