*/ set_time_limit(0); error_reporting(E_ALL); ini_set('display_errors', 'On'); header("Content-type: text/html; charset=gb2312"); $baseUrl = 'http://money.eastmoney.com/news/clcal_'; $linkTxt = 'eastmoney.txt'; $articleLinks = array(); $totalPages = 4; $allArticleLinks = array(); for ($i = 1; $i <= $totalPages; $i++) { $pagerLink = $baseUrl . $i . '.html'; $articleLinks = getArticleLinks($pagerLink); $allArticleLinks = array_merge($allArticleLinks, $articleLinks); // echo '<pre>';print_r($articleLinks);echo '</pre>'; writeToTxt($articleLinks); echo $i . '/' . $totalPages . ': ' . $pagerLink . '<br>'; ob_flush(); flush(); } function getArticleLinks($pagerLink) { $articleLinks = array(); $listContentPattern = '/<div class="mod-list">[\\s\\S]*?<div class="PageBox">/i'; $linkPattern = '/<a href="(.*?)"[^>]*>.*?<\\/a>/i'; $content = file_get_contents($pagerLink); preg_match($listContentPattern, $content, $matches); $listContent = $matches[0]; if ($listContent) { preg_match_all($linkPattern, $listContent, $matches); $articleLinks = $matches[1];
die; /** * */ set_time_limit(0); error_reporting(E_ALL); ini_set('display_errors', 'On'); header("Content-type: text/html; charset=gb2312"); $baseUrl = 'http://info.stockstar.com/info/dic/'; $pagerLink = 'http://info.stockstar.com/info/dic/left2.htm'; $linkTxt = 'tag.txt'; $articleLinks = array(); $tagLinks = getTagLinks($pagerLink); //echo '<pre>';print_r($tagLinks);echo '</pre>'; writeToTxt($tagLinks); function getTagLinks($pagerLink) { $tagLinks = array(); $listPattern = '/<div class=clSub>[\\s\\S]*?<\\/div>/i'; $linkPattern = '/<a href="(.*?)"[^>]*>.*?<\\/a>/i'; $content = file_get_contents($pagerLink); preg_match($listPattern, $content, $matches); $listContent = $matches[0]; if ($listContent) { preg_match_all($linkPattern, $listContent, $matches); $tagLinks = $matches[1]; } return $tagLinks; } function writeToTxt($tagLinks)
<?php header("Content-type: text/html; charset=utf-8"); set_time_limit(0); error_reporting(E_ALL ^ E_NOTICE); ini_set('display_errors', 'On'); $linkTxt = 'fetchcat.txt'; $lnk = mysql_connect('114.215.210.34', 'root', 'xujj10192917') or die('Not connected : ' . mysql_error()); // make foo the current db mysql_select_db('touzilicai', $lnk) or die('Can\'t use foo : ' . mysql_error()); mysql_query("set names utf8"); $result = mysql_query('select * from articles group by src,src_category'); $total = mysql_affected_rows(); $index = 0; while ($row = mysql_fetch_assoc($result)) { $index++; $line = $index . '::src:' . $row['src'] . ',src_cat:' . $row['src_category'] . ',cid:'; echo $line . '<br>'; writeToTxt($line); ob_flush(); flush(); } function writeToTxt($line) { global $linkTxt; //file_put_contents($linkTxt, $line."\n", FILE_APPEND); }
*/ set_time_limit(0); error_reporting(E_ALL); ini_set('display_errors', 'On'); header("Content-type: text/html; charset=gb2312"); $host = 'http://school.stockstar.com'; $linkTxt = 'stockstar_links.txt'; $articleLinks = array(); $navLinks = getNavLinks(); //echo '<pre>';print_r($navLinks);echo '</pre>'; $total = count($navLinks); $index = 0; foreach ($navLinks as $navLink) { $index++; $navArticleLinks = getNavArticleLinks($navLink); writeToTxt($navArticleLinks); echo $index . "/" . $total . " " . $navLink . "<br />"; ob_flush(); flush(); //$articleLinks[$navLink] = $navArticleLinks; } //echo '<pre>';print_r($articleLinks);echo '</pre>'; function getNavLinks() { global $host; $startUrl = 'http://school.stockstar.com/list/4067.shtml'; $navLinks = array(); $content = file_get_contents($startUrl); //收集菜单 $navPattern = '/<div id="listSidebar">[\\s\\S]*?<div id="foot08">/i'; preg_match($navPattern, $content, $matches);