Example #1
0
function getNavArticleLinks($navLink)
{
    $navArticleLinks = array();
    $pagerLinks = getPagerLinks($navLink);
    foreach ($pagerLinks as $pagerLink) {
        $navArticleLinks = array_merge($navArticleLinks, getArticleLinks($pagerLink));
    }
    return $navArticleLinks;
}
Example #2
0
/**
 *
 */
set_time_limit(0);
error_reporting(E_ALL);
ini_set('display_errors', 'On');
header("Content-type: text/html; charset=gb2312");
$baseUrl = 'http://money.eastmoney.com/news/clcal_';
$linkTxt = 'eastmoney.txt';
$articleLinks = array();
$totalPages = 4;
$allArticleLinks = array();
for ($i = 1; $i <= $totalPages; $i++) {
    $pagerLink = $baseUrl . $i . '.html';
    $articleLinks = getArticleLinks($pagerLink);
    $allArticleLinks = array_merge($allArticleLinks, $articleLinks);
    //	echo '<pre>';print_r($articleLinks);echo '</pre>';
    writeToTxt($articleLinks);
    echo $i . '/' . $totalPages . ': ' . $pagerLink . '<br>';
    ob_flush();
    flush();
}
function getArticleLinks($pagerLink)
{
    $articleLinks = array();
    $listContentPattern = '/<div class="mod-list">[\\s\\S]*?<div class="PageBox">/i';
    $linkPattern = '/<a href="(.*?)"[^>]*>.*?<\\/a>/i';
    $content = file_get_contents($pagerLink);
    preg_match($listContentPattern, $content, $matches);
    $listContent = $matches[0];