<?php require_once '../apps/conf_ini.php'; $oParser = new HtmlParse(); // アクセスする URL を指定 $URL = 'http://perldoc.jp/docs/modules/DBD-mysql-2.1026/DBD/mysql/INSTALL.pod'; $sHtml = getHtmlData($URL); //UTF-8にエンコード $enc = mb_detect_encoding($sHtml); $sHtml = mb_convert_encoding($sHtml, "UTF-8", $enc); //解析 $rtn = $oParser->execHtmlParse($sHtml); if ($rtn !== false) { list($aSubject, $aUrl, $aNonTagHtml) = $rtn; $fp = fopen("C:/body.txt", 'w'); $sLine = implode('', $aNonTagHtml); fwrite($fp, mb_convert_encoding($sLine, "SJIS", "UTF-8")); $flag = fclose($fp); }
<?php // Include require_once '../apps/conf_ini.php'; require_once 'apps/class/Process/CollectUrl/CollectUrlDao_cls.php'; // 初期化 $oDb = new CollectUrlDao($oDbConnMng); $oParser = new HtmlParse(); // for ($iHierarchy = 0; $iHierarchy < 3; $iHierarchy++) { echo "Hierarchy:" . $iHierarchy . "\n"; // i階層のURLを取得する $aUrlHeader = $oDb->getUrlByHierarchy($iHierarchy); for ($iCnt = 0; $iCnt < count($aUrlHeader); $iCnt++) { // アクセスする URL を指定 echo $aUrlHeader[$iCnt]['url'] . "\n"; // HTMLデータ取得 $sHtml = getHtmlData($aUrlHeader[$iCnt]['url']); if (!$sHtml) { continue; } // UTF-8にエンコード $enc = mb_detect_encoding($sHtml); $sHtml = mb_convert_encoding($sHtml, "UTF-8", $enc); // 解析 $oParser->execHtmlParse($sHtml); $rtn = $oParser->getResult(); if ($rtn !== false) { list($aSubject, $aUrl) = $rtn; foreach ($aUrl as $iUrlNo => $sUrl) { if ($sUrl != '') {