/** * 获取每一页中的产品名称和url3 * @param $pageUrl */ function getProductUrls($pageUrl, $brandsArray, $keyword, $withoutFilementKeywordXMLName) { ob_start(); //打开输出控制缓冲 ob_end_flush(); //输出缓冲区内容并关闭缓冲 ob_implicit_flush(1); //立即输出 $xmlFileName = 'xml/' . $withoutFilementKeywordXMLName . '.xml'; $html = file_get_html($pageUrl); $ulsdiv = $html->find('.s-result-item'); //获取ul // $productUrls = array(); $index = 0; foreach ($ulsdiv as $li) { $title = $li->find('h2', 0)->plaintext; $productUrl = $li->find('a', 0)->href; $brand = trim($li->find('.a-color-secondary', 1)->plaintext); echo $brand . '-----' . $title . '<br/>'; //存在关键词则认为是打印材料,添加到对应品牌xml文件,否则添加到丢弃文件 if (containsKeyword($title, $keyword)) { //品牌存在,则说明该品牌的xml文件已经存在,直接将该产品添加到对应的xml文件中,否则新建品牌xml文件 if (in_array($brand, $brandsArray)) { addToXML($brand, $brand, $title, $productUrl); } else { createXML($brand, $brand, $title, $productUrl); $brandsArray[$brand] = $brand; } } else { if (file_exists($xmlFileName)) { addToXML($withoutFilementKeywordXMLName, $brand, $title, $productUrl); } else { createXML($withoutFilementKeywordXMLName, $brand, $title, $productUrl); } } // $productUrls[$index] = $productUrl; sleep(1); ob_flush(); //输出缓冲区中的内容 flush(); //刷新输出缓冲 } return $brandsArray; }
/** * 遍历指定的目录以及子目录,将符合条件的文件加入XML * $p 指定的目录 */ function DealFP($p){ $FilterDir = $GLOBALS["FilterDir"]; $IndexFileExt = $GLOBALS["IndexFileExt"]; $handle=opendir($p); if ($p==".") $path = ""; else $path = $p."/"; while ($file = readdir($handle)) { $d = filetype($path.$file); if ((($d=='file')||($d=='dir'))&&($file!='.')&&($file!='..')) { $pf = $path.$file; //echo "[".$d."]".$pf."<br>"; if ($d=='dir') { if (!(strpos($FilterDir, "|".$pf."|"))) { DealFP($pf); } }else{ $ext = "|".strtolower(substr($file, strrpos($file, ".")+1))."|"; if (strpos($IndexFileExt, $ext)) { $d = filemtime($pf); $dt = date("Y-m-d",$d)."T".date("H:i:s",$d)."+00:00"; addToXML($pf, $dt); } } } } closedir($handle); }
foreach ($secondArray as $s) { if (strcasecmp($f, $s) == 0) { echo $index . '----' . $f . '<br/>'; //读取文件夹1的文件内容 $doc = new DOMDocument(); $fileName = $dir_1 . '/' . $f; $doc->load($fileName); //读取xml文件 $products = $doc->getElementsByTagName("product"); //取得product标签的对象数组 foreach ($products as $p) { $title = $p->getElementsByTagName("title")->item(0)->nodeValue; $brand = $p->getElementsByTagName("brand")->item(0)->nodeValue; $url = $p->getElementsByTagName("url")->item(0)->nodeValue; //将1的内容添加到文件2的同名文件中 addToXML($dir_2 . '/' . $f, $brand, $title, $url); // echo "$title - $brand - $url".'<br/> '; } $index++; } } } /** *添加信息到xml文件 */ function addToXML($filename, $brand, $title, $url) { //处理产品名字符乱码 $brand = strReplaceToEntity($brand); $title = strReplaceToEntity($title); $url = strReplaceToEntity($url);