/**
 * 获取每一页中的产品名称和url3
 * @param $pageUrl
 */
function getProductUrls($pageUrl, $brandsArray, $keyword, $withoutFilementKeywordXMLName)
{
    ob_start();
    //打开输出控制缓冲
    ob_end_flush();
    //输出缓冲区内容并关闭缓冲
    ob_implicit_flush(1);
    //立即输出
    $xmlFileName = 'xml/' . $withoutFilementKeywordXMLName . '.xml';
    $html = file_get_html($pageUrl);
    $ulsdiv = $html->find('.s-result-item');
    //获取ul
    // $productUrls = array();
    $index = 0;
    foreach ($ulsdiv as $li) {
        $title = $li->find('h2', 0)->plaintext;
        $productUrl = $li->find('a', 0)->href;
        $brand = trim($li->find('.a-color-secondary', 1)->plaintext);
        echo $brand . '-----' . $title . '<br/>';
        //存在关键词则认为是打印材料,添加到对应品牌xml文件,否则添加到丢弃文件
        if (containsKeyword($title, $keyword)) {
            //品牌存在,则说明该品牌的xml文件已经存在,直接将该产品添加到对应的xml文件中,否则新建品牌xml文件
            if (in_array($brand, $brandsArray)) {
                addToXML($brand, $brand, $title, $productUrl);
            } else {
                createXML($brand, $brand, $title, $productUrl);
                $brandsArray[$brand] = $brand;
            }
        } else {
            if (file_exists($xmlFileName)) {
                addToXML($withoutFilementKeywordXMLName, $brand, $title, $productUrl);
            } else {
                createXML($withoutFilementKeywordXMLName, $brand, $title, $productUrl);
            }
        }
        // $productUrls[$index] = $productUrl;
        sleep(1);
        ob_flush();
        //输出缓冲区中的内容
        flush();
        //刷新输出缓冲
    }
    return $brandsArray;
}
Beispiel #2
0
/** 
* 遍历指定的目录以及子目录,将符合条件的文件加入XML 
* $p 指定的目录 
*/ 
function DealFP($p){ 
  $FilterDir = $GLOBALS["FilterDir"]; 
  $IndexFileExt = $GLOBALS["IndexFileExt"]; 
   
  $handle=opendir($p); 
  if ($p==".") $path = ""; 
  else $path = $p."/"; 
  while ($file = readdir($handle)) 
  { 
    $d = filetype($path.$file); 
    if ((($d=='file')||($d=='dir'))&&($file!='.')&&($file!='..')) 
    { 
        $pf = $path.$file; 
        //echo "[".$d."]".$pf."<br>"; 
        if ($d=='dir') 
        { 
          if (!(strpos($FilterDir, "|".$pf."|"))) 
          { 
            DealFP($pf); 
          } 
        }else{ 
          $ext = "|".strtolower(substr($file, strrpos($file, ".")+1))."|"; 
           
          if (strpos($IndexFileExt, $ext)) 
          { 
            $d = filemtime($pf); 
            $dt = date("Y-m-d",$d)."T".date("H:i:s",$d)."+00:00"; 
            addToXML($pf, $dt); 
          } 
        } 
    } 
  } 
  closedir($handle);  
} 
Beispiel #3
0
    foreach ($secondArray as $s) {
        if (strcasecmp($f, $s) == 0) {
            echo $index . '----' . $f . '<br/>';
            //读取文件夹1的文件内容
            $doc = new DOMDocument();
            $fileName = $dir_1 . '/' . $f;
            $doc->load($fileName);
            //读取xml文件
            $products = $doc->getElementsByTagName("product");
            //取得product标签的对象数组
            foreach ($products as $p) {
                $title = $p->getElementsByTagName("title")->item(0)->nodeValue;
                $brand = $p->getElementsByTagName("brand")->item(0)->nodeValue;
                $url = $p->getElementsByTagName("url")->item(0)->nodeValue;
                //将1的内容添加到文件2的同名文件中
                addToXML($dir_2 . '/' . $f, $brand, $title, $url);
                //					echo "$title - $brand - $url".'<br/> ';
            }
            $index++;
        }
    }
}
/**
 *添加信息到xml文件
 */
function addToXML($filename, $brand, $title, $url)
{
    //处理产品名字符乱码
    $brand = strReplaceToEntity($brand);
    $title = strReplaceToEntity($title);
    $url = strReplaceToEntity($url);