예제 #1
0
 /**
  * Добавление урлов на страницы с данными в список
  */
 function addPageUrls($urls)
 {
     if (sizeof($urls) <= 0) {
         return;
     }
     foreach ($urls as $url) {
         $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $url);
         if ($this->isNewUrl($url)) {
             $this->pagesUrls[] = $url;
         }
     }
 }
    function parseItems()
    {
        $items = array();
        $logger =& Log::singleton("null", "results.log", "ident");
        $pattern = '{
			<td width="80%">
			<a href="([^"]+)"> <b class="c-t-[^>]+">(.+?)</b> </a>
			<div[^>]*>(.+?)</div>
		}si';
        $pattern = preg_replace("{\\s+}", "\\s*", $pattern);
        if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) {
            return new PEAR_Error("Items not found.");
        }
        for ($i = 0; $i < sizeof($matches[1]); $i++) {
            $item = array();
            $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $matches[1][$i]);
            $item["url"] = $url;
            $item["name"] = StrUtils::cleanString($matches[2][$i]);
            $item["address"] = StrUtils::cleanString($matches[3][$i]);
            //if (!$this->checkUpdate($item["name"], $item["address"])) {
            //	$logger->log("Exists item: " . $item["name"] . ", " . $item["address"]);
            //	continue;
            //}
            $logger->log("New item: " . $item["name"] . ", " . $item["address"]);
            $item = array_merge($item, $this->getItemInfo($url));
            $pattern = "{.+?bo(\\d+)/ru(\\d+)}si";
            if (preg_match($pattern, $url, $numMatches)) {
                $item["ru"] = $numMatches[2];
                $item["bo"] = $numMatches[1];
            } else {
                $item["ru"] = 0;
                $item["bo"] = 0;
            }
            $this->setStatus($item);
            $items[] = $item;
        }
        $logger->log("Size of items: " . sizeof($items));
        $logger->log(print_r($items, true));
        $this->rubricsData->addNextNums(sizeof($matches[1]));
        return $items;
    }
    function parseItems()
    {
        $items = array();
        $logger =& Log::singleton("null", "results.log", "parseItems");
        $pattern = '{
			<td[^>]*>
              <a href="([^"]+)" class="comp_header" >
                (.+?)
              </a>.+?
			<div><span[^>]+>.+?</span>(.+?)(<nobr><img|</div>)
		}si';
        $pattern = preg_replace("{\\s+}", "\\s*", $pattern);
        if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) {
            return new PEAR_Error("Items not found.");
        }
        for ($i = 0; $i < sizeof($matches[1]); $i++) {
            $item = array();
            $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $matches[1][$i]);
            $url = UrlUtils::removeQuery($url);
            $item["url"] = $url;
            $item["name"] = StrUtils::cleanString($matches[2][$i]);
            $item["address"] = StrUtils::cleanString($matches[3][$i]);
            if (!$this->checkUpdate($item["name"], $item["address"])) {
                $logger->log("Exists item: " . $item["name"] . ", " . $item["address"]);
                continue;
            }
            $logger->log("New item: " . $item["name"] . ", " . $item["address"]);
            $item = array_merge($item, $this->getItemInfo($url));
            if (preg_match("{.+?ru(\\d+)}si", $this->url, $numMatches)) {
                $item["ru"] = $numMatches[1];
            } else {
                $item["ru"] = 0;
            }
            if (preg_match("{.+?bo(\\d+)}si", $url, $numMatches)) {
                $item["bo"] = $numMatches[1];
            } else {
                $item["bo"] = 0;
            }
            $this->setStatus($item);
            $items[] = $item;
        }
        $this->rubricsData->addNextNums(sizeof($matches[1]));
        return $items;
    }
 function parseItems()
 {
     $items = array();
     $pattern = '{<td\\s+class=table_model[^>]*><a\\s+href=(print\\.php[^\\s]+)\\s*><img\\s+src=}si';
     if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) {
         return new PEAR_Error("Items not found.");
     }
     $urls = $matches[1];
     for ($i = 0; $i < sizeof($urls); $i++) {
         $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $urls[$i]);
         $item = $this->getItemInfo($url);
         $items[] = $item;
     }
     return $items;
 }
 function parseItems()
 {
     $items = array();
     $pattern = '{<a\\s+href="(\\/shop\\.asp[^"]+)">\\s*<img src="([^"]+)"[^>]*>}si';
     if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) {
         return new PEAR_Error("Items not found.");
     }
     $urls = $matches[1];
     $smallImg = $matches[2];
     for ($i = 0; $i < sizeof($urls); $i++) {
         $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $urls[$i]);
         $item = $this->getItemInfo($url);
         $item["image_small"] = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $smallImg[$i]);
         $items[] = $item;
     }
     return $items;
 }
 function parseItems()
 {
     $items = array();
     $pattern = "{<p><a href='(/search/[^']+)'\\s*target='_blank'>}si";
     if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) {
         return new PEAR_Error("Items not found.");
     }
     $urls = $matches[1];
     for ($i = 0; $i < sizeof($urls); $i++) {
         $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $urls[$i]);
         $item = $this->getItemInfo($url);
         $items[] = $item;
     }
     return $items;
 }