/** * Добавление урлов на страницы с данными в список */ function addPageUrls($urls) { if (sizeof($urls) <= 0) { return; } foreach ($urls as $url) { $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $url); if ($this->isNewUrl($url)) { $this->pagesUrls[] = $url; } } }
function parseItems() { $items = array(); $logger =& Log::singleton("null", "results.log", "ident"); $pattern = '{ <td width="80%"> <a href="([^"]+)"> <b class="c-t-[^>]+">(.+?)</b> </a> <div[^>]*>(.+?)</div> }si'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Items not found."); } for ($i = 0; $i < sizeof($matches[1]); $i++) { $item = array(); $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $matches[1][$i]); $item["url"] = $url; $item["name"] = StrUtils::cleanString($matches[2][$i]); $item["address"] = StrUtils::cleanString($matches[3][$i]); //if (!$this->checkUpdate($item["name"], $item["address"])) { // $logger->log("Exists item: " . $item["name"] . ", " . $item["address"]); // continue; //} $logger->log("New item: " . $item["name"] . ", " . $item["address"]); $item = array_merge($item, $this->getItemInfo($url)); $pattern = "{.+?bo(\\d+)/ru(\\d+)}si"; if (preg_match($pattern, $url, $numMatches)) { $item["ru"] = $numMatches[2]; $item["bo"] = $numMatches[1]; } else { $item["ru"] = 0; $item["bo"] = 0; } $this->setStatus($item); $items[] = $item; } $logger->log("Size of items: " . sizeof($items)); $logger->log(print_r($items, true)); $this->rubricsData->addNextNums(sizeof($matches[1])); return $items; }
function parseItems() { $items = array(); $logger =& Log::singleton("null", "results.log", "parseItems"); $pattern = '{ <td[^>]*> <a href="([^"]+)" class="comp_header" > (.+?) </a>.+? <div><span[^>]+>.+?</span>(.+?)(<nobr><img|</div>) }si'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Items not found."); } for ($i = 0; $i < sizeof($matches[1]); $i++) { $item = array(); $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $matches[1][$i]); $url = UrlUtils::removeQuery($url); $item["url"] = $url; $item["name"] = StrUtils::cleanString($matches[2][$i]); $item["address"] = StrUtils::cleanString($matches[3][$i]); if (!$this->checkUpdate($item["name"], $item["address"])) { $logger->log("Exists item: " . $item["name"] . ", " . $item["address"]); continue; } $logger->log("New item: " . $item["name"] . ", " . $item["address"]); $item = array_merge($item, $this->getItemInfo($url)); if (preg_match("{.+?ru(\\d+)}si", $this->url, $numMatches)) { $item["ru"] = $numMatches[1]; } else { $item["ru"] = 0; } if (preg_match("{.+?bo(\\d+)}si", $url, $numMatches)) { $item["bo"] = $numMatches[1]; } else { $item["bo"] = 0; } $this->setStatus($item); $items[] = $item; } $this->rubricsData->addNextNums(sizeof($matches[1])); return $items; }
function parseItems() { $items = array(); $pattern = '{<td\\s+class=table_model[^>]*><a\\s+href=(print\\.php[^\\s]+)\\s*><img\\s+src=}si'; if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Items not found."); } $urls = $matches[1]; for ($i = 0; $i < sizeof($urls); $i++) { $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $urls[$i]); $item = $this->getItemInfo($url); $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = '{<a\\s+href="(\\/shop\\.asp[^"]+)">\\s*<img src="([^"]+)"[^>]*>}si'; if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Items not found."); } $urls = $matches[1]; $smallImg = $matches[2]; for ($i = 0; $i < sizeof($urls); $i++) { $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $urls[$i]); $item = $this->getItemInfo($url); $item["image_small"] = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $smallImg[$i]); $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = "{<p><a href='(/search/[^']+)'\\s*target='_blank'>}si"; if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Items not found."); } $urls = $matches[1]; for ($i = 0; $i < sizeof($urls); $i++) { $url = UrlUtils::getRealUrl(UrlUtils::basePath($this->url), $urls[$i]); $item = $this->getItemInfo($url); $items[] = $item; } return $items; }