function parseItems() { $items = array(); $pattern = '{<img src="(images/[^\\"]+.jpg)" border="0" alt="([^\\"]*)"> </a> <br> <br> <a [^>]+> <img [^>]+> </a> <br> <br> </td> <td [^>]+> <span class=articul> <u> <a name="[^\\"]+">([^<]+)</a> </u> <br> <a href="(cat/[^\\"]+.html)">([^<]+)</a> (</font>)* </span> <span class=gray_small_text>(.*?)</span>}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches_m, PREG_SET_ORDER)) { return new PEAR_Error("Ошибка при выделении элементов."); } foreach ($matches_m as $matches) { $largeImg = preg_replace("{^(.*)\\.jpg\$}", "\$1b.jpg", $matches[1]); $comments = trim(strip_tags(str_replace("<br>", "\n", $matches[7]))); $props = explode("\n", $comments); if (!isset($props[5])) { $size = ""; $material = ""; $drawing = ""; } else { $size = isset($props[0]) ? $props[0] : ""; $material = isset($props[1]) ? $props[1] : ""; $drawing = isset($props[5]) ? $props[5] : ""; } $url = getRealUrl($this->url, $matches[4]); $items[] = array("art" => $matches[3], "name" => $matches[5], "size" => $size, "material" => $material, "drawing" => $drawing, "descr" => $matches[2], "descr_other" => $comments, "url" => $url, "image_small" => $this->getSiteUrl() . $matches[1], "image_large" => $this->getSiteUrl() . $largeImg); } return $items; }
function parseItems() { $items = array(); $pattern = '{<div id="text"> <h1>(.+?)</h1> <p>(.+?)</p>}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match($pattern, $this->pageContent, $matches)) { return new PEAR_Error("Items not found."); } $name = strip_tags($matches[1]); $descr = strip_tags($matches[2]); $pattern = '{<img border="0" src="((images/[^"]+_)small(_[^"]+))"}s'; if (!preg_match($pattern, $this->pageContent, $matches)) { $imgSmall = ""; $img = ""; } else { $imgSmall = getRealUrl(basePath($this->url), $matches[1]); $img = getRealUrl(basePath($this->url), $matches[2] . "big" . $matches[3]); } $pattern = '{<td><p class="pnavbar2"><b>(.+?)</b></p></td>}s'; if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { $colors = ""; } else { $colors = implode(", ", $matches[1]); } $items[] = array("image_large" => $img, "image_small" => $imgSmall, "url" => $this->url, "name" => $name, "descr" => $descr, "colors" => $colors); return $items; }
function parseItems() { $items = array(); $pattern = <<<PATTERN {openWin\\('(/images/material-thumb/.+?)'.+?<img src="(/images/material-thumb/.+?)".+? class="ProdName" [^>]+>(.+?)</a>}s PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Ошибка при выделении элементов."); } foreach ($matches as $m) { $url = $this->url; $img = getRealUrl(basePath($this->url), $m[1]); $imgSmall = getRealUrl(basePath($this->url), $m[2]); if (preg_match('{".+"}s', $m[3], $matches) || strpos($m[3], "-") === false) { $name = $m[3]; $art = ""; } else { if (preg_match('{(.*?) ([^0-9]+)}s', $m[3], $mArtName)) { $art = $mArtName[1]; $name = $mArtName[2]; } else { $art = $m[3]; $name = ""; } } $items[] = array("url" => $url, "image_large" => $img, "image_small" => $imgSmall, "art" => $art, "name" => $name); } return $items; }
function parseItems() { $items = array(); $pattern = <<<PATTERN {<TABLE[^>]+> <TR[^>]+> <TD[^>]+> <IMG src="(resize.php\\?name=[^"]+)".+? onclick="javascript:window\\.open\\('catalogPrev.php\\?catalogId=(\\d+)'[^"]+\\);"> </TD> <TD[^>]+> <h3>(.+?)</h3> (.+?)<TABLE }si PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Items not found."); } foreach ($matches as $m) { $imgSmall = getRealUrl(basePath($this->url), $m[1]); $img = getRealUrl(basePath($this->url), "ItemImages/" . $m[2] . ".jpg"); $art = trim($m[3]); $descr = trim(html_entity_decode(strip_tags($m[4]))); $pattern = "{Размеры\\s*\\(XxYxZ\\):(.+)}s"; if (preg_match($pattern, $descr, $mSize)) { $size = trim($mSize[1]); } else { $size = ""; } $items[] = array("url" => $this->url, "image_large" => $img, "image_small" => $imgSmall, "descr" => $descr, "size" => $size, "art" => $art); } return $items; }
function parseItems() { $items = array(); $pattern = '{<img src="(/photo/.+?)".+? <b>(.+?)</b> : (.+?)</b>.+?Цена : (.+?) у.е. <br> <br> (.+?) <br> <br> <b> Материал: </b>(.+?)<br> <b> Цвета корпуса: </b>(.+?)<br> <b> Цвета деталей: (.*?)</b> <br>}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Ошибка при выделении элементов."); } foreach ($matches as $m) { $item = array(); $item["image_large"] = getRealUrl(basePath($this->url), $m[1]); $item["art"] = strip_tags($m[2]); $item["name"] = $m[3]; $item["price"] = strip_tags($m[4]); $item["descr"] = strip_tags($m[5]); $item["material"] = $m[6]; $item["colors"] = $m[7]; $item["url"] = $this->url; $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = <<<PATTERN {<tr> <td align="center"> <h2 class="context">(.+?)</h2> </td> <td align="center"> <span class="context">(.+?)</span> </td> <td align="center"> <span class="context">(.+?)</span> </td> <td align="center"> <a.+?href="(prod/zapis/[^"]+)" target="_blank"> <img.+?src="(prod/zapis/[^"]+)".+?> </a> </td>}s PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Items not found."); } foreach ($matches as $m) { $name = trim($m[1]); $art = trim($m[2]); $size = trim($m[3]); $img = getRealUrl(basePath($this->url), $m[4]); $imgSmall = getRealUrl(basePath($this->url), $m[5]); $items[] = array("url" => $this->url, "image_large" => $img, "image_small" => $imgSmall, "name" => $name, "art" => $art, "size" => $size); } return $items; }
function parseItems() { $items = array(); $pattern = <<<PATTERN {<img src='((images/catalogue/[^']+?)_s(\\.jpg))'.+? \t\t\t<a href='(/\\?action=OnShowGood\\&ID=(.+?))'> \t\t\t\t<b>(.+?)(\\(склад\\))*</b></a>}s PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Ошибка при выделении элементов."); } foreach ($matches as $m) { $item = array(); $item["image_small"] = getRealUrl(basePath($this->url), $m[1]); $item["image_large"] = getRealUrl(basePath($this->url), $m[2] . "_b" . $m[3]); $item["url"] = getRealUrl(basePath($this->url), $m[4]); $name = trim($m[6]); $art = ""; $p = strrpos($name, " "); if ($p !== false && $p > 0) { $art = substr($name, $p + 1); $name = substr($name, 0, $p); } $item["name"] = $name; $item["art"] = $art; $items[] = $item; } return $items; }
/** * Добавление урлов на страницы с данными в список */ function addPageUrls($urls) { if (sizeof($urls) <= 0) { return; } foreach ($urls as $url) { $url = getRealUrl(basePath($this->url), $url); if (!in_array($url, $this->pagesUrls)) { $this->pagesUrls[] = $url; } } }
function parseItems() { $items = array(); $pattern = '{<div style=".+?" id="row_.+?"> <table width="100%" cellspacing="0" cellpadding="0" border="0" >(.+?) </div><br style="clear:both;" />}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Items not found."); } foreach ($matches[0] as $m) { // Pattern 1 $pattern = '{<a href="javascript.+? <img src="(http://www.luxor-russia.com/components/com_phpshop/show_image_in_imgtag\\.php\\?[^"]+)".+? <a[^>]+href="(http://luxor-russia.com/index\\.php\\?page=[^"]+)">(.+?)</a>.+?<td colspan="2">(.+?)</td>}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (preg_match($pattern, $m, $data)) { $imgSmall = getRealUrl($this->url, $data[1]); $url = getRealUrl($this->url, $data[2]); $name = trim($data[3]); $descr = strip_tags($data[4]); } else { // Pattern 2 $pattern = '{href="(http://luxor-russia.com/index\\.php\\?page=[^"]+)">(.+?)</a>.+? <img src="(http://www\\.luxor-russia\\.com/components/com_phpshop/show_image_in_imgtag\\.php\\?[^"]+)".+? <td[^>]+>(.+?)<a}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (preg_match($pattern, $m, $data)) { $url = getRealUrl($this->url, $data[1]); $name = trim($data[2]); $imgSmall = getRealUrl($this->url, $data[3]); $descr = strip_tags($data[4]); } } if (!preg_match('{(.+?)÷вет:\\s*(.+)}s', $descr, $mSrc)) { $colors = ""; $descr = trim($descr); } else { $descr = trim($mSrc[1]); $colors = trim($mSrc[2]); } $item = $this->getItemInfo($url); $item["name"] = $name; $item["image_small"] = $imgSmall; $item["descr"] = $descr; $item["colors"] = $colors; $item["url"] = $url; $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = <<<PATTERN {<tr> <td[^>]+>(\\d+)</td> <td[^>]+> <p align="justify">(.+?)</p> </td> <td[^>]+>(.+?)</td> <td[^>]+> (<table[^>]+>(.+?)</table>)* </td> <td[^>]+> (<a href="#" onClick="window.open\\('([^']+)','','[^']+'\\); return false;"> <img src="(pic_small/[^"]+)" width="100"[^>]+> </a>|.+?) </td> </tr> }s PATTERN; //" $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Items not found."); } foreach ($matches as $m) { $descr = trim(strip_tags($m[2])); $art = trim($m[3]); if (isset($m[7])) { $img = getRealUrl(basePath($this->url), $m[7]); } else { $img = ""; } if (isset($m[8])) { $imgSmall = getRealUrl(basePath($this->url), $m[8]); } else { $imgSmall = ""; } $pattern = "{Размеры\\s*\\(XxYxZ\\):(.+)}s"; if (preg_match($pattern, $descr, $mSize)) { $size = trim($mSize[1]); } else { $size = ""; } $items[] = array("url" => $this->url, "image_large" => $img, "image_small" => $imgSmall, "descr" => $descr, "size" => $size, "art" => $art); } return $items; }
function parseItems() { $items = array(); $img = ""; $pattern = '{src="([^"]+ru/modules/catalog/admin/goods_images/[^"]+)"}si'; if (preg_match($pattern, $this->pageContent, $matches)) { $img = getRealUrl(basePath($this->url), $matches[1]); $img = str_replace(" ", "%20", $img); } $name = ""; $pattern = '{<td[^>]+background="images/portcapback.gif"[^>]+class=portcap>(.+?)</td>}si'; if (preg_match($pattern, $this->pageContent, $matches)) { $name = trim(strip_tags($matches[1])); } $items[] = array("image_small" => $img, "image_large" => $img, "url" => $this->url, "name" => $name); return $items; }
function parseItems() { $items = array(); $this->pageContent = iconv($this->getSiteEncoding(), "windows-1251", $this->pageContent); $pattern = "{<a href='/show_img\\.php\\?p\\[0\\]=(.+?)\\&.+?'\n\t\t\tonClick='return pop_window.+?><img src='(/zont/pics/.+?)' border=1>\n</a><br>(.+?)<br>(.+?)</td>}s"; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Ошибка при выделении элементов."); } foreach ($matches as $m) { $img = getRealUrl(basePath($this->url), "/zont/pics/" . $m[1]); $imgSmall = getRealUrl(basePath($this->url), $m[2]); $art = $m[3]; $descr = $m[4]; $items[] = array("url" => $this->url, "image_large" => $img, "image_small" => $imgSmall, "art" => $art, "descr" => $descr); } return $items; }
function parseItems() { $items = array(); $pattern = "{<a target=_blank\n\t\t\thref='(/card.php\\?id=[^']+)'>\n\t\t\t<img border=0 src='(\\./files/images/[^']+)'}s"; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Ошибка при выделении элементов."); } $urls = $matches[1]; $smallImages = $matches[2]; for ($i = 0; $i < sizeof($urls); $i++) { $url = getRealUrl(basePath($this->url), $urls[$i]); $item = $this->getItemInfo($url); $item["image_small"] = getRealUrl(basePath($this->url), $smallImages[$i]); $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = '{<div><a href="(zoom\\.php\\?cat=.+?)"><img src="(catimages/[^"]+)"}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Ошибка при выделении элементов."); } $urls = $matches[1]; $smallImages = $matches[2]; for ($i = 0; $i < sizeof($urls); $i++) { $url = getRealUrl(basePath($this->url), $urls[$i]); $item = $this->getItemInfo($url); $item["image_small"] = getRealUrl(basePath($this->url), $smallImages[$i]); $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = '{ <a href="(/index\\.php\\?page=\\d+)"> <img src="(/file\\.php\\?f=\\d+)"}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Items not found."); } $urls = $matches[1]; $smallImages = $matches[2]; for ($i = 0; $i < sizeof($urls); $i++) { $url = getRealUrl(basePath($this->url), $urls[$i]); $item = $this->getItemInfo($url); $item["image_small"] = getRealUrl(basePath($this->url), $smallImages[$i]); $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = '{ <td[^>]*> <img[^>]+src="(/pic/[^"]+)"[^>]*> </td> }s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Items not found."); } foreach ($matches as $m) { $img = getRealUrl(basePath($this->url), $m[1]); $item = array("url" => $this->url, "image_large" => $img); $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = <<<PATTERN {<a href="(.+?)" \t\t\tonClick="big_win\\('nw7',880,490\\);" target="nw7"> \t\t\t<img src="(img/.+?)"}s PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Ошибка при выделении элементов."); } foreach ($matches as $m) { $url = getRealUrl(basePath($this->url), $m[1]); $item = $this->getItemInfo($url); $item["image_small"] = getRealUrl(basePath($this->url), $m[2]); $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = <<<PATTERN {<td width="22%"><img src="([^"]+?)" \twidth=".+?" height=".+?" alt="(.+?)"></td>}s PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Items not found."); } foreach ($matches as $m) { $item = array(); $item["image_large"] = getRealUrl(basePath($this->url), $m[1]); $item["name"] = $m[2]; $item["url"] = $this->url; $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = "{<a href='javascript:launchImg\\(\"(.+?)\",(\\d+)\\).+?\n\t\t\tsrc='(catalog/.+?)'.+?<b>(.+?)</b>.+?\n\t\t\t<b>јртикул:</b>(.+?)</td>.+?\n\t\t\t<b>Ѕренд:</b>(.+?)</td>.+?\n\t\t\t<b>÷ена:</b>(.+?)</td>}s"; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Items not found."); } foreach ($matches as $m) { $art = trim($m[5]); $imgSmall = getRealUrl(basePath($this->url), $m[3]); $img = "catalog/" . $m[2] . "/big/" . $m[1] . ".jpg"; $img = getRealUrl(basePath($this->url), $img); $name = trim($m[4]); $brandname = trim($m[6]); $price = trim($m[7]); $items[] = array("art" => $art, "image_large" => $img, "image_small" => $imgSmall, "url" => $this->url, "name" => $name, "brandname" => $brandname, "price" => $price); } return $items; }
function parseItems() { $items = array(); $pattern = <<<PATTERN {<td valign='top' width='50%'><a href='(.+?.html)' target='_blank'> \t <img src='(images/.+?)' border='0' align='left' hspace='7' vspace='6'> <font class='maintext'>Артикул:}s PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Items not found."); } $urls = $matches[1]; $smallImages = $matches[2]; for ($i = 0; $i < sizeof($urls); $i++) { $url = getRealUrl(basePath($this->url), $urls[$i]); $item = $this->getItemInfo($url); $item["image_small"] = getRealUrl(basePath($this->url), $smallImages[$i]); $items[] = $item; } return $items; }
function getItemInfo($imgsStr, $descrsStr) { $pattern = <<<PATTERN {<td[^>]+>\\s*<a[^>]+href="([^"]+)">\\s*<img border="0" src="([^"]+)"}s PATTERN; if (!preg_match_all($pattern, $imgsStr, $imgMatches, PREG_SET_ORDER)) { return array(); } $pattern = <<<PATTERN {<td[^>]+>(.+?)</td>}s PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (preg_match_all($pattern, $imgsStr, $descrMatches, PREG_SET_ORDER)) { return array(); } for ($i = 0; $i < sizeof($imgMatches); $i++) { $img = getRealUrl(basePath($this->url), $m[1]); $imgSmall = getRealUrl(basePath($this->url), $m[2]); $descr = ""; } return array("url" => $this->url, "descr" => $descr, "image_large" => $img, "image_small" => $imgSmall); }
function parseItems() { $items = array(); $pattern = <<<PATTERN {<a href="(alacarte_unit\\.php\\?page=\\d+\\&id=\\d+\\&show_pict=(/modules/alacarte/models/[^"]+))" > <img[^>]+src="(http://s-collection.ru/modules/alacarte/models/[^"]+)"[^>]*> }s PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Items not found."); } $urls = $matches[1]; $smallImg = $matches[2]; for ($i = 0; $i < sizeof($urls); $i++) { $url = getRealUrl(basePath($this->url), $urls[$i]); $item = $this->getItemInfo($url); $item["image_small"] = getRealUrl(basePath($this->url), $smallImg[$i]); $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = <<<PATTERN {<td[^>]*> <h2>(.+?), арт. (.+?)</h2> <div class="im" onclick="ZoomB\\('(/data/shop/products/.+?/i/logo0.jpg)'\\)" style="background-image:url\\((/data/shop/products/.+?/i/logo.jpg)\\)"> <br /> </div> <p> <b> Материал: </b>(.+?)<br /> <b> Размеры: </b>(.+?)<br /> </p> </td>}si PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Items not found."); } foreach ($matches as $m) { $url = $this->url; $name = trim($m[1]); $art = trim($m[2]); $img = getRealUrl(basePath($this->url), $m[3]); $imgSmall = getRealUrl(basePath($this->url), $m[4]); $material = trim(html_entity_decode($m[5])); $size = trim(html_entity_decode($m[6])); $items[] = array("url" => $url, "art" => $art, "name" => $name, "image_large" => $img, "image_small" => $imgSmall, "size" => $size); } return $items; }
function parseItems() { $items = array(); $pattern = '{<td class="contentheading" width="100%"> Арт.(.+?) \\(.+?\\).+?<a href="(images/stories/.+?)"> <img src="(http://www.uvelen.ru/images/stories/.+?)"[^>]+?> (.+?)<strong>Цена -([^<]+?)</strong> </font></div>}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Ошибка при выделении элементов."); } foreach ($matches as $m) { $art = $m[1]; $img = getRealUrl(basePath($this->url), $m[2]); $imgSmall = getRealUrl(basePath($this->url), $m[3]); $price = $m[5]; $data = html_entity_decode(strip_tags($m[4])); if (preg_match('{(.+?)\\.}s', $data, $matches)) { $name = $matches[1]; } else { $name = ""; } if (preg_match('{Размер запечатки:(.+?)\\.}s', $data, $matches)) { $size = $matches[1]; } else { $size = ""; } if (preg_match('{Вид нанесения:(.+?)\\.}s', $data, $matches)) { $drawing = $matches[1]; } else { $drawing = ""; } $items[] = array("art" => $art, "image_large" => $img, "image_small" => $imgSmall, "drawing" => $drawing, "size" => $size, "url" => $this->url, "name" => $name, "price" => $price); } return $items; }
function getQvodUrls($qpurl = array()) { global $_root; $return = array(); foreach ($qpurl as $v) { $purl = getRealUrl($v['url']); $html = getHtml($purl); $html = mb_convert_encoding($html, "UTF-8", "GBK"); preg_match('#Player\\.url = "(qvod://.+)";#Uis', $html, $match); $purl = isset($match[1]) ? $match[1] : ''; if (!$purl) { die("\nGet Qvod Play Url Data Error!\n"); } $title = unicode_encode($v['title']); $purl = unicode_encode($purl); $return[] = sprintf('%s$%s$qvod', $title, $purl); sleep(5); } return $return; }
function parseItems() { $items = array(); $pattern = '{<a href="
([^"]+)(
)*[^"]*".+? <img src="(http://pic.krukro.com/public_html/supermarket/products/[^"]+)"}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Items not found."); } $urls = $matches[1]; $smallImages = $matches[3]; for ($i = 0; $i < sizeof($urls); $i++) { $url = html_entity_decode(trim($urls[$i])); $url = str_replace("
", "", $url); $url = getRealUrl(basePath($this->url), $url); $item = $this->getItemInfo($url); $items[] = $item; } return $items; }
function parseItemsGum() { $items = array(); $pattern = <<<PATTERN {<h1>(.+?)</h1> .+? <p>(.+?)<p><center> }si PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match($pattern, $this->pageContent, $matches)) { return new PEAR_Error("Items not found."); } $pattern = <<<PATTERN {<center> <img src="(/images/[^"]+)"[^>]+>}si PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); preg_match_all($pattern, $this->pageContent, $imgMatches, PREG_SET_ORDER); foreach ($imgMatches as $m) { $item = array(); $item["image_large"] = getRealUrl(basePath($this->url), $m[1]); $item["url"] = $this->url; $item["name"] = trim(strip_tags(html_entity_decode($matches[1]))); $item["descr"] = trim(strip_tags(html_entity_decode($matches[2]))); $item["descr_other"] = ""; $items[] = $item; } return $items; }
function parseItems() { $items = array(); if (strcmp($this->url, "http://www.shokostyle.ru/shokonabor.asp?order_id=") == 0) { return $this->parseItems1(); } $pattern = <<<PATTERN {<a href="javascript\\:popup\\('(info\\.asp\\?id=\\d+)'[^"]+"[^>]+> <img[^>]+src="(images/[^"]+)"}si PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Items not found."); } foreach ($matches as $m) { $url = getRealUrl(basePath($this->url), $m[1]); $item = $this->getItemInfo($url); $item["image_small"] = getRealUrl(basePath($this->url), $m[2]); $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = '{<table cellspacing="3" cellpadding="3" border="0"> <tr> <td rowspan="2" class="image_preview" align="center"> <a href="(/catalogue/detail/.+?)"> <img src="(/catalogue/images/smoll/[^"]+?)".+?}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_PATTERN_ORDER)) { return new PEAR_Error("Items not found."); } $smallImages = $matches[2]; $urls = $matches[1]; for ($i = 0; $i < sizeof($urls); $i++) { $url = getRealUrl(basePath($this->url), $urls[$i]); $item = $this->getItemInfo($url); $item["image_small"] = getRealUrl(basePath($this->url), $smallImages[$i]); $items[] = $item; } return $items; }
function parseItems() { $items = array(); $pattern = '{<a href="(/catalogue/detail/\\?.+?)" title="(.+?)"> <img.+?src=(/catalogue/files/.+?/small\\.jpg).+?></a>}s'; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { // Попытка пропарсить другую структуру $pattern = <<<PATTERN {<td class="catalog_cell"> <img src="(/images/lexon/catalog/[^"]+)".+?src='(/images/lexon/catalog/[^']+)'(.+?)alt="([^"]+)"> }s PATTERN; $pattern = preg_replace("{\\s+}", "\\s*", $pattern); if (!preg_match_all($pattern, $this->pageContent, $matches, PREG_SET_ORDER)) { return new PEAR_Error("Items not found."); } foreach ($matches as $m) { $url = $this->url; $imgSmall = getRealUrl(basePath($this->url), $m[1]); $img = getRealUrl(basePath($this->url), $m[2]); $art = ""; $name = ""; $descr = ""; if (preg_match("{'Артикул:([^']+)<br><strong>([^']+)</strong>(<br>[^']+)*'}s", $m[3], $artandname)) { $art = $artandname[1]; $name = $artandname[2]; if (isset($artandname[3])) { $descr = strip_tags($artandname[3]); } } else { if (preg_match('{Артикул:\\s+([^\\s]+)\\s+([^\\s]+)\\s+(.+)}s', $m[4], $artandname)) { $art = $artandname[1] . "-" . $artandname[2]; $name = $artandname[3]; } } $items[] = array("url" => $this->url, "image_small" => $imgSmall, "image_large" => $img, "art" => trim($art), "name" => trim($name), "descr" => $descr); } } else { foreach ($matches as $m) { $url = getRealUrl(basePath($this->url), $m[1]); $item = $this->getItemInfo($url); $item["image_small"] = getRealUrl(basePath($this->url), $m[3]); $item["name"] = $m[2]; $items[] = $item; } } return $items; }