public function process($content, Filter $filter = null) { if (!$content) { return; } $content = str_replace("<br/>", " ", $content); $this->loadHTML($content); $timeToday = strtotime(date("Y-m-d") . " 23:59:59"); $dateYesterday = $timeToday - 24 * 3600; $ads = array(); $tables = $this->getElementsByTagName("table"); $tableOffers = null; foreach ($tables as $table) { if (false !== strpos($table->getAttribute("id"), "offers_table")) { $tableOffers = $table; break; } } if (!$tableOffers) { return array(); } $adNodes = $tableOffers->getElementsByTagName("td"); foreach ($adNodes as $adNode) { if (false === strpos($adNode->getAttribute("class"), "offer")) { continue; } $ad = new Ad(); $ad->setUrgent(false); // aucun indicateur pour savoir si c'est un pro ou non. $ad->setProfessional(false); // permet d'éliminer les annonces déjà envoyées. // @todo pour le moment, pas possible. Les IDs ne semblent pas // numérique et incrémentals. // if ($filter && $m[1] <= $filter->getMinId()) { // continue; // } $rows = $adNode->getElementsByTagName("tr"); if (0 == $rows->length) { continue; } $columns = $adNode->getElementsByTagName("td"); $row2_p = $rows->item(1)->getElementsByTagName("p"); // analyse de la date $dateStr = preg_replace("#\\s+#", " ", trim($row2_p->item(1)->nodeValue)); if (!$dateStr) { continue; } $aDate = explode(' ', $dateStr); if (false !== strpos($dateStr, 'Сегодня')) { // aujourd'hui $time = strtotime(date("Y-m-d") . " 00:00:00"); } elseif (false !== strpos($dateStr, 'Вчера')) { $time = strtotime(date("Y-m-d") . " 00:00:00"); $time = strtotime("-1 day", $time); } else { if (!isset(self::$months[$aDate[1]])) { continue; } $time = strtotime(date("Y") . "-" . self::$months[$aDate[1]] . "-" . $aDate[0]); } $timeStr = $aDate[count($aDate) - 1]; if (false !== ($pos = mb_strpos($dateStr, ":"))) { $time += (int) mb_substr($dateStr, $pos - 2, 2) * 3600; $time += (int) mb_substr($dateStr, $pos + 1, 2) * 60; if ($timeToday < $time) { $time = strtotime("-1 year", $time); } } $ad->setDate($time); // image $img = $columns->item(0)->getElementsByTagName("img"); if ($img->length) { $ad->setThumbnailLink(str_replace("94x72", "644x461", $img->item(0)->getAttribute("src"))); } // titre + lien $link = $adNode->getElementsByTagName("h3")->item(0)->getElementsByTagName("a")->item(0); if ($link) { $ad->setTitle(trim($link->nodeValue)); $ad->setLink($link->getAttribute("href")); } // urgent if (false !== strpos($adNode->nodeValue, "Срочно")) { $ad->setUrgent(true); } // lieu $ad->setCity(trim($row2_p->item(0)->nodeValue)); // catégorie $ad->setCategory(trim($columns->item(1)->getElementsByTagName("p")->item(0)->nodeValue)); if (!preg_match("#ID([^.]+)\\.html#", $ad->getLink(), $m)) { continue; } $ad->setId(base_convert($m[1], 32, 10)); $priceColumn = trim($columns->item(2)->nodeValue); if (preg_match('#(?<price>[0-9\\s]+)\\s+(?<currency>грн|\\$|€)#imsU', $priceColumn, $m)) { $ad->setPrice((int) str_replace(" ", "", $m["price"]))->setCurrency($m["currency"]); } if ($filter && !$filter->isValid($ad)) { continue; } $ads[$ad->getId()] = $ad; } return $ads; }
public function process($content, Filter $filter = null, $scheme = "http") { if (!$content) { return; } $this->scheme = $scheme; $this->loadHTML($content); $timeToday = strtotime(date("Y-m-d") . " 23:59:59"); $dateYesterday = $timeToday - 24 * 3600; $divsAd = $this->getElementsByTagName("div"); $ads = array(); foreach ($divsAd as $result) { if (false === strpos($result->getAttribute("class"), "lbc")) { continue; } $ad = new Ad(); $ad->setProfessional(false)->setUrgent(false); $parent = $result->parentNode; if ($parent->tagName == "a") { $a = $parent; } else { $aTags = $result->getElementsByTagName("a"); if (!$aTags->length) { continue; } $a = $aTags->item(0); } if (!preg_match('/([0-9]+)\\.htm.*/', $a->getAttribute("href"), $m)) { continue; } // permet d'éliminer les annonces déjà envoyées. if ($filter && $m[1] <= $filter->getMinId()) { continue; } $ad->setLink($this->formatLink($a->getAttribute("href")))->setId($m[1]); foreach ($result->getElementsByTagName("div") as $node) { if ($node->hasAttribute("class")) { $class = $node->getAttribute("class"); if ($class == "date") { $dateStr = preg_replace("#\\s+#", " ", trim($node->nodeValue)); $aDate = explode(' ', $dateStr); if (false !== strpos($dateStr, 'Aujourd')) { $time = strtotime(date("Y-m-d") . " 00:00:00"); } elseif (false !== strpos($dateStr, 'Hier')) { $time = strtotime(date("Y-m-d") . " 00:00:00"); $time = strtotime("-1 day", $time); } else { if (!isset(self::$months[$aDate[1]])) { continue; } $time = strtotime(date("Y") . "-" . self::$months[$aDate[1]] . "-" . $aDate[0]); } $aTime = explode(":", $aDate[count($aDate) - 1]); $time += (int) $aTime[0] * 3600 + (int) $aTime[1] * 60; if ($timeToday < $time) { $time = strtotime("-1 year", $time); } $ad->setDate($time); } elseif ($class == "title") { $ad->setTitle(trim($node->nodeValue)); } elseif ($class == "image") { $img = $node->getElementsByTagName("img"); if ($img->length > 0) { $img = $img->item(0); $ad->setThumbnailLink($this->formatLink($img->getAttribute("src"))); } } elseif ($class == "placement") { $placement = $node->nodeValue; if (false !== strpos($placement, "/")) { $placement = explode("/", $placement); $ad->setCountry(trim($placement[1]))->setCity(trim($placement[0])); } else { $ad->setCountry(trim($placement)); } } elseif ($class == "category") { $category = $node->nodeValue; if (false !== strpos($category, "(pro)")) { $ad->setProfessional(true); } $ad->setCategory(trim(str_replace("(pro)", "", $category))); } elseif ($class == "price") { if (preg_match("#[0-9 ]+#", $node->nodeValue, $m)) { $ad->setPrice((int) str_replace(" ", "", trim($m[0]))); } } elseif ($class == "urgent") { $ad->setUrgent(true); } } } $h2Tags = $result->getElementsByTagName("h2"); if ($h2Tags->length) { $h2 = $h2Tags->item(0); $ad->setTitle(trim($h2->nodeValue)); } if ($filter && !$filter->isValid($ad)) { continue; } if ($ad->getDate()) { $ads[$ad->getId()] = $ad; } } return $ads; }