Esempio n. 1
0
 public function process($content, Filter $filter = null)
 {
     if (!$content) {
         return;
     }
     $content = str_replace("<br/>", " ", $content);
     $this->loadHTML($content);
     $timeToday = strtotime(date("Y-m-d") . " 23:59:59");
     $dateYesterday = $timeToday - 24 * 3600;
     $ads = array();
     $tables = $this->getElementsByTagName("table");
     $tableOffers = null;
     foreach ($tables as $table) {
         if (false !== strpos($table->getAttribute("id"), "offers_table")) {
             $tableOffers = $table;
             break;
         }
     }
     if (!$tableOffers) {
         return array();
     }
     $adNodes = $tableOffers->getElementsByTagName("td");
     foreach ($adNodes as $adNode) {
         if (false === strpos($adNode->getAttribute("class"), "offer")) {
             continue;
         }
         $ad = new Ad();
         $ad->setUrgent(false);
         // aucun indicateur pour savoir si c'est un pro ou non.
         $ad->setProfessional(false);
         // permet d'éliminer les annonces déjà envoyées.
         // @todo pour le moment, pas possible. Les IDs ne semblent pas
         // numérique et incrémentals.
         //             if ($filter && $m[1] <= $filter->getMinId()) {
         //                 continue;
         //             }
         $rows = $adNode->getElementsByTagName("tr");
         if (0 == $rows->length) {
             continue;
         }
         $columns = $adNode->getElementsByTagName("td");
         $row2_p = $rows->item(1)->getElementsByTagName("p");
         // analyse de la date
         $dateStr = preg_replace("#\\s+#", " ", trim($row2_p->item(1)->nodeValue));
         if (!$dateStr) {
             continue;
         }
         $aDate = explode(' ', $dateStr);
         if (false !== strpos($dateStr, 'Сегодня')) {
             // aujourd'hui
             $time = strtotime(date("Y-m-d") . " 00:00:00");
         } elseif (false !== strpos($dateStr, 'Вчера')) {
             $time = strtotime(date("Y-m-d") . " 00:00:00");
             $time = strtotime("-1 day", $time);
         } else {
             if (!isset(self::$months[$aDate[1]])) {
                 continue;
             }
             $time = strtotime(date("Y") . "-" . self::$months[$aDate[1]] . "-" . $aDate[0]);
         }
         $timeStr = $aDate[count($aDate) - 1];
         if (false !== ($pos = mb_strpos($dateStr, ":"))) {
             $time += (int) mb_substr($dateStr, $pos - 2, 2) * 3600;
             $time += (int) mb_substr($dateStr, $pos + 1, 2) * 60;
             if ($timeToday < $time) {
                 $time = strtotime("-1 year", $time);
             }
         }
         $ad->setDate($time);
         // image
         $img = $columns->item(0)->getElementsByTagName("img");
         if ($img->length) {
             $ad->setThumbnailLink(str_replace("94x72", "644x461", $img->item(0)->getAttribute("src")));
         }
         // titre + lien
         $link = $adNode->getElementsByTagName("h3")->item(0)->getElementsByTagName("a")->item(0);
         if ($link) {
             $ad->setTitle(trim($link->nodeValue));
             $ad->setLink($link->getAttribute("href"));
         }
         // urgent
         if (false !== strpos($adNode->nodeValue, "Срочно")) {
             $ad->setUrgent(true);
         }
         // lieu
         $ad->setCity(trim($row2_p->item(0)->nodeValue));
         // catégorie
         $ad->setCategory(trim($columns->item(1)->getElementsByTagName("p")->item(0)->nodeValue));
         if (!preg_match("#ID([^.]+)\\.html#", $ad->getLink(), $m)) {
             continue;
         }
         $ad->setId(base_convert($m[1], 32, 10));
         $priceColumn = trim($columns->item(2)->nodeValue);
         if (preg_match('#(?<price>[0-9\\s]+)\\s+(?<currency>грн|\\$|€)#imsU', $priceColumn, $m)) {
             $ad->setPrice((int) str_replace(" ", "", $m["price"]))->setCurrency($m["currency"]);
         }
         if ($filter && !$filter->isValid($ad)) {
             continue;
         }
         $ads[$ad->getId()] = $ad;
     }
     return $ads;
 }
Esempio n. 2
0
 public function process($content, Filter $filter = null, $scheme = "http")
 {
     if (!$content) {
         return;
     }
     $this->scheme = $scheme;
     $this->loadHTML($content);
     $timeToday = strtotime(date("Y-m-d") . " 23:59:59");
     $dateYesterday = $timeToday - 24 * 3600;
     $divsAd = $this->getElementsByTagName("div");
     $ads = array();
     foreach ($divsAd as $result) {
         if (false === strpos($result->getAttribute("class"), "lbc")) {
             continue;
         }
         $ad = new Ad();
         $ad->setProfessional(false)->setUrgent(false);
         $parent = $result->parentNode;
         if ($parent->tagName == "a") {
             $a = $parent;
         } else {
             $aTags = $result->getElementsByTagName("a");
             if (!$aTags->length) {
                 continue;
             }
             $a = $aTags->item(0);
         }
         if (!preg_match('/([0-9]+)\\.htm.*/', $a->getAttribute("href"), $m)) {
             continue;
         }
         // permet d'éliminer les annonces déjà envoyées.
         if ($filter && $m[1] <= $filter->getMinId()) {
             continue;
         }
         $ad->setLink($this->formatLink($a->getAttribute("href")))->setId($m[1]);
         foreach ($result->getElementsByTagName("div") as $node) {
             if ($node->hasAttribute("class")) {
                 $class = $node->getAttribute("class");
                 if ($class == "date") {
                     $dateStr = preg_replace("#\\s+#", " ", trim($node->nodeValue));
                     $aDate = explode(' ', $dateStr);
                     if (false !== strpos($dateStr, 'Aujourd')) {
                         $time = strtotime(date("Y-m-d") . " 00:00:00");
                     } elseif (false !== strpos($dateStr, 'Hier')) {
                         $time = strtotime(date("Y-m-d") . " 00:00:00");
                         $time = strtotime("-1 day", $time);
                     } else {
                         if (!isset(self::$months[$aDate[1]])) {
                             continue;
                         }
                         $time = strtotime(date("Y") . "-" . self::$months[$aDate[1]] . "-" . $aDate[0]);
                     }
                     $aTime = explode(":", $aDate[count($aDate) - 1]);
                     $time += (int) $aTime[0] * 3600 + (int) $aTime[1] * 60;
                     if ($timeToday < $time) {
                         $time = strtotime("-1 year", $time);
                     }
                     $ad->setDate($time);
                 } elseif ($class == "title") {
                     $ad->setTitle(trim($node->nodeValue));
                 } elseif ($class == "image") {
                     $img = $node->getElementsByTagName("img");
                     if ($img->length > 0) {
                         $img = $img->item(0);
                         $ad->setThumbnailLink($this->formatLink($img->getAttribute("src")));
                     }
                 } elseif ($class == "placement") {
                     $placement = $node->nodeValue;
                     if (false !== strpos($placement, "/")) {
                         $placement = explode("/", $placement);
                         $ad->setCountry(trim($placement[1]))->setCity(trim($placement[0]));
                     } else {
                         $ad->setCountry(trim($placement));
                     }
                 } elseif ($class == "category") {
                     $category = $node->nodeValue;
                     if (false !== strpos($category, "(pro)")) {
                         $ad->setProfessional(true);
                     }
                     $ad->setCategory(trim(str_replace("(pro)", "", $category)));
                 } elseif ($class == "price") {
                     if (preg_match("#[0-9 ]+#", $node->nodeValue, $m)) {
                         $ad->setPrice((int) str_replace(" ", "", trim($m[0])));
                     }
                 } elseif ($class == "urgent") {
                     $ad->setUrgent(true);
                 }
             }
         }
         $h2Tags = $result->getElementsByTagName("h2");
         if ($h2Tags->length) {
             $h2 = $h2Tags->item(0);
             $ad->setTitle(trim($h2->nodeValue));
         }
         if ($filter && !$filter->isValid($ad)) {
             continue;
         }
         if ($ad->getDate()) {
             $ads[$ad->getId()] = $ad;
         }
     }
     return $ads;
 }