/** * Converts provider output to db's input format * * @param QueryPath $html * * @return mixed (array/boolean) */ public function parseItem($html) { $images = []; /* transform http://a.ftcs.es/inmesp/anuncio/2015/04/03/135151707/253141017.jpg/w_0/c_690x518/p_1/ to http://a.ftcs.es/inmesp/anuncio/2015/04/03/135151707/253141017.jpg */ foreach ($html->find('#containerSlider img') as $img) { $src = $img->attr("data-src"); if (empty($src)) { $src = $img->attr("src"); } $path = explode(".jpg", $src); $images[] = $path[0] . ".jpg"; } $data = ['title' => trim($html->find('.property-title')->text()), 'description' => trim($html->find('#ctl00_ddDescription .detail-section-content')->text()), 'images' => $images, 'location' => trim($html->find('.section.section--noBorder .detail-section-content')->text()), 'price' => $this->strToNumber($html->find('#priceContainer')->text()), 'meters' => $this->strToNumber($html->find('#litSurface b')->text()), 'floor' => (int) $html->find('#litFloor')->text(), 'url' => $html->find('link[rel="canonical"]')->attr("href")]; foreach ($html->find('.detail-extras li') as $li) { $text = trim($li->text()); switch ($text) { case "Ascensor": $data["elevator"] = true; break; } } if ($data["meters"] == 0 || empty($data["description"])) { return false; } return $data; }
/** * Converts provider output to db's input format * * @param QueryPath $html * * @return mixed (array/boolean) */ public function parseItem($html) { $images = []; // get ch var from og image (required to display the images) $ogImage = $html->find('[name="og:image"]')->attr("content"); if (empty($ogImage)) { return false; } parse_str(parse_url($ogImage)["query"], $query); $imageCh = $query["ch"]; /* transform http://img3.idealista.com/thumbs,W,H,wi,+tSLyO%2BcnvWFQ1vfQ1%2FQRH6EBc9TEzAKu5PmhgV%2 to http://img3.idealista.com/thumbs?wi=1500&he=0&en=%2BtSLyO%2BcnvWFQ1vfQ1%2FQRH6EBc9TEzAKu5PmhgV%2&ch=2106166706 */ foreach ($html->find('#main-multimedia img') as $img) { $image = str_replace("http://img3.idealista.com/thumbs,W,H,wi,+", "", $img->attr("data-service")); $images[] = "http://img3.idealista.com/thumbs?wi=1500&he=0&en=%2B" . urlencode($image) . "&ch=" . $imageCh; } $title = trim($html->find('h1.txt-bold span')->text()); $location = str_replace("Piso en venta en ", "", $title); $location = str_replace("Piso en alquiler en ", "", $location); $data = ['title' => $title, 'description' => trim($html->find('.adCommentsLanguage.expandable')->text()), 'images' => $images, 'location' => $location, 'price' => $this->strToNumber($html->find('#main-info .txt-big.txt-bold')->eq(0)->text()), 'url' => $html->find('#share-link')->attr("href")]; foreach ($html->find('#fixed-toolbar .info-data > span') as $item) { $text = $item->text(); $this->parseHouseInfo($text, $data); } if (!isset($data["meters"]) || $data["meters"] == 0 || empty($data["description"])) { return false; } return $data; }
/** * Converts provider output to db's input format * * @param QueryPath $html * * @return mixed (array/boolean) */ public function parseItem($html) { $images = []; $data = ['title' => trim($html->find('h1.title')->text()), 'description' => trim($html->find('.description')->text()), 'price' => $this->strToNumber($html->find('.jsPrecioH1')->eq(0)->text()), 'url' => $html->find('link[rel="canonical"]')->attr("href")]; foreach ($this->itemProps as $prop) { $propVal = $html->find('[itemprop="' . $prop . '"]')->attr("content"); if (!empty($propVal)) { $data[$prop] = $propVal; } } // try to get the exact address $location = $html->find('[itemprop="streetAddress"]')->attr("content"); if (empty($location)) { $location = $html->find('meta[itemprop="name"]')->attr("content"); $location = str_replace("Piso en venta en ", "", $location); $location = str_replace("Piso en alquiler en ", "", $location); } $data['location'] = $location . ", " . $html->find('h2.position')->text(); foreach ($html->find('.characteristics .item') as $item) { $text = $item->text(); $this->parseHouseInfo($text, $data); } // skip retards that dont even fill the apartment meters if (!isset($data["meters"]) || $data["meters"] < 1) { return false; } /* from http://fotos.imghs.net/s/1030/129/1030_27926263129_1_2015112416580031250.jpg to http://fotos.imghs.net/xl/1030/129/1030_27926263129_1_2015112416580031250.jpg */ foreach ($html->find("#basic img") as $img) { $image = str_replace(".net/s/", ".net/xl/", $img->attr("src")); // skip the default photos if (strpos($image, "nofoto_mini.jpg") !== false || strpos($image, "blank1x1.png") !== false || strpos($image, "Images/assets") !== false) { continue; } $images[] = $image; } if (sizeof($images) > 0) { $data["images"] = $images; } return $data; }
/** * Converts provider output to db's input format * * @param QueryPath $html * * @return mixed (array/boolean) */ public function parseItem($html) { $images = []; // doesnt have images or price if (!empty($html->find('.cajon-pedir-foto')->text()) || !empty($html->find('.pvpdesde')->text())) { return false; } $location = trim(preg_replace('/(\\v|\\s)+/', ' ', $html->find('.dir_ex.sprite')->text())); $description = trim($html->find('[itemprop="description"] p')->text()); $data = ['title' => $html->find('.h1ficha')->text(), 'location' => $location, 'description' => $description, 'url' => $html->find('link[rel="canonical"]')->attr("href"), "price" => $this->strToNumber($html->find('[itemprop="price"]')->text())]; $lastUpdate = trim($html->find('.actualizado.radius')->text()); preg_match("/\\(([0-9\\/]+)\\)/", $lastUpdate, $matches); if (isset($matches[1])) { $data["lastUpdate"] = $matches[1]; } foreach ($html->find('#inificha .bodis ul li') as $li) { $text = $li->text(); if (strpos($text, " m2") !== false) { $data["meters"] = $this->strToNumber($li->find("span")->text()); } else { if (strpos($text, "habitaciones") !== false) { $data["rooms"] = (int) $text; } } } foreach ($html->find('.caracteristicas li') as $li) { $text = $li->text(); if (strpos($text, ":") === false) { continue; } $info = explode(":", $text); switch (trim($info[0])) { case "Número de planta": $data["floor"] = (int) $info[1]; break; case "Aire acondicionado": $data["airConditioner"] = $this->stringToBool(trim($info[1])); break; case "Calefacción": $data["heating"] = $this->stringToBool(trim($info[1])); break; case "Parking": $data["parking"] = $this->stringToBool(trim($info[1])); break; case "Ascensor": $data["elevator"] = $this->stringToBool(trim($info[1])); break; case "Amueblado": $data["furnished"] = $this->stringToBool(trim($info[1])); break; } } foreach ($html->find(".ficha_foto img") as $img) { $image = str_replace("G.jpg", "XL.jpg", $img->attr("src")); $images[] = $image; } if (sizeof($images) > 0) { $data["images"] = $images; } return $data; }