Exemplo n.º 1
0
 public static function oParseHtml($iHtmlID)
 {
     $oHtml = Ad::oGetHtml($iHtmlID);
     $oAd = new Ad();
     $oAd->oPage->iHtmlID = $oHtml->id;
     $oAd->oPage->sListID = $oHtml->list;
     $oAd->oPage->sFetched = $oHtml->fetched;
     $oAd->oPage->sDomain = $oHtml->domain_hash;
     $oAd->oPage->sUrl = $oHtml->url;
     $oDom = HtmlDomParser::str_get_html($oHtml->html);
     $oMainInfo = $oDom->find('.panel-body > .row', 0);
     $sOrange = $oDom->find('.headline-key-facts', 0)->innertext;
     $sSquareMeters = Utilitu::sPregRead('#röße:\\s+([,\\d]+)m#', $sOrange);
     $oAd->oPhysical->nSquareMeters = floatval($sSquareMeters);
     $aKostenRows = $oMainInfo->find('.col-sm-5 tbody tr');
     $aKostenRowsByLabel = array();
     foreach ($aKostenRows as $oKostenRow) {
         $aCells = $oKostenRow->find('td');
         $sLabel = trim($aCells[0]->plaintext, "\t\n :");
         $sValue = trim($aCells[1]->plaintext, "\t\n ");
         $iPrice = 100 * intval(str_replace(array(',', '€'), array('.', ''), $sValue));
         $aKostenRowsByLabel[$sLabel] = array('oDom' => $oKostenRow, 'sLabel' => $sLabel, 'sValue' => $sValue, 'iPrice' => $iPrice);
     }
     $aKostenMap = array('iCold' => 'Miete', 'iNeben' => 'Nebenkosten', 'iOther' => 'Sonstige Kosten', 'iBail' => 'Kaution', 'iBuy' => 'Abschlagszahlung');
     foreach ($aKostenMap as $sTarget => $sSource) {
         if (isset($aKostenRowsByLabel[$sSource])) {
             $oAd->oPrice->{$sTarget} = $aKostenRowsByLabel[$sSource]['iPrice'];
         }
     }
     $oAd->oPrice->iWarm = $oAd->oPrice->iCold + $oAd->oPrice->iNeben;
     $sAddressHtml = $oMainInfo->find('.col-sm-4 > p', 0)->innertext;
     $sAddress = trim($sAddressHtml);
     $sAddress = str_replace("\n", '', $sAddress);
     $sAddress = preg_replace('#<br ?/?>\\s+#', "\n", $sAddress);
     $aAddress = explode("\n", $sAddress);
     $oAd->oAddress->sCity = 'Aachen';
     $oAd->oAddress->sZip = Utilitu::sPregRead('#\\s*(\\d+)#', $aAddress[0]);
     $oAd->oAddress->sStreet = trim($aAddress[1]);
     $sGeocodeAddress = $oAd->oAddress->sStreet . ', ' . $oAd->oAddress->sZip . ' ' . 'Aachen';
     $oCoords = Maps::oGetCoords($sGeocodeAddress);
     $oAd->oAddress->oCoords = $oCoords;
     $aImageDoms = $oDom->find('img.sp-image');
     foreach ($aImageDoms as $oImageDom) {
         if (!isset($oImageDom->attr['data-large'])) {
             continue;
         }
         $oImage = new StdClass();
         $oImage->sUrl = str_replace('/./', '/', $oImageDom->attr['data-large']);
         $sFileType = Utilitu::sPregRead('#\\.([^\\.]+)$#', $oImage->sUrl);
         $oImage->sFile = self::$sImagesFolder . md5($oImage->sUrl) . '.' . $sFileType;
         if (!file_exists($oImage->sFile)) {
             $sImage = Curl::sGet($oImage->sUrl);
             if (Curl::iGetLastStatus() == 200) {
                 file_put_contents($oImage->sFile, $sImage);
             }
         }
         if (file_exists($oImage->sFile)) {
             $oAd->oPage->aImages[] = $oImage;
         }
     }
     $aDescription = array();
     $aDescriptionBlocks = $oDom->find('#infobox_nachrichtsenden', 0)->parent->find('.freitext');
     foreach ($aDescriptionBlocks as $oDescriptionBlock) {
         $aDescription[] = $oDescriptionBlock->plaintext;
     }
     $oAd->oPage->sDescription = implode("\n\n", $aDescription);
     $oAd->oPage->sDescription = preg_replace('#\\n\\s+#', "\n", $oAd->oPage->sDescription);
     $oAd->oPage->sDescription = str_replace('&nbsp;', '', $oAd->oPage->sDescription);
     $aPotentialDates = $oDom->find('.col-sm-4 .col-sm-12');
     foreach ($aPotentialDates as $oPotentialDate) {
         if (preg_match('#^\\s*Angebot vom:\\s*(.+)\\s*$#', $oPotentialDate->plaintext, $aMatch)) {
             $sDate = date('Y-m-d H:i:s', strtotime($aMatch[1]));
             $oAd->oPage->sCreated = $sDate;
             $oAd->oPage->sChanged = $sDate;
         }
     }
     /// TODO: oContact
     Ad::iRemoveAdsByUrl($oHtml->url);
     $oAd->vSave();
     DirectDB::bUpdate('ads_htmls', array('parsed' => true), $oHtml->id);
     return $oAd;
 }
Exemplo n.º 2
0
 public function vSave()
 {
     $aData = array('html_id' => $this->oPage->iHtmlID, 'created' => $this->oPage->sCreated, 'changed' => $this->oPage->sChanged, 'json_data' => json_encode($this->oData));
     if ($this->iID) {
         DirectDB::bUpdate('ads', $aData, $this->iID);
     } else {
         $this->iID = DirectDB::iInsert('ads', $aData);
     }
 }