public function sCreateCookieJarFile($bUse = true) { $oInstance = isset($this) ? $this : self::$oDefault; if (!file_exists(self::$sCookieJarFolder)) { mkdir(self::$sCookieJarFolder, 0777, true); } $sFile = self::$sCookieJarFolder . '/' . Utilitu::sMicrotime() . '_' . Utilitu::sRandomString(8) . '.cookie'; file_put_contents($sFile, ''); if ($bUse) { $oInstance->vSetCookieJarFile($sFile); } return $sFile; }
public static function sMakeIncludeHtml($sLink, $sFileType = null) { $sReturn = null; $sLink = self::sFileToLink($sLink); if (!$sFileType) { $sFileType = Utilitu::sPregRead('#\\.([^\\.]+)$#', $sLink); } if ($sFileType == 'js') { $sReturn = '<script type="text/javascript" src="' . htmlspecialchars($sLink) . '"></script>'; } if ($sFileType == 'css') { $sReturn = '<link rel="stylesheet" type="text/css" media="all" href="' . htmlspecialchars($sLink) . '" />'; } return $sReturn; }
public static function oGetCoords($sAddress) { $sAddress = trim($sAddress); $sAddressHash = Utilitu::sConditionalHash($sAddress); $oAddress = DirectDB::oSelectOne(self::$sTable, array('address_hash' => $sAddressHash)); if (!$oAddress) { $sResponse = Curl::sGet('http://maps.google.com/maps/api/geocode/json?address=' . urlencode($sAddress) . '&sensor=false'); $oResponse = json_decode($sResponse); $oFirstResult = $oResponse->results[0]; $oFRC = $oFirstResult->geometry->location; DirectDB::iInsert(self::$sTable, array('address_hash' => $sAddressHash, 'address_stub' => substr($sAddress, 0, 64), 'fetched' => date('Y-m-d H:i:s'), 'x' => '' . $oFRC->lng . '', 'y' => '' . $oFRC->lat . '', 'response' => $sResponse)); $oAddress = DirectDB::oSelectOne(self::$sTable, array('address_hash' => $sAddressHash)); } $oCoords = new StdClass(); $oCoords->nX = $oAddress->x; $oCoords->nY = $oAddress->y; return $oCoords; }
public static function oParseHtml($iHtmlID) { $oHtml = Ad::oGetHtml($iHtmlID); $oAd = new Ad(); $oAd->oPage->iHtmlID = $oHtml->id; $oAd->oPage->sListID = $oHtml->list; $oAd->oPage->sFetched = $oHtml->fetched; $oAd->oPage->sDomain = $oHtml->domain_hash; $oAd->oPage->sUrl = $oHtml->url; $oDom = HtmlDomParser::str_get_html($oHtml->html); $oMainInfo = $oDom->find('.panel-body > .row', 0); $sOrange = $oDom->find('.headline-key-facts', 0)->innertext; $sSquareMeters = Utilitu::sPregRead('#röße:\\s+([,\\d]+)m#', $sOrange); $oAd->oPhysical->nSquareMeters = floatval($sSquareMeters); $aKostenRows = $oMainInfo->find('.col-sm-5 tbody tr'); $aKostenRowsByLabel = array(); foreach ($aKostenRows as $oKostenRow) { $aCells = $oKostenRow->find('td'); $sLabel = trim($aCells[0]->plaintext, "\t\n :"); $sValue = trim($aCells[1]->plaintext, "\t\n "); $iPrice = 100 * intval(str_replace(array(',', '€'), array('.', ''), $sValue)); $aKostenRowsByLabel[$sLabel] = array('oDom' => $oKostenRow, 'sLabel' => $sLabel, 'sValue' => $sValue, 'iPrice' => $iPrice); } $aKostenMap = array('iCold' => 'Miete', 'iNeben' => 'Nebenkosten', 'iOther' => 'Sonstige Kosten', 'iBail' => 'Kaution', 'iBuy' => 'Abschlagszahlung'); foreach ($aKostenMap as $sTarget => $sSource) { if (isset($aKostenRowsByLabel[$sSource])) { $oAd->oPrice->{$sTarget} = $aKostenRowsByLabel[$sSource]['iPrice']; } } $oAd->oPrice->iWarm = $oAd->oPrice->iCold + $oAd->oPrice->iNeben; $sAddressHtml = $oMainInfo->find('.col-sm-4 > p', 0)->innertext; $sAddress = trim($sAddressHtml); $sAddress = str_replace("\n", '', $sAddress); $sAddress = preg_replace('#<br ?/?>\\s+#', "\n", $sAddress); $aAddress = explode("\n", $sAddress); $oAd->oAddress->sCity = 'Aachen'; $oAd->oAddress->sZip = Utilitu::sPregRead('#\\s*(\\d+)#', $aAddress[0]); $oAd->oAddress->sStreet = trim($aAddress[1]); $sGeocodeAddress = $oAd->oAddress->sStreet . ', ' . $oAd->oAddress->sZip . ' ' . 'Aachen'; $oCoords = Maps::oGetCoords($sGeocodeAddress); $oAd->oAddress->oCoords = $oCoords; $aImageDoms = $oDom->find('img.sp-image'); foreach ($aImageDoms as $oImageDom) { if (!isset($oImageDom->attr['data-large'])) { continue; } $oImage = new StdClass(); $oImage->sUrl = str_replace('/./', '/', $oImageDom->attr['data-large']); $sFileType = Utilitu::sPregRead('#\\.([^\\.]+)$#', $oImage->sUrl); $oImage->sFile = self::$sImagesFolder . md5($oImage->sUrl) . '.' . $sFileType; if (!file_exists($oImage->sFile)) { $sImage = Curl::sGet($oImage->sUrl); if (Curl::iGetLastStatus() == 200) { file_put_contents($oImage->sFile, $sImage); } } if (file_exists($oImage->sFile)) { $oAd->oPage->aImages[] = $oImage; } } $aDescription = array(); $aDescriptionBlocks = $oDom->find('#infobox_nachrichtsenden', 0)->parent->find('.freitext'); foreach ($aDescriptionBlocks as $oDescriptionBlock) { $aDescription[] = $oDescriptionBlock->plaintext; } $oAd->oPage->sDescription = implode("\n\n", $aDescription); $oAd->oPage->sDescription = preg_replace('#\\n\\s+#', "\n", $oAd->oPage->sDescription); $oAd->oPage->sDescription = str_replace(' ', '', $oAd->oPage->sDescription); $aPotentialDates = $oDom->find('.col-sm-4 .col-sm-12'); foreach ($aPotentialDates as $oPotentialDate) { if (preg_match('#^\\s*Angebot vom:\\s*(.+)\\s*$#', $oPotentialDate->plaintext, $aMatch)) { $sDate = date('Y-m-d H:i:s', strtotime($aMatch[1])); $oAd->oPage->sCreated = $sDate; $oAd->oPage->sChanged = $sDate; } } /// TODO: oContact Ad::iRemoveAdsByUrl($oHtml->url); $oAd->vSave(); DirectDB::bUpdate('ads_htmls', array('parsed' => true), $oHtml->id); return $oAd; }
<?php require_once '../init.php'; Utilitu::vReturnJson(array('aAds' => Ad::aGet("\n\t\tSELECT a.* FROM ads AS a\n\t\tJOIN ads_htmls AS h ON a.html_id = h.id\n\t\tWHERE h.list != 'wg0' AND h.fetched > '" . date('Y-m-d H:i:s', strtotime('now - 3 days')) . "';\n\t")));
public static function oGetEarliestHtmlForUrl($sUrl) { $sExtra = 'ORDER BY fetched ASC LIMIT 1'; $sWhere = array('url_hash' => Utilitu::sConditionalHash($sUrl)); $oReturn = DirectDB::oSelectOne('ads_htmls', $sWhere, '*', $sExtra); return $oReturn; }
public static function vInit() { self::$sHtdocs = realpath(dirname(__FILE__) . '/..'); }