Maps::vWipeDatabase(); Ad::vWipeDatabase(); } if (isset($_REQUEST['ad_html'])) { $oAds = DirectDB::oSelectOne('ads', array('id' => intval($_REQUEST['ad_html']))); $oHtml = DirectDB::oSelectOne('ads_htmls', array('id' => intval($oAds->html_id))); $sHtml = $oHtml->html; ODT::vDump(WgGesuchtReader::oParseHtml(null, $sHtml)); exit($sHtml); } $bFetch = isset($_REQUEST['fetch']) || isset($_REQUEST['fetch_and_parse']); $bParse = isset($_REQUEST['parse']) || isset($_REQUEST['fetch_and_parse']); if (isset($_REQUEST['search_html'])) { if (isset($_REQUEST['url'])) { $aWhere = array('url' => array('%like%' => $_REQUEST['url'])); $oHtml = DirectDB::oSelectOne('ads_htmls', $aWhere, 'id , url', 'ORDER BY fetched DESC LIMIT 1'); ODT::vDump($oHtml); } } if ($bFetch) { Ad::vDeleteDuplicateUrlAds(); WgGesuchtReader::vFetch(); } if ($bParse) { $iParse = intval($_REQUEST['parse']); if ($iParse) { $oAd = WgGesuchtReader::oParseHtml($iParse); ODT::vDump($oAd); $oHtml = Ad::oGetHtml($iParse); ODT::vDump($oHtml); } else {
public static function vSetDefault($aAccessData) { self::$oDefault = new self($aAccessData); }
foreach ($aDirFiles as $sDirFile) { $aFiles[] = $sDirFile; } } else { $aFiles[] = $sFile; } } return $aFiles; } require 'lib/foreign/include_all.php'; spl_autoload_register(function ($sClass) { require_once 'lib/' . $sClass . '.php'; if (method_exists($sClass, 'vInit')) { $sClass::vInit(); } }); DirectDB::vSetDefault(array('sUser' => 'wohnungssuche', 'sPass' => 'c54effb3740c91a40760eda1e5c15319', 'sDaba' => 'wohnungssuche')); #$aInitClasses = array(); #$aLibFiles = aListFiles('lib'); #foreach ($aLibFiles as $sLibFile) { # if (preg_match('#\.php$#', $sLibFile)) { # $sClass = preg_replace('#^.+/#', '', preg_replace('#\.php$#', '', $sLibFile)); # require_once('lib/' . $sClass . '.php'); # if (method_exists($sClass, 'vInit')) { # $aInitClasses []= $sClass; # } # } #} #foreach ($aInitClasses as $sInitClass) { # $sInitClass::vInit(); #}
public static function oParseHtml($iHtmlID) { $oHtml = Ad::oGetHtml($iHtmlID); $oAd = new Ad(); $oAd->oPage->iHtmlID = $oHtml->id; $oAd->oPage->sListID = $oHtml->list; $oAd->oPage->sFetched = $oHtml->fetched; $oAd->oPage->sDomain = $oHtml->domain_hash; $oAd->oPage->sUrl = $oHtml->url; $oDom = HtmlDomParser::str_get_html($oHtml->html); $oMainInfo = $oDom->find('.panel-body > .row', 0); $sOrange = $oDom->find('.headline-key-facts', 0)->innertext; $sSquareMeters = Utilitu::sPregRead('#röße:\\s+([,\\d]+)m#', $sOrange); $oAd->oPhysical->nSquareMeters = floatval($sSquareMeters); $aKostenRows = $oMainInfo->find('.col-sm-5 tbody tr'); $aKostenRowsByLabel = array(); foreach ($aKostenRows as $oKostenRow) { $aCells = $oKostenRow->find('td'); $sLabel = trim($aCells[0]->plaintext, "\t\n :"); $sValue = trim($aCells[1]->plaintext, "\t\n "); $iPrice = 100 * intval(str_replace(array(',', '€'), array('.', ''), $sValue)); $aKostenRowsByLabel[$sLabel] = array('oDom' => $oKostenRow, 'sLabel' => $sLabel, 'sValue' => $sValue, 'iPrice' => $iPrice); } $aKostenMap = array('iCold' => 'Miete', 'iNeben' => 'Nebenkosten', 'iOther' => 'Sonstige Kosten', 'iBail' => 'Kaution', 'iBuy' => 'Abschlagszahlung'); foreach ($aKostenMap as $sTarget => $sSource) { if (isset($aKostenRowsByLabel[$sSource])) { $oAd->oPrice->{$sTarget} = $aKostenRowsByLabel[$sSource]['iPrice']; } } $oAd->oPrice->iWarm = $oAd->oPrice->iCold + $oAd->oPrice->iNeben; $sAddressHtml = $oMainInfo->find('.col-sm-4 > p', 0)->innertext; $sAddress = trim($sAddressHtml); $sAddress = str_replace("\n", '', $sAddress); $sAddress = preg_replace('#<br ?/?>\\s+#', "\n", $sAddress); $aAddress = explode("\n", $sAddress); $oAd->oAddress->sCity = 'Aachen'; $oAd->oAddress->sZip = Utilitu::sPregRead('#\\s*(\\d+)#', $aAddress[0]); $oAd->oAddress->sStreet = trim($aAddress[1]); $sGeocodeAddress = $oAd->oAddress->sStreet . ', ' . $oAd->oAddress->sZip . ' ' . 'Aachen'; $oCoords = Maps::oGetCoords($sGeocodeAddress); $oAd->oAddress->oCoords = $oCoords; $aImageDoms = $oDom->find('img.sp-image'); foreach ($aImageDoms as $oImageDom) { if (!isset($oImageDom->attr['data-large'])) { continue; } $oImage = new StdClass(); $oImage->sUrl = str_replace('/./', '/', $oImageDom->attr['data-large']); $sFileType = Utilitu::sPregRead('#\\.([^\\.]+)$#', $oImage->sUrl); $oImage->sFile = self::$sImagesFolder . md5($oImage->sUrl) . '.' . $sFileType; if (!file_exists($oImage->sFile)) { $sImage = Curl::sGet($oImage->sUrl); if (Curl::iGetLastStatus() == 200) { file_put_contents($oImage->sFile, $sImage); } } if (file_exists($oImage->sFile)) { $oAd->oPage->aImages[] = $oImage; } } $aDescription = array(); $aDescriptionBlocks = $oDom->find('#infobox_nachrichtsenden', 0)->parent->find('.freitext'); foreach ($aDescriptionBlocks as $oDescriptionBlock) { $aDescription[] = $oDescriptionBlock->plaintext; } $oAd->oPage->sDescription = implode("\n\n", $aDescription); $oAd->oPage->sDescription = preg_replace('#\\n\\s+#', "\n", $oAd->oPage->sDescription); $oAd->oPage->sDescription = str_replace(' ', '', $oAd->oPage->sDescription); $aPotentialDates = $oDom->find('.col-sm-4 .col-sm-12'); foreach ($aPotentialDates as $oPotentialDate) { if (preg_match('#^\\s*Angebot vom:\\s*(.+)\\s*$#', $oPotentialDate->plaintext, $aMatch)) { $sDate = date('Y-m-d H:i:s', strtotime($aMatch[1])); $oAd->oPage->sCreated = $sDate; $oAd->oPage->sChanged = $sDate; } } /// TODO: oContact Ad::iRemoveAdsByUrl($oHtml->url); $oAd->vSave(); DirectDB::bUpdate('ads_htmls', array('parsed' => true), $oHtml->id); return $oAd; }
public static function vInit() { DirectDB::mQuery("\n\t\t\t\tCREATE TABLE IF NOT EXISTS `" . self::$sTable . "` (\n\t\t\t\t\taddress_hash varchar(32) NOT NULL,\n\t\t\t\t\taddress_stub varchar(64) NOT NULL,\n\t\t\t\t\tfetched DATETIME,\n\t\t\t\t\tx varchar(32),\n\t\t\t\t\ty varchar(32),\n\t\t\t\t\tresponse mediumtext,\n\t\t\t\t\tPRIMARY KEY (`address_hash`)\n\t\t\t\t) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;\n\t\t\t"); }
public static function vInit() { DirectDB::mQuery("\n\t\t\t\tCREATE TABLE IF NOT EXISTS ads_htmls (\n\t\t\t\t\tid int(9) NOT NULL AUTO_INCREMENT,\n\t\t\t\t\tfetched datetime,\n\t\t\t\t\tparsed tinyint(1),\n\t\t\t\t\tlist varchar(32),\n\t\t\t\t\tdomain_hash varchar(32),\n\t\t\t\t\turl_hash varchar(32),\n\t\t\t\t\thtml_hash varchar(32),\n\t\t\t\t\thtml longtext,\n\t\t\t\t\turl text,\n\t\t\t\t\tINDEX (url_hash),\n\t\t\t\t\tPRIMARY KEY (id)\n\t\t\t\t) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;\n\t\t\t"); DirectDB::mQuery("\n\t\t\t\tCREATE TABLE IF NOT EXISTS ads (\n\t\t\t\t\tid int(9) NOT NULL AUTO_INCREMENT,\n\t\t\t\t\thtml_id int(9),\n\t\t\t\t\tcreated datetime,\n\t\t\t\t\tchanged datetime,\n\t\t\t\t\tjson_data longtext,\n\t\t\t\t\tPRIMARY KEY (id)\n\t\t\t\t) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;\n\t\t\t"); }