Exemple #1
0
    Maps::vWipeDatabase();
    Ad::vWipeDatabase();
}
if (isset($_REQUEST['ad_html'])) {
    $oAds = DirectDB::oSelectOne('ads', array('id' => intval($_REQUEST['ad_html'])));
    $oHtml = DirectDB::oSelectOne('ads_htmls', array('id' => intval($oAds->html_id)));
    $sHtml = $oHtml->html;
    ODT::vDump(WgGesuchtReader::oParseHtml(null, $sHtml));
    exit($sHtml);
}
$bFetch = isset($_REQUEST['fetch']) || isset($_REQUEST['fetch_and_parse']);
$bParse = isset($_REQUEST['parse']) || isset($_REQUEST['fetch_and_parse']);
if (isset($_REQUEST['search_html'])) {
    if (isset($_REQUEST['url'])) {
        $aWhere = array('url' => array('%like%' => $_REQUEST['url']));
        $oHtml = DirectDB::oSelectOne('ads_htmls', $aWhere, 'id , url', 'ORDER BY fetched DESC LIMIT 1');
        ODT::vDump($oHtml);
    }
}
if ($bFetch) {
    Ad::vDeleteDuplicateUrlAds();
    WgGesuchtReader::vFetch();
}
if ($bParse) {
    $iParse = intval($_REQUEST['parse']);
    if ($iParse) {
        $oAd = WgGesuchtReader::oParseHtml($iParse);
        ODT::vDump($oAd);
        $oHtml = Ad::oGetHtml($iParse);
        ODT::vDump($oHtml);
    } else {
 public static function vSetDefault($aAccessData)
 {
     self::$oDefault = new self($aAccessData);
 }
Exemple #3
0
            foreach ($aDirFiles as $sDirFile) {
                $aFiles[] = $sDirFile;
            }
        } else {
            $aFiles[] = $sFile;
        }
    }
    return $aFiles;
}
require 'lib/foreign/include_all.php';
spl_autoload_register(function ($sClass) {
    require_once 'lib/' . $sClass . '.php';
    if (method_exists($sClass, 'vInit')) {
        $sClass::vInit();
    }
});
DirectDB::vSetDefault(array('sUser' => 'wohnungssuche', 'sPass' => 'c54effb3740c91a40760eda1e5c15319', 'sDaba' => 'wohnungssuche'));
#$aInitClasses = array();
#$aLibFiles = aListFiles('lib');
#foreach ($aLibFiles as $sLibFile) {
#	if (preg_match('#\.php$#', $sLibFile)) {
#		$sClass = preg_replace('#^.+/#', '', preg_replace('#\.php$#', '', $sLibFile));
#		require_once('lib/' . $sClass . '.php');
#		if (method_exists($sClass, 'vInit')) {
#			$aInitClasses []= $sClass;
#		}
#	}
#}
#foreach ($aInitClasses as $sInitClass) {
#	$sInitClass::vInit();
#}
 public static function oParseHtml($iHtmlID)
 {
     $oHtml = Ad::oGetHtml($iHtmlID);
     $oAd = new Ad();
     $oAd->oPage->iHtmlID = $oHtml->id;
     $oAd->oPage->sListID = $oHtml->list;
     $oAd->oPage->sFetched = $oHtml->fetched;
     $oAd->oPage->sDomain = $oHtml->domain_hash;
     $oAd->oPage->sUrl = $oHtml->url;
     $oDom = HtmlDomParser::str_get_html($oHtml->html);
     $oMainInfo = $oDom->find('.panel-body > .row', 0);
     $sOrange = $oDom->find('.headline-key-facts', 0)->innertext;
     $sSquareMeters = Utilitu::sPregRead('#röße:\\s+([,\\d]+)m#', $sOrange);
     $oAd->oPhysical->nSquareMeters = floatval($sSquareMeters);
     $aKostenRows = $oMainInfo->find('.col-sm-5 tbody tr');
     $aKostenRowsByLabel = array();
     foreach ($aKostenRows as $oKostenRow) {
         $aCells = $oKostenRow->find('td');
         $sLabel = trim($aCells[0]->plaintext, "\t\n :");
         $sValue = trim($aCells[1]->plaintext, "\t\n ");
         $iPrice = 100 * intval(str_replace(array(',', '€'), array('.', ''), $sValue));
         $aKostenRowsByLabel[$sLabel] = array('oDom' => $oKostenRow, 'sLabel' => $sLabel, 'sValue' => $sValue, 'iPrice' => $iPrice);
     }
     $aKostenMap = array('iCold' => 'Miete', 'iNeben' => 'Nebenkosten', 'iOther' => 'Sonstige Kosten', 'iBail' => 'Kaution', 'iBuy' => 'Abschlagszahlung');
     foreach ($aKostenMap as $sTarget => $sSource) {
         if (isset($aKostenRowsByLabel[$sSource])) {
             $oAd->oPrice->{$sTarget} = $aKostenRowsByLabel[$sSource]['iPrice'];
         }
     }
     $oAd->oPrice->iWarm = $oAd->oPrice->iCold + $oAd->oPrice->iNeben;
     $sAddressHtml = $oMainInfo->find('.col-sm-4 > p', 0)->innertext;
     $sAddress = trim($sAddressHtml);
     $sAddress = str_replace("\n", '', $sAddress);
     $sAddress = preg_replace('#<br ?/?>\\s+#', "\n", $sAddress);
     $aAddress = explode("\n", $sAddress);
     $oAd->oAddress->sCity = 'Aachen';
     $oAd->oAddress->sZip = Utilitu::sPregRead('#\\s*(\\d+)#', $aAddress[0]);
     $oAd->oAddress->sStreet = trim($aAddress[1]);
     $sGeocodeAddress = $oAd->oAddress->sStreet . ', ' . $oAd->oAddress->sZip . ' ' . 'Aachen';
     $oCoords = Maps::oGetCoords($sGeocodeAddress);
     $oAd->oAddress->oCoords = $oCoords;
     $aImageDoms = $oDom->find('img.sp-image');
     foreach ($aImageDoms as $oImageDom) {
         if (!isset($oImageDom->attr['data-large'])) {
             continue;
         }
         $oImage = new StdClass();
         $oImage->sUrl = str_replace('/./', '/', $oImageDom->attr['data-large']);
         $sFileType = Utilitu::sPregRead('#\\.([^\\.]+)$#', $oImage->sUrl);
         $oImage->sFile = self::$sImagesFolder . md5($oImage->sUrl) . '.' . $sFileType;
         if (!file_exists($oImage->sFile)) {
             $sImage = Curl::sGet($oImage->sUrl);
             if (Curl::iGetLastStatus() == 200) {
                 file_put_contents($oImage->sFile, $sImage);
             }
         }
         if (file_exists($oImage->sFile)) {
             $oAd->oPage->aImages[] = $oImage;
         }
     }
     $aDescription = array();
     $aDescriptionBlocks = $oDom->find('#infobox_nachrichtsenden', 0)->parent->find('.freitext');
     foreach ($aDescriptionBlocks as $oDescriptionBlock) {
         $aDescription[] = $oDescriptionBlock->plaintext;
     }
     $oAd->oPage->sDescription = implode("\n\n", $aDescription);
     $oAd->oPage->sDescription = preg_replace('#\\n\\s+#', "\n", $oAd->oPage->sDescription);
     $oAd->oPage->sDescription = str_replace('&nbsp;', '', $oAd->oPage->sDescription);
     $aPotentialDates = $oDom->find('.col-sm-4 .col-sm-12');
     foreach ($aPotentialDates as $oPotentialDate) {
         if (preg_match('#^\\s*Angebot vom:\\s*(.+)\\s*$#', $oPotentialDate->plaintext, $aMatch)) {
             $sDate = date('Y-m-d H:i:s', strtotime($aMatch[1]));
             $oAd->oPage->sCreated = $sDate;
             $oAd->oPage->sChanged = $sDate;
         }
     }
     /// TODO: oContact
     Ad::iRemoveAdsByUrl($oHtml->url);
     $oAd->vSave();
     DirectDB::bUpdate('ads_htmls', array('parsed' => true), $oHtml->id);
     return $oAd;
 }
Exemple #5
0
 public static function vInit()
 {
     DirectDB::mQuery("\n\t\t\t\tCREATE TABLE IF NOT EXISTS `" . self::$sTable . "` (\n\t\t\t\t\taddress_hash varchar(32) NOT NULL,\n\t\t\t\t\taddress_stub varchar(64) NOT NULL,\n\t\t\t\t\tfetched DATETIME,\n\t\t\t\t\tx varchar(32),\n\t\t\t\t\ty varchar(32),\n\t\t\t\t\tresponse mediumtext,\n\t\t\t\t\tPRIMARY KEY (`address_hash`)\n\t\t\t\t) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;\n\t\t\t");
 }
Exemple #6
0
 public static function vInit()
 {
     DirectDB::mQuery("\n\t\t\t\tCREATE TABLE IF NOT EXISTS ads_htmls (\n\t\t\t\t\tid int(9) NOT NULL AUTO_INCREMENT,\n\t\t\t\t\tfetched datetime,\n\t\t\t\t\tparsed tinyint(1),\n\t\t\t\t\tlist varchar(32),\n\t\t\t\t\tdomain_hash varchar(32),\n\t\t\t\t\turl_hash varchar(32),\n\t\t\t\t\thtml_hash varchar(32),\n\t\t\t\t\thtml longtext,\n\t\t\t\t\turl text,\n\t\t\t\t\tINDEX (url_hash),\n\t\t\t\t\tPRIMARY KEY (id)\n\t\t\t\t) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;\n\t\t\t");
     DirectDB::mQuery("\n\t\t\t\tCREATE TABLE IF NOT EXISTS ads (\n\t\t\t\t\tid int(9) NOT NULL AUTO_INCREMENT,\n\t\t\t\t\thtml_id int(9),\n\t\t\t\t\tcreated datetime,\n\t\t\t\t\tchanged datetime,\n\t\t\t\t\tjson_data longtext,\n\t\t\t\t\tPRIMARY KEY (id)\n\t\t\t\t) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;\n\t\t\t");
 }