PHP _getHTML 예제들

프로그래밍 언어: PHP

메소드/함수: _getHTML

hotexamples.com에서의 예제들: 2

PHP _getHTML - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 PHP의 _getHTML에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: scrape.php 프로젝트: asavagar/EU-data-cloud

function _scrapeLexPage($html, $url)
{
    global $vocab;
    global $rdfData;
    $dom = new simple_html_dom();
    $dom->load($html);
    $h2a = $dom->find("h2[@class='headline'] a");
    $href = $h2a[0]->href;
    $id = substr($href, 0, strrpos($href, '.'));
    $fullURL = substr($url, 0, strrpos($url, '/')) . '/' . $href;
    $dom->__destruct();
    $uri = URI_BASE . $id;
    $html = _getHTML($fullURL);
    $dom = new simple_html_dom();
    $dom->load($html);
    // x a http://www.metalex.eu/metalex/2008-05-02#BibliographicWork
    // dct:title, dct:description, metalex:fragment/fragmentOf, rdfs:seeAlso, rdfs:label, dcterms:source
    // x rdf:type metalex:BibliographicWork
    $rdfData[$uri] = array(RDF_TYPE => array(array('type' => 'uri', 'value' => METALEX_BIBWORK)));
    // title, label
    $h1Element = $dom->find("div[@id='paddingLR12'] div[@class='jnheader'] h1");
    $title = html_entity_decode($h1Element[0]->plaintext, ENT_COMPAT, 'ISO-8859-1');
    $rdfData[$uri][$vocab['dct'] . 'title'] = array(array('type' => 'literal', 'value' => $title, 'lang' => 'de'));
    $rdfData[$uri][$vocab['rdfs'] . 'label'] = array(array('type' => 'literal', 'value' => $title, 'lang' => 'de'));
    // token
    $pElems = $dom->find("div[@id='paddingLR12'] div[@class='jnheader'] p");
    if (isset($pElems[0])) {
        $token = html_entity_decode($pElems[0]->plaintext, ENT_COMPAT, 'ISO-8859-1');
        $rdfData[$uri][$vocab['nlex'] . 'token'] = array(array('type' => 'literal', 'value' => $token));
    }
    // seeAlso
    $rdfData[$uri][$vocab['rdfs'] . 'seeAlso'] = array(array('type' => 'uri', 'value' => $fullURL));
    $i = 0;
    $currentChapterURI = null;
    $jnNorms = $dom->find("div[@id='paddingLR12'] div[@class='jnnorm']");
    foreach ($jnNorms as $data) {
        if ($i === 0) {
            // We skip the first for now, since it contains only header info
            ++$i;
            continue;
        }
        ++$i;
        $result = false;
        // If the text contains a h3, it is a fragment
        $h3 = $data->find("div[@class='jnheader'] h3 span");
        if ($h3) {
            if (null !== $currentChapterURI) {
                $result = _handleFragmentForURI($data, $currentChapterURI, $fullURL);
            } else {
                $result = _handleFragmentForURI($data, $uri, $fullURL);
            }
        } else {
            // If we find a h2 this is a chapter
            $h2 = $data->find("div[@class='jnheader'] h2");
            if ($h2) {
                $currentChapterURI = _handleChapterForURI($data, $uri, $fullURL);
                $result = true;
                // special case...
            } else {
                return false;
            }
        }
        if (!$result) {
            return false;
        }
    }
    $dom->__destruct();
    return true;
}

예제 #2

파일 보기

파일: scrape.php 프로젝트: asavagar/EU-data-cloud

function _handleDetailPage($url, $result)
{
    $html = _getHTML($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    $divBoxes = $dom->find("div[@class='featured_box margin_top_fb']");
    if (count($divBoxes) < 1) {
        return $result;
    }
    $style = $divBoxes[0]->style;
    $styleParts = explode("'", $style);
    $imageURL = EUROPA_URL_BASE . $styleParts[1];
    $result['logoURL'] = $imageURL;
    $addressText = $divBoxes[0]->xmltext;
    $addressTextParts = explode('</h3>', $addressText);
    if (count($addressTextParts) !== 2) {
        return $result;
    }
    $addressText = $addressTextParts[1];
    $addressText = str_replace('<br />', '<br>', $addressText);
    $addressTextParts = explode('<br>', $addressText);
    if (count($addressTextParts) < 3) {
        return $result;
    }
    $street = trim($addressTextParts[0]);
    $matches = array();
    $curPos = 1;
    preg_match('/^.*[0-9]+.*$/', $street, $matches);
    if (count($matches) === 0) {
        $street .= ' ' . trim($addressTextParts[$curPos++]);
    }
    $result['zipCity'] = trim($addressTextParts[$curPos++]);
    $result['country'] = trim($addressTextParts[$curPos++]);
    for ($i = $curPos; $i < count($addressTextParts); ++$i) {
        $val = strtolower(trim($addressTextParts[$i]));
        if (substr($val, 0, 4) === 'tel:') {
            $result['tel'] = trim(substr($val, 4));
        } else {
            if (substr($val, 0, 4) === 'fax:') {
                $result['fax'] = trim(substr($val, 4));
            } else {
                if (substr($val, 0, 2) === '<a') {
                    $parts = explode('"', $val);
                    $result['mailto'] = trim($parts[1]);
                }
            }
        }
    }
    $links = array();
    $aElements = $dom->find("div[@id='euCenter'] a");
    foreach ($aElements as $a) {
        if (strpos($a->href, 'http://') !== false) {
            $links[] = $a->href;
        }
    }
    if (count($links) > 0) {
        if (isset($result['seeAlso'])) {
            $result['seeAlso'] = array_merge($result['seeAlso'], $links);
        } else {
            $result['seeAlso'] = $links;
        }
    }
    $dom->__destruct();
    return $result;
}