示例#1
0
function retriever_apply_dom_filter($retriever, &$item, $resource)
{
    logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['plink'], LOGGER_DEBUG);
    require_once 'include/html2bbcode.php';
    if (!$retriever['data']['include']) {
        return;
    }
    if (!$resource['data']) {
        logger('retriever_apply_dom_filter: no text to work with', LOGGER_NORMAL);
        return;
    }
    $encoding = retriever_get_encoding($resource);
    logger('@@@ item type ' . $resource['type'] . ' encoding ' . $encoding);
    $extracter_template = get_markup_template('extract.tpl', 'addon/retriever/');
    $doc = new DOMDocument('1.0', 'utf-8');
    if (strpos($resource['type'], 'html') !== false) {
        @$doc->loadHTML($resource['data']);
    } else {
        $doc->loadXML($resource['data']);
    }
    logger('@@@ actual encoding of document is ' . $doc->encoding);
    $components = parse_url($item['plink']);
    $rooturl = $components['scheme'] . "://" . $components['host'];
    $dirurl = $rooturl . dirname($components['path']) . "/";
    $params = array('$include' => retriever_construct_xpath($retriever['data']['include']), '$exclude' => retriever_construct_xpath($retriever['data']['exclude']), '$pageurl' => $item['plink'], '$dirurl' => $dirurl, '$rooturl' => $rooturl);
    $xslt = replace_macros($extracter_template, $params);
    $xmldoc = new DOMDocument();
    $xmldoc->loadXML($xslt);
    $xp = new XsltProcessor();
    $xp->importStylesheet($xmldoc);
    $transformed = $xp->transformToXML($doc);
    $item['body'] = html2bbcode($transformed);
    if (!strlen($item['body'])) {
        logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_NORMAL);
        return;
    }
    $item['body'] .= "\n\n" . t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
    $item['body'] .= $item['plink'];
    $item['body'] .= ']' . $item['plink'] . '[/url]';
    q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d", dbesc($item['body']), intval($item['id']));
}
示例#2
0
function retriever_apply_dom_filter($retriever, &$item, $resource)
{
    logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['uri'] . ' contact ' . $item['contact-id'], LOGGER_DEBUG);
    if (!$retriever['data']['include'] && !$retriever['data']['customxslt']) {
        return;
    }
    if (!$resource['data']) {
        logger('retriever_apply_dom_filter: no text to work with', LOGGER_NORMAL);
        return;
    }
    $encoding = retriever_get_encoding($resource);
    $content = mb_convert_encoding($resource['data'], 'HTML-ENTITIES', $encoding);
    $doc = new DOMDocument('1.0', 'UTF-8');
    if (strpos($resource['type'], 'html') !== false) {
        @$doc->loadHTML($content);
    } else {
        $doc->loadXML($content);
    }
    $params = array('$spec' => $retriever['data']);
    $extract_template = get_markup_template('extract.tpl', 'addon/retriever/');
    $extract_xslt = replace_macros($extract_template, $params);
    if ($retriever['data']['include']) {
        $doc = retriever_apply_xslt_text($extract_xslt, $doc);
    }
    if ($retriever['data']['customxslt']) {
        $doc = retriever_apply_xslt_text($retriever['data']['customxslt'], $doc);
    }
    if (!$doc) {
        logger('retriever_apply_dom_filter: failed to apply extract XSLT template', LOGGER_NORMAL);
        return;
    }
    $components = parse_url($resource['redirect-url']);
    $rooturl = $components['scheme'] . "://" . $components['host'];
    $dirurl = $rooturl . dirname($components['path']) . "/";
    $params = array('$dirurl' => $dirurl, '$rooturl' => $rooturl);
    $fix_urls_template = get_markup_template('fix-urls.tpl', 'addon/retriever/');
    $fix_urls_xslt = replace_macros($fix_urls_template, $params);
    $doc = retriever_apply_xslt_text($fix_urls_xslt, $doc);
    if (!$doc) {
        logger('retriever_apply_dom_filter: failed to apply fix urls XSLT template', LOGGER_NORMAL);
        return;
    }
    $item['body'] = html2bbcode($doc->saveXML());
    if (!strlen($item['body'])) {
        logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_NORMAL);
        return;
    }
    $item['body'] .= "\n\n" . t('Retrieved') . ' ' . date("Y-m-d") . ': [url=';
    $item['body'] .= $item['plink'];
    $item['body'] .= ']' . $item['plink'] . '[/url]';
    q("UPDATE `item` SET `body` = '%s' WHERE `id` = %d", dbesc($item['body']), intval($item['id']));
}