Example #1
0
function wpws_parse_byselector($scrap, $url, $selector, $wpwsopt)
{
    global $wpdb;
    $currcharset = get_bloginfo('charset');
    require_once 'includes/phpQuery-onefile.php';
    $doc = phpQuery::newDocumentHTML($scrap, $currcharset);
    phpQuery::selectDocument($doc);
    if ($wpwsopt['output'] == 'text') {
        $output = pq($selector)->text();
    }
    if ($wpwsopt['output'] == 'html') {
        $output = pq($selector)->html();
    }
    if (!empty($wpwsopt['clear_regex'])) {
        $output = preg_replace($wpwsopt['clear_regex'], '', $output);
    }
    if (!empty($wpwsopt['replace_regex'])) {
        $output = preg_replace($wpwsopt['replace_regex'], $wpwsopt['replace_with'], $output);
    }
    if (!empty($wpwsopt['basehref'])) {
        $output = preg_replace('#(href|src)="([^:"]*)("|(?:(?:%20|\\s|\\+)[^"]*"))#', '$1="' . $wpwsopt['basehref'] . '$2$3', $output);
    }
    if (!empty($wpwsopt['striptags'])) {
        $output = wpws_strip_only($output, $wpwsopt['striptags']);
    }
    if (!empty($wpwsopt['htmldecode'])) {
        $output = iconv($wpwsopt['htmldecode'], $currcharset, $output);
    }
    if (empty($output) && $wpwsopt['debug'] == 1) {
        $header = wpws_HEADER . "<!--Warning: Your selector returned an empty string-->";
    } else {
        $header = wpws_HEADER;
    }
    return $header . trim($output) . wpws_FOOTER;
}
/**
 * Parse filtered content using options
 * @param string $filtered_html Filtered HTML using selector or xpath query
 * @param array $wpwsopt Options array
 * @return string
 */
function wpws_parse_filtered_html($filtered_html, $wpwsopt) {
    $currcharset = get_bloginfo('charset');
    if(!empty($wpwsopt['clear_regex']))
        $filtered_html = preg_replace($wpwsopt['clear_regex'], '', $filtered_html);
    if(!empty($wpwsopt['clear_selector']))
        $filtered_html = str_replace(wpws_get_html_by_selector($filtered_html, $wpwsopt['clear_selector']), '', $filtered_html);
    if(!empty($wpwsopt['replace_regex']))
        $filtered_html = preg_replace($wpwsopt['replace_regex'], $wpwsopt['replace_with'], $filtered_html);
    if(!empty($wpwsopt['replace_selector']))
        $filtered_html = str_replace(wpws_get_html_by_selector($filtered_html, $wpwsopt['replace_selector']), $wpwsopt['replace_selector_with'], $filtered_html);
    if(!empty($wpwsopt['basehref']))
        $filtered_html = preg_replace('#(href|src)="([^:"]*)("|(?:(?:%20|\s|\+)[^"]*"))#','$1="'.$wpwsopt['basehref'].'$2$3',$filtered_html);
    if(!empty($wpwsopt['striptags']))
        $filtered_html = wpws_strip_only($filtered_html, $wpwsopt['striptags']);
    if(!empty($wpwsopt['removetags']))
        $filtered_html = wpws_strip_only($filtered_html, $wpwsopt['removetags'], true);
    if(!empty($wpwsopt['htmldecode']))
        $filtered_html = iconv($wpwsopt['htmldecode'], $currcharset, $filtered_html);
    if(!empty($wpwsopt['callback']) && function_exists($wpwsopt['callback']))
        $filtered_html = call_user_func($wpwsopt['callback'], $filtered_html);
    return $filtered_html;
}