function wpws_parse_byselector($scrap, $url, $selector, $wpwsopt) { global $wpdb; $currcharset = get_bloginfo('charset'); require_once 'includes/phpQuery-onefile.php'; $doc = phpQuery::newDocumentHTML($scrap, $currcharset); phpQuery::selectDocument($doc); if ($wpwsopt['output'] == 'text') { $output = pq($selector)->text(); } if ($wpwsopt['output'] == 'html') { $output = pq($selector)->html(); } if (!empty($wpwsopt['clear_regex'])) { $output = preg_replace($wpwsopt['clear_regex'], '', $output); } if (!empty($wpwsopt['replace_regex'])) { $output = preg_replace($wpwsopt['replace_regex'], $wpwsopt['replace_with'], $output); } if (!empty($wpwsopt['basehref'])) { $output = preg_replace('#(href|src)="([^:"]*)("|(?:(?:%20|\\s|\\+)[^"]*"))#', '$1="' . $wpwsopt['basehref'] . '$2$3', $output); } if (!empty($wpwsopt['striptags'])) { $output = wpws_strip_only($output, $wpwsopt['striptags']); } if (!empty($wpwsopt['htmldecode'])) { $output = iconv($wpwsopt['htmldecode'], $currcharset, $output); } if (empty($output) && $wpwsopt['debug'] == 1) { $header = wpws_HEADER . "<!--Warning: Your selector returned an empty string-->"; } else { $header = wpws_HEADER; } return $header . trim($output) . wpws_FOOTER; }
/** * Parse filtered content using options * @param string $filtered_html Filtered HTML using selector or xpath query * @param array $wpwsopt Options array * @return string */ function wpws_parse_filtered_html($filtered_html, $wpwsopt) { $currcharset = get_bloginfo('charset'); if(!empty($wpwsopt['clear_regex'])) $filtered_html = preg_replace($wpwsopt['clear_regex'], '', $filtered_html); if(!empty($wpwsopt['clear_selector'])) $filtered_html = str_replace(wpws_get_html_by_selector($filtered_html, $wpwsopt['clear_selector']), '', $filtered_html); if(!empty($wpwsopt['replace_regex'])) $filtered_html = preg_replace($wpwsopt['replace_regex'], $wpwsopt['replace_with'], $filtered_html); if(!empty($wpwsopt['replace_selector'])) $filtered_html = str_replace(wpws_get_html_by_selector($filtered_html, $wpwsopt['replace_selector']), $wpwsopt['replace_selector_with'], $filtered_html); if(!empty($wpwsopt['basehref'])) $filtered_html = preg_replace('#(href|src)="([^:"]*)("|(?:(?:%20|\s|\+)[^"]*"))#','$1="'.$wpwsopt['basehref'].'$2$3',$filtered_html); if(!empty($wpwsopt['striptags'])) $filtered_html = wpws_strip_only($filtered_html, $wpwsopt['striptags']); if(!empty($wpwsopt['removetags'])) $filtered_html = wpws_strip_only($filtered_html, $wpwsopt['removetags'], true); if(!empty($wpwsopt['htmldecode'])) $filtered_html = iconv($wpwsopt['htmldecode'], $currcharset, $filtered_html); if(!empty($wpwsopt['callback']) && function_exists($wpwsopt['callback'])) $filtered_html = call_user_func($wpwsopt['callback'], $filtered_html); return $filtered_html; }