Пример #1
0
function html2xhtml($html)
{
    process_pagebreak_commands($html);
    // Remove SCRIPT tags from the page being processed, as script content may
    // mess the firther html-parsing utilities
    $html = process_script($html);
    // Remove STYLE tags for the same reason and store them in the temporary variable
    // later they will be added back to HEAD section
    $styles = process_style($html);
    // Convert HTML character references to their Unicode analogues
    process_character_references($html);
    remove_comments($html);
    fix_attrs_spaces($html);
    $html = quote_attrs($html);
    $html = escape_attrs_entities($html);
    $html = lowercase_tags($html);
    $html = lowercase_closing_tags($html);
    $html = fix_closing_tags($html);
    $html = close_tag("area", $html);
    $html = close_tag("base", $html);
    $html = close_tag("basefont", $html);
    $html = close_tag("br", $html);
    $html = close_tag("col", $html);
    $html = close_tag("embed", $html);
    $html = close_tag("frame", $html);
    $html = close_tag("hr", $html);
    $html = close_tag("img", $html);
    $html = close_tag("input", $html);
    $html = close_tag("isindex", $html);
    $html = close_tag("link", $html);
    $html = close_tag("meta", $html);
    $html = close_tag("param", $html);
    $html = make_attr_value("checked", $html);
    $html = make_attr_value("compact", $html);
    $html = make_attr_value("declare", $html);
    $html = make_attr_value("defer", $html);
    $html = make_attr_value("disabled", $html);
    $html = make_attr_value("ismap", $html);
    $html = make_attr_value("multiple", $html);
    $html = make_attr_value("nohref", $html);
    $html = make_attr_value("noresize", $html);
    $html = make_attr_value("noshade", $html);
    $html = make_attr_value("nowrap", $html);
    $html = make_attr_value("readonly", $html);
    $html = make_attr_value("selected", $html);
    $html = process_html($html);
    $html = process_body($html);
    $html = process_head($html);
    $html = process_p($html);
    $html = escape_amp($html);
    $html = escape_lt($html);
    $html = escape_gt($html);
    $html = escape_textarea_content($html);
    process_tables($html, 0);
    process_lists($html, 0);
    process_deflists($html, 0);
    process_selects($html, 0);
    $html = fix_tags($html);
    $html = fix_attrs($html);
    $html = insert_styles($html, $styles);
    return $html;
}
Пример #2
0
$uri = $_GET['uri'];
//develop XML serialization
$writer = new XMLWriter();
$writer->openURI('php://output');
$writer->startDocument('1.0', 'UTF-8');
$writer->setIndent(true);
$writer->setIndentString("    ");
//validate URI
if (preg_match('/https:\\/\\/[a-z]+\\.academia.edu\\/[A-Za-z]+/', $uri)) {
    //initiate curl
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $uri);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    $output = curl_exec($ch);
    if (curl_exec($ch) !== FALSE) {
        process_html($output, $writer);
    } else {
        $writer->startElement('response');
        $writer->writeElement('error', 'Unable to retrieve data from Academia.edu URI.');
        $writer->endElement();
    }
    curl_close($ch);
} else {
    $writer->startElement('response');
    $writer->writeElement('error', 'URI does not validate.');
    $writer->endElement();
}
function process_html($output, $writer)
{
    //get creator metadata
    preg_match('/c\\.User\\.set_viewed\\((.*)\\);\\n/', $output, $matches);