Beispiel #1
0
require_once "xml2xhtml.php";
require_once "mediawiki_converter.php";
# FUNCTIONS
function get_param($key, $default = "")
{
    if (!isset($_REQUEST[$key])) {
        return $default;
    }
    return $_REQUEST[$key];
}
# MAIN
@set_time_limit(0);
# No time limit
$xmlg = array('site_base_url' => "SBU", 'resolvetemplates' => true, 'templates' => array(), 'namespace_template' => 'Vorlage');
$content_provider = new ContentProviderTextFile();
$converter = new MediaWikiConverter();
$title = urldecode(get_param('title', urlencode('Main Page')));
$xmlg['page_title'] = $title;
$format = strtolower(get_param('format', 'xhtml'));
$content_provider->basedir = $base_text_dir;
$text = $content_provider->get_wiki_text($title);
$xml = $converter->article2xml($title, $text, $xmlg);
if ($format == "xml") {
    # XML
    header('Content-type: text/xml; charset=utf-8');
    print "<?xml version='1.0' encoding='UTF-8' ?>\n";
    print $xml;
} else {
    if ($format == "text") {
        # Plain text
        $xmlg['plaintext_markup'] = true;
Beispiel #2
0
if (strlen($wikitext) == 0) {
    echo "Bad input file\n";
    exit(1);
}
$filename_parts = explode("/", $filename);
$title = $filename_parts[count($filename_parts) - 1];
$title = str_replace(".txt", "", $title);
$title = urldecode($title);
// Configures options for converting to XML
$xmlg = array();
$xmlg["usetemplates"] = "none";
$xmlg["resolvetemplates"] = "none";
$xmlg["templates"] = array();
$xmlg['add_gfdl'] = false;
$xmlg['keep_interlanguage'] = false;
$xmlg['keep_categories'] = false;
$xmlg['text_hide_images'] = true;
$xmlg['text_hide_tables'] = true;
$xmlg["useapi"] = false;
$xmlg["xml_articles_header"] = "<articles>";
// No idea what it does, but it makes it work
$content_provider = new ContentProviderHTTP();
$converter = new MediaWikiConverter();
$xml = $converter->article2xml($title, $wikitext, $xmlg);
// To convert to plain text:
//~ require_once("xml2tree.php");
//~ require_once("xml2txt.php");
//~ $x2t = new xml2php ;
//~ $tree = $x2t->scanString($xml);
//~ $text = trim($tree->parse($tree));
file_put_contents($argv[2], $xml);
Beispiel #3
0
        unlink($x);
    }
    $xml = array();
}
## MAIN PROGRAM
if (get_param('doit', false)) {
    # Process
    $wikitext = stripslashes(get_param('text'));
    if (!defined('MEDIAWIKI')) {
        # Stand-alone
        $content_provider = new ContentProviderHTTP();
    } else {
        # MediaWiki extension
        $content_provider = new ContentProviderMySQL();
    }
    $converter = new MediaWikiConverter();
    $xmlg["useapi"] = isset($_REQUEST['useapi']);
    $xmlg["book_title"] = get_param('document_title');
    $xmlg["site_base_url"] = get_param('site');
    $xmlg["resolvetemplates"] = get_param('use_templates', 'all');
    $xmlg['templates'] = explode("\n", get_param('templates', ''));
    $xmlg['add_gfdl'] = get_param('add_gfdl', false);
    $xmlg['keep_interlanguage'] = get_param('keep_interlanguage', false);
    $xmlg['keep_categories'] = get_param('keep_categories', false);
    # the article list
    $aArticles = array();
    $t = microtime_float();
    $xml = "";
    $format = get_param('output_format');
    $whatsthis = get_param('whatsthis');
    # Catch listnamepage
Beispiel #4
0
$xmlg["resolvetemplates"] = 'all';
$xmlg['templates'] = array();
$xmlg['add_gfdl'] = false;
$xmlg['keep_interlanguage'] = true;
$xmlg['keep_categories'] = true;
$xmlg['xml_articles_header'] = "<articles>";
$xmlg['xhtml_justify'] = false;
$xmlg['xhtml_logical_markup'] = false;
$xmlg['xhtml_source'] = false;
$cnt = 1;
print "<table border=1 width='100%'><tr><th>Test</th><th>Result</th><th>wiki2xml</th><th>Input</th><th>XML</th></tr>";
foreach ($tests as $t) {
    $res = $t->result;
    $col = '';
    $content_provider = new ContentProviderHTTP();
    $converter = new MediaWikiConverter();
    $xml = $converter->article2xml("", $t->input, $xmlg);
    $nr = $converter->articles2xhtml($xml, $xmlg);
    $nr = array_pop(explode('<body>', $nr, 2));
    $nr = array_shift(explode('</body>', $nr, 2));
    # Fixing things to compare to the stupid parser test formatting
    $res = trim($res);
    $res = str_replace("<li> ", "<li>", $res);
    $res = str_replace("<dd> ", "<dd>", $res);
    $res = str_replace("\n<", "<", $res);
    $res = str_replace("\n", " ", $res);
    $res = str_replace(" </p>", "</p>", $res);
    do {
        $o = $res;
        $res = str_replace("  ", " ", $res);
    } while ($o != $res);