require_once "xml2xhtml.php"; require_once "mediawiki_converter.php"; # FUNCTIONS function get_param($key, $default = "") { if (!isset($_REQUEST[$key])) { return $default; } return $_REQUEST[$key]; } # MAIN @set_time_limit(0); # No time limit $xmlg = array('site_base_url' => "SBU", 'resolvetemplates' => true, 'templates' => array(), 'namespace_template' => 'Vorlage'); $content_provider = new ContentProviderTextFile(); $converter = new MediaWikiConverter(); $title = urldecode(get_param('title', urlencode('Main Page'))); $xmlg['page_title'] = $title; $format = strtolower(get_param('format', 'xhtml')); $content_provider->basedir = $base_text_dir; $text = $content_provider->get_wiki_text($title); $xml = $converter->article2xml($title, $text, $xmlg); if ($format == "xml") { # XML header('Content-type: text/xml; charset=utf-8'); print "<?xml version='1.0' encoding='UTF-8' ?>\n"; print $xml; } else { if ($format == "text") { # Plain text $xmlg['plaintext_markup'] = true;
if (strlen($wikitext) == 0) { echo "Bad input file\n"; exit(1); } $filename_parts = explode("/", $filename); $title = $filename_parts[count($filename_parts) - 1]; $title = str_replace(".txt", "", $title); $title = urldecode($title); // Configures options for converting to XML $xmlg = array(); $xmlg["usetemplates"] = "none"; $xmlg["resolvetemplates"] = "none"; $xmlg["templates"] = array(); $xmlg['add_gfdl'] = false; $xmlg['keep_interlanguage'] = false; $xmlg['keep_categories'] = false; $xmlg['text_hide_images'] = true; $xmlg['text_hide_tables'] = true; $xmlg["useapi"] = false; $xmlg["xml_articles_header"] = "<articles>"; // No idea what it does, but it makes it work $content_provider = new ContentProviderHTTP(); $converter = new MediaWikiConverter(); $xml = $converter->article2xml($title, $wikitext, $xmlg); // To convert to plain text: //~ require_once("xml2tree.php"); //~ require_once("xml2txt.php"); //~ $x2t = new xml2php ; //~ $tree = $x2t->scanString($xml); //~ $text = trim($tree->parse($tree)); file_put_contents($argv[2], $xml);
unlink($x); } $xml = array(); } ## MAIN PROGRAM if (get_param('doit', false)) { # Process $wikitext = stripslashes(get_param('text')); if (!defined('MEDIAWIKI')) { # Stand-alone $content_provider = new ContentProviderHTTP(); } else { # MediaWiki extension $content_provider = new ContentProviderMySQL(); } $converter = new MediaWikiConverter(); $xmlg["useapi"] = isset($_REQUEST['useapi']); $xmlg["book_title"] = get_param('document_title'); $xmlg["site_base_url"] = get_param('site'); $xmlg["resolvetemplates"] = get_param('use_templates', 'all'); $xmlg['templates'] = explode("\n", get_param('templates', '')); $xmlg['add_gfdl'] = get_param('add_gfdl', false); $xmlg['keep_interlanguage'] = get_param('keep_interlanguage', false); $xmlg['keep_categories'] = get_param('keep_categories', false); # the article list $aArticles = array(); $t = microtime_float(); $xml = ""; $format = get_param('output_format'); $whatsthis = get_param('whatsthis'); # Catch listnamepage
$xmlg["resolvetemplates"] = 'all'; $xmlg['templates'] = array(); $xmlg['add_gfdl'] = false; $xmlg['keep_interlanguage'] = true; $xmlg['keep_categories'] = true; $xmlg['xml_articles_header'] = "<articles>"; $xmlg['xhtml_justify'] = false; $xmlg['xhtml_logical_markup'] = false; $xmlg['xhtml_source'] = false; $cnt = 1; print "<table border=1 width='100%'><tr><th>Test</th><th>Result</th><th>wiki2xml</th><th>Input</th><th>XML</th></tr>"; foreach ($tests as $t) { $res = $t->result; $col = ''; $content_provider = new ContentProviderHTTP(); $converter = new MediaWikiConverter(); $xml = $converter->article2xml("", $t->input, $xmlg); $nr = $converter->articles2xhtml($xml, $xmlg); $nr = array_pop(explode('<body>', $nr, 2)); $nr = array_shift(explode('</body>', $nr, 2)); # Fixing things to compare to the stupid parser test formatting $res = trim($res); $res = str_replace("<li> ", "<li>", $res); $res = str_replace("<dd> ", "<dd>", $res); $res = str_replace("\n<", "<", $res); $res = str_replace("\n", " ", $res); $res = str_replace(" </p>", "</p>", $res); do { $o = $res; $res = str_replace(" ", " ", $res); } while ($o != $res);