function parse($file) { // Attempt to use proper XML parsers first if (extension_loaded('simplexml')) { $parser = new WXR_Parser_SimpleXML(); $result = $parser->parse($file); // If SimpleXML succeeds or this is an invalid WXR file then return the results return $result; } else { if (extension_loaded('xml')) { $parser = new WXR_Parser_XML(); $result = $parser->parse($file); // If XMLParser succeeds or this is an invalid WXR file then return the results return $result; } } // use regular expressions if nothing else available or this is bad XML $parser = new WXR_Parser_Regex(); return $parser->parse($file); }
function parse($file) { // Attempt to use proper XML parsers first if (extension_loaded('simplexml')) { $parser = new WXR_Parser_SimpleXML(); $result = $parser->parse($file); // If SimpleXML succeeds or this is an invalid WXR file then return the results if (!is_wp_error($result) || 'SimpleXML_parse_error' != $result->get_error_code()) { return $result; } } else { if (extension_loaded('xml')) { $parser = new WXR_Parser_XML(); $result = $parser->parse($file); // If XMLParser succeeds or this is an invalid WXR file then return the results if (!is_wp_error($result) || 'XML_parse_error' != $result->get_error_code()) { return $result; } } } // We have a malformed XML file, so display the error and fallthrough to regex if (isset($result) && defined('IMPORT_DEBUG') && IMPORT_DEBUG) { echo '<pre>'; if ('SimpleXML_parse_error' == $result->get_error_code()) { foreach ($result->get_error_data() as $error) { echo $error->line . ':' . $error->column . ' ' . esc_html($error->message) . "\n"; } } else { if ('XML_parse_error' == $result->get_error_code()) { $error = $result->get_error_data(); echo $error[0] . ':' . $error[1] . ' ' . esc_html($error[2]); } } echo '</pre>'; echo '<p><strong>' . __('There was an error when reading this WXR file', 'wordpress-importer') . '</strong><br />'; echo __('Details are shown above. The importer will now try again with a different parser...', 'wordpress-importer') . '</p>'; } // use regular expressions if nothing else available or this is bad XML $parser = new WXR_Parser_Regex(); return $parser->parse($file); }
/** * Ensure that the regex parser can still parse invalid CDATA blocks (i.e. those * with "]]>" unescaped within a CDATA section). */ function test_unescaped_cdata_closing_sequence() { $file = DIR_TESTDATA . '/export/crazy-cdata.xml'; $parser = new WXR_Parser_Regex(); $result = $parser->parse($file); $post = $result['posts'][0]; $this->assertEquals('Content with nested <![CDATA[ tags ]]> :)', $post['post_content']); foreach ($post['postmeta'] as $meta) { switch ($meta['key']) { case 'Plain string': $value = 'Foo'; break; case 'Closing CDATA': $value = ']]>'; break; case 'Alot of CDATA': $value = 'This has <![CDATA[ opening and ]]> closing <![CDATA[ tags like this: ]]>'; break; default: $this->fail('Unknown postmeta (' . $meta['key'] . ') was parsed out by' . $p); } $this->assertEquals($value, $meta['value']); } }