parse() public method

public parse ( string $file ) : array
$file string
return array
Example #1
0
 /**
  * @param array $current_import
  *
  * @return bool
  */
 function import(array $current_import)
 {
     try {
         $parser = new Parser();
         $xml = $parser->parse($current_import['file']);
     } catch (\Exception $e) {
         return false;
     }
     $this->pbCheck($xml);
     if ($this->isPbWxr) {
         $xml['posts'] = $this->customNestedSort($xml['posts']);
     }
     $match_ids = array_flip(array_keys($current_import['chapters']));
     $chapter_parent = $this->getChapterParent();
     $total = 0;
     libxml_use_internal_errors(true);
     foreach ($xml['posts'] as $p) {
         // Skip
         if (!$this->flaggedForImport($p['post_id'])) {
             continue;
         }
         if (!isset($match_ids[$p['post_id']])) {
             continue;
         }
         // Insert
         $post_type = $this->determinePostType($p['post_id']);
         // Load HTMl snippet into DOMDocument using UTF-8 hack
         $utf8_hack = '<?xml version="1.0" encoding="UTF-8"?>';
         $doc = new \DOMDocument();
         $doc->loadHTML($utf8_hack . $this->tidy($p['post_content']));
         // Download images, change image paths
         $doc = $this->scrapeAndKneadImages($doc);
         $html = $doc->saveXML($doc->documentElement);
         // Remove auto-created <html> <body> and <!DOCTYPE> tags.
         $html = preg_replace('/^<!DOCTYPE.+?>/', '', str_replace(array('<html>', '</html>', '<body>', '</body>'), array('', '', '', ''), $html));
         if ('metadata' == $post_type) {
             $pid = $this->bookInfoPid();
         } else {
             $pid = $this->insertNewPost($post_type, $p, $html, $chapter_parent);
             if ('part' == $post_type) {
                 $chapter_parent = $pid;
             }
         }
         if (isset($p['postmeta']) && is_array($p['postmeta'])) {
             $this->importPbPostMeta($pid, $post_type, $p);
         }
         Book::consolidatePost($pid, get_post($pid));
         // Reorder
         ++$total;
     }
     $errors = libxml_get_errors();
     // TODO: Handle errors gracefully
     libxml_clear_errors();
     // Done
     $_SESSION['pb_notices'][] = sprintf(__('Imported %s chapters.', 'pressbooks'), $total);
     return $this->revokeCurrentImport();
 }
Example #2
0
 /**
  * @param array $current_import
  *
  * @return bool
  */
 function import(array $current_import)
 {
     try {
         $parser = new Parser();
         $xml = $parser->parse($current_import['file']);
     } catch (\Exception $e) {
         return false;
     }
     $this->pbCheck($xml);
     if ($this->isPbWxr) {
         $xml['posts'] = $this->customNestedSort($xml['posts']);
     }
     $match_ids = array_flip(array_keys($current_import['chapters']));
     $chapter_parent = $this->getChapterParent();
     $total = 0;
     $taxonomies = apply_filters('pb_import_custom_taxonomies', array('front-matter-type', 'chapter-type', 'back-matter-type'));
     $custom_post_types = apply_filters('pb_import_custom_post_types', array());
     // set custom terms...
     $terms = apply_filters('pb_import_custom_terms', $xml['terms']);
     // and import them if they don't already exist.
     foreach ($terms as $t) {
         $term = term_exists($t['term_name'], $t['term_taxonomy']);
         if (null === $term || 0 === $term) {
             wp_insert_term($t['term_name'], $t['term_taxonomy'], array('description' => $t['term_description'], 'slug' => $t['slug']));
         }
     }
     libxml_use_internal_errors(true);
     foreach ($xml['posts'] as $p) {
         // Skip
         if (!$this->flaggedForImport($p['post_id'])) {
             continue;
         }
         if (!isset($match_ids[$p['post_id']])) {
             continue;
         }
         // Insert
         $post_type = $this->determinePostType($p['post_id']);
         // Load HTMl snippet into DOMDocument using UTF-8 hack
         $utf8_hack = '<?xml version="1.0" encoding="UTF-8"?>';
         $doc = new \DOMDocument();
         $doc->loadHTML($utf8_hack . $this->tidy($p['post_content']));
         // Download images, change image paths
         $doc = $this->scrapeAndKneadImages($doc);
         $html = $doc->saveXML($doc->documentElement);
         // Remove auto-created <html> <body> and <!DOCTYPE> tags.
         $html = preg_replace('/^<!DOCTYPE.+?>/', '', str_replace(array('<html>', '</html>', '<body>', '</body>'), array('', '', '', ''), $html));
         if ('metadata' == $post_type) {
             $pid = $this->bookInfoPid();
         } else {
             $pid = $this->insertNewPost($post_type, $p, $html, $chapter_parent);
             if ('part' == $post_type) {
                 $chapter_parent = $pid;
             }
         }
         // if this is a custom post type,
         // and it has terms associated with it...
         if (in_array($post_type, $custom_post_types) && true == $p['terms']) {
             // associate post with terms.
             foreach ($p['terms'] as $t) {
                 if (in_array($t['domain'], $taxonomies)) {
                     wp_set_object_terms($pid, $t['slug'], $t['domain'], true);
                 }
             }
         }
         if (isset($p['postmeta']) && is_array($p['postmeta'])) {
             $this->importPbPostMeta($pid, $post_type, $p);
         }
         Book::consolidatePost($pid, get_post($pid));
         // Reorder
         ++$total;
     }
     $errors = libxml_get_errors();
     // TODO: Handle errors gracefully
     libxml_clear_errors();
     // Done
     $_SESSION['pb_notices'][] = sprintf(__('Imported %s chapters.', 'pressbooks'), $total);
     return $this->revokeCurrentImport();
 }