function &traverse_dom_tree_pdf(&$root)
{
    switch ($root->node_type()) {
        case XML_DOCUMENT_NODE:
            $child =& $root->first_child();
            while ($child) {
                $body =& traverse_dom_tree_pdf($child);
                if ($body) {
                    return $body;
                }
                $child =& $child->next_sibling();
            }
            $null = null;
            return $null;
        case XML_ELEMENT_NODE:
            if (strtolower($root->tagname()) == "body") {
                return $root;
            }
            $child =& $root->first_child();
            while ($child) {
                $body =& traverse_dom_tree_pdf($child);
                if ($body) {
                    return $body;
                }
                $child =& $child->next_sibling();
            }
            $null = null;
            return $null;
        default:
            $null = null;
            return $null;
    }
}
Exemple #2
0
 function IFrameBox(&$root, $pipeline)
 {
     $this->InlineBlockBox();
     // If NO src attribute specified, just return.
     if (!$root->has_attribute('src') || trim($root->get_attribute('src')) == '') {
         return;
     }
     // Determine the fullly qualified URL of the frame content
     $src = $root->get_attribute('src');
     $url = $pipeline->guess_url($src);
     $data = $pipeline->fetch($url);
     /**
      * If framed page could not be fetched return immediately
      */
     if (is_null($data)) {
         return;
     }
     /**
      * Render only iframes containing HTML only
      *
      * Note that content-type header may contain additional information after the ';' sign
      */
     $content_type = $data->get_additional_data('Content-Type');
     $content_type_array = explode(';', $content_type);
     if ($content_type_array[0] != "text/html") {
         return;
     }
     $html = $data->get_content();
     // Remove control symbols if any
     $html = preg_replace('/[\\x00-\\x07]/', "", $html);
     $converter = Converter::create();
     $html = $converter->to_utf8($html, $data->detect_encoding());
     $html = html2xhtml($html);
     $tree = TreeBuilder::build($html);
     // Save current stylesheet, as each frame may load its own stylesheets
     //
     $pipeline->pushCSS();
     $css =& $pipeline->getCurrentCSS();
     $css->scan_styles($tree, $pipeline);
     $frame_root = traverse_dom_tree_pdf($tree);
     $box_child =& create_pdf_box($frame_root, $pipeline);
     $this->add_child($box_child);
     // Restore old stylesheet
     //
     $pipeline->popCSS();
     $pipeline->pop_base_url();
 }
 function &process($html, &$pipeline, &$media)
 {
     // Run the XML parser on the XHTML we've prepared
     $dom_tree = TreeBuilder::build($html);
     // Check if parser returned valid document
     if (is_null($dom_tree)) {
         readfile(HTML2PS_DIR . '/templates/cannot_parse.html');
         error_log(sprintf("Cannot parse document: %s", $pipeline->get_base_url()));
         die("HTML2PS Error");
     }
     /**
      * Detect the base URI for this document. 
      * 
      * According to the HTML 4.01 p. 12.4.1:
      * User agents must calculate the base URI according to the following precedences (highest priority to lowest):
      * 
      * 1. The base URI is set by the BASE element.
      * 2. The base URI is given by meta data discovered during a protocol interaction, such as an HTTP header (see [RFC2616]).
      * 3. By default, the base URI is that of the current document. Not all HTML documents have a base URI (e.g., a valid HTML document may appear in an email and may not be designated by a URI). Such HTML documents are considered erroneous if they contain relative URIs and rely on a default base URI.
      */
     /** 
      * Check if BASE element present; use its first occurrence
      */
     $this->_scan_base($dom_tree, $pipeline);
     /**
      * @todo fall back to the protocol metadata
      */
     /**
      * Parse STYLE / LINK nodes containing CSS references and definitions 
      * This should be done here, as the document body may include STYLE node 
      * (this violates HTML standard, but is rather often appears in Web)
      */
     $css =& $pipeline->getCurrentCSS();
     $css->scan_styles($dom_tree, $pipeline);
     if (!is_null($media)) {
         // Setup media size and margins
         $pipeline->get_page_media(1, $media);
         $pipeline->output_driver->update_media($media);
         $pipeline->_setupScales($media);
     }
     $body =& traverse_dom_tree_pdf($dom_tree);
     $box =& create_pdf_box($body, $pipeline);
     return $box;
 }
 function FrameBox(&$root, &$pipeline)
 {
     $css_state =& $pipeline->getCurrentCSSState();
     // Inherit 'border' CSS value from parent (FRAMESET tag), if current FRAME
     // has no FRAMEBORDER attribute, and FRAMESET has one
     $parent = $root->parent();
     if (!$root->has_attribute('frameborder') && $parent->has_attribute('frameborder')) {
         $parent_border = $css_state->getPropertyOnLevel(CSS_BORDER, CSS_PROPERTY_LEVEL_PARENT);
         $css_state->setProperty(CSS_BORDER, $parent_border->copy());
     }
     $this->GenericContainerBox($root);
     // If NO src attribute specified, just return.
     if (!$root->has_attribute('src')) {
         return;
     }
     // Determine the fullly qualified URL of the frame content
     $src = $root->get_attribute('src');
     $url = $pipeline->guess_url($src);
     $data = $pipeline->fetch($url);
     /**
      * If framed page could not be fetched return immediately
      */
     if (is_null($data)) {
         return;
     }
     /**
      * Render only iframes containing HTML only
      *
      * Note that content-type header may contain additional information after the ';' sign
      */
     $content_type = $data->get_additional_data('Content-Type');
     $content_type_array = explode(';', $content_type);
     if ($content_type_array[0] != "text/html") {
         return;
     }
     $html = $data->get_content();
     // Remove control symbols if any
     $html = preg_replace('/[\\x00-\\x07]/', "", $html);
     $converter = Converter::create();
     $html = $converter->to_utf8($html, $data->detect_encoding());
     $html = html2xhtml($html);
     $tree = TreeBuilder::build($html);
     // Save current stylesheet, as each frame may load its own stylesheets
     //
     $pipeline->pushCSS();
     $css =& $pipeline->getCurrentCSS();
     $css->scan_styles($tree, $pipeline);
     $frame_root = traverse_dom_tree_pdf($tree);
     $box_child =& create_pdf_box($frame_root, $pipeline);
     $this->add_child($box_child);
     // Restore old stylesheet
     //
     $pipeline->popCSS();
     $pipeline->pop_base_url();
 }
 function FrameBox(&$root, &$pipeline)
 {
     // Inherit 'border' CSS value from parent (FRAMESET tag), if current FRAME
     // has no FRAMEBORDER attribute, and FRAMESET has one
     $parent = $root->parent();
     if (!$root->has_attribute('frameborder') && $parent->has_attribute('frameborder')) {
         pop_border();
         push_border(get_border());
     }
     $this->GenericContainerBox($root);
     // If NO src attribute specified, just return.
     if (!$root->has_attribute('src')) {
         return;
     }
     // Determine the fullly qualified URL of the frame content
     $src = $root->get_attribute('src');
     $url = $pipeline->guess_url($src);
     $data = $pipeline->fetch($url);
     /**
      * If framed page could not be fetched return immediately
      */
     if (is_null($data)) {
         return;
     }
     /**
      * Render only iframes containing HTML only
      *
      * Note that content-type header may contain additional information after the ';' sign
      */
     $content_type = $data->get_additional_data('Content-Type');
     $content_type_array = explode(';', $content_type);
     if ($content_type_array[0] != "text/html") {
         return;
     }
     $html = $data->get_content();
     // Remove control symbols if any
     $html = preg_replace('/[\\x00-\\x07]/', "", $html);
     $converter = Converter::create();
     $html = $converter->to_utf8($html, $data->detect_encoding());
     $html = html2xhtml($html);
     $tree = TreeBuilder::build($html);
     // Save current stylesheet, as each frame may load its own stylesheets
     //
     global $g_css;
     $old_css = $g_css;
     global $g_css_obj;
     $old_obj = $g_css_obj;
     scan_styles($tree, $pipeline);
     // Temporary hack: convert CSS rule array to CSS object
     $g_css_obj = new CSSObject();
     foreach ($g_css as $rule) {
         $g_css_obj->add_rule($rule, $pipeline);
     }
     // TODO: stinks. Rewrite
     //
     $frame_root = traverse_dom_tree_pdf($tree);
     $box_child =& create_pdf_box($frame_root, $pipeline);
     $this->add_child($box_child);
     // Restore old stylesheet
     //
     $g_css = $old_css;
     $g_css_obj = $old_obj;
     $pipeline->pop_base_url();
 }
 function BoxPageMargin(&$pipeline, $at_rule)
 {
     $state =& $pipeline->get_current_css_state();
     $state->pushDefaultState();
     $root = null;
     $at_rule->css->apply($root, $state, $pipeline);
     $this->GenericContainerBox();
     $this->readCSS($state);
     $state->pushDefaultstate();
     /**
      * Check whether 'content' or '-html2ps-html-content' properties had been defined 
      * (if both properties are defined, -html2ps-html-content takes precedence)
      */
     $raw_html_content =& $at_rule->get_css_property(CSS_HTML2PS_HTML_CONTENT);
     $html_content = $raw_html_content->render($pipeline->get_counters());
     if ($html_content !== '') {
         // We should wrap html_content in DIV tag,
         // as we treat only the very first box of the resulting DOM tree as margin box content
         $html_content = html2xhtml("<div>" . $html_content . "</div>");
         $tree = TreeBuilder::build($html_content);
         $tree_root = traverse_dom_tree_pdf($tree);
         $body_box =& create_pdf_box($tree_root, $pipeline);
         $box =& $body_box->content[0];
     } else {
         $raw_content =& $at_rule->get_css_property(CSS_CONTENT);
         $content = $raw_content->render($pipeline->get_counters());
         $box =& InlineBox::create_from_text($content, WHITESPACE_PRE_LINE, $pipeline);
     }
     $this->add_child($box);
     $state->popState();
     $state->popState();
 }
 function &build(&$dom_tree, &$pipeline)
 {
     $body =& traverse_dom_tree_pdf($dom_tree);
     $box =& create_pdf_box($body, $pipeline);
     return $box;
 }