function &create(&$root, &$pipeline)
 {
     // Create contents of this inline box
     if ($root->node_type() == XML_TEXT_NODE) {
         $handler = get_css_handler('white-space');
         return InlineBox::create_from_text($root->content, $handler->get());
     } else {
         $box =& new InlineBox();
         // Initialize content
         $child = $root->first_child();
         while ($child) {
             $child_box =& create_pdf_box($child, $pipeline);
             $box->add_child($child_box);
             $child = $child->next_sibling();
         }
         // Add fake whitespace box with zero size for the anchor spans
         // We need this, as "reflow" functions will automatically remove empty inline boxes from the
         // document tree
         //
         if ($box->is_null()) {
             push_css_defaults();
             pop_font_size();
             push_font_size('0.01pt');
             $whitespace = new WhitespaceBox();
             $box->add_child($whitespace);
             pop_css_defaults();
         }
     }
     return $box;
 }
 function &create(&$root, &$pipeline)
 {
     $box =& new TableRowBox();
     $child = $root->first_child();
     while ($child) {
         $child_box =& create_pdf_box($child, $pipeline);
         $box->add_child($child_box);
         $child = $child->next_sibling();
     }
     return $box;
 }
 function &create(&$root, &$pipeline)
 {
     $box =& new TableBox();
     $box->readCSS($pipeline->getCurrentCSSState());
     // This row should not inherit any table specific properties!
     // 'overflow' for example
     //
     $css_state =& $pipeline->getCurrentCSSState();
     $css_state->pushDefaultState();
     $row =& new TableRowBox($root);
     $row->readCSS($css_state);
     $box->add_child($row);
     $css_state->popState();
     // Setup cellspacing / cellpadding values
     if ($box->getCSSProperty(CSS_BORDER_COLLAPSE) == BORDER_COLLAPSE) {
         $handler =& CSS::get_handler(CSS_PADDING);
         $box->setCSSProperty(CSS_PADDING, $handler->default_value());
     }
     // Set text-align to 'left'; all browsers I've ever seen prevent inheriting of
     // 'text-align' property by the tables.
     // Say, in the following example the text inside the table cell will be aligned left,
     // instead of inheriting 'center' value.
     //
     // <div style="text-align: center; background-color: green;">
     // <table width="100" bgcolor="red">
     // <tr><td>TEST
     // </table>
     // </div>
     $handler =& CSS::get_handler(CSS_TEXT_ALIGN);
     $handler->css('left', $pipeline);
     // Parse table contents
     $child = $root->first_child();
     $col_index = 0;
     while ($child) {
         if ($child->node_type() === XML_ELEMENT_NODE) {
             if ($child->tagname() === 'colgroup') {
                 // COLGROUP tags do not generate boxes; they contain information on the columns
                 //
                 $col_index = $box->parse_colgroup_tag($child, $col_index);
             } else {
                 $child_box =& create_pdf_box($child, $pipeline);
                 $box->add_child($child_box);
             }
         }
         $child = $child->next_sibling();
     }
     $box->normalize($pipeline);
     $box->normalize_cwc();
     $box->normalize_rhc();
     $box->normalize_parent();
     return $box;
 }
 function TableSectionBox(&$root, &$pipeline)
 {
     $this->GenericContainerBox();
     // Automatically create at least one table row
     if (count($this->content) == 0) {
         $this->content[] =& new TableRowBox($root);
     }
     // Parse table contents
     $child = $root->first_child();
     while ($child) {
         $child_box =& create_pdf_box($child, $pipeline);
         $this->add_child($child_box);
         $child = $child->next_sibling();
     }
 }
Example #5
0
 function IFrameBox(&$root, $pipeline)
 {
     $this->InlineBlockBox();
     // If NO src attribute specified, just return.
     if (!$root->has_attribute('src') || trim($root->get_attribute('src')) == '') {
         return;
     }
     // Determine the fullly qualified URL of the frame content
     $src = $root->get_attribute('src');
     $url = $pipeline->guess_url($src);
     $data = $pipeline->fetch($url);
     /**
      * If framed page could not be fetched return immediately
      */
     if (is_null($data)) {
         return;
     }
     /**
      * Render only iframes containing HTML only
      *
      * Note that content-type header may contain additional information after the ';' sign
      */
     $content_type = $data->get_additional_data('Content-Type');
     $content_type_array = explode(';', $content_type);
     if ($content_type_array[0] != "text/html") {
         return;
     }
     $html = $data->get_content();
     // Remove control symbols if any
     $html = preg_replace('/[\\x00-\\x07]/', "", $html);
     $converter = Converter::create();
     $html = $converter->to_utf8($html, $data->detect_encoding());
     $html = html2xhtml($html);
     $tree = TreeBuilder::build($html);
     // Save current stylesheet, as each frame may load its own stylesheets
     //
     $pipeline->pushCSS();
     $css =& $pipeline->getCurrentCSS();
     $css->scan_styles($tree, $pipeline);
     $frame_root = traverse_dom_tree_pdf($tree);
     $box_child =& create_pdf_box($frame_root, $pipeline);
     $this->add_child($box_child);
     // Restore old stylesheet
     //
     $pipeline->popCSS();
     $pipeline->pop_base_url();
 }
 function &create(&$root, &$pipeline)
 {
     $state =& $pipeline->getCurrentCSSState();
     $box =& new TableSectionBox();
     $box->readCSS($state);
     // Automatically create at least one table row
     $row = new TableRowBox();
     $row->readCSS($state);
     $box->add_child($row);
     // Parse table contents
     $child = $root->first_child();
     while ($child) {
         $child_box =& create_pdf_box($child, $pipeline);
         $box->add_child($child_box);
         $child = $child->next_sibling();
     }
     return $box;
 }
 function &process($html, &$pipeline, &$media)
 {
     // Run the XML parser on the XHTML we've prepared
     $dom_tree = TreeBuilder::build($html);
     // Check if parser returned valid document
     if (is_null($dom_tree)) {
         readfile(HTML2PS_DIR . '/templates/cannot_parse.html');
         error_log(sprintf("Cannot parse document: %s", $pipeline->get_base_url()));
         die("HTML2PS Error");
     }
     /**
      * Detect the base URI for this document. 
      * 
      * According to the HTML 4.01 p. 12.4.1:
      * User agents must calculate the base URI according to the following precedences (highest priority to lowest):
      * 
      * 1. The base URI is set by the BASE element.
      * 2. The base URI is given by meta data discovered during a protocol interaction, such as an HTTP header (see [RFC2616]).
      * 3. By default, the base URI is that of the current document. Not all HTML documents have a base URI (e.g., a valid HTML document may appear in an email and may not be designated by a URI). Such HTML documents are considered erroneous if they contain relative URIs and rely on a default base URI.
      */
     /** 
      * Check if BASE element present; use its first occurrence
      */
     $this->_scan_base($dom_tree, $pipeline);
     /**
      * @todo fall back to the protocol metadata
      */
     /**
      * Parse STYLE / LINK nodes containing CSS references and definitions 
      * This should be done here, as the document body may include STYLE node 
      * (this violates HTML standard, but is rather often appears in Web)
      */
     $css =& $pipeline->getCurrentCSS();
     $css->scan_styles($dom_tree, $pipeline);
     if (!is_null($media)) {
         // Setup media size and margins
         $pipeline->get_page_media(1, $media);
         $pipeline->output_driver->update_media($media);
         $pipeline->_setupScales($media);
     }
     $body =& traverse_dom_tree_pdf($dom_tree);
     $box =& create_pdf_box($body, $pipeline);
     return $box;
 }
 function FrameBox(&$root, &$pipeline)
 {
     $css_state =& $pipeline->getCurrentCSSState();
     // Inherit 'border' CSS value from parent (FRAMESET tag), if current FRAME
     // has no FRAMEBORDER attribute, and FRAMESET has one
     $parent = $root->parent();
     if (!$root->has_attribute('frameborder') && $parent->has_attribute('frameborder')) {
         $parent_border = $css_state->getPropertyOnLevel(CSS_BORDER, CSS_PROPERTY_LEVEL_PARENT);
         $css_state->setProperty(CSS_BORDER, $parent_border->copy());
     }
     $this->GenericContainerBox($root);
     // If NO src attribute specified, just return.
     if (!$root->has_attribute('src')) {
         return;
     }
     // Determine the fullly qualified URL of the frame content
     $src = $root->get_attribute('src');
     $url = $pipeline->guess_url($src);
     $data = $pipeline->fetch($url);
     /**
      * If framed page could not be fetched return immediately
      */
     if (is_null($data)) {
         return;
     }
     /**
      * Render only iframes containing HTML only
      *
      * Note that content-type header may contain additional information after the ';' sign
      */
     $content_type = $data->get_additional_data('Content-Type');
     $content_type_array = explode(';', $content_type);
     if ($content_type_array[0] != "text/html") {
         return;
     }
     $html = $data->get_content();
     // Remove control symbols if any
     $html = preg_replace('/[\\x00-\\x07]/', "", $html);
     $converter = Converter::create();
     $html = $converter->to_utf8($html, $data->detect_encoding());
     $html = html2xhtml($html);
     $tree = TreeBuilder::build($html);
     // Save current stylesheet, as each frame may load its own stylesheets
     //
     $pipeline->pushCSS();
     $css =& $pipeline->getCurrentCSS();
     $css->scan_styles($tree, $pipeline);
     $frame_root = traverse_dom_tree_pdf($tree);
     $box_child =& create_pdf_box($frame_root, $pipeline);
     $this->add_child($box_child);
     // Restore old stylesheet
     //
     $pipeline->popCSS();
     $pipeline->pop_base_url();
 }
 function FrameBox(&$root, &$pipeline)
 {
     // Inherit 'border' CSS value from parent (FRAMESET tag), if current FRAME
     // has no FRAMEBORDER attribute, and FRAMESET has one
     $parent = $root->parent();
     if (!$root->has_attribute('frameborder') && $parent->has_attribute('frameborder')) {
         pop_border();
         push_border(get_border());
     }
     $this->GenericContainerBox($root);
     // If NO src attribute specified, just return.
     if (!$root->has_attribute('src')) {
         return;
     }
     // Determine the fullly qualified URL of the frame content
     $src = $root->get_attribute('src');
     $url = $pipeline->guess_url($src);
     $data = $pipeline->fetch($url);
     /**
      * If framed page could not be fetched return immediately
      */
     if (is_null($data)) {
         return;
     }
     /**
      * Render only iframes containing HTML only
      *
      * Note that content-type header may contain additional information after the ';' sign
      */
     $content_type = $data->get_additional_data('Content-Type');
     $content_type_array = explode(';', $content_type);
     if ($content_type_array[0] != "text/html") {
         return;
     }
     $html = $data->get_content();
     // Remove control symbols if any
     $html = preg_replace('/[\\x00-\\x07]/', "", $html);
     $converter = Converter::create();
     $html = $converter->to_utf8($html, $data->detect_encoding());
     $html = html2xhtml($html);
     $tree = TreeBuilder::build($html);
     // Save current stylesheet, as each frame may load its own stylesheets
     //
     global $g_css;
     $old_css = $g_css;
     global $g_css_obj;
     $old_obj = $g_css_obj;
     scan_styles($tree, $pipeline);
     // Temporary hack: convert CSS rule array to CSS object
     $g_css_obj = new CSSObject();
     foreach ($g_css as $rule) {
         $g_css_obj->add_rule($rule, $pipeline);
     }
     // TODO: stinks. Rewrite
     //
     $frame_root = traverse_dom_tree_pdf($tree);
     $box_child =& create_pdf_box($frame_root, $pipeline);
     $this->add_child($box_child);
     // Restore old stylesheet
     //
     $g_css = $old_css;
     $g_css_obj = $old_obj;
     $pipeline->pop_base_url();
 }
Example #10
0
 /**
  * Create the child nodes of current container object using the parsed HTML data
  *
  * @param mixed $root node corresponding to the current container object
  */
 function create_content(&$root, &$pipeline)
 {
     // Initialize content
     $child = $root->first_child();
     while ($child) {
         $box_child =& create_pdf_box($child, $pipeline);
         $this->add_child($box_child);
         $child = $child->next_sibling();
     }
 }
 function BoxPageMargin(&$pipeline, $at_rule)
 {
     $state =& $pipeline->get_current_css_state();
     $state->pushDefaultState();
     $root = null;
     $at_rule->css->apply($root, $state, $pipeline);
     $this->GenericContainerBox();
     $this->readCSS($state);
     $state->pushDefaultstate();
     /**
      * Check whether 'content' or '-html2ps-html-content' properties had been defined 
      * (if both properties are defined, -html2ps-html-content takes precedence)
      */
     $raw_html_content =& $at_rule->get_css_property(CSS_HTML2PS_HTML_CONTENT);
     $html_content = $raw_html_content->render($pipeline->get_counters());
     if ($html_content !== '') {
         // We should wrap html_content in DIV tag,
         // as we treat only the very first box of the resulting DOM tree as margin box content
         $html_content = html2xhtml("<div>" . $html_content . "</div>");
         $tree = TreeBuilder::build($html_content);
         $tree_root = traverse_dom_tree_pdf($tree);
         $body_box =& create_pdf_box($tree_root, $pipeline);
         $box =& $body_box->content[0];
     } else {
         $raw_content =& $at_rule->get_css_property(CSS_CONTENT);
         $content = $raw_content->render($pipeline->get_counters());
         $box =& InlineBox::create_from_text($content, WHITESPACE_PRE_LINE, $pipeline);
     }
     $this->add_child($box);
     $state->popState();
     $state->popState();
 }
 function &create(&$root, &$pipeline)
 {
     // Create contents of this inline box
     if ($root->node_type() == XML_TEXT_NODE) {
         $css_state =& $pipeline->getCurrentCSSState();
         return InlineBox::create_from_text($root->content, $css_state->getProperty(CSS_WHITE_SPACE), $pipeline);
     } else {
         $box =& new InlineBox();
         $css_state =& $pipeline->getCurrentCSSState();
         $box->readCSS($css_state);
         // Initialize content
         $child = $root->first_child();
         while ($child) {
             $child_box =& create_pdf_box($child, $pipeline);
             $box->add_child($child_box);
             $child = $child->next_sibling();
         }
         // Add fake whitespace box with zero size for the anchor spans
         // We need this, as "reflow" functions will automatically remove empty inline boxes from the
         // document tree
         //
         if ($box->is_null()) {
             $css_state->pushState();
             $css_state->setProperty(CSS_FONT_SIZE, Value::fromData(0.01, UNIT_PT));
             $whitespace = WhitespaceBox::create($pipeline);
             $whitespace->readCSS($css_state);
             $box->add_child($whitespace);
             $css_state->popState();
         }
     }
     return $box;
 }
Example #13
0
 function TableBox(&$root, &$pipeline)
 {
     // Call parent constructor
     $this->GenericContainerBox();
     // Initialize line box
     //     $this->_current_x = 0;
     //     $this->_current_y = 0;
     // List of column width constraints
     $this->cwc = array();
     // Initialize content
     //     $this->content = array();
     // Automatically create at least one table row
     //     if (count($this->content) == 0) {
     // This row should not inherit any table specific properties!
     // 'overflow' for example
     //
     push_css_defaults();
     $this->content[] =& new TableRowBox($root);
     pop_css_defaults();
     //     }
     // Setup cellspacing / cellpadding values
     $handler =& get_css_handler('border-collapse');
     if ($handler->get() == BORDER_COLLAPSE) {
         $handler =& get_css_handler('padding');
         $handler->css("0", $pipeline);
     }
     // Set text-align to 'left'; all browsers I've ever seen prevent inheriting of
     // 'text-align' property by the tables.
     // Say, in the following example the text inside the table cell will be aligned left,
     // instead of inheriting 'center' value.
     //
     // <div style="text-align: center; background-color: green;">
     // <table width="100" bgcolor="red">
     // <tr><td>TEST
     // </table>
     // </div>
     $handler =& get_css_handler('text-align');
     $handler->css('left', $pipeline);
     // Parse table contents
     $child = $root->first_child();
     $col_index = 0;
     while ($child) {
         if ($child->node_type() === XML_ELEMENT_NODE) {
             if ($child->tagname() === 'colgroup') {
                 // COLGROUP tags do not generate boxes; they contain information on the columns
                 //
                 $col_index = $this->parse_colgroup_tag($child, $col_index);
             } else {
                 $child_box =& create_pdf_box($child, $pipeline);
                 $this->add_child($child_box);
             }
         }
         $child = $child->next_sibling();
     }
     $this->normalize();
     $this->normalize_cwc();
     $this->normalize_rhc();
     $this->normalize_parent();
 }
Example #14
0
 function &build(&$dom_tree, &$pipeline)
 {
     $body =& traverse_dom_tree_pdf($dom_tree);
     $box =& create_pdf_box($body, $pipeline);
     return $box;
 }