function &create(&$root, &$pipeline) { // Create contents of this inline box if ($root->node_type() == XML_TEXT_NODE) { $handler = get_css_handler('white-space'); return InlineBox::create_from_text($root->content, $handler->get()); } else { $box =& new InlineBox(); // Initialize content $child = $root->first_child(); while ($child) { $child_box =& create_pdf_box($child, $pipeline); $box->add_child($child_box); $child = $child->next_sibling(); } // Add fake whitespace box with zero size for the anchor spans // We need this, as "reflow" functions will automatically remove empty inline boxes from the // document tree // if ($box->is_null()) { push_css_defaults(); pop_font_size(); push_font_size('0.01pt'); $whitespace = new WhitespaceBox(); $box->add_child($whitespace); pop_css_defaults(); } } return $box; }
function &create(&$root, &$pipeline) { $box =& new TableRowBox(); $child = $root->first_child(); while ($child) { $child_box =& create_pdf_box($child, $pipeline); $box->add_child($child_box); $child = $child->next_sibling(); } return $box; }
function &create(&$root, &$pipeline) { $box =& new TableBox(); $box->readCSS($pipeline->getCurrentCSSState()); // This row should not inherit any table specific properties! // 'overflow' for example // $css_state =& $pipeline->getCurrentCSSState(); $css_state->pushDefaultState(); $row =& new TableRowBox($root); $row->readCSS($css_state); $box->add_child($row); $css_state->popState(); // Setup cellspacing / cellpadding values if ($box->getCSSProperty(CSS_BORDER_COLLAPSE) == BORDER_COLLAPSE) { $handler =& CSS::get_handler(CSS_PADDING); $box->setCSSProperty(CSS_PADDING, $handler->default_value()); } // Set text-align to 'left'; all browsers I've ever seen prevent inheriting of // 'text-align' property by the tables. // Say, in the following example the text inside the table cell will be aligned left, // instead of inheriting 'center' value. // // <div style="text-align: center; background-color: green;"> // <table width="100" bgcolor="red"> // <tr><td>TEST // </table> // </div> $handler =& CSS::get_handler(CSS_TEXT_ALIGN); $handler->css('left', $pipeline); // Parse table contents $child = $root->first_child(); $col_index = 0; while ($child) { if ($child->node_type() === XML_ELEMENT_NODE) { if ($child->tagname() === 'colgroup') { // COLGROUP tags do not generate boxes; they contain information on the columns // $col_index = $box->parse_colgroup_tag($child, $col_index); } else { $child_box =& create_pdf_box($child, $pipeline); $box->add_child($child_box); } } $child = $child->next_sibling(); } $box->normalize($pipeline); $box->normalize_cwc(); $box->normalize_rhc(); $box->normalize_parent(); return $box; }
function TableSectionBox(&$root, &$pipeline) { $this->GenericContainerBox(); // Automatically create at least one table row if (count($this->content) == 0) { $this->content[] =& new TableRowBox($root); } // Parse table contents $child = $root->first_child(); while ($child) { $child_box =& create_pdf_box($child, $pipeline); $this->add_child($child_box); $child = $child->next_sibling(); } }
function IFrameBox(&$root, $pipeline) { $this->InlineBlockBox(); // If NO src attribute specified, just return. if (!$root->has_attribute('src') || trim($root->get_attribute('src')) == '') { return; } // Determine the fullly qualified URL of the frame content $src = $root->get_attribute('src'); $url = $pipeline->guess_url($src); $data = $pipeline->fetch($url); /** * If framed page could not be fetched return immediately */ if (is_null($data)) { return; } /** * Render only iframes containing HTML only * * Note that content-type header may contain additional information after the ';' sign */ $content_type = $data->get_additional_data('Content-Type'); $content_type_array = explode(';', $content_type); if ($content_type_array[0] != "text/html") { return; } $html = $data->get_content(); // Remove control symbols if any $html = preg_replace('/[\\x00-\\x07]/', "", $html); $converter = Converter::create(); $html = $converter->to_utf8($html, $data->detect_encoding()); $html = html2xhtml($html); $tree = TreeBuilder::build($html); // Save current stylesheet, as each frame may load its own stylesheets // $pipeline->pushCSS(); $css =& $pipeline->getCurrentCSS(); $css->scan_styles($tree, $pipeline); $frame_root = traverse_dom_tree_pdf($tree); $box_child =& create_pdf_box($frame_root, $pipeline); $this->add_child($box_child); // Restore old stylesheet // $pipeline->popCSS(); $pipeline->pop_base_url(); }
function &create(&$root, &$pipeline) { $state =& $pipeline->getCurrentCSSState(); $box =& new TableSectionBox(); $box->readCSS($state); // Automatically create at least one table row $row = new TableRowBox(); $row->readCSS($state); $box->add_child($row); // Parse table contents $child = $root->first_child(); while ($child) { $child_box =& create_pdf_box($child, $pipeline); $box->add_child($child_box); $child = $child->next_sibling(); } return $box; }
function &process($html, &$pipeline, &$media) { // Run the XML parser on the XHTML we've prepared $dom_tree = TreeBuilder::build($html); // Check if parser returned valid document if (is_null($dom_tree)) { readfile(HTML2PS_DIR . '/templates/cannot_parse.html'); error_log(sprintf("Cannot parse document: %s", $pipeline->get_base_url())); die("HTML2PS Error"); } /** * Detect the base URI for this document. * * According to the HTML 4.01 p. 12.4.1: * User agents must calculate the base URI according to the following precedences (highest priority to lowest): * * 1. The base URI is set by the BASE element. * 2. The base URI is given by meta data discovered during a protocol interaction, such as an HTTP header (see [RFC2616]). * 3. By default, the base URI is that of the current document. Not all HTML documents have a base URI (e.g., a valid HTML document may appear in an email and may not be designated by a URI). Such HTML documents are considered erroneous if they contain relative URIs and rely on a default base URI. */ /** * Check if BASE element present; use its first occurrence */ $this->_scan_base($dom_tree, $pipeline); /** * @todo fall back to the protocol metadata */ /** * Parse STYLE / LINK nodes containing CSS references and definitions * This should be done here, as the document body may include STYLE node * (this violates HTML standard, but is rather often appears in Web) */ $css =& $pipeline->getCurrentCSS(); $css->scan_styles($dom_tree, $pipeline); if (!is_null($media)) { // Setup media size and margins $pipeline->get_page_media(1, $media); $pipeline->output_driver->update_media($media); $pipeline->_setupScales($media); } $body =& traverse_dom_tree_pdf($dom_tree); $box =& create_pdf_box($body, $pipeline); return $box; }
function FrameBox(&$root, &$pipeline) { $css_state =& $pipeline->getCurrentCSSState(); // Inherit 'border' CSS value from parent (FRAMESET tag), if current FRAME // has no FRAMEBORDER attribute, and FRAMESET has one $parent = $root->parent(); if (!$root->has_attribute('frameborder') && $parent->has_attribute('frameborder')) { $parent_border = $css_state->getPropertyOnLevel(CSS_BORDER, CSS_PROPERTY_LEVEL_PARENT); $css_state->setProperty(CSS_BORDER, $parent_border->copy()); } $this->GenericContainerBox($root); // If NO src attribute specified, just return. if (!$root->has_attribute('src')) { return; } // Determine the fullly qualified URL of the frame content $src = $root->get_attribute('src'); $url = $pipeline->guess_url($src); $data = $pipeline->fetch($url); /** * If framed page could not be fetched return immediately */ if (is_null($data)) { return; } /** * Render only iframes containing HTML only * * Note that content-type header may contain additional information after the ';' sign */ $content_type = $data->get_additional_data('Content-Type'); $content_type_array = explode(';', $content_type); if ($content_type_array[0] != "text/html") { return; } $html = $data->get_content(); // Remove control symbols if any $html = preg_replace('/[\\x00-\\x07]/', "", $html); $converter = Converter::create(); $html = $converter->to_utf8($html, $data->detect_encoding()); $html = html2xhtml($html); $tree = TreeBuilder::build($html); // Save current stylesheet, as each frame may load its own stylesheets // $pipeline->pushCSS(); $css =& $pipeline->getCurrentCSS(); $css->scan_styles($tree, $pipeline); $frame_root = traverse_dom_tree_pdf($tree); $box_child =& create_pdf_box($frame_root, $pipeline); $this->add_child($box_child); // Restore old stylesheet // $pipeline->popCSS(); $pipeline->pop_base_url(); }
function FrameBox(&$root, &$pipeline) { // Inherit 'border' CSS value from parent (FRAMESET tag), if current FRAME // has no FRAMEBORDER attribute, and FRAMESET has one $parent = $root->parent(); if (!$root->has_attribute('frameborder') && $parent->has_attribute('frameborder')) { pop_border(); push_border(get_border()); } $this->GenericContainerBox($root); // If NO src attribute specified, just return. if (!$root->has_attribute('src')) { return; } // Determine the fullly qualified URL of the frame content $src = $root->get_attribute('src'); $url = $pipeline->guess_url($src); $data = $pipeline->fetch($url); /** * If framed page could not be fetched return immediately */ if (is_null($data)) { return; } /** * Render only iframes containing HTML only * * Note that content-type header may contain additional information after the ';' sign */ $content_type = $data->get_additional_data('Content-Type'); $content_type_array = explode(';', $content_type); if ($content_type_array[0] != "text/html") { return; } $html = $data->get_content(); // Remove control symbols if any $html = preg_replace('/[\\x00-\\x07]/', "", $html); $converter = Converter::create(); $html = $converter->to_utf8($html, $data->detect_encoding()); $html = html2xhtml($html); $tree = TreeBuilder::build($html); // Save current stylesheet, as each frame may load its own stylesheets // global $g_css; $old_css = $g_css; global $g_css_obj; $old_obj = $g_css_obj; scan_styles($tree, $pipeline); // Temporary hack: convert CSS rule array to CSS object $g_css_obj = new CSSObject(); foreach ($g_css as $rule) { $g_css_obj->add_rule($rule, $pipeline); } // TODO: stinks. Rewrite // $frame_root = traverse_dom_tree_pdf($tree); $box_child =& create_pdf_box($frame_root, $pipeline); $this->add_child($box_child); // Restore old stylesheet // $g_css = $old_css; $g_css_obj = $old_obj; $pipeline->pop_base_url(); }
/** * Create the child nodes of current container object using the parsed HTML data * * @param mixed $root node corresponding to the current container object */ function create_content(&$root, &$pipeline) { // Initialize content $child = $root->first_child(); while ($child) { $box_child =& create_pdf_box($child, $pipeline); $this->add_child($box_child); $child = $child->next_sibling(); } }
function BoxPageMargin(&$pipeline, $at_rule) { $state =& $pipeline->get_current_css_state(); $state->pushDefaultState(); $root = null; $at_rule->css->apply($root, $state, $pipeline); $this->GenericContainerBox(); $this->readCSS($state); $state->pushDefaultstate(); /** * Check whether 'content' or '-html2ps-html-content' properties had been defined * (if both properties are defined, -html2ps-html-content takes precedence) */ $raw_html_content =& $at_rule->get_css_property(CSS_HTML2PS_HTML_CONTENT); $html_content = $raw_html_content->render($pipeline->get_counters()); if ($html_content !== '') { // We should wrap html_content in DIV tag, // as we treat only the very first box of the resulting DOM tree as margin box content $html_content = html2xhtml("<div>" . $html_content . "</div>"); $tree = TreeBuilder::build($html_content); $tree_root = traverse_dom_tree_pdf($tree); $body_box =& create_pdf_box($tree_root, $pipeline); $box =& $body_box->content[0]; } else { $raw_content =& $at_rule->get_css_property(CSS_CONTENT); $content = $raw_content->render($pipeline->get_counters()); $box =& InlineBox::create_from_text($content, WHITESPACE_PRE_LINE, $pipeline); } $this->add_child($box); $state->popState(); $state->popState(); }
function &create(&$root, &$pipeline) { // Create contents of this inline box if ($root->node_type() == XML_TEXT_NODE) { $css_state =& $pipeline->getCurrentCSSState(); return InlineBox::create_from_text($root->content, $css_state->getProperty(CSS_WHITE_SPACE), $pipeline); } else { $box =& new InlineBox(); $css_state =& $pipeline->getCurrentCSSState(); $box->readCSS($css_state); // Initialize content $child = $root->first_child(); while ($child) { $child_box =& create_pdf_box($child, $pipeline); $box->add_child($child_box); $child = $child->next_sibling(); } // Add fake whitespace box with zero size for the anchor spans // We need this, as "reflow" functions will automatically remove empty inline boxes from the // document tree // if ($box->is_null()) { $css_state->pushState(); $css_state->setProperty(CSS_FONT_SIZE, Value::fromData(0.01, UNIT_PT)); $whitespace = WhitespaceBox::create($pipeline); $whitespace->readCSS($css_state); $box->add_child($whitespace); $css_state->popState(); } } return $box; }
function TableBox(&$root, &$pipeline) { // Call parent constructor $this->GenericContainerBox(); // Initialize line box // $this->_current_x = 0; // $this->_current_y = 0; // List of column width constraints $this->cwc = array(); // Initialize content // $this->content = array(); // Automatically create at least one table row // if (count($this->content) == 0) { // This row should not inherit any table specific properties! // 'overflow' for example // push_css_defaults(); $this->content[] =& new TableRowBox($root); pop_css_defaults(); // } // Setup cellspacing / cellpadding values $handler =& get_css_handler('border-collapse'); if ($handler->get() == BORDER_COLLAPSE) { $handler =& get_css_handler('padding'); $handler->css("0", $pipeline); } // Set text-align to 'left'; all browsers I've ever seen prevent inheriting of // 'text-align' property by the tables. // Say, in the following example the text inside the table cell will be aligned left, // instead of inheriting 'center' value. // // <div style="text-align: center; background-color: green;"> // <table width="100" bgcolor="red"> // <tr><td>TEST // </table> // </div> $handler =& get_css_handler('text-align'); $handler->css('left', $pipeline); // Parse table contents $child = $root->first_child(); $col_index = 0; while ($child) { if ($child->node_type() === XML_ELEMENT_NODE) { if ($child->tagname() === 'colgroup') { // COLGROUP tags do not generate boxes; they contain information on the columns // $col_index = $this->parse_colgroup_tag($child, $col_index); } else { $child_box =& create_pdf_box($child, $pipeline); $this->add_child($child_box); } } $child = $child->next_sibling(); } $this->normalize(); $this->normalize_cwc(); $this->normalize_rhc(); $this->normalize_parent(); }
function &build(&$dom_tree, &$pipeline) { $body =& traverse_dom_tree_pdf($dom_tree); $box =& create_pdf_box($body, $pipeline); return $box; }