function &process($html, &$pipeline)
 {
     // Run the XML parser on the XHTML we've prepared
     $dom_tree = TreeBuilder::build($html);
     // Check if parser returned valid document
     if (is_null($dom_tree)) {
         readfile(HTML2PS_DIR . '/templates/cannot_parse.html');
         error_log(sprintf("Cannot parse document: %s", $pipeline->get_base_url()));
         die;
     }
     /**
      * Detect the base URI for this document. 
      * 
      * According to the HTML 4.01 p. 12.4.1:
      * User agents must calculate the base URI according to the following precedences (highest priority to lowest):
      * 
      * 1. The base URI is set by the BASE element.
      * 2. The base URI is given by meta data discovered during a protocol interaction, such as an HTTP header (see [RFC2616]).
      * 3. By default, the base URI is that of the current document. Not all HTML documents have a base URI (e.g., a valid HTML document may appear in an email and may not be designated by a URI). Such HTML documents are considered erroneous if they contain relative URIs and rely on a default base URI.
      */
     /** 
      * Check if BASE element present; use its first occurrence
      */
     $this->_scan_base($dom_tree, $pipeline);
     /**
      * @todo fall back to the protocol metadata
      */
     /**
      * Parse STYLE / LINK nodes containing CSS references and definitions 
      * This should be done here, as the document body may include STYLE node 
      * (this violates HTML standard, but is rather often appears in Web)
      */
     scan_styles($dom_tree, $pipeline);
     // Temporary hack: convert CSS rule array to CSS object
     global $g_css;
     global $g_css_obj;
     $g_css_obj = new CSSObject();
     foreach ($g_css as $rule) {
         $g_css_obj->add_rule($rule, $pipeline);
     }
     $body = traverse_dom_tree_pdf($dom_tree);
     $box =& create_pdf_box($body, $pipeline);
     return $box;
 }
Пример #2
0
 function FrameBox(&$root, &$pipeline)
 {
     // Inherit 'border' CSS value from parent (FRAMESET tag), if current FRAME
     // has no FRAMEBORDER attribute, and FRAMESET has one
     $parent = $root->parent();
     if (!$root->has_attribute('frameborder') && $parent->has_attribute('frameborder')) {
         pop_border();
         push_border(get_border());
     }
     $this->GenericContainerBox($root);
     // If NO src attribute specified, just return.
     if (!$root->has_attribute('src')) {
         return;
     }
     // Determine the fullly qualified URL of the frame content
     $src = $root->get_attribute('src');
     $url = $pipeline->guess_url($src);
     $data = $pipeline->fetch($url);
     /**
      * If framed page could not be fetched return immediately
      */
     if (is_null($data)) {
         return;
     }
     /**
      * Render only iframes containing HTML only
      *
      * Note that content-type header may contain additional information after the ';' sign
      */
     $content_type = $data->get_additional_data('Content-Type');
     $content_type_array = explode(';', $content_type);
     if ($content_type_array[0] != "text/html") {
         return;
     }
     $html = $data->get_content();
     // Remove control symbols if any
     $html = preg_replace('/[\\x00-\\x07]/', "", $html);
     $converter = Converter::create();
     $html = $converter->to_utf8($html, $data->detect_encoding());
     $html = html2xhtml($html);
     $tree = TreeBuilder::build($html);
     // Save current stylesheet, as each frame may load its own stylesheets
     //
     global $g_css;
     $old_css = $g_css;
     global $g_css_obj;
     $old_obj = $g_css_obj;
     scan_styles($tree, $pipeline);
     // Temporary hack: convert CSS rule array to CSS object
     $g_css_obj = new CSSObject();
     foreach ($g_css as $rule) {
         $g_css_obj->add_rule($rule, $pipeline);
     }
     // TODO: stinks. Rewrite
     //
     $frame_root = traverse_dom_tree_pdf($tree);
     $box_child =& create_pdf_box($frame_root, $pipeline);
     $this->add_child($box_child);
     // Restore old stylesheet
     //
     $g_css = $old_css;
     $g_css_obj = $old_obj;
     $pipeline->pop_base_url();
 }
}

/* 3 deep (or more) unordered lists use a square */
ol ol ul,     ol ul ul,     ol menu ul,     ol dir ul,
ol ol menu,   ol ul menu,   ol menu menu,   ol dir menu,
ol ol dir,    ol ul dir,    ol menu dir,    ol dir dir,
ul ol ul,     ul ul ul,     ul menu ul,     ul dir ul,
ul ol menu,   ul ul menu,   ul menu menu,   ul dir menu,
ul ol dir,    ul ul dir,    ul menu dir,    ul dir dir,
menu ol ul,   menu ul ul,   menu menu ul,   menu dir ul,
menu ol menu, menu ul menu, menu menu menu, menu dir menu,
menu ol dir,  menu ul dir,  menu menu dir,  menu dir dir,
dir ol ul,    dir ul ul,    dir menu ul,    dir dir ul,
dir ol menu,  dir ul menu,  dir menu menu,  dir dir menu,
dir ol dir,   dir ul dir,   dir menu dir,   dir dir dir {
  list-style-type: square;
}
EOF;
parse_css($css, new Pipeline());
global $g_css;
$g_css_defaults = array_merge($g_css_defaults, $g_css);
// Clear the global CSS object we've used to parse the CSS source above
$g_css = array();
for ($ctr = 0; $ctr < count($g_css_defaults); $ctr++) {
    $g_css_defaults[$ctr][2] = "";
    $g_css_defaults[$ctr][3] = $ctr;
}
$g_css_defaults_obj = new CSSObject();
foreach ($g_css_defaults as $rule) {
    $g_css_defaults_obj->add_rule($rule, new Pipeline());
}