function WriteHTML($html) { //! @desc HTML parser //! @return void /* $e == content */ $this->ReadMetaTags($html); $html = AdjustHTML($html, $this->usepre); //Try to make HTML look more like XHTML if ($this->usecss) { $html = $this->ReadCSS($html); } //Add new supported tags in the DisableTags function $html = str_replace('<?php', '< ', $html); //Fix '<?XML' bug from HTML code generated by MS Word //$html = str_replace ("%2", "/", $html); $html = strip_tags($html, $this->enabledtags); //remove all unsupported tags, but the ones inside the 'enabledtags' string //Explode the string in order to parse the HTML code $a = preg_split('/<(.*?)>/ms', $html, -1, PREG_SPLIT_DELIM_CAPTURE); foreach ($a as $i => $e) { if ($i % 2 == 0) { //TEXT //Adjust lineheight // $this->lineheight = (5*$this->FontSizePt)/11; //should be inside printbuffer? //Adjust text, if needed if (strpos($e, "&") !== false) { if (strpos($e, "#") !== false) { $e = value_entity_decode($e); } // Decode value entities //Avoid crashing the script on PHP 4.0 $version = phpversion(); $version = str_replace('.', '', $version); if ($version >= 430) { $e = html_entity_decode($e, ENT_QUOTES, 'cp1252'); } else { $e = lesser_entity_decode($e); } } $e = str_replace(chr(160), chr(32), $e); //unify ascii code of spaces (in order to recognize all of them correctly) if (strlen($e) == 0) { continue; } if ($this->divrevert) { $e = strrev($e); } if ($this->toupper) { $e = strtoupper($e); } if ($this->tolower) { $e = strtolower($e); } //Start of 'if/elseif's if ($this->titulo) { $this->SetTitle($e); } elseif ($this->specialcontent) { if ($this->specialcontent == "type=select" and $this->selectoption['ACTIVE'] == true) { $stringwidth = $this->GetStringWidth($e); if (!isset($this->selectoption['MAXWIDTH']) or $stringwidth > $this->selectoption['MAXWIDTH']) { $this->selectoption['MAXWIDTH'] = $stringwidth; } if (!isset($this->selectoption['SELECTED']) or $this->selectoption['SELECTED'] == '') { $this->selectoption['SELECTED'] = $e; } } else { $this->textbuffer[] = array("???" . $this->specialcontent . "???" . $e); } } elseif ($this->tablestart) { if ($this->tdbegin) { $this->cell[$this->row][$this->col]['textbuffer'][] = array($e, $this->HREF, $this->currentstyle, $this->colorarray, $this->currentfont, $this->SUP, $this->SUB, '', $this->strike, $this->outlineparam, $this->bgcolorarray); $this->cell[$this->row][$this->col]['text'][] = $e; $this->cell[$this->row][$this->col]['s'] += $this->GetStringWidth($e); } //Ignore content between <table>,<tr> and a <td> tag (this content is usually only a bunch of spaces) } elseif ($this->pbegin or $this->HREF or $this->divbegin or $this->SUP or $this->SUB or $this->strike or $this->buffer_on) { $this->textbuffer[] = array($e, $this->HREF, $this->currentstyle, $this->colorarray, $this->currentfont, $this->SUP, $this->SUB, '', $this->strike, $this->outlineparam, $this->bgcolorarray); } else { if ($this->blockjustfinished) { $e = ltrim($e); } if ($e != '') { $this->Write($this->lineheight, $e); //Write text directly in the PDF if ($this->pjustfinished) { $this->pjustfinished = false; } } } } else { $this->colorarray = array(); $this->strike = false; //Tag if ($e[0] == '/') { $this->CloseTag(strtoupper(substr($e, 1))); } else { $regexp = '|=\'(.*?)\'|s'; // eliminate single quotes, if any $e = preg_replace($regexp, "=\"\$1\"", $e); $regexp = '| (\\w+?)=([^\\s>"]+)|si'; // changes anykey=anyvalue to anykey="anyvalue" (only do this when this happens inside tags) $e = preg_replace($regexp, " \$1=\"\$2\"", $e); //Fix path values, if needed if (stristr($e, "href=") !== false or stristr($e, "src=") !== false) { $regexp = '/ (href|src)="(.*?)"/i'; preg_match($regexp, $e, $auxiliararray); $path = $auxiliararray[2]; $path = str_replace("\\", "/", $path); //If on Windows //Get link info and obtain its absolute path $regexp = '|^./|'; $path = preg_replace($regexp, '', $path); if ($path[0] != '#') { if (strpos($path, "../") !== false) { $backtrackamount = substr_count($path, "../"); $maxbacktrack = substr_count($this->basepath, "/") - 1; $filepath = str_replace("../", '', $path); $path = $this->basepath; //If it is an invalid relative link, then make it go to directory root if ($backtrackamount > $maxbacktrack) { $backtrackamount = $maxbacktrack; } //Backtrack some directories for ($i = 0; $i < $backtrackamount + 1; $i++) { $path = substr($path, 0, strrpos($path, "/")); } $path = $path . "/" . $filepath; //Make it an absolute path } elseif (strpos($path, ":/") === false) { $path = $this->basepath . $path; } //Do nothing if it is an Absolute Link } $regexp = '/ (href|src)="(.*?)"/i'; $e = preg_replace($regexp, ' \\1="' . $path . '"', $e); } //END of Fix path values //Extract attributes $contents = array(); preg_match_all('/\\S*=["\'][^"\']*["\']/', $e, $contents); preg_match('/\\S+/', $e, $a2); $tag = strtoupper($a2[0]); $attr = array(); if (!empty($contents)) { foreach ($contents[0] as $v) { if (ereg('^([^=]*)=["\']?([^"\']*)["\']?$', $v, $a3)) { $attr[strtoupper($a3[1])] = $a3[2]; } } } $this->OpenTag($tag, $attr); } } } //end of foreach($a as $i=>$e) //Create Internal Links, if needed if (!empty($this->internallink)) { foreach ($this->internallink as $k => $v) { if (strpos($k, "#") !== false) { continue; } //ignore $ypos = $v['Y']; $pagenum = $v['PAGE']; $sharp = "#"; while (array_key_exists($sharp . $k, $this->internallink)) { $internallink = $this->internallink[$sharp . $k]; $this->SetLink($internallink, $ypos, $pagenum); $sharp .= "#"; } } } }
function wan_pdf($atts) { global $thisarticle, $sitename, $permlink_mode, $prefs, $txpcfg; extract(lAtts(array('class' => '', 'pdf_css_class' => 'article_pdf', 'name' => 'This article as a pdf', 'file_category' => 'article_pdf', 'show_excerpt' => 'y', 'show_body' => 'y', 'image' => '', 'debug' => 'n'), $atts)); $pdf_exists = false; $pdf_up_to_date = false; $article_id = $thisarticle['thisid']; $title = $thisarticle['title']; $body = ''; $excerpt = ''; $tempdir = $prefs['tempdir']; $filedir = $prefs['file_base_path']; if ($show_body == 'y') { if ($thisarticle['body'] != "") { //$body = "<div class=\"article_body\">".$thisarticle['body']."</div>"; $body = $thisarticle['body']; } } if ($show_excerpt == 'y') { if ($thisarticle['excerpt'] != "") { //$excerpt = "<div class=\"excerpt\">".$thisarticle['excerpt']."</div>"; $excerpt = $thisarticle['excerpt']; } } // Read CSS-file for pdf-output $css = safe_field('css', 'txp_css', "name='" . doSlash($pdf_css_class) . "'"); // Calculate hash-value for article $article_hash = md5($css . $title . $excerpt . $body); // Check if pdf already exists for this article $pattern = $article_id . "_%"; $rs = safe_row("id, description, filename", 'txp_file', "category = '{$file_category}' AND description LIKE '{$pattern}'"); if (count($rs) > 0) { $pdf_exists = true; $file_id = $rs['id']; $pdf_filename = $rs['filename']; // pdf exists, so check if it's up to date $description = explode("_", $rs['description']); if ($description[1] == $article_hash) { $pdf_up_to_date = true; } } // Delete old PDF if (file_exists($filedir . "/" . $title . ".pdf")) { unlink($filedir . "/" . $title . ".pdf"); $pdf_up_to_date = false; } // if pdf has to be generated (or re-generated) if (!$pdf_exists or !$pdf_up_to_date) { // generate identifier $identifier = $article_id . "_" . $article_hash; $pdf_filename = $article_id . "_" . $thisarticle['url_title'] . ".pdf"; $pdf_filename = str_replace("'", "", $pdf_filename); $x_title = utf8_decode($title); // check if textile is on or off if (strcmp("<p>", substr($excerpt, 0, 3)) != 0) { $excerpt = "<p>" . $excerpt . "</p>"; //$x_title = value_entity_decode($title); } if (strcmp("<p>", substr($body, 0, 3)) != 0) { $body = "<p>" . $body . "</p>"; $x_title = value_entity_decode($title); } else { $body = value_entity_decode($body); } // change image-urls to http://... $excerpt = str_replace('<img src="/textpattern', '<img src="http://' . $prefs['siteurl'], $excerpt); $body = str_replace('<img src="/textpattern', '<img src="http://' . $prefs['siteurl'], $body); // generate HTML first if (is_writable($tempdir . "/")) { $file = fopen($tempdir . "/" . $identifier . ".html", "w"); $xhtml_header = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n <html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">\n <head>\n <meta http-equiv=\"Content-Type\" content=\"text/HTML;\" />\n <title>" . $title . "</title>\n </head>\n <body>"; //fwrite($file, utf8_decode($xhtml_header."<h1>".$thisarticle['title']."</h1>\n".$excerpt.$body."</body></html>")); fwrite($file, $xhtml_header . "<h1>" . $title . "</h1>\n" . $excerpt . $body . "</body></html>"); fclose($file); // write CSS to file if ($css) { $cssfile = fopen($tempdir . "/csstemp.css", "w"); fwrite($cssfile, base64_decode($css)); fclose($cssfile); } // Define some variables for PDF define('X_PATH', $txpcfg['txpath'] . '/lib/xhtml2pdf'); //define('X_NAME', utf8_decode($sitename)); //define('X_TITLE', html_entity_decode(utf8_decode($thisarticle['title']))); //$x_name = utf8_decode($sitename); //$x_title = html_entity_decode(utf8_decode($title)); $x_name = $sitename; // include_once X_PATH . '/classes/x2fpdf.php'; // Create new xhtml2pdf-object $xpdf = new xhtml2pdf($tempdir . "/" . $identifier . ".html", $tempdir . "/csstemp.css", $config); $xpdf->SetTitle(utf8_decode($title)); $xpdf->SetAuthor(utf8_decode($thisarticle['authorid'])); $xpdf->SetCreator('XHTML2PDF v0.2.5'); $xpdf->SetSubject(utf8_decode($title)); $xpdf->SetKeywords(utf8_decode($thisarticle['keywords'])); // output pdf $xpdf->output($filedir . "/" . $pdf_filename, 'F'); // remove HTML-file, remove CSS-file if ($debug != 'y') { unlink($tempdir . "/" . $identifier . ".html"); unlink($tempdir . "/csstemp.css"); } } if (!$pdf_exists) { // Add pdf to textpattern-db $file_id = safe_insert("txp_file", "filename = '{$pdf_filename}',\n\t\t\t category = '{$file_category}',\n\t\t\t permissions = '',\n\t\t\t description = '{$identifier}'\n\t\t "); } else { if (!$pdf_up_to_date) { // Update textpattern-db safe_update("txp_file", "description = '{$identifier}', filename = '{$pdf_filename}'", "id = '{$file_id}'"); } } } // Generate Link to PDF if ($class != '') { $class = " class=\"" . $class . "\""; } if ($image != '') { $name = image(array('id' => $image)); } if ($permlink_mode == 'messy') { $url = '<a href="http://' . $prefs['siteurl'] . '/index.php?s=file_download&id=' . $file_id . '" title="download file ' . $pdf_filename . '"' . $class . '>' . $name . '</a>'; } else { $url = '<a href="http://' . $prefs['siteurl'] . '/' . gTxt('file_download') . '/' . $file_id . '" title="download file ' . $pdf_filename . '"' . $class . '>' . $name . '</a>'; } return $url; }