function WriteHTML($html) { //! @desc HTML parser //! @return void /* $e == content */ $this->ReadMetaTags($html); $html = AdjustHTML($html, $this->usepre); //Try to make HTML look more like XHTML if ($this->usecss) { $html = $this->ReadCSS($html); } //Add new supported tags in the DisableTags function $html = str_replace('<?php', '< ', $html); //Fix '<?XML' bug from HTML code generated by MS Word //$html = str_replace ("%2", "/", $html); $html = strip_tags($html, $this->enabledtags); //remove all unsupported tags, but the ones inside the 'enabledtags' string //Explode the string in order to parse the HTML code $a = preg_split('/<(.*?)>/ms', $html, -1, PREG_SPLIT_DELIM_CAPTURE); foreach ($a as $i => $e) { if ($i % 2 == 0) { //TEXT //Adjust lineheight // $this->lineheight = (5*$this->FontSizePt)/11; //should be inside printbuffer? //Adjust text, if needed if (strpos($e, "&") !== false) { if (strpos($e, "#") !== false) { $e = value_entity_decode($e); } // Decode value entities //Avoid crashing the script on PHP 4.0 $version = phpversion(); $version = str_replace('.', '', $version); if ($version >= 430) { $e = html_entity_decode($e, ENT_QUOTES, 'cp1252'); } else { $e = lesser_entity_decode($e); } } $e = str_replace(chr(160), chr(32), $e); //unify ascii code of spaces (in order to recognize all of them correctly) if (strlen($e) == 0) { continue; } if ($this->divrevert) { $e = strrev($e); } if ($this->toupper) { $e = strtoupper($e); } if ($this->tolower) { $e = strtolower($e); } //Start of 'if/elseif's if ($this->titulo) { $this->SetTitle($e); } elseif ($this->specialcontent) { if ($this->specialcontent == "type=select" and $this->selectoption['ACTIVE'] == true) { $stringwidth = $this->GetStringWidth($e); if (!isset($this->selectoption['MAXWIDTH']) or $stringwidth > $this->selectoption['MAXWIDTH']) { $this->selectoption['MAXWIDTH'] = $stringwidth; } if (!isset($this->selectoption['SELECTED']) or $this->selectoption['SELECTED'] == '') { $this->selectoption['SELECTED'] = $e; } } else { $this->textbuffer[] = array("???" . $this->specialcontent . "???" . $e); } } elseif ($this->tablestart) { if ($this->tdbegin) { $this->cell[$this->row][$this->col]['textbuffer'][] = array($e, $this->HREF, $this->currentstyle, $this->colorarray, $this->currentfont, $this->SUP, $this->SUB, '', $this->strike, $this->outlineparam, $this->bgcolorarray); $this->cell[$this->row][$this->col]['text'][] = $e; $this->cell[$this->row][$this->col]['s'] += $this->GetStringWidth($e); } //Ignore content between <table>,<tr> and a <td> tag (this content is usually only a bunch of spaces) } elseif ($this->pbegin or $this->HREF or $this->divbegin or $this->SUP or $this->SUB or $this->strike or $this->buffer_on) { $this->textbuffer[] = array($e, $this->HREF, $this->currentstyle, $this->colorarray, $this->currentfont, $this->SUP, $this->SUB, '', $this->strike, $this->outlineparam, $this->bgcolorarray); } else { if ($this->blockjustfinished) { $e = ltrim($e); } if ($e != '') { $this->Write($this->lineheight, $e); //Write text directly in the PDF if ($this->pjustfinished) { $this->pjustfinished = false; } } } } else { $this->colorarray = array(); $this->strike = false; //Tag if ($e[0] == '/') { $this->CloseTag(strtoupper(substr($e, 1))); } else { $regexp = '|=\'(.*?)\'|s'; // eliminate single quotes, if any $e = preg_replace($regexp, "=\"\$1\"", $e); $regexp = '| (\\w+?)=([^\\s>"]+)|si'; // changes anykey=anyvalue to anykey="anyvalue" (only do this when this happens inside tags) $e = preg_replace($regexp, " \$1=\"\$2\"", $e); //Fix path values, if needed if (stristr($e, "href=") !== false or stristr($e, "src=") !== false) { $regexp = '/ (href|src)="(.*?)"/i'; preg_match($regexp, $e, $auxiliararray); $path = $auxiliararray[2]; $path = str_replace("\\", "/", $path); //If on Windows //Get link info and obtain its absolute path $regexp = '|^./|'; $path = preg_replace($regexp, '', $path); if ($path[0] != '#') { if (strpos($path, "../") !== false) { $backtrackamount = substr_count($path, "../"); $maxbacktrack = substr_count($this->basepath, "/") - 1; $filepath = str_replace("../", '', $path); $path = $this->basepath; //If it is an invalid relative link, then make it go to directory root if ($backtrackamount > $maxbacktrack) { $backtrackamount = $maxbacktrack; } //Backtrack some directories for ($i = 0; $i < $backtrackamount + 1; $i++) { $path = substr($path, 0, strrpos($path, "/")); } $path = $path . "/" . $filepath; //Make it an absolute path } elseif (strpos($path, ":/") === false) { $path = $this->basepath . $path; } //Do nothing if it is an Absolute Link } $regexp = '/ (href|src)="(.*?)"/i'; $e = preg_replace($regexp, ' \\1="' . $path . '"', $e); } //END of Fix path values //Extract attributes $contents = array(); preg_match_all('/\\S*=["\'][^"\']*["\']/', $e, $contents); preg_match('/\\S+/', $e, $a2); $tag = strtoupper($a2[0]); $attr = array(); if (!empty($contents)) { foreach ($contents[0] as $v) { if (ereg('^([^=]*)=["\']?([^"\']*)["\']?$', $v, $a3)) { $attr[strtoupper($a3[1])] = $a3[2]; } } } $this->OpenTag($tag, $attr); } } } //end of foreach($a as $i=>$e) //Create Internal Links, if needed if (!empty($this->internallink)) { foreach ($this->internallink as $k => $v) { if (strpos($k, "#") !== false) { continue; } //ignore $ypos = $v['Y']; $pagenum = $v['PAGE']; $sharp = "#"; while (array_key_exists($sharp . $k, $this->internallink)) { $internallink = $this->internallink[$sharp . $k]; $this->SetLink($internallink, $ypos, $pagenum); $sharp .= "#"; } } } }
function AdjustHTML(&$html, $usepre = true) { //Try to make the html text more manageable (turning it into XHTML) $html = str_replace("\r\n", "\n", $html); //replace carriagereturn-linefeed-combo by a simple linefeed if ($usepre) { // Preserve '\n's between the tags <pre> and </pre> $regexp = '/<pre(.*?)>(.+?)<\\/pre>/si'; $thereispre = preg_match_all($regexp, $html, $temp); // Preserve '\n's between the tags <textarea> and </textarea> $regexp2 = '/<textarea(.*?)>(.+?)<\\/textarea>/si'; $thereistextarea = preg_match_all($regexp2, $html, $temp2); $iterator = 0; $html = str_replace("\f", '', $html); //replace formfeed by nothing $html = str_replace("\r", '', $html); //replace carriage return by nothing $html = str_replace("\n", '', $html); //replace linefeed by nothing $html = str_replace("\t", ' ', $html); //replace tabs by spaces while ($thereispre != 0) { $html = preg_replace($regexp, '<erp' . $temp[1][$iterator] . '>' . $temp[2][$iterator] . '</erp>', $html, 1); $thereispre--; $iterator++; } while ($thereistextarea != 0) { $html = preg_replace($regexp2, '<aeratxet' . $temp2[1][$iterator] . '>' . trim($temp2[2][$iterator]) . '</aeratxet>', $html, 1); $thereistextarea--; $iterator++; } $html = str_replace("<erp", "<pre", $html); //restore $html = str_replace("</erp>", "</pre>", $html); //restore $html = str_replace("<aeratxet", "<textarea", $html); //restore $html = str_replace("</aeratxet>", "</textarea>", $html); //restore // (the code above might slowdown overall performance?) } else { $html = str_replace("\f", '', $html); //replace formfeed by nothing $html = str_replace("\r", '', $html); //replace carriage return by nothing $html = str_replace("\n", '', $html); //replace linefeed by nothing $html = str_replace("\t", ' ', $html); //replace tabs by spaces } $html = str_replace("< IMPRIMIR >", '', $html); //remover especial desta versão $regexp = '/\\s{2,}/s'; // turn 2+ consecutive spaces into one $html = preg_replace($regexp, ' ', $html); //Avoid crashing the script on PHP 4.0 $version = phpversion(); $version = str_replace('.', '', $version); if ($version >= 430) { $html = html_entity_decode($html); } else { $html = lesser_entity_decode($html); } // remove redundant <br>'s before </div>, avoiding huge leaps between text blocks // they appear on computer-generated HTML code $regexp = '/(<br[ \\/]?[\\/]?>)+?<\\/div>/si'; $html = preg_replace($regexp, '</div>', $html); }