Example #1
0
 function WriteHTML($html)
 {
     //! @desc HTML parser
     //! @return void
     /* $e == content */
     $this->ReadMetaTags($html);
     $html = AdjustHTML($html, $this->usepre);
     //Try to make HTML look more like XHTML
     if ($this->usecss) {
         $html = $this->ReadCSS($html);
     }
     //Add new supported tags in the DisableTags function
     $html = str_replace('<?php', '< ', $html);
     //Fix '<?XML' bug from HTML code generated by MS Word
     //$html = str_replace ("%2", "/", $html);
     $html = strip_tags($html, $this->enabledtags);
     //remove all unsupported tags, but the ones inside the 'enabledtags' string
     //Explode the string in order to parse the HTML code
     $a = preg_split('/<(.*?)>/ms', $html, -1, PREG_SPLIT_DELIM_CAPTURE);
     foreach ($a as $i => $e) {
         if ($i % 2 == 0) {
             //TEXT
             //Adjust lineheight
             //            $this->lineheight = (5*$this->FontSizePt)/11; //should be inside printbuffer?
             //Adjust text, if needed
             if (strpos($e, "&") !== false) {
                 if (strpos($e, "#") !== false) {
                     $e = value_entity_decode($e);
                 }
                 // Decode value entities
                 //Avoid crashing the script on PHP 4.0
                 $version = phpversion();
                 $version = str_replace('.', '', $version);
                 if ($version >= 430) {
                     $e = html_entity_decode($e, ENT_QUOTES, 'cp1252');
                 } else {
                     $e = lesser_entity_decode($e);
                 }
             }
             $e = str_replace(chr(160), chr(32), $e);
             //unify ascii code of spaces (in order to recognize all of them correctly)
             if (strlen($e) == 0) {
                 continue;
             }
             if ($this->divrevert) {
                 $e = strrev($e);
             }
             if ($this->toupper) {
                 $e = strtoupper($e);
             }
             if ($this->tolower) {
                 $e = strtolower($e);
             }
             //Start of 'if/elseif's
             if ($this->titulo) {
                 $this->SetTitle($e);
             } elseif ($this->specialcontent) {
                 if ($this->specialcontent == "type=select" and $this->selectoption['ACTIVE'] == true) {
                     $stringwidth = $this->GetStringWidth($e);
                     if (!isset($this->selectoption['MAXWIDTH']) or $stringwidth > $this->selectoption['MAXWIDTH']) {
                         $this->selectoption['MAXWIDTH'] = $stringwidth;
                     }
                     if (!isset($this->selectoption['SELECTED']) or $this->selectoption['SELECTED'] == '') {
                         $this->selectoption['SELECTED'] = $e;
                     }
                 } else {
                     $this->textbuffer[] = array("???" . $this->specialcontent . "???" . $e);
                 }
             } elseif ($this->tablestart) {
                 if ($this->tdbegin) {
                     $this->cell[$this->row][$this->col]['textbuffer'][] = array($e, $this->HREF, $this->currentstyle, $this->colorarray, $this->currentfont, $this->SUP, $this->SUB, '', $this->strike, $this->outlineparam, $this->bgcolorarray);
                     $this->cell[$this->row][$this->col]['text'][] = $e;
                     $this->cell[$this->row][$this->col]['s'] += $this->GetStringWidth($e);
                 }
                 //Ignore content between <table>,<tr> and a <td> tag (this content is usually only a bunch of spaces)
             } elseif ($this->pbegin or $this->HREF or $this->divbegin or $this->SUP or $this->SUB or $this->strike or $this->buffer_on) {
                 $this->textbuffer[] = array($e, $this->HREF, $this->currentstyle, $this->colorarray, $this->currentfont, $this->SUP, $this->SUB, '', $this->strike, $this->outlineparam, $this->bgcolorarray);
             } else {
                 if ($this->blockjustfinished) {
                     $e = ltrim($e);
                 }
                 if ($e != '') {
                     $this->Write($this->lineheight, $e);
                     //Write text directly in the PDF
                     if ($this->pjustfinished) {
                         $this->pjustfinished = false;
                     }
                 }
             }
         } else {
             $this->colorarray = array();
             $this->strike = false;
             //Tag
             if ($e[0] == '/') {
                 $this->CloseTag(strtoupper(substr($e, 1)));
             } else {
                 $regexp = '|=\'(.*?)\'|s';
                 // eliminate single quotes, if any
                 $e = preg_replace($regexp, "=\"\$1\"", $e);
                 $regexp = '| (\\w+?)=([^\\s>"]+)|si';
                 // changes anykey=anyvalue to anykey="anyvalue" (only do this when this happens inside tags)
                 $e = preg_replace($regexp, " \$1=\"\$2\"", $e);
                 //Fix path values, if needed
                 if (stristr($e, "href=") !== false or stristr($e, "src=") !== false) {
                     $regexp = '/ (href|src)="(.*?)"/i';
                     preg_match($regexp, $e, $auxiliararray);
                     $path = $auxiliararray[2];
                     $path = str_replace("\\", "/", $path);
                     //If on Windows
                     //Get link info and obtain its absolute path
                     $regexp = '|^./|';
                     $path = preg_replace($regexp, '', $path);
                     if ($path[0] != '#') {
                         if (strpos($path, "../") !== false) {
                             $backtrackamount = substr_count($path, "../");
                             $maxbacktrack = substr_count($this->basepath, "/") - 1;
                             $filepath = str_replace("../", '', $path);
                             $path = $this->basepath;
                             //If it is an invalid relative link, then make it go to directory root
                             if ($backtrackamount > $maxbacktrack) {
                                 $backtrackamount = $maxbacktrack;
                             }
                             //Backtrack some directories
                             for ($i = 0; $i < $backtrackamount + 1; $i++) {
                                 $path = substr($path, 0, strrpos($path, "/"));
                             }
                             $path = $path . "/" . $filepath;
                             //Make it an absolute path
                         } elseif (strpos($path, ":/") === false) {
                             $path = $this->basepath . $path;
                         }
                         //Do nothing if it is an Absolute Link
                     }
                     $regexp = '/ (href|src)="(.*?)"/i';
                     $e = preg_replace($regexp, ' \\1="' . $path . '"', $e);
                 }
                 //END of Fix path values
                 //Extract attributes
                 $contents = array();
                 preg_match_all('/\\S*=["\'][^"\']*["\']/', $e, $contents);
                 preg_match('/\\S+/', $e, $a2);
                 $tag = strtoupper($a2[0]);
                 $attr = array();
                 if (!empty($contents)) {
                     foreach ($contents[0] as $v) {
                         if (ereg('^([^=]*)=["\']?([^"\']*)["\']?$', $v, $a3)) {
                             $attr[strtoupper($a3[1])] = $a3[2];
                         }
                     }
                 }
                 $this->OpenTag($tag, $attr);
             }
         }
     }
     //end of   foreach($a as $i=>$e)
     //Create Internal Links, if needed
     if (!empty($this->internallink)) {
         foreach ($this->internallink as $k => $v) {
             if (strpos($k, "#") !== false) {
                 continue;
             }
             //ignore
             $ypos = $v['Y'];
             $pagenum = $v['PAGE'];
             $sharp = "#";
             while (array_key_exists($sharp . $k, $this->internallink)) {
                 $internallink = $this->internallink[$sharp . $k];
                 $this->SetLink($internallink, $ypos, $pagenum);
                 $sharp .= "#";
             }
         }
     }
 }
Example #2
0
function AdjustHTML(&$html, $usepre = true)
{
    //Try to make the html text more manageable (turning it into XHTML)
    $html = str_replace("\r\n", "\n", $html);
    //replace carriagereturn-linefeed-combo by a simple linefeed
    if ($usepre) {
        // Preserve '\n's between the tags <pre> and </pre>
        $regexp = '/<pre(.*?)>(.+?)<\\/pre>/si';
        $thereispre = preg_match_all($regexp, $html, $temp);
        // Preserve '\n's between the tags <textarea> and </textarea>
        $regexp2 = '/<textarea(.*?)>(.+?)<\\/textarea>/si';
        $thereistextarea = preg_match_all($regexp2, $html, $temp2);
        $iterator = 0;
        $html = str_replace("\f", '', $html);
        //replace formfeed by nothing
        $html = str_replace("\r", '', $html);
        //replace carriage return by nothing
        $html = str_replace("\n", '', $html);
        //replace linefeed by nothing
        $html = str_replace("\t", ' ', $html);
        //replace tabs by spaces
        while ($thereispre != 0) {
            $html = preg_replace($regexp, '<erp' . $temp[1][$iterator] . '>' . $temp[2][$iterator] . '</erp>', $html, 1);
            $thereispre--;
            $iterator++;
        }
        while ($thereistextarea != 0) {
            $html = preg_replace($regexp2, '<aeratxet' . $temp2[1][$iterator] . '>' . trim($temp2[2][$iterator]) . '</aeratxet>', $html, 1);
            $thereistextarea--;
            $iterator++;
        }
        $html = str_replace("<erp", "<pre", $html);
        //restore
        $html = str_replace("</erp>", "</pre>", $html);
        //restore
        $html = str_replace("<aeratxet", "<textarea", $html);
        //restore
        $html = str_replace("</aeratxet>", "</textarea>", $html);
        //restore
        // (the code above might slowdown overall performance?)
    } else {
        $html = str_replace("\f", '', $html);
        //replace formfeed by nothing
        $html = str_replace("\r", '', $html);
        //replace carriage return by nothing
        $html = str_replace("\n", '', $html);
        //replace linefeed by nothing
        $html = str_replace("\t", ' ', $html);
        //replace tabs by spaces
    }
    $html = str_replace("< IMPRIMIR >", '', $html);
    //remover especial desta versão
    $regexp = '/\\s{2,}/s';
    // turn 2+ consecutive spaces into one
    $html = preg_replace($regexp, ' ', $html);
    //Avoid crashing the script on PHP 4.0
    $version = phpversion();
    $version = str_replace('.', '', $version);
    if ($version >= 430) {
        $html = html_entity_decode($html);
    } else {
        $html = lesser_entity_decode($html);
    }
    // remove redundant <br>'s before </div>, avoiding huge leaps between text blocks
    // they appear on computer-generated HTML code
    $regexp = '/(<br[ \\/]?[\\/]?>)+?<\\/div>/si';
    $html = preg_replace($regexp, '</div>', $html);
}