Example #1
0
 function processHTML($filename, $thisid)
 {
     global $titles, $files, $images, $tempdir, $ordTable, $usedImages;
     $filedata = file_get_contents($filename);
     if (basename($filename) == 'index.html') {
         $error_reporting = error_reporting(E_ERROR);
         $domdoc = new DOMDocument();
         $success = $domdoc->loadXML($filedata);
         if (!$success) {
             die('ERROR: ' . $domdoc->getErrorString());
         }
         $order = 0;
         $ordTable = array('index.html' => 0);
         // Get a list of anchor elements (<a href="...">)
         $anchors =& $domdoc->getElementsByTagName('a');
         foreach ($anchors as $anchor) {
             // Grab the href
             $href = $anchor->getAttribute('href');
             // Kill any page anchors from the URL, e.g. #some-anchor
             $hashlocation = strpos($href, '#');
             if ($hashlocation !== false) {
                 $href = substr($href, 0, $hashlocation);
             }
             // Only precess if this page is not already found
             if (!array_key_exists($href, $ordTable) && $href != '') {
                 if (substr($href, 0, 7) != 'mailto:') {
                     $order++;
                     $ordTable[$href] = $order;
                 }
             }
             unset($href);
             unset($anchor);
             error_reporting($error_reporting);
         }
         unset($anchors);
     }
     // Extract the title
     $startOfTitle = strpos($filedata, '<title>') + 7;
     $endOfTitle = strpos($filedata, '</title>');
     $title = substr($filedata, $startOfTitle, $endOfTitle - $startOfTitle);
     // Extract the body
     $startOfContent = strpos($filedata, '<body>') + 6;
     $endOfContent = strpos($filedata, '</body>');
     $filedata = '<div id="docimport">' . substr($filedata, $startOfContent, $endOfContent - $startOfContent) . '</div>';
     // Store the title
     $titles[basename($filename)] = $title;
     // Replace links to other XHTML files with {{fileXX}}
     foreach ($files as $filename => $id) {
         $filedata = str_replace('href="' . $filename, 'href="{{file' . $id . '}}', $filedata);
     }
     // Replace links to image files with {{imageXX}}
     $imageroot = $this->docbook_relimages;
     foreach ($images as $filename => $id) {
         $occurence = strpos($filedata, 'src="' . $imageroot . '/' . $filename);
         if ($occurence !== false) {
             $usedImages[basename($filename)] = true;
             // Mark this image as used
         }
         $filedata = str_replace('src="' . $imageroot . '/' . $filename, 'src="{{image' . $id . '}}', $filedata);
     }
     // Save the result to the archive
     file_put_contents($tempdir . DS . 'file' . $thisid . '.dat', $filedata);
 }