function processHTML($filename, $thisid) { global $titles, $files, $images, $tempdir, $ordTable, $usedImages; $filedata = file_get_contents($filename); if (basename($filename) == 'index.html') { $error_reporting = error_reporting(E_ERROR); $domdoc = new DOMDocument(); $success = $domdoc->loadXML($filedata); if (!$success) { die('ERROR: ' . $domdoc->getErrorString()); } $order = 0; $ordTable = array('index.html' => 0); // Get a list of anchor elements (<a href="...">) $anchors =& $domdoc->getElementsByTagName('a'); foreach ($anchors as $anchor) { // Grab the href $href = $anchor->getAttribute('href'); // Kill any page anchors from the URL, e.g. #some-anchor $hashlocation = strpos($href, '#'); if ($hashlocation !== false) { $href = substr($href, 0, $hashlocation); } // Only precess if this page is not already found if (!array_key_exists($href, $ordTable) && $href != '') { if (substr($href, 0, 7) != 'mailto:') { $order++; $ordTable[$href] = $order; } } unset($href); unset($anchor); error_reporting($error_reporting); } unset($anchors); } // Extract the title $startOfTitle = strpos($filedata, '<title>') + 7; $endOfTitle = strpos($filedata, '</title>'); $title = substr($filedata, $startOfTitle, $endOfTitle - $startOfTitle); // Extract the body $startOfContent = strpos($filedata, '<body>') + 6; $endOfContent = strpos($filedata, '</body>'); $filedata = '<div id="docimport">' . substr($filedata, $startOfContent, $endOfContent - $startOfContent) . '</div>'; // Store the title $titles[basename($filename)] = $title; // Replace links to other XHTML files with {{fileXX}} foreach ($files as $filename => $id) { $filedata = str_replace('href="' . $filename, 'href="{{file' . $id . '}}', $filedata); } // Replace links to image files with {{imageXX}} $imageroot = $this->docbook_relimages; foreach ($images as $filename => $id) { $occurence = strpos($filedata, 'src="' . $imageroot . '/' . $filename); if ($occurence !== false) { $usedImages[basename($filename)] = true; // Mark this image as used } $filedata = str_replace('src="' . $imageroot . '/' . $filename, 'src="{{image' . $id . '}}', $filedata); } // Save the result to the archive file_put_contents($tempdir . DS . 'file' . $thisid . '.dat', $filedata); }