public function getWordDocument($content, $absolutePath = "", $isEraseLink = true) { $mht = new MhtFileMaker(); if ($isEraseLink) { $content = preg_replace('/<a\\s*.*?\\s*>(\\s*.*?\\s*)<\\/a>/i', '$1', $content); } //去掉链接 $images = array(); $files = array(); $matches = array(); //这个算法要求src后的属性值必须使用引号括起来 if (preg_match_all('/<img[.\\n]*?src\\s*?=\\s*?[\\"\'](.*?)[\\"\'](.*?)\\/>/i', $content, $matches)) { $arrPath = $matches[1]; for ($i = 0; $i < count($arrPath); $i++) { $path = $arrPath[$i]; $imgPath = trim($path); if ($imgPath != "") { $files[] = $imgPath; if (substr($imgPath, 0, 7) == 'http://') { //绝对链接,不加前缀 } else { $imgPath = $absolutePath . $imgPath; } $images[] = $imgPath; } } } $mht->AddContents("tmp.html", $mht->GetMimeType("tmp.html"), $content); for ($i = 0; $i < count($images); $i++) { $image = $images[$i]; if (@fopen($image, 'r')) { $imgcontent = @file_get_contents($image); if ($imgcontent) { $mht->AddContents($files[$i], $mht->GetMimeType($image), $imgcontent); } } else { echo "file:" . $image . " not exist!<br />"; } } return $mht->GetFile(); }
function CreateDOC($link, $filename) { // Creating Object, which will generate mht $MhtFileMaker = new MhtFileMaker(); // Plug in images to file // RegEx, which pull out image path preg_match_all('@<img(.*)?src="([^"]+)"@ui', $link, $matches); // For future implementation of links // preg_match_all('@<a(.*)?href="([^"]+)"@ui', $link, $mat); // print_r($mat[2]); $link = preg_replace('@<script(.*)?</script>@ui', "", $link); preg_match_all('@<link(.*)?href="([^"]+)"@ui', $link, $styles); foreach ($styles[2] as $style) { $cont = @file_get_contents($style); if ($cont !== false) { $link .= "<style>{$cont}</style>"; } } // print_r($link); foreach ($matches[4] as $img) { $img_tmp = $img; $img_tmp_old = $img; // Change image pathes, if they are not in Web if (strpos($img_tmp, "http") === FALSE) { $img_tmp = "img/" . $img_tmp; } // Allocate image path WITHOUT domain address $img_array = explode("//", $img_tmp); $img_name_only = $img_array[1]; $img_name_only = explode("/", $img_name_only); unset($img_name_only[0]); $img_name_only = implode("/", $img_name_only); // Change image path to relative path (without domain) $link = str_replace($img_tmp_old, $img_name_only, $link); // Add image to final file $MhtFileMaker->AddFile($img_tmp, $img_name_only, 'utf-8'); } // Create final file $MhtFileMaker->AddContents($link, "text/html"); // Save file $MhtFileMaker->MakeFile($filename); }
function getWordDocument($content, $absolutePath = "", $isEraseLink = false) { $mht = new MhtFileMaker(); if ($isEraseLink) { $content = preg_replace('/<a\\s*.*?\\s*>(\\s*.*?\\s*)<\\/a>/i', '$1', $content); } $images = array(); $files = array(); $matches = array(); if (preg_match_all('/<img[^>]*src\\s*=\\s*?[\\"\'](.*?)[\\"\'](.*?)\\/>/i', $content, $matches)) { $arrPath = $matches[1]; for ($i = 0; $i < count($arrPath); $i++) { $path = $arrPath[$i]; $imgPath = trim($path); if ($imgPath != "") { $files[] = $imgPath; if (substr($imgPath, 0, 7) == 'http://') { } else { $imgPath = "http://" . $_SERVER['HTTP_HOST'] . "/" . $imgPath; } $images[] = $imgPath; } } } $mht->AddContents("tmp.html", $mht->GetMimeType("tmp.html"), $content); for ($i = 0; $i < count($images); $i++) { $image = $images[$i]; if (@fopen($image, 'r')) { $imgcontent = @file_get_contents($image); if ($content) { $mht->AddContents($files[$i], $mht->GetMimeType($image), $imgcontent); } } else { echo "file:" . $image . " not exist!<br />"; } } return $mht->GetFile(); }