function replaceHtml($insertInto, $text, $style = null) { // HTML Dom object: $html_dom = new simple_html_dom(); $html_dom->load('<html><body>' . $text . '</body></html>'); // Note, we needed to nest the html in a couple of dummy elements // Create the dom array of elements which we are going to work on: $html_dom_array = $html_dom->find('html', 0)->children(); // Provide some initial settings: if ($style == null) { $style = array('size' => '10'); } $initial_state = array('current_style' => $style, 'style_sheet' => h2d_styles(), 'parents' => array(0 => 'body'), 'list_depth' => 0, 'context' => 'section', 'base_root' => 'http://test.local', 'base_path' => '/', 'pseudo_list' => TRUE, 'pseudo_list_indicator_font_name' => 'Wingdings', 'pseudo_list_indicator_font_size' => '7', 'pseudo_list_indicator_character' => 'l '); // Convert the HTML and put it into the PHPWord object h2d_insert_html($insertInto, $html_dom_array[0]->nodes, $initial_state); }
/** * Populate PHPWord element * This recursive function processes all the elements and child elements * from the DOM array of objects created by SimpleHTMLDom. * * @param object phpword_element - the object from PHPWord in which to place the converted html * @param array $html_dom_array - array of nodes generated by simple HTML dom * @param array $state - variables for the current run */ function h2d_insert_html(&$phpword_element, $html_dom_array, &$state = array()) { // Set some defaults: $state['current_style'] = $state['current_style'] ? $state['current_style'] : array('size' => '11'); $state['parents'] = $state['parents'] ? $state['parents'] : array(0 => 'body'); // Our parent is body $state['list_depth'] = $state['list_depth'] ? $state['list_depth'] : 0; $state['context'] = $state['context'] ? $state['context'] : 'section'; // Possible values - section, footer or header // Go through the html_dom_array, adding bits to go in the PHPWord element $allowed_children = h2d_html_allowed_children($state['parents'][0]); // Go through each element: foreach ($html_dom_array as $element) { $old_style = $state['current_style']; $state['current_style'] = h2d_get_style($element, $state); switch ($element->tag) { case 'p': case 'div': // Treat a div as a paragraph // Treat a div as a paragraph case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': // Everything in this element should be in the same text run // we need to initiate a text run here and pass it on: $state['textrun'] = $phpword_element->createTextRun($state['current_style']); if (in_array($element->tag, $allowed_children)) { array_unshift($state['parents'], $element->tag); h2d_insert_html($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun']->addText(h2d_clean_text($element->innertext), $state['current_style']); } unset($state['textrun']); if (!isset($state['current_style']['spaceAfter'])) { // For better usability for the end user of the Word document, we // separate paragraphs and headings with an empty line. You can // override this behaviour by setting the spaceAfter parameter for // the current element. $phpword_element->addTextBreak(); } break; case 'table': if (in_array('table', $allowed_children)) { $old_table_state = $state['table_allowed']; if (in_array('td', $state['parents'])) { $state['table_allowed'] = FALSE; // This is a PHPWord constraint } else { $state['table_allowed'] = TRUE; $state['table'] = $phpword_element->addTable(); } array_unshift($state['parents'], 'table'); h2d_insert_html($phpword_element, $element->nodes, $state); array_shift($state['parents']); // Reset table state to what it was before a table was added: $state['table_allowed'] = $old_table_state; $phpword_element->addTextBreak(); } else { $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(h2d_clean_text($element->innertext), $state['current_style']); } break; case 'tbody': if (in_array('tbody', $allowed_children)) { array_unshift($state['parents'], 'tbody'); h2d_insert_html($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(h2d_clean_text($element->innertext), $state['current_style']); } break; case 'tr': if (in_array('tr', $allowed_children)) { if ($state['table_allowed']) { $state['table']->addRow(); } else { // Simply add a new line if a table is not possible in this context: $state['textrun'] = $phpword_element->createTextRun(); } array_unshift($state['parents'], 'tr'); h2d_insert_html($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(h2d_clean_text($element->innertext), $state['current_style']); } break; case 'td': // Unset any text run there may happen to be: // unset($state['textrun']); if (in_array('td', $allowed_children) && $state['table_allowed']) { unset($state['textrun']); if (isset($element->width)) { $cell_width = $element->width * 15; // Converting at 15 TWIPS per pixel } else { $cell_width = 800; } $state['table_cell'] = $state['table']->addCell($cell_width); array_unshift($state['parents'], 'td'); h2d_insert_html($state['table_cell'], $element->nodes, $state); array_shift($state['parents']); } else { if (!isset($state['textrun'])) { $state['textrun'] = $phpword_element->createTextRun(); } $state['textrun']->addText(h2d_clean_text($element->innertext), $state['current_style']); } break; case 'a': // Create a new text run if we aren't in one already: if (!$state['textrun']) { $state['textrun'] = $phpword_element->createTextRun(); } if ($state['context'] == 'section') { if (strpos($element->href, 'http://') === 0) { $href = $element->href; } elseif (strpos($element->href, '/') === 0) { $href = $state['base_root'] . $element->href; } else { $href = $state['base_root'] . $state['base_path'] . $element->href; } $state['textrun']->addLink($href, h2d_clean_text($element->innertext), $state['current_style']); } else { // Links can't seem to be included in headers or footers with PHPWord: // trying to include them causes an error which stops Word from opening the // file - in Word 2003 with the converter at least. // So add the link styled as a link only. $state['textrun']->addText(h2d_clean_text($element->innertext), $state['current_style']); } break; case 'ul': if (in_array('ul', $allowed_children)) { if (!$state['pseudo_list']) { // Unset any existing text run: unset($state['textrun']); // PHPWord lists cannot appear in a text run. If we leave a text run active then subsequent text will go in that text run (if it isn't re-initialised), which would mean that text after this list would appear before it in the Word document. } array_unshift($state['parents'], 'ul'); h2d_insert_html($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(h2d_clean_text($element->innertext), $state['current_style']); } break; case 'ol': $state['list_number'] = 0; // Reset list number. if (in_array('ol', $allowed_children)) { if (!$state['pseudo_list']) { // Unset any existing text run: unset($state['textrun']); // Lists cannot appear in a text run. If we leave a text run active then subsequent text will go in that text run (if it isn't re-initialised), which would mean that text after this list would appear before it in the Word document. } array_unshift($state['parents'], 'ol'); h2d_insert_html($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(h2d_clean_text($element->innertext), $state['current_style']); } break; case 'li': // You cannot style individual pieces of text in a list element so we do it // with text runs instead. This does not allow us to indent lists at all, so // we can't show nesting. // Create a new text run for each element: $state['textrun'] = $phpword_element->createTextRun(); if (in_array('li', $allowed_children)) { if ($state['parents'][0] == 'ol') { $state['list_number']++; $item_indicator = $state['list_number'] . '. '; $style = $state['current_style']; } else { $style = $state['current_style']; $style['name'] = $state['pseudo_list_indicator_font_name']; $style['size'] = $state['pseudo_list_indicator_font_size']; $item_indicator = $state['pseudo_list_indicator_character']; } array_unshift($state['parents'], 'li'); $state['textrun']->addText($item_indicator, $style); h2d_insert_html($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun']->addText(h2d_clean_text($element->innertext), $state['current_style']); } // $phpword_element->addTextBreak(); unset($state['textrun']); break; case 'text': // We may get some empty text nodes - containing just a space - // in simple HTML dom - we want // to exclude those, as these can cause extra line returns. However // we don't want to exclude spaces between styling elements (these will be within // a text run). if (!$state['textrun']) { $text = h2d_clean_text(trim($element->innertext)); } else { $text = h2d_clean_text($element->innertext); } if (!empty($text)) { if (!$state['textrun']) { $state['textrun'] = $phpword_element->createTextRun(); } $state['textrun']->addText($text, $state['current_style']); } break; // Style tags: // Style tags: case 'strong': case 'b': case 'sup': // Not working in PHPWord // Not working in PHPWord case 'em': case 'i': case 'u': case 'span': // Create a new text run if we aren't in one already: if (!$state['textrun']) { $state['textrun'] = $phpword_element->createTextRun(); } if (in_array($element->tag, $allowed_children)) { array_unshift($state['parents'], $element->tag); h2d_insert_html($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun']->addText(h2d_clean_text($element->innertext), $state['current_style']); } break; case 'br': // Simply create a new text run: $state['textrun'] = $phpword_element->createTextRun(); break; case 'img': $image_style = array(); if ($element->height && $element->width) { $state['current_style']['height'] = $element->height; $state['current_style']['width'] = $element->width; } $phpword_element->addImage(ltrim($element->src, '/'), $state['current_style']); break; default: $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(h2d_clean_text($element->innertext), $state['current_style']); break; } // Reset the style back to what it was: $state['current_style'] = $old_style; } }