/** * Populate PHPWord element * This recursive function processes all the elements and child elements * from the DOM array of objects created by SimpleHTMLDom. * * @param object phpword_element * PHPWord object to add in the converted html * @param array $html_dom_array * Array of nodes generated by simple HTML dom * @param array $state * Parameters for the current run */ function htmltodocx_insert_html_recursive(&$phpword_element, $html_dom_array, &$state = array()) { // Go through the html_dom_array, adding bits to go in the PHPWord element $allowed_children = htmltodocx_html_allowed_children($state['parents'][0]); // Go through each element: foreach ($html_dom_array as $element) { $old_style = $state['current_style']; $state['current_style'] = _htmltodocx_get_style($element, $state); switch ($element->tag) { case 'p': case 'div': // Treat a div as a paragraph // Treat a div as a paragraph case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': if ($state['structure_document'] && in_array($element->tag, array('h1', 'h2', 'h3', 'h4', 'h5', 'h6')) && is_object($state['phpword_object'])) { // If the structure_document option has been enabled, then headings // are used to create Word heading styles. Note, in this case, any // nested elements within the heading are displayed as text only. // Additionally we don't now add a text break after a heading where // sizeAfter has not been set. $state['phpword_object']->addTitleStyle($state['structure_headings'][$element->tag], $state['current_style']); $phpword_element->addTitle(htmltodocx_clean_text($element->innertext), $state['structure_headings'][$element->tag]); break; } if ($element->tag == 'div' && $state['table_of_contents_id'] && $element->id == $state['table_of_contents_id']) { // Replace this div with a table of contents: $phpword_element->addTOC($state['current_style'], $state['current_style']); break; } // Everything in this element should be in the same text run // we need to initiate a text run here and pass it on. Starting one of // these elements will cause a new line to be added in the Word // document. In the case of divs this might not always be what is // wanted the setting 'treat_div_as_paragraph' determines whether or // not to add new lines for divs. if ($element->tag != 'div' || $state['treat_div_as_paragraph'] || !isset($state['textrun'])) { $state['textrun'] = $phpword_element->createTextRun($state['current_style']); } // For better usability for the end user of the Word document, we // separate paragraphs and headings with an empty line. You can // override this behaviour by setting the spaceAfter parameter for // the current element. // If the spaceAfter parameter is not set, we set it temporarily to 0 // here and record that it wasn't set in the style. Later we will add // an empty line. Word 2007 and later have a non-zero default for // paragraph separation, so without setting that spacing to 0 here we // would end up with a large gap between paragraphs (the document // template default plus the extra line). $space_after_set = TRUE; if (!isset($state['current_style']['spaceAfter'])) { $state['current_style']['spaceAfter'] = 0; $space_after_set = FALSE; } if (in_array($element->tag, $allowed_children)) { array_unshift($state['parents'], $element->tag); htmltodocx_insert_html_recursive($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun']->addText(htmltodocx_clean_text($element->innertext), $state['current_style']); } unset($state['textrun']); if (!$space_after_set) { // Add the text break here - where the spaceAfter parameter hadn't // been set initially - also unset the spaceAfter parameter we just // set: $phpword_element->addTextBreak(); unset($state['current_style']['spaceAfter']); } break; case 'table': if (in_array('table', $allowed_children)) { $old_table_state = $state['table_allowed']; if (!$state['table_allowed'] || in_array('td', $state['parents']) || in_array('th', $state['parents'])) { $state['table_allowed'] = FALSE; // This is a PHPWord constraint } else { $state['table_allowed'] = TRUE; // PHPWord allows table_styles to be passed in a couple of // different ways either using an array of properties, or by // defining a full table style on the PHPWord object: if (is_object($state['phpword_object']) && method_exists($state['phpword_object'], 'addTableStyle')) { $state['phpword_object']->addTableStyle('temp_table_style', $state['current_style']); $table_style = 'temp_table_style'; } else { $table_style = $state['current_style']; } $state['table'] = $phpword_element->addTable($table_style); } array_unshift($state['parents'], 'table'); htmltodocx_insert_html_recursive($phpword_element, $element->nodes, $state); array_shift($state['parents']); // Reset table state to what it was before a table was added: $state['table_allowed'] = $old_table_state; $phpword_element->addTextBreak(); } else { $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->innertext), $state['current_style']); } break; case 'tbody': if (in_array('tbody', $allowed_children)) { array_unshift($state['parents'], 'tbody'); htmltodocx_insert_html_recursive($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->innertext), $state['current_style']); } break; case 'tr': if (in_array('tr', $allowed_children)) { if ($state['table_allowed']) { $state['table']->addRow(); } else { // Simply add a new line if a table is not possible in this // context: $state['textrun'] = $phpword_element->createTextRun(); } array_unshift($state['parents'], 'tr'); htmltodocx_insert_html_recursive($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->innertext), $state['current_style']); } break; case 'td': case 'th': if (in_array($element->tag, $allowed_children) && $state['table_allowed']) { unset($state['textrun']); if (isset($state['current_style']['width'])) { $cell_width = $state['current_style']['width']; } elseif (isset($element->width)) { $cell_width = $element->width * 15; // Converting at 15 TWIPS per pixel. } else { $cell_width = 800; } $state['table_cell'] = $state['table']->addCell($cell_width, $state['current_style']); array_unshift($state['parents'], $element->tag); htmltodocx_insert_html_recursive($state['table_cell'], $element->nodes, $state); array_shift($state['parents']); } else { if (!isset($state['textrun'])) { $state['textrun'] = $phpword_element->createTextRun(); } $state['textrun']->addText(htmltodocx_clean_text($element->innertext), $state['current_style']); } break; case 'a': // Create a new text run if we aren't in one already: if (!isset($state['textrun'])) { $state['textrun'] = $phpword_element->createTextRun(); } if ($state['context'] == 'section') { if (strpos($element->href, 'http://') === 0) { $href = $element->href; } elseif (strpos($element->href, '/') === 0) { $href = $state['base_root'] . $element->href; } else { $href = $state['base_root'] . $state['base_path'] . $element->href; } // Replace any spaces in url with %20 - to prevent errors in the Word // document: $state['textrun']->addLink(htmltodocx_url_encode_chars($href), htmltodocx_clean_text($element->innertext), $state['current_style']); } else { // Links can't seem to be included in headers or footers with // PHPWord: trying to include them causes an error which stops Word // from opening the file - in Word 2003 with the converter at least. // So add the link styled as a link only. $state['textrun']->addText(htmltodocx_clean_text($element->innertext), $state['current_style']); } break; case 'ul': $state['list_total_count'] = count($element->children); // We use this to be able to add the ordered list spaceAfter onto the // last list element. All ol children should be li elements. _htmltodocx_add_list_start_end_spacing_style($state); $state['list_number'] = 0; // Reset list number. if (in_array('ul', $allowed_children)) { if (!isset($state['pseudo_list'])) { // Unset any existing text run: unset($state['textrun']); // PHPWord lists cannot appear in a text run. If we leave a text // run active then subsequent text will go in that text run (if it // isn't re-initialised), which would mean that text after this // list would appear before it in the Word document. } array_unshift($state['parents'], 'ul'); htmltodocx_insert_html_recursive($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->innertext), $state['current_style']); } break; case 'ol': $state['list_total_count'] = count($element->children); // We use this to be able to add the ordered list spaceAfter onto the // last list element. All ol children should be li elements. _htmltodocx_add_list_start_end_spacing_style($state); $state['list_number'] = 0; // Reset list number. if (in_array('ol', $allowed_children)) { if (!isset($state['pseudo_list'])) { // Unset any existing text run: unset($state['textrun']); // Lists cannot appear in a text run. If we leave a text run active // then subsequent text will go in that text run (if it isn't // re-initialised), which would mean that text after this list // would appear before it in the Word document. } array_unshift($state['parents'], 'ol'); htmltodocx_insert_html_recursive($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->innertext), $state['current_style']); } break; case 'li': // You cannot style individual pieces of text in a list element so we do it // with text runs instead. This does not allow us to indent lists at all, so // we can't show nesting. // Before and after spacings: if ($state['list_number'] === 0) { $state['current_style'] = array_merge($state['current_style'], $state['list_style_before']); } $last_item = FALSE; if ($state['list_number'] == $state['list_total_count'] - 1) { $last_item = TRUE; if (empty($state['list_style_after'])) { $state['current_style']['spaceAfter'] = 0; // Set to 0 if not defined so we can add a text break without // ending up within too much space in Word2007+. // *Needs further testing on Word 2007+* } $state['current_style'] = array_merge($state['current_style'], $state['list_style_after']); } // We create a new text run for each element: $state['textrun'] = $phpword_element->createTextRun($state['current_style']); if (in_array('li', $allowed_children)) { $state['list_number']++; if ($state['parents'][0] == 'ol') { $item_indicator = $state['list_number'] . '. '; $style = $state['current_style']; } else { $style = $state['current_style']; $style['name'] = $state['pseudo_list_indicator_font_name']; $style['size'] = $state['pseudo_list_indicator_font_size']; $item_indicator = $state['pseudo_list_indicator_character']; } array_unshift($state['parents'], 'li'); $state['textrun']->addText($item_indicator, $style); htmltodocx_insert_html_recursive($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun']->addText(htmltodocx_clean_text($element->innertext), $state['current_style']); } if ($last_item && empty($state['list_style_after'])) { $phpword_element->addTextBreak(); // Add an empty line after the list if no spacing after has been // defined. } unset($state['textrun']); break; case 'text': // We may get some empty text nodes - containing just a space - in // simple HTML dom - we want to exclude those, as these can cause extra // line returns. However we don't want to exclude spaces between styling // elements (these will be within a text run). if (!isset($state['textrun'])) { $text = htmltodocx_clean_text(trim($element->innertext)); } else { $text = htmltodocx_clean_text($element->innertext); } if (!empty($text)) { if (!isset($state['textrun'])) { $state['textrun'] = $phpword_element->createTextRun(); } $state['textrun']->addText($text, $state['current_style']); } break; // Style tags: // Style tags: case 'strong': case 'b': case 'sup': // Not working in PHPWord // Not working in PHPWord case 'em': case 'i': case 'u': case 'span': case 'code': // Create a new text run if we aren't in one already: if (!isset($state['textrun'])) { $state['textrun'] = $phpword_element->createTextRun(); } if (in_array($element->tag, $allowed_children)) { array_unshift($state['parents'], $element->tag); htmltodocx_insert_html_recursive($phpword_element, $element->nodes, $state); array_shift($state['parents']); } else { $state['textrun']->addText(htmltodocx_clean_text($element->innertext), $state['current_style']); } break; // NB, Simple HTML Dom might not be picking up <br> tags. // NB, Simple HTML Dom might not be picking up <br> tags. case 'br': // Simply create a new text run: $state['textrun'] = $phpword_element->createTextRun(); break; case 'img': $image_style = array(); if ($element->height && $element->width) { $state['current_style']['height'] = $element->height; $state['current_style']['width'] = $element->width; } if (strpos($element->src, $state['base_root']) === 0) { // The image source is a full url, but nevertheless it is on this // server. $element_src = substr($element->src, strlen($state['base_root'])); } else { $element_src = $element->src; } if (strpos($element_src, 'http://') === 0) { // The image url is from another site. Most probably the image won't // appear in the Word document. $src = $element_src; } elseif (strpos($element_src, '/') === 0) { $src = htmltodocx_doc_root() . $element_src; } else { $src = htmltodocx_doc_root() . $state['base_path'] . $element_src; } $phpword_element->addImage($src, $state['current_style']); break; default: $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->innertext), $state['current_style']); break; } // Reset the style back to what it was: $state['current_style'] = $old_style; } }
function htmltodocx_insert_html_recursive(&$phpword_object, $phpword_element, $html_dom_array, &$state = array()) { switch ($html_dom_array->nodeType) { case XML_TEXT_NODE: $state['textrun']->addText(htmltodocx_clean_text($html_dom_array->nodeValue), $state['current_style']); break; case XML_ELEMENT_NODE: $a4WidthPortraitOrientationInPoints = 595; $a4HeightPortraitOrientationInPoints = 842; $a4WidthLandscapeOrientationInPoints = 842; $a4HeightLandscapeOrientationInPoints = 595; // Go through the html_dom_array, adding bits to go in the PhpWord element $allowed_children = htmltodocx_html_allowed_children($state['parents'][0]); foreach ($html_dom_array->childNodes as $element) { if (!in_array($element->nodeType, [XML_ELEMENT_NODE, XML_TEXT_NODE])) { continue; } /** @var DOMElement $element */ //var_dump($element->tag); $old_style = $state['current_style']; if ($element->nodeType == XML_ELEMENT_NODE) { $state['current_style'] = _htmltodocx_get_style($element, $state); } $tag = $element->nodeName; switch ($tag) { case 'p': case 'div': // Treat a div as a paragraph // Treat a div as a paragraph case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': case 'center': if ($state['structure_document'] && in_array($tag, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) && is_object($state['phpword_object'])) { // If the structure_document option has been enabled, then headings // are used to create Word heading styles. Note, in this case, any // nested elements within the heading are displayed as text only. // Additionally we don't now add a text break after a heading where // sizeAfter has not been set. $state['phpword_object']->addTitleStyle($state['structure_headings'][$tag], $state['current_style']); if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $phpword_element->addTitle(htmltodocx_clean_text($element->nodeValue), $state['structure_headings'][$tag]); break; } if ($tag == 'div' && $state['table_of_contents_id'] && $element->getAttribute('id') == $state['table_of_contents_id']) { // Replace this div with a table of contents: if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $phpword_element->addTOC($state['current_style'], $state['current_style']); break; } // Everything in this element should be in the same text run // we need to initiate a text run here and pass it on. Starting one of // these elements will cause a new line to be added in the Word // document. In the case of divs this might not always be what is // wanted the setting 'treat_div_as_paragraph' determines whether or // not to add new lines for divs. if ($tag != 'div' || $state['treat_div_as_paragraph'] || !isset($state['textrun'])) { if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun($state['current_style']); } // For better usability for the end user of the Word document, we // separate paragraphs and headings with an empty line. You can // override this behaviour by setting the spaceAfter parameter for // the current element. // If the spaceAfter parameter is not set, we set it temporarily to 0 // here and record that it wasn't set in the style. Later we will add // an empty line. Word 2007 and later have a non-zero default for // paragraph separation, so without setting that spacing to 0 here we // would end up with a large gap between paragraphs (the document // template default plus the extra line). $space_after_set = false; if (!isset($state['current_style']['spaceAfter'])) { $state['current_style']['spaceAfter'] = 0; $space_after_set = false; } if (in_array($tag, $allowed_children)) { array_unshift($state['parents'], $tag); $phpword_element_returned = htmltodocx_insert_html_recursive($phpword_object, $phpword_element, $element, $state); if (!empty($phpword_element) && $phpword_element_returned->getSettings()->getOrientation() !== $phpword_element->getSettings()->getOrientation()) { $phpword_element = $phpword_object->createSection(array('orientation' => $phpword_element->getSettings()->getOrientation())); } else { $phpword_element = $phpword_element_returned; } array_shift($state['parents']); } else { $state['textrun']->addText(htmltodocx_clean_text($element->nodeValue), $state['current_style']); } unset($state['textrun']); if (!$space_after_set) { // Add the text break here - where the spaceAfter parameter hadn't // been set initially - also unset the spaceAfter parameter we just // set: //$phpword_element->addTextBreak(); unset($state['current_style']['spaceAfter']); } break; case 'table': if (in_array('table', $allowed_children)) { $old_table_state = $state['table_allowed']; if (!$state['table_allowed'] || in_array('td', $state['parents']) || in_array('th', $state['parents'])) { $state['table_allowed'] = false; // This is a PhpWord constraint } else { $state['table_allowed'] = true; // PhpWord allows table_styles to be passed in a couple of // different ways either using an array of properties, or by // defining a full table style on the PhpWord object: if (is_object($state['phpword_object']) && method_exists($state['phpword_object'], 'addTableStyle')) { $state['phpword_object']->addTableStyle('temp_table_style', $state['current_style']); $table_style = 'temp_table_style'; } else { $table_style = $state['current_style']; } if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['table'] = $phpword_element->addTable($table_style); } array_unshift($state['parents'], 'table'); $phpword_element_returned = htmltodocx_insert_html_recursive($phpword_object, $phpword_element, $element, $state); if (!empty($phpword_element) && $phpword_element_returned->getSettings()->getOrientation() !== $phpword_element->getSettings()->getOrientation()) { $phpword_element = $phpword_object->createSection(array('orientation' => $phpword_element->getSettings()->getOrientation())); } else { $phpword_element = $phpword_element_returned; } array_shift($state['parents']); // Reset table state to what it was before a table was added: $state['table_allowed'] = $old_table_state; $phpword_element->addTextBreak(); } else { if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->nodeValue), $state['current_style']); } break; case 'tbody': if (in_array('tbody', $allowed_children)) { array_unshift($state['parents'], 'tbody'); $phpword_element_returned = htmltodocx_insert_html_recursive($phpword_object, $phpword_element, $element, $state); if (!empty($phpword_element) && $phpword_element_returned->getSettings()->getOrientation() !== $phpword_element->getSettings()->getOrientation()) { $phpword_element = $phpword_object->createSection(array('orientation' => $phpword_element->getSettings()->getOrientation())); } else { $phpword_element = $phpword_element_returned; } array_shift($state['parents']); } else { if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->nodeValue), $state['current_style']); } break; case 'tr': if (in_array('tr', $allowed_children)) { if ($state['table_allowed']) { $state['table']->addRow(); } else { // Simply add a new line if a table is not possible in this // context: if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); } array_unshift($state['parents'], 'tr'); $phpword_element_returned = htmltodocx_insert_html_recursive($phpword_object, $phpword_element, $element, $state); if (!empty($phpword_element) && $phpword_element_returned->getSettings()->getOrientation() !== $phpword_element->getSettings()->getOrientation()) { $phpword_element = $phpword_object->createSection(array('orientation' => $phpword_element->getSettings()->getOrientation())); } else { $phpword_element = $phpword_element_returned; } array_shift($state['parents']); } else { if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->nodeValue), $state['current_style']); } break; case 'td': case 'th': if (in_array($tag, $allowed_children) && $state['table_allowed']) { unset($state['textrun']); if (isset($state['current_style']['width'])) { $cell_width = $state['current_style']['width']; } elseif (!empty($element->getAttribute('width'))) { $cell_width = $element->getAttribute('width') * 15; // Converting at 15 TWIPS per pixel. } else { $cell_width = 800; } $state['table_cell'] = $state['table']->addCell($cell_width, $state['current_style']); array_unshift($state['parents'], $tag); $phpword_element_returned = htmltodocx_insert_html_recursive($phpword_object, $phpword_element, $state['table_cell'], $element, $state); if (!empty($phpword_element) && $phpword_element_returned->getSettings()->getOrientation() !== $phpword_element->getSettings()->getOrientation()) { $phpword_element = $phpword_object->createSection(array('orientation' => $phpword_element->getSettings()->getOrientation())); } else { $phpword_element = $phpword_element_returned; } array_shift($state['parents']); } else { if (!isset($state['textrun'])) { if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); } $state['textrun']->addText(htmltodocx_clean_text($element->nodeValue), $state['current_style']); } break; case 'a': /* var_dump($element->href); var_dump(get_class($state['textrun'])); if($phpword_element instanceof \PhpOffice\PhpWord\Element\Footnote) { var_dump($phpword_element instanceof MyClass); }*/ // Create a new text run if we aren't in one already: if (!isset($state['textrun'])) { if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); } if ($state['context'] == 'section') { if (strpos($element->getAttribute('href'), 'http://') === 0) { $href = $element->getAttribute('href'); } else { if (strpos($element->getAttribute('href'), 'https://') === 0) { $href = $element->getAttribute('href'); } elseif (strpos($element->getAttribute('href'), '/') === 0) { $href = $state['base_root'] . $element->getAttribute('href'); } else { $href = $state['base_root'] . $state['base_path'] . $element->getAttribute('href'); } } // Replace any spaces in url with %20 - to prevent errors in the Word // document: $state['textrun']->addLink(htmltodocx_url_encode_chars($href), htmltodocx_clean_text($element->nodeValue), $state['current_style']); } else { // Links can't seem to be included in headers or footers with // PHPWord: trying to include them causes an error which stops Word // from opening the file - in Word 2003 with the converter at least. // So add the link styled as a link only. $state['textrun']->addText(htmltodocx_clean_text($element->nodeValue), $state['current_style']); } break; case 'ul': $state['list_total_count'] = $element->childNodes->length; // We use this to be able to add the ordered list spaceAfter onto the // last list element. All ol children should be li elements. _htmltodocx_add_list_start_end_spacing_style($state); $state['list_number'] = 0; // Reset list number. if (in_array('ul', $allowed_children)) { if (!isset($state['pseudo_list'])) { // Unset any existing text run: unset($state['textrun']); // PhpWord lists cannot appear in a text run. If we leave a text // run active then subsequent text will go in that text run (if it // isn't re-initialised), which would mean that text after this // list would appear before it in the Word document. } array_unshift($state['parents'], 'ul'); $phpword_element_returned = htmltodocx_insert_html_recursive($phpword_object, $phpword_element, $element, $state); if (!empty($phpword_element) && $phpword_element_returned->getSettings()->getOrientation() !== $phpword_element->getSettings()->getOrientation()) { $phpword_element = $phpword_object->createSection(array('orientation' => $phpword_element->getSettings()->getOrientation())); } else { $phpword_element = $phpword_element_returned; } array_shift($state['parents']); } else { if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->nodeValue), $state['current_style']); } break; case 'ol': $state['list_total_count'] = $element->childNodes->length; // We use this to be able to add the ordered list spaceAfter onto the // last list element. All ol children should be li elements. _htmltodocx_add_list_start_end_spacing_style($state); $state['list_number'] = 0; // Reset list number. if (in_array('ol', $allowed_children)) { if (!isset($state['pseudo_list'])) { // Unset any existing text run: unset($state['textrun']); // Lists cannot appear in a text run. If we leave a text run active // then subsequent text will go in that text run (if it isn't // re-initialised), which would mean that text after this list // would appear before it in the Word document. } array_unshift($state['parents'], 'ol'); $phpword_element_returned = htmltodocx_insert_html_recursive($phpword_object, $phpword_element, $element, $state); if (!empty($phpword_element) && $phpword_element_returned->getSettings()->getOrientation() !== $phpword_element->getSettings()->getOrientation()) { $phpword_element = $phpword_object->createSection(array('orientation' => $phpword_element->getSettings()->getOrientation())); } else { $phpword_element = $phpword_element_returned; } array_shift($state['parents']); } else { if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->nodeValue), $state['current_style']); } break; case 'li': // You cannot style individual pieces of text in a list element so we do it // with text runs instead. This does not allow us to indent lists at all, so // we can't show nesting. // Before and after spacings: if ($state['list_number'] === 0) { $state['current_style'] = array_merge($state['current_style'], $state['list_style_before']); } $last_item = false; if ($state['list_number'] == $state['list_total_count'] - 1) { $last_item = true; if (empty($state['list_style_after'])) { $state['current_style']['spaceAfter'] = 0; // Set to 0 if not defined so we can add a text break without // ending up within too much space in Word2007+. // *Needs further testing on Word 2007+* } $state['current_style'] = array_merge($state['current_style'], $state['list_style_after']); } // We create a new text run for each element: if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun($state['current_style']); if (in_array('li', $allowed_children)) { $state['list_number']++; if ($state['parents'][0] == 'ol') { $item_indicator = $state['list_number'] . '. '; $style = $state['current_style']; } else { $style = $state['current_style']; $style['name'] = $state['pseudo_list_indicator_font_name']; $style['size'] = $state['pseudo_list_indicator_font_size']; $item_indicator = $state['pseudo_list_indicator_character']; } array_unshift($state['parents'], 'li'); $state['textrun']->addText($item_indicator, $style); $phpword_element_returned = htmltodocx_insert_html_recursive($phpword_object, $phpword_element, $element, $state); if (!empty($phpword_element) && $phpword_element_returned->getSettings()->getOrientation() !== $phpword_element->getSettings()->getOrientation()) { $phpword_element = $phpword_object->createSection(array('orientation' => $phpword_element->getSettings()->getOrientation())); } else { $phpword_element = $phpword_element_returned; } array_shift($state['parents']); } else { $state['textrun']->addText(htmltodocx_clean_text($element->nodeValue), $state['current_style']); } if ($last_item && empty($state['list_style_after'])) { if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $phpword_element->addTextBreak(); // Add an empty line after the list if no spacing after has been // defined. } unset($state['textrun']); break; case 'text': // We may get some empty text nodes - containing just a space - in // simple HTML dom - we want to exclude those, as these can cause extra // line returns. However we don't want to exclude spaces between styling // elements (these will be within a text run). if (!isset($state['textrun'])) { $text = htmltodocx_clean_text(trim($element->nodeValue)); } else { $text = htmltodocx_clean_text($element->nodeValue); } if (!empty($text)) { if (!isset($state['textrun'])) { if (empty($phpword_element)) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); } $state['textrun']->addText($text, $state['current_style']); } break; case '#text': // We may get some empty text nodes - containing just a space - in // simple HTML dom - we want to exclude those, as these can cause extra // line returns. However we don't want to exclude spaces between styling // elements (these will be within a text run). if (!isset($state['textrun'])) { $text = htmltodocx_clean_text(trim($element->nodeValue)); } else { $text = htmltodocx_clean_text($element->nodeValue); } if (!empty($text)) { if (!isset($state['textrun'])) { if (empty($phpword_element)) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); } $state['textrun']->addText($text, $state['current_style']); } break; // Style tags: // Style tags: case 'strong': case 'b': case 'sup': // Not working in PHPWord // Not working in PHPWord case 'em': case 'i': case 'u': case 'span': case 'code': // Create a new text run if we aren't in one already: if (!isset($state['textrun'])) { if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); } if (in_array($tag, $allowed_children)) { array_unshift($state['parents'], $tag); $phpword_element_returned = htmltodocx_insert_html_recursive($phpword_object, $phpword_element, $element, $state); if (!empty($phpword_element) && $phpword_element_returned->getSettings()->getOrientation() !== $phpword_element->getSettings()->getOrientation()) { $phpword_element = $phpword_object->createSection(array('orientation' => $phpword_element->getSettings()->getOrientation())); } else { $phpword_element = $phpword_element_returned; } array_shift($state['parents']); } else { $state['textrun']->addText(htmltodocx_clean_text($element->nodeValue), $state['current_style']); } break; // NB, Simple HTML Dom might not be picking up <br> tags. // NB, Simple HTML Dom might not be picking up <br> tags. case 'br': if ($state['textrun'] instanceof \PhpOffice\PhpWord\Element\Footnote) { $state['textrun']->addTextBreak(); } else { // Simply create a new text run: if (empty($phpword_element) || $phpword_element->getSettings()->getMarginTop() === 0) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); } break; case 'img': $image_style = array(); $element_src = $element->getAttribute('src'); if (strpos($element_src, 'http://') === 0) { // The image url is from another site. Most probably the image won't // appear in the Word document. $src = $element_src; } elseif (strpos($element_src, 'https://') === 0) { // The image url is from another site. Most probably the image won't // appear in the Word document. $src = $element_src; } elseif (strpos($element_src, '/') === 0) { $src = $element_src; } else { $src = htmltodocx_doc_root() . $state['base_path'] . $element_src; } //if (file_exists($src)) if (file_get_contents($src, 0, null, 0, 1)) { $switchFromPortraitToLandscape = false; if ($element->getAttribute('width') && $element->getAttribute('height')) { $aspectRatio = $element->getAttribute('width') / $element->getAttribute('height'); if ($aspectRatio > 1.0) { $phpword_element = $phpword_object->createSection(array('orientation' => 'landscape', 'marginLeft' => 0, 'marginRight' => 0, 'marginTop' => 0, 'marginBottom' => 0)); $state['current_style']['width'] = $a4WidthLandscapeOrientationInPoints; $state['current_style']['height'] = $a4WidthLandscapeOrientationInPoints / $aspectRatio; $switchFromPortraitToLandscape = true; } else { $phpword_element = $phpword_object->createSection(array('orientation' => 'portrait', 'marginLeft' => 0, 'marginRight' => 0, 'marginTop' => 0, 'marginBottom' => 0)); $state['current_style']['height'] = $a4HeightPortraitOrientationInPoints; $state['current_style']['width'] = $a4HeightPortraitOrientationInPoints * $aspectRatio; } } else { $phpword_element = $phpword_object->createSection(array('orientation' => 'portrait', 'marginLeft' => 0, 'marginRight' => 0, 'marginTop' => 0, 'marginBottom' => 0)); $state['current_style']['height'] = $a4HeightPortraitOrientationInPoints; $state['current_style']['width'] = $a4WidthPortraitOrientationInPoints; } if ($switchFromPortraitToLandscape === true) { if (empty($phpword_element)) { $phpword_element = $phpword_object->createSection(); } $phpword_element->addImage($src, $state['current_style']); } else { if (empty($phpword_element)) { $phpword_element = $phpword_object->createSection(); } $phpword_element->addImage($src, $state['current_style']); } } break; case 'pb': if (empty($phpword_element)) { $phpword_element = $phpword_object->createSection(); } $phpword_element->addPageBreak(); break; case 'footnote': if ($element->getAttribute("text")) { $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->getAttribute("text")), $state['current_style']); } $textrunBeforeFootnote = $state['textrun']; $styleBeforeFootnote = $state['current_style']; $state['current_style'] = array(); $state['textrun'] = $state['textrun']->addFootnote(); array_unshift($state['parents'], $tag); $phpword_element_returned = htmltodocx_insert_html_recursive($phpword_object, $phpword_element, $element, $state); if (!empty($phpword_element) && $phpword_element_returned->getSettings()->getOrientation() !== $phpword_element->getSettings()->getOrientation()) { $phpword_element = $phpword_object->createSection(array('orientation' => $phpword_element->getSettings()->getOrientation())); } else { $phpword_element = $phpword_element_returned; } array_shift($state['parents']); $state['textrun'] = $textrunBeforeFootnote; $state['current_style'] = $styleBeforeFootnote; break; default: if (empty($phpword_element)) { $phpword_element = $phpword_object->createSection(); } $state['textrun'] = $phpword_element->createTextRun(); $state['textrun']->addText(htmltodocx_clean_text($element->nodeValue), $state['current_style']); break; } // Reset the style back to what it was: $state['current_style'] = $old_style; } break; } if (empty($phpword_element)) { $phpword_element = $phpword_object->createSection(); } return $phpword_element; }