/** * Helper function to process the source text. * * @param string $source * Job data array. * @param array $key_array * The source item data key. * * @return string */ protected function processForExport($source, array $key_array) { $tjiid = $key_array[0]; $key_string = \Drupal::service('tmgmt.data')->ensureStringKey($key_array); // The reason why we use DOMDocument object here and not just XMLReader // is the DOMDocument's ability to deal with broken HTML. $dom = new \DOMDocument(); // We need to append the head with encoding so that special characters // are read correctly. $dom->loadHTML("<html><head><meta http-equiv='Content-type' content='text/html; charset=UTF-8' /></head><body>" . $source . '</body></html>'); $iterator = new \RecursiveIteratorIterator(new RecursiveDOMIterator($dom), \RecursiveIteratorIterator::SELF_FIRST); $writer = new \XMLWriter(); $writer->openMemory(); $writer->startDocument('1.0', 'UTF-8'); $writer->startElement('wrapper'); $tray = array(); $non_pair_tags = array('br', 'img'); $xliff_validation = $this->job->getSetting('xliff_validation'); /** @var \DOMElement $node */ foreach ($iterator as $node) { if (in_array($node->nodeName, array('html', 'body', 'head', 'meta'))) { continue; } if ($node->nodeType === XML_ELEMENT_NODE) { // Increment the elements count and compose element id. if (!isset($xliff_validation[$key_string])) { $xliff_validation[$key_string] = 0; } $xliff_validation[$key_string]++; $id = 'tjiid' . $tjiid . '-' . $xliff_validation[$key_string]; $is_pair_tag = !in_array($node->nodeName, $non_pair_tags); if ($is_pair_tag) { $this->writeBPT($writer, $node, $id); } elseif ($node->nodeName == 'img') { $this->writeIMG($writer, $node, $id); } elseif ($node->nodeName == 'br') { $this->writeBR($writer, $node, $id); } // Add to tray new element info. $tray[$id] = array('name' => $node->nodeName, 'id' => $id, 'value' => $node->nodeValue, 'built_text' => '', 'is_pair_tag' => $is_pair_tag); } elseif ($node->nodeName == '#text') { // Add the node value to the text output. $writer->writeCdata($this->toEntities($node->nodeValue)); foreach ($tray as &$info) { $info['built_text'] .= $node->nodeValue; } } // Reverse so that pair tags are closed in the expected order. $reversed_tray = array_reverse($tray); foreach ($reversed_tray as $_info) { // If the build_text equals to the node value and it is not a pair tag // add the end pair tag markup. if ($_info['value'] == $_info['built_text'] && $_info['is_pair_tag']) { // Count also for the closing elements. $xliff_validation[$key_string]++; $this->writeEPT($writer, $_info['name'], $_info['id']); // When the end pair tag has been written unset the element info // from the tray. unset($tray[$_info['id']]); } } } // Set the xliff_validation data and save the job. $this->job->settings->xliff_validation = $xliff_validation; $this->job->save(); $writer->endElement(); // Load the output with XMLReader so that we can easily get the inner xml. $reader = new \XMLReader(); $reader->XML($writer->outputMemory()); $reader->read(); return $reader->readInnerXML(); }