/** * Extracts the entity's notation and surrounding context from the text * * @param string $xmlid Value of @xml:id attribute of element whose * context is to be returned. * @param int $containerindex Internal container counter/index * * @return array */ protected function extractText($xmlid, $containerindex) { // Insert marker for "own" notation, then remove other notations $context = preg_replace('#</?:[^>]+>#', '', str_replace(array('<:' . $xmlid . '>', '</:' . $xmlid . '>'), '###', $this->containers[$containerindex])); // Convert the context to plaintext. Therefore, escape special chars // in the plaintext, as the plaintext conversion code should expect XML. $context = trim(preg_replace('#\\s+#u', ' ', $this->plaintextConverter->convert(htmlspecialchars($context)))); return explode('###', $context); }
/** * Callback function for occurrences of <titlePart> elements. * * This method expects each <text> to have one <titlePart>. If there is * more than one, subclasses may be used to filter the unwanted title(s). * * @throws \RuntimeException */ protected function processTitlePart() { $title = $this->plaintextConverter->convert($this->r->readOuterXML()); // Check for uniqueness if (!empty($this->data['volTitles'][$this->data['currentVolume']])) { throw new \RuntimeException('Multiple <titlePart> elements for volume ' . $this->data['currentVolume'] . ":\n"); } $this->data['volTitles'][$this->data['currentVolume']] = true; $volume = $this->setup->factory->createVolume(); $volume->number = $this->data['currentVolume']; $volume->title = $title; $volume->pagenumber = $this->data['currTextStart']; $this->volumeGateway->save($volume); }