private function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; /* * Datastructure * Convert the array to lexical entry * array contains * - id * - header * - status * - metadata * - AntrastinisZodis * - Reikšme * - Atitikmuo * - 0..n Forma * - Tarimas */ $n = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata') { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); // Taking a record /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); foreach ($record->getElementsByTagName('el') as $el) { /* @var $el \DOMElement */ if ($el->getAttribute('value') || $el->getAttribute('name') == 'Reiksme') { // Lemma if ($el->getAttribute('name') == 'AntrastinisZodis') { // Bug "patekti į nepatogią padėtį" firs space is nor normal if ($el->getAttribute('value') == 'patekti į nepatogią padėtį') { $ins['lemma'] = 'patekti į nepatogią padėtį'; } else { $ins['lemma'] = htmlspecialchars($el->getAttribute('value')); } } // Senses if ($el->getAttribute('name') == 'Reiksme') { $senseArr = array(); foreach ($el->childNodes as $sense) { // There are some DOMTExt nodes, so we will ignore them if (get_class($sense) == 'DOMElement') { /* @var $sense \DOMElement */ // Forms if ($sense->getAttribute('name') == 'Forma') { $senseArr['wordForms'][] = $sense->getAttribute('value'); } // Pronunciation if ($sense->getAttribute('name') == 'Tarimas') { $senseArr['pronunciation'] = $sense->getAttribute('value'); } // Equivalents if ($sense->getAttribute('name') == 'Atitikmuo') { $senseArr['equivalent'][] = htmlspecialchars($sense->getAttribute('value')); } } } $ins['senses'][] = $senseArr; } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } // TODO pridėti tarimą ir wordFormas // Convert the array to lexical entry /* array contains * - id * - header * - status * - metadata * - lemma * - senses * - pronunciation () - TODO * - wordForms - TODO fix showing of equivalent forms * - equivalent */ if (isset($arr['metadata']['lemma'])) { $senseNr = 1; $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id'])); // Set Lemma $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); foreach ($arr['metadata']['senses'] as $sense) { $lmfSense = new Owl\LmfSense(); $lmfSense->setUri($this->getUriFactory()->create('Sense', $lexicalEntry->getLemma()->getWrittenForm(), $arr['id'] . '-' . $senseNr++)); $lmfSense->setLemmaWrittenForm($lexicalEntry->getLemma()->getWrittenForm()); $equivalents = $sense['equivalent']; $rank = 1; foreach ($equivalents as $equivalent) { $lmfEquivalent = new Owl\LmfEquivalent(); $lmfEquivalent->setUri($this->getUriFactory()->create('Equivalent', $equivalent, $arr['id'] . '-' . $rank)); $lmfEquivalent->setLanguage('Anglų'); $lmfEquivalent->setWrittenForm($equivalent); $lmfEquivalent->setRank($rank++); $lmfSense->addEquivalent($lmfEquivalent); } $lexicalEntry->addSense($lmfSense); } // Word form /* if (!empty($arr['metadata']['wordForms'])) { $rank = 1; foreach ($arr['metadata']['wordForms'] as $wordForm) { $lmfWordForm = new Owl\LmfWordForm(); $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', $wordForm, $arr['id'] . '-' . $rank++)); $lmfWordForm->setWrittenForm($wordForm); $lexicalEntry->addWordForm($lmfWordForm); } } */ fwrite($fileIndividuals, $lexicalEntry->toLmfString()); echo '<br />' . $recordNr++ . '-' . $arr['id'] . '-' . $arr['metadata']['lemma'] . "\n"; } } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }
private function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; /* * Datastructure * Convert the array to lexical entry * array contains * - id * - header * - status * - metadata * - AntrastinisZodis * - Reikšme * - Straipnelis - kažkoks užkoduotas tekstas @TODO * - NuorodosId - nenaudojamas */ $n = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata') { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); // Taking a record /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); foreach ($record->getElementsByTagName('el') as $el) { /* @var $el \DOMElement */ if ($el->getAttribute('value') || $el->getAttribute('name') == 'Reiksme') { // Lemma if ($el->getAttribute('name') == 'AntrastinisZodis') { $ins['lemma'] = htmlspecialchars($el->getAttribute('value')); } // Senses if ($el->getAttribute('name') == 'Reiksme') { // There are some DOMTExt nodes, so we will ignore them // Equivalents $ins['senses'][] = array('equivalent' => array(htmlspecialchars($el->getAttribute('value')))); } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } // TODO pridėti tarimą ir wordFormas // Convert the array to lexical entry /* array contains * - id * - header * - status * - metadata * - lemma * - senses * - equivalent */ if (isset($arr['metadata']['lemma'])) { $senseNr = 1; $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id'])); // Set Lemma $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); foreach ($arr['metadata']['senses'] as $sense) { $lmfSense = new Owl\LmfSense(); $lmfSense->setUri($this->getUriFactory()->create('Sense', $lexicalEntry->getLemma()->getWrittenForm(), $arr['id'] . '-' . $senseNr++)); $lmfSense->setLemmaWrittenForm($lexicalEntry->getLemma()->getWrittenForm()); $equivalents = $sense['equivalent']; $rank = 1; foreach ($equivalents as $equivalent) { $lmfEquivalent = new Owl\LmfEquivalent(); $lmfEquivalent->setUri($this->getUriFactory()->create('Equivalent', $equivalent, $arr['id'] . '-' . $rank)); $lmfEquivalent->setLanguage('Lietuvių'); $lmfEquivalent->setWrittenForm($equivalent); $lmfEquivalent->setRank($rank++); $lmfSense->addEquivalent($lmfEquivalent); } $lexicalEntry->addSense($lmfSense); } fwrite($fileIndividuals, $lexicalEntry->toLmfString()); echo '<br />' . $recordNr++ . '-' . $arr['id'] . '-' . $arr['metadata']['lemma'] . "\n"; } } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }