private function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata') { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); // Taking a record /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); foreach ($record->getElementsByTagName('el') as $el) { /* @var $el \DOMElement */ if ($el->getAttribute('value') || $el->getAttribute('name') == 'Reiksme') { // Lemma if ($el->getAttribute('name') == 'AntrastinisZodis') { $ins['lemma'] = htmlspecialchars($el->getAttribute('value')); } // Forms if ($el->getAttribute('name') == 'Forma') { $ins['wordForms'][] = $el->getAttribute('value'); } // Pronunciation if ($el->getAttribute('name') == 'Tarimas') { $ins['pronunciation'] = $el->getAttribute('value'); } // Senses if ($el->getAttribute('name') == 'Reiksme') { $senseArr = array(); foreach ($el->childNodes as $sense) { // There are some DOMTExt nodes, so we will ignore them if (get_class($sense) == 'DOMElement') { /* @var $sense \DOMElement */ // PartOfSpeach if ($sense->getAttribute('name') == 'KalbosDalis') { $senseArr['partOfSpeach'] = $this->fullAbbreviation($sense->getAttribute('value')); } // Equivalents if ($sense->getAttribute('name') == 'Atitikmuo') { // Dictionary can contain ilegal xml chars $senseArr['equivalent'][] = htmlspecialchars($sense->getAttribute('value')); } } } $ins['senses'][] = $senseArr; } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } // TODO pridėti tarimą ir wordFormas // Concert the array to lexical entry /* array contains * - id * - header * - status * - metadata * - lemma (attr: word) * - (attr: writer) * - (attr: imageURL) * - (attr: sourceLink) * - (attr: * - pronunciation () - @TODO * - wordForms * - senses * - partOfSpeach * - equivalent */ if (isset($arr['metadata']['lemma'])) { $lexicalEntries = array(); $senseNr = 1; $isFirst = TRUE; foreach ($arr['metadata']['senses'] as $sense) { $lmfSense = new Owl\LmfSense(); if ($isFirst) { $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id'])); // Set Lemma $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); $lexicalEntry->setPartOfSpeech($sense['partOfSpeach']); array_push($lexicalEntries, $lexicalEntry); $isFirst = FALSE; } else { reset($lexicalEntries); $lexicalEntry = NULL; // Check if lexical entry with specified part of speech exists foreach ($lexicalEntries as $lexEntry) { /* @var $lexEntry Owl\LmfLexicalEntry */ if ($lexEntry->getPartOfSpeech() == $sense['partOfSpeach']) { $lexicalEntry = $lexEntry; } } // Creation of new entity of lexical entry if (!$lexicalEntry) { $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'] . '-' . (sizeof($lexicalEntries) + 1), $arr['id'])); // Set Lemma $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'] . '-' . (sizeof($lexicalEntries) + 1), $arr['id'])); $lexicalEntry->setLemma($lmfLemma); $lexicalEntry->setPartOfSpeech($sense['partOfSpeach']); array_push($lexicalEntries, $lexicalEntry); } } $lmfSense->setUri($this->getUriFactory()->create('Sense', $lexicalEntry->getLemma()->getWrittenForm(), $arr['id'] . '-' . $senseNr++)); $lmfSense->setLemmaWrittenForm($lexicalEntry->getLemma()->getWrittenForm()); $equivalents = $sense['equivalent']; $rank = 1; foreach ($equivalents as $equivalent) { $lmfEquivalent = new Owl\LmfEquivalent(); // Bug "patekti į nepatogią padėtį" firs space is nor normal if ($equivalent == 'patekti į nepatogią padėtį') { $equivalent = 'patekti į nepatogią padėtį'; } $lmfEquivalent->setUri($this->getUriFactory()->create('Equivalent', $equivalent, $arr['id'] . '-' . $rank)); $lmfEquivalent->setLanguage('Lietuvių'); $lmfEquivalent->setWrittenForm($equivalent); $lmfEquivalent->setRank($rank++); $lmfSense->addEquivalent($lmfEquivalent); } $lexicalEntry->addSense($lmfSense); } // Word form if (!empty($arr['metadata']['wordForms'])) { $rank = 1; foreach ($arr['metadata']['wordForms'] as $wordForm) { $lmfWordForm = new Owl\LmfWordForm(); $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', $wordForm, $arr['id'] . '-' . $rank++)); $lmfWordForm->setWrittenForm($wordForm); $lexicalEntry->addWordForm($lmfWordForm); } } // When is more than one sense foreach ($lexicalEntries as $lexicalEntry) { fwrite($fileIndividuals, $lexicalEntry->toLmfString()); } } //echo '<br />' . $recordNr++ . '-' . $arr['id'] . '-' . $arr['metadata']['lemma']; } fclose($fileIndividuals); }
private function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; /* * Datastructure * Convert the array to lexical entry * array contains * - id * - header * - status * - metadata * - AntrastinisZodis * - Reikšme * - Atitikmuo * - 0..n Forma * - Tarimas */ $n = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata') { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); // Taking a record /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); foreach ($record->getElementsByTagName('el') as $el) { /* @var $el \DOMElement */ if ($el->getAttribute('value') || $el->getAttribute('name') == 'Reiksme') { // Lemma if ($el->getAttribute('name') == 'AntrastinisZodis') { // Bug "patekti į nepatogią padėtį" firs space is nor normal if ($el->getAttribute('value') == 'patekti į nepatogią padėtį') { $ins['lemma'] = 'patekti į nepatogią padėtį'; } else { $ins['lemma'] = htmlspecialchars($el->getAttribute('value')); } } // Senses if ($el->getAttribute('name') == 'Reiksme') { $senseArr = array(); foreach ($el->childNodes as $sense) { // There are some DOMTExt nodes, so we will ignore them if (get_class($sense) == 'DOMElement') { /* @var $sense \DOMElement */ // Forms if ($sense->getAttribute('name') == 'Forma') { $senseArr['wordForms'][] = $sense->getAttribute('value'); } // Pronunciation if ($sense->getAttribute('name') == 'Tarimas') { $senseArr['pronunciation'] = $sense->getAttribute('value'); } // Equivalents if ($sense->getAttribute('name') == 'Atitikmuo') { $senseArr['equivalent'][] = htmlspecialchars($sense->getAttribute('value')); } } } $ins['senses'][] = $senseArr; } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } // TODO pridėti tarimą ir wordFormas // Convert the array to lexical entry /* array contains * - id * - header * - status * - metadata * - lemma * - senses * - pronunciation () - TODO * - wordForms - TODO fix showing of equivalent forms * - equivalent */ if (isset($arr['metadata']['lemma'])) { $senseNr = 1; $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id'])); // Set Lemma $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); foreach ($arr['metadata']['senses'] as $sense) { $lmfSense = new Owl\LmfSense(); $lmfSense->setUri($this->getUriFactory()->create('Sense', $lexicalEntry->getLemma()->getWrittenForm(), $arr['id'] . '-' . $senseNr++)); $lmfSense->setLemmaWrittenForm($lexicalEntry->getLemma()->getWrittenForm()); $equivalents = $sense['equivalent']; $rank = 1; foreach ($equivalents as $equivalent) { $lmfEquivalent = new Owl\LmfEquivalent(); $lmfEquivalent->setUri($this->getUriFactory()->create('Equivalent', $equivalent, $arr['id'] . '-' . $rank)); $lmfEquivalent->setLanguage('Anglų'); $lmfEquivalent->setWrittenForm($equivalent); $lmfEquivalent->setRank($rank++); $lmfSense->addEquivalent($lmfEquivalent); } $lexicalEntry->addSense($lmfSense); } // Word form /* if (!empty($arr['metadata']['wordForms'])) { $rank = 1; foreach ($arr['metadata']['wordForms'] as $wordForm) { $lmfWordForm = new Owl\LmfWordForm(); $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', $wordForm, $arr['id'] . '-' . $rank++)); $lmfWordForm->setWrittenForm($wordForm); $lexicalEntry->addWordForm($lmfWordForm); } } */ fwrite($fileIndividuals, $lexicalEntry->toLmfString()); echo '<br />' . $recordNr++ . '-' . $arr['id'] . '-' . $arr['metadata']['lemma'] . "\n"; } } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }
private function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; /* * Datastructure * Convert the array to lexical entry * array contains * - id * - header * - status * - metadata * - AntrastinisZodis * - Reikšme * - Straipnelis - kažkoks užkoduotas tekstas @TODO * - NuorodosId - nenaudojamas */ $n = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata') { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); // Taking a record /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); foreach ($record->getElementsByTagName('el') as $el) { /* @var $el \DOMElement */ if ($el->getAttribute('value') || $el->getAttribute('name') == 'Reiksme') { // Lemma if ($el->getAttribute('name') == 'AntrastinisZodis') { $ins['lemma'] = htmlspecialchars($el->getAttribute('value')); } // Senses if ($el->getAttribute('name') == 'Reiksme') { // There are some DOMTExt nodes, so we will ignore them // Equivalents $ins['senses'][] = array('equivalent' => array(htmlspecialchars($el->getAttribute('value')))); } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } // TODO pridėti tarimą ir wordFormas // Convert the array to lexical entry /* array contains * - id * - header * - status * - metadata * - lemma * - senses * - equivalent */ if (isset($arr['metadata']['lemma'])) { $senseNr = 1; $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id'])); // Set Lemma $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); foreach ($arr['metadata']['senses'] as $sense) { $lmfSense = new Owl\LmfSense(); $lmfSense->setUri($this->getUriFactory()->create('Sense', $lexicalEntry->getLemma()->getWrittenForm(), $arr['id'] . '-' . $senseNr++)); $lmfSense->setLemmaWrittenForm($lexicalEntry->getLemma()->getWrittenForm()); $equivalents = $sense['equivalent']; $rank = 1; foreach ($equivalents as $equivalent) { $lmfEquivalent = new Owl\LmfEquivalent(); $lmfEquivalent->setUri($this->getUriFactory()->create('Equivalent', $equivalent, $arr['id'] . '-' . $rank)); $lmfEquivalent->setLanguage('Lietuvių'); $lmfEquivalent->setWrittenForm($equivalent); $lmfEquivalent->setRank($rank++); $lmfSense->addEquivalent($lmfEquivalent); } $lexicalEntry->addSense($lmfSense); } fwrite($fileIndividuals, $lexicalEntry->toLmfString()); echo '<br />' . $recordNr++ . '-' . $arr['id'] . '-' . $arr['metadata']['lemma'] . "\n"; } } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }