private function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; /* * Datastructure * Convert the array to lexical entry * array contains * - id * - header * - status * - metadata * - AntrastinisZodis * - Reikšme * - Atitikmuo * - 0..n Forma * - Tarimas */ $n = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata') { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); // Taking a record /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); foreach ($record->getElementsByTagName('el') as $el) { /* @var $el \DOMElement */ if ($el->getAttribute('value') || $el->getAttribute('name') == 'Reiksme') { // Lemma if ($el->getAttribute('name') == 'AntrastinisZodis') { // Bug "patekti į nepatogią padėtį" firs space is nor normal if ($el->getAttribute('value') == 'patekti į nepatogią padėtį') { $ins['lemma'] = 'patekti į nepatogią padėtį'; } else { $ins['lemma'] = htmlspecialchars($el->getAttribute('value')); } } // Senses if ($el->getAttribute('name') == 'Reiksme') { $senseArr = array(); foreach ($el->childNodes as $sense) { // There are some DOMTExt nodes, so we will ignore them if (get_class($sense) == 'DOMElement') { /* @var $sense \DOMElement */ // Forms if ($sense->getAttribute('name') == 'Forma') { $senseArr['wordForms'][] = $sense->getAttribute('value'); } // Pronunciation if ($sense->getAttribute('name') == 'Tarimas') { $senseArr['pronunciation'] = $sense->getAttribute('value'); } // Equivalents if ($sense->getAttribute('name') == 'Atitikmuo') { $senseArr['equivalent'][] = htmlspecialchars($sense->getAttribute('value')); } } } $ins['senses'][] = $senseArr; } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } // TODO pridėti tarimą ir wordFormas // Convert the array to lexical entry /* array contains * - id * - header * - status * - metadata * - lemma * - senses * - pronunciation () - TODO * - wordForms - TODO fix showing of equivalent forms * - equivalent */ if (isset($arr['metadata']['lemma'])) { $senseNr = 1; $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id'])); // Set Lemma $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); foreach ($arr['metadata']['senses'] as $sense) { $lmfSense = new Owl\LmfSense(); $lmfSense->setUri($this->getUriFactory()->create('Sense', $lexicalEntry->getLemma()->getWrittenForm(), $arr['id'] . '-' . $senseNr++)); $lmfSense->setLemmaWrittenForm($lexicalEntry->getLemma()->getWrittenForm()); $equivalents = $sense['equivalent']; $rank = 1; foreach ($equivalents as $equivalent) { $lmfEquivalent = new Owl\LmfEquivalent(); $lmfEquivalent->setUri($this->getUriFactory()->create('Equivalent', $equivalent, $arr['id'] . '-' . $rank)); $lmfEquivalent->setLanguage('Anglų'); $lmfEquivalent->setWrittenForm($equivalent); $lmfEquivalent->setRank($rank++); $lmfSense->addEquivalent($lmfEquivalent); } $lexicalEntry->addSense($lmfSense); } // Word form /* if (!empty($arr['metadata']['wordForms'])) { $rank = 1; foreach ($arr['metadata']['wordForms'] as $wordForm) { $lmfWordForm = new Owl\LmfWordForm(); $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', $wordForm, $arr['id'] . '-' . $rank++)); $lmfWordForm->setWrittenForm($wordForm); $lexicalEntry->addWordForm($lmfWordForm); } } */ fwrite($fileIndividuals, $lexicalEntry->toLmfString()); echo '<br />' . $recordNr++ . '-' . $arr['id'] . '-' . $arr['metadata']['lemma'] . "\n"; } } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }
protected function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); //$data = array(); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; // Get record ids $attributes = array(); /** * Data structure * id * header * metadata - homonym * - [1..n] word * - [1..n] idiom + All child elements are transformed to one level * - idiomtag * - expl * - example * - exampletag * - link * - linktag * - entryfulltext - this element is not used * status */ $n = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata' && $node->nodeValue) { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); $num = 1; // Taking a dc records /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); foreach ($record->getElementsByTagName('el') as $el) { /* @var $el \DOMElement */ if ($el->getAttribute('value') || $el->getAttribute('name') == 'word') { // Lemma if ($el->getAttribute('name') == 'word') { if (!isset($ins['lemma'])) { // First is lemma $ins['lemma'] = htmlspecialchars($el->getAttribute('value')); } else { // Second and next are wordForms $ins['wordForms'][] = htmlspecialchars($el->getAttribute('value')); } } // Idioms if ($el->getAttribute('name') == 'idiom') { $idiomsArr = array('idiom' => $el->getAttribute('value')); $idiomsArr['explanations'] = array(); // Take all childer elements they belong to same idiom foreach ($el->childNodes as $expl) { // There are some DOMTExt nodes, so we will ignore them if (get_class($expl) == 'DOMElement') { /* @var $expl \DOMElement */ if ($expl->getAttribute('name') == 'expl') { $idiomsArr['explanations'][] = array($expl->getAttribute('name') => $this->getChildNodesArray($expl)); } elseif ($expl->getAttribute('name') == 'link') { $idiomsArr['links'][] = array($expl->getAttribute('name') => $this->getChildNodesArray($expl)); } else { $idiomsArr[$expl->getAttribute('name')][] = $expl->getAttribute('value'); } } } $ins['idioms'][] = $idiomsArr; } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } $recordNr++; // Concert the array to lexical entry /* array contains all atributes of data structure * - id * - header * - status * - metadata * * all feeld of data structure with is presented upper */ if ($arr['status'] != '-1' && !empty($arr['metadata']['lemma'])) { $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id'])); $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); $lmfSense = new Owl\LmfSense(); $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm()); $lmfSense->setUri($this->getUriFactory()->create('Sense', $arr['metadata']['lemma'], $arr['id'])); $lmfDefintion = new Owl\LmfDefinition(); $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $arr['metadata']['lemma'], $arr['id'])); $lmfTextRepresentation = new Owl\LmfTextRepresentation(); $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $arr['metadata']['lemma'], $arr['id'])); $writtenForm = "<![CDATA["; foreach ($arr['metadata']['idioms'] as $key => $attr) { //$writtenForm .= "<div>"; if (isset($attr['idiom'])) { $writtenForm .= "\n<br/><span style=\"font-weight: bold;\">{$attr['idiom']}</span> "; // IdiomTag if (isset($attr['idiomtag'])) { $writtenForm .= implode('., ', $attr['idiomtag']) . ". "; } // Explanation if (isset($attr['explanations'])) { $countExpl = count($attr['explanations']); foreach ($attr['explanations'] as $key => $expls) { if (isset($expls['expl'])) { if ($countExpl > 1) { $writtenForm .= "\n<br/> <em>" . ($key + 1) . ".</em><i>{$expls['expl']['value']}:</i>"; } else { $writtenForm .= "<i>{$expls['expl']['value']}:</i>"; } // Examples if (isset($expls['expl']['children'])) { foreach ($expls['expl']['children'] as $example) { if (isset($example['example']['value'])) { $writtenForm .= " {$example['example']['value']}."; } if (isset($example['example']['children'][0]['exampletag']['value'])) { $writtenForm .= " " . $example['example']['children'][0]['exampletag']['value'] . "."; } } } } } } // TODO make real links between lemmas if (isset($attr['links'])) { foreach ($attr['links'] as $key => $link) { if (isset($link['link']['value'])) { if (isset($link['link']['children'][0]['linktag']['value'])) { $writtenForm .= "<i>{$link['link']['children'][0]['linktag']['value']}</i> "; } $writtenForm .= $link['link']['value']; } } } } //$writtenForm .= "</div>"; } $writtenForm .= "]]>"; $lmfTextRepresentation->setWrittenForm($writtenForm); $lmfDefintion->addTextRepresentation($lmfTextRepresentation); $lmfSense->setDefinition($lmfDefintion); $lexicalEntry->addSense($lmfSense); // Word form if (!empty($arr['metadata']['wordForms'])) { $rank = 1; foreach ($arr['metadata']['wordForms'] as $wordForm) { $lmfWordForm = new Owl\LmfWordForm(); $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', $wordForm, $arr['id'] . '-' . $rank++)); $lmfWordForm->setWrittenForm($wordForm); $lexicalEntry->addWordForm($lmfWordForm); } } fwrite($fileIndividuals, $lexicalEntry->toLmfString()); } } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }
private function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata') { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); // Taking a record /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); foreach ($record->getElementsByTagName('el') as $el) { /* @var $el \DOMElement */ if ($el->getAttribute('value') || $el->getAttribute('name') == 'Reiksme') { // Lemma if ($el->getAttribute('name') == 'AntrastinisZodis') { $ins['lemma'] = htmlspecialchars($el->getAttribute('value')); } // Forms if ($el->getAttribute('name') == 'Forma') { $ins['wordForms'][] = $el->getAttribute('value'); } // Pronunciation if ($el->getAttribute('name') == 'Tarimas') { $ins['pronunciation'] = $el->getAttribute('value'); } // Senses if ($el->getAttribute('name') == 'Reiksme') { $senseArr = array(); foreach ($el->childNodes as $sense) { // There are some DOMTExt nodes, so we will ignore them if (get_class($sense) == 'DOMElement') { /* @var $sense \DOMElement */ // PartOfSpeach if ($sense->getAttribute('name') == 'KalbosDalis') { $senseArr['partOfSpeach'] = $this->fullAbbreviation($sense->getAttribute('value')); } // Equivalents if ($sense->getAttribute('name') == 'Atitikmuo') { // Dictionary can contain ilegal xml chars $senseArr['equivalent'][] = htmlspecialchars($sense->getAttribute('value')); } } } $ins['senses'][] = $senseArr; } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } // TODO pridėti tarimą ir wordFormas // Concert the array to lexical entry /* array contains * - id * - header * - status * - metadata * - lemma (attr: word) * - (attr: writer) * - (attr: imageURL) * - (attr: sourceLink) * - (attr: * - pronunciation () - @TODO * - wordForms * - senses * - partOfSpeach * - equivalent */ if (isset($arr['metadata']['lemma'])) { $lexicalEntries = array(); $senseNr = 1; $isFirst = TRUE; foreach ($arr['metadata']['senses'] as $sense) { $lmfSense = new Owl\LmfSense(); if ($isFirst) { $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id'])); // Set Lemma $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); $lexicalEntry->setPartOfSpeech($sense['partOfSpeach']); array_push($lexicalEntries, $lexicalEntry); $isFirst = FALSE; } else { reset($lexicalEntries); $lexicalEntry = NULL; // Check if lexical entry with specified part of speech exists foreach ($lexicalEntries as $lexEntry) { /* @var $lexEntry Owl\LmfLexicalEntry */ if ($lexEntry->getPartOfSpeech() == $sense['partOfSpeach']) { $lexicalEntry = $lexEntry; } } // Creation of new entity of lexical entry if (!$lexicalEntry) { $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'] . '-' . (sizeof($lexicalEntries) + 1), $arr['id'])); // Set Lemma $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'] . '-' . (sizeof($lexicalEntries) + 1), $arr['id'])); $lexicalEntry->setLemma($lmfLemma); $lexicalEntry->setPartOfSpeech($sense['partOfSpeach']); array_push($lexicalEntries, $lexicalEntry); } } $lmfSense->setUri($this->getUriFactory()->create('Sense', $lexicalEntry->getLemma()->getWrittenForm(), $arr['id'] . '-' . $senseNr++)); $lmfSense->setLemmaWrittenForm($lexicalEntry->getLemma()->getWrittenForm()); $equivalents = $sense['equivalent']; $rank = 1; foreach ($equivalents as $equivalent) { $lmfEquivalent = new Owl\LmfEquivalent(); // Bug "patekti į nepatogią padėtį" firs space is nor normal if ($equivalent == 'patekti į nepatogią padėtį') { $equivalent = 'patekti į nepatogią padėtį'; } $lmfEquivalent->setUri($this->getUriFactory()->create('Equivalent', $equivalent, $arr['id'] . '-' . $rank)); $lmfEquivalent->setLanguage('Lietuvių'); $lmfEquivalent->setWrittenForm($equivalent); $lmfEquivalent->setRank($rank++); $lmfSense->addEquivalent($lmfEquivalent); } $lexicalEntry->addSense($lmfSense); } // Word form if (!empty($arr['metadata']['wordForms'])) { $rank = 1; foreach ($arr['metadata']['wordForms'] as $wordForm) { $lmfWordForm = new Owl\LmfWordForm(); $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', $wordForm, $arr['id'] . '-' . $rank++)); $lmfWordForm->setWrittenForm($wordForm); $lexicalEntry->addWordForm($lmfWordForm); } } // When is more than one sense foreach ($lexicalEntries as $lexicalEntry) { fwrite($fileIndividuals, $lexicalEntry->toLmfString()); } } //echo '<br />' . $recordNr++ . '-' . $arr['id'] . '-' . $arr['metadata']['lemma']; } fclose($fileIndividuals); }
protected function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); //$data = array(); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; // Get record ids $attributes = array(); /** * Data structure * id * header * metadata * record * writer (4641) * Metrika -> Užrašytojai * imageURLs (48822) * Paveikslėlis 126x166 * sourcelink (27369) * Metrika -> Šaltiniai * gramref_header (26) * attributes (num of instances: - kartoteka (48822) - visiems rašoma "Kartoteka 1" - word (48822) * Antraštė kirčiuotas žodis ar junginys - word_header (48718) - naudojamas LKIIS paieškoje - cardno (48822) * Kortelės numeris - box (48822) - nenaudojamas - images (48822) - paveiklėlių numeriai - sourcelocation (16140) * Metrika->Vietovė (nurodoma prie šaltinio) - word_subtitle (8326) * Paantraštė () - note (328) * Pastaba - sourceauthor (483) * Metrika -> Pateikėjas - writedate (1580) * Metrika -> "Užrašymo metai" - wordvariant_subtitle (38) - neatvaizduojamsa - sourcelocation_free (837) * Metrika -> "Vietovė kortelėje" - sourcelocation_geocode (2733) - Iškviečiamas interaktyvus langas - wordvariant_header (459) * Antraštė -> Žodžio variantas - femineform_header (2530) * Antraštė -> Moteriška giminė - repeatable_forms3 (3009) * Antraštė -> III forma - repeatable_forms2 (3015) * Antraštė -> II forma - sourcelink_free (2647) * Metrika -> "Šaltinis kaip kortelėje" - writer_free (424) * Metrika -> "Užrašytojas kortelėje" - repeatable_forms3_subtitle (30) - repeatable_forms2_subtitle (30) - repeatable_forms4 (12) - femineform_subtitle (49) - gram (2234) - neatsivaizduoja reiškia kirčiuotę (pvz 3b) - gram_subtitle (51) - writercomment (4) - sourceauthoryears (4) * Metrika -> "Pateikėjo amžius/gimimo metai" - bugacard (18) - corrections (1) - homonym (4) - unusable (1) - explanation_header (1) - confidence (1) - explanation_subtitle (1) - content (2) * status */ $n = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata' && $node->nodeValue) { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); // Taking a record /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); // Get not empty attributes foreach ($record->attributes as $attribute_name => $attribute_node) { /* @var $attribute_node \DOMNode */ if ($attribute_node->nodeValue) { $ins[$attribute_name] = $attribute_node->nodeValue; /* //To get list not empty attributes if (isset($attributes[$attribute_name])) { $attributes[$attribute_name] += 1; if ($attribute_name == 'gram') echo $attribute_node->nodeValue; } else { $attributes[$attribute_name] = 1; }*/ } } foreach ($record->childNodes as $childNode) { /* @var $childNode \DOMElement */ if ($childNode->nodeValue) { // Receive other metadata nodes if ($childNode->nodeName == 'imageURLs') { /* @var $imageUrlNode \DOMElement */ $imageUrlNode = $childNode->getElementsByTagName('imageURL')->item(0); $arr['imageUrl'] = $imageUrlNode->getAttribute('value'); } else { $arr[$childNode->nodeName] = $childNode->nodeValue; } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } $recordNr++; // Concert the array to lexical entry /* array contains all atributes of data structure * - id * - header * - status * - metadata * * all feeld of data structure with is presented upper */ if ($arr['status'] != '-1' && !empty($arr['metadata']['word'])) { $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['word'] . '-' . $arr['metadata']['cardno'], $arr['id'])); $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['word']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['word'] . '-' . $arr['metadata']['cardno'], $arr['id'])); $lmfLemma->setImage($arr['imageUrl']); $lexicalEntry->setLemma($lmfLemma); $lmfSense = new Owl\LmfSense(); $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm()); $lmfSense->setUri($this->getUriFactory()->create('Sense', $arr['metadata']['word'] . '-' . $arr['metadata']['cardno'], $arr['id'])); $lmfDefintion = new Owl\LmfDefinition(); $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $arr['metadata']['word'] . '-' . $arr['metadata']['cardno'], $arr['id'])); $lmfTextRepresentation = new Owl\LmfTextRepresentation(); $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $arr['metadata']['word'] . '-' . $arr['metadata']['cardno'], $arr['id'])); $writtenForm = "<![CDATA["; $writtenForm .= "<div><em>Kortelės numeris:</em> {$arr['metadata']['cardno']}</div>"; // Spacing between rows $writtenForm .= "<div style=\"height: 5px;\"></div>"; $writtenForm .= "<div><img width=\"238\" alt=\"\" src=\"{$arr['imageUrl']}\"></div>"; // Spacing between rows $writtenForm .= "<div style=\"height: 5px;\"></div>"; if (!empty($arr['metadata']['sourcelocation']) || !empty($arr['sourcelink'])) { $writtenForm .= "<div>Metrika</div>"; if (!empty($arr['metadata']['sourcelocation'])) { $writtenForm .= "<div><em>Vietovė:</em> {$arr['metadata']['sourcelocation']}</div>"; // Spacing between rows $writtenForm .= "<div style=\"height: 5px;\"></div>"; } if (!empty($arr['sourcelink'])) { $writtenForm .= "<div><em>Šaltiniai:</em> {$arr['sourcelink']}</div>"; // Spacing between rows $writtenForm .= "<div style=\"height: 5px;\"></div>"; } } $writtenForm .= "]]>"; $lmfTextRepresentation->setWrittenForm($writtenForm); $lmfDefintion->addTextRepresentation($lmfTextRepresentation); $lmfSense->setDefinition($lmfDefintion); $lexicalEntry->addSense($lmfSense); fwrite($fileIndividuals, $lexicalEntry->toLmfString()); } } fclose($fileIndividuals); }
private function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; /* * Datastructure * Convert the array to lexical entry * array contains * - id * - header * - status * - metadata * - AntrastinisZodis * - Reikšme * - Straipnelis - kažkoks užkoduotas tekstas @TODO * - NuorodosId - nenaudojamas */ $n = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata') { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); // Taking a record /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); foreach ($record->getElementsByTagName('el') as $el) { /* @var $el \DOMElement */ if ($el->getAttribute('value') || $el->getAttribute('name') == 'Reiksme') { // Lemma if ($el->getAttribute('name') == 'AntrastinisZodis') { $ins['lemma'] = htmlspecialchars($el->getAttribute('value')); } // Senses if ($el->getAttribute('name') == 'Reiksme') { // There are some DOMTExt nodes, so we will ignore them // Equivalents $ins['senses'][] = array('equivalent' => array(htmlspecialchars($el->getAttribute('value')))); } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } // TODO pridėti tarimą ir wordFormas // Convert the array to lexical entry /* array contains * - id * - header * - status * - metadata * - lemma * - senses * - equivalent */ if (isset($arr['metadata']['lemma'])) { $senseNr = 1; $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id'])); // Set Lemma $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); foreach ($arr['metadata']['senses'] as $sense) { $lmfSense = new Owl\LmfSense(); $lmfSense->setUri($this->getUriFactory()->create('Sense', $lexicalEntry->getLemma()->getWrittenForm(), $arr['id'] . '-' . $senseNr++)); $lmfSense->setLemmaWrittenForm($lexicalEntry->getLemma()->getWrittenForm()); $equivalents = $sense['equivalent']; $rank = 1; foreach ($equivalents as $equivalent) { $lmfEquivalent = new Owl\LmfEquivalent(); $lmfEquivalent->setUri($this->getUriFactory()->create('Equivalent', $equivalent, $arr['id'] . '-' . $rank)); $lmfEquivalent->setLanguage('Lietuvių'); $lmfEquivalent->setWrittenForm($equivalent); $lmfEquivalent->setRank($rank++); $lmfSense->addEquivalent($lmfEquivalent); } $lexicalEntry->addSense($lmfSense); } fwrite($fileIndividuals, $lexicalEntry->toLmfString()); echo '<br />' . $recordNr++ . '-' . $arr['id'] . '-' . $arr['metadata']['lemma'] . "\n"; } } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }
protected function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; // Get record ids $attributes = array(); /** * Data structure * id * header * metadata - homonym * - word * - grammar * - wordtag * - [1..n] antonym + All child elements are transformed to one level * - antgramar * - anttag * - antremote * - [1..n] valcontext * - [1..n] example * - [1..n]exampletag * - expl + word explanation * - antexpl + antonym explanation * - entryfulltext - this element is not used * status */ $n = array(); $lexEntries = array(); // Index of all posible lexical entries ([] => 'lemma') $lexIndex = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata' && $node->nodeValue) { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); $num = 1; // Taking a dc records /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); foreach ($record->getElementsByTagName('el') as $el) { /* @var $el \DOMElement */ if ($el->getAttribute('value') || $el->getAttribute('name') == 'valcontext') { // Homonym if ($el->getAttribute('name') == 'homonym') { if ($el->getAttribute('value')) { $ins['homonym'] = $el->getAttribute('value'); } } // Lemma if ($el->getAttribute('name') == 'word') { if (!isset($ins['lemma'])) { // First is lemma $ins['lemma'] = htmlspecialchars($el->getAttribute('value')); } else { // Second and next are wordForms $ins['wordForms'][] = htmlspecialchars($el->getAttribute('value')); } } // Antonyms if ($el->getAttribute('name') == 'antonym') { $ant = array('antonym' => $el->getAttribute('value')); foreach ($el->childNodes as $param) { // There are some DOMTExt nodes, so we will ignore them if (get_class($param) == 'DOMElement') { /* @var $param \DOMElement */ $ant[$param->getAttribute('name')][] = $param->getAttribute('value'); } } $ins['antonyms'][] = $ant; } // value context if ($el->getAttribute('name') == 'valcontext') { $valcontextArr = array(); // Take all childer elements they belong to same idiom foreach ($el->childNodes as $expl) { // There are some DOMTExt nodes, so we will ignore them if (get_class($expl) == 'DOMElement') { /* @var $expl \DOMElement */ if ($expl->getAttribute('name') == 'example') { $valcontextArr['examples'][] = array($expl->getAttribute('name') => $this->getChildNodesArray($expl)); } else { $valcontextArr[$expl->getAttribute('name')][] = $expl->getAttribute('value'); } } } $ins['valcontexts'][] = $valcontextArr; } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } $recordNr++; // Concert the array to lexical entry /* array contains all atributes of data structure * - id * - header * - status * - metadata * * all feeld of data structure with is presented upper */ if ($arr['status'] != '-1' && !empty($arr['metadata']['lemma'])) { $homonym = isset($arr['metadata']['homonym']) ? $arr['metadata']['homonym'] : ''; $lexIndex[$arr['id']] = $arr['metadata']['lemma']; // Lexical entry has multiple senses if (isset($lexEntries[$arr['metadata']['lemma']])) { $lexicalEntry = $lexEntries[$arr['metadata']['lemma']]; $lmfLemma = $lexicalEntry->getLemma(); } else { $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], 0)); $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); } $lmfSense = new Owl\LmfSense(); $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm()); if ($homonym) { $lmfSense->setRank($homonym); } $lmfSense->setUri($this->getUriFactory()->create('Sense', $arr['metadata']['lemma'], $arr['id'] . '-' . $homonym)); $lmfDefintion = new Owl\LmfDefinition(); $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $arr['metadata']['lemma'], $arr['id'] . '-' . $homonym)); $lmfTextRepresentation = new Owl\LmfTextRepresentation(); $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $arr['metadata']['lemma'], $arr['id'] . '-' . $homonym)); $antonyms = $arr['metadata']['antonyms']; $writtenForm = "<![CDATA["; foreach ($arr['metadata']['valcontexts'] as $key => $attr) { if (sizeof($arr['metadata']['valcontexts']) > 1) { $writtenForm .= "\n<span style=\"font-weight: bold;\">" . ($key + 1) . "</span>"; } // Explanation if (isset($attr['expl'])) { $writtenForm .= "\n<em>{$attr['expl'][0]}</em> <br />"; // Antonyms foreach ($antonyms as $key => $antonym) { $writtenForm .= "\n<span style=\"font-weight: bold;\">{$antonym['antonym']}</span>, "; } // remove last comma $writtenForm = substr($writtenForm, 0, strlen($writtenForm) - 2) . ' '; } // Antonym explanation if (isset($attr['antexpl'])) { $writtenForm .= "\n<br/> <em>{$attr['antexpl'][0]}</em>"; } // Examples if (isset($attr['examples'])) { $countExpl = count($attr['examples']); foreach ($attr['examples'] as $key => $expls) { // Example if (isset($expls['example']['value'])) { $writtenForm .= "\n<br />{$expls['example']['value']}"; // Tags if (isset($expls['example']['children'])) { foreach ($expls['example']['children'] as $example) { if (isset($example['exampletag']['value'])) { $writtenForm .= " {$example['exampletag']['value']}."; } } } } } } } $writtenForm .= " ]]>"; $lmfTextRepresentation->setWrittenForm($writtenForm); $lmfDefintion->addTextRepresentation($lmfTextRepresentation); $lmfSense->setDefinition($lmfDefintion); // Add sense relations foreach ($antonyms as $key => $antonym) { /* $antonymLexicalEntry = new Owl\LmfLexicalEntry($resourceName); $antonymLexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $antonym['antonym'], 0)); $antonymLmfLemma = new Owl\LmfLemma(); $antonymLmfLemma->setWrittenForm($arr['metadata']['lemma']); $antonymLmfLemma->setUri($this->getUriFactory()->create('Lemma', $antonym['antonym'], 0)); $antonymLmfLemma->setWrittenForm($antonym['antonym']); $antonymLexicalEntry->setLemma($antonymLmfLemma); fwrite($fileIndividuals, $antonymLexicalEntry->toLmfString()); */ $senseRelation = new Owl\LmfSenseRelation(); $senseRelation->setUri($this->getUriFactory()->create('SenseRelation', $arr['metadata']['lemma'], $arr['id'] . '-' . $key)); $senseRelation->setType('Antonimas'); $senseRelation->setRank($key + 1); //$senseRelation->addSenseRelatedTo($antonymLexicalEntry); $senseRelation->setWrittenForm($antonym['antonym']); $lmfSense->addSenseRelation($senseRelation); } $lexicalEntry->addSense($lmfSense); // Word form if (!empty($arr['metadata']['wordForms'])) { $rank = 1; foreach ($arr['metadata']['wordForms'] as $wordForm) { $lmfWordForm = new Owl\LmfWordForm(); $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', $wordForm, $arr['id'] . '-' . $rank++)); $lmfWordForm->setWrittenForm($wordForm); $lexicalEntry->addWordForm($lmfWordForm); } } // save not homonyms if (!$homonym) { fwrite($fileIndividuals, $lexicalEntry->toLmfString()); } else { // Update lexical entries $lexEntries[$arr['metadata']['lemma']] = @$lexicalEntry; } } } // Save homonyms foreach ($lexEntries as $lexEntry) { fwrite($fileIndividuals, $lexEntry->toLmfString()); } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }
protected function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); //$data = array(); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; // Get record ids $attributes = array(); /** * Data structure * id * header * metadata [dc:identifier] - [dc:title] => 8274 * Pavadinimas [dc:description] => 6730 * Redaguotas tekstas [dc:description] => 8222 * Originalus tekstas [dc:description] => 3703 * Pastabos [dc:date] => 6818 * Užrašymo laikas [dc:source] => 7658 * Signatūra [dc:source] => 7994 * Signatūros nuoroda [dc:creator] => 5934 * Pateikėjas [dc:publisher] => 9373 * Fiksuotojas [dc:coverage] => 7612 * Vieta [dc:subject] => 17573 * Mokslinė— klasifikacija [dc:description] => * * Mokslinis komentaras * status */ $n = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata' && $node->nodeValue) { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); $num = 1; // Taking a dc records /* @var $record \DOMElement */ $record = $metadata->childNodes->item(0); foreach ($record->childNodes as $childNode) { /* @var $childNode \DOMElement */ if ($childNode->nodeValue) { // Receive other metadata nodes, but ignore identifiers if ($childNode->nodeName != 'dc:identifier') { // a lot of dublication $tmpIns = array(); $tmpIns[$childNode->nodeName]['label'] = $childNode->getAttribute('label'); $tmpIns[$childNode->nodeName]['value'] = $childNode->nodeValue; $ins[] = $tmpIns; } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } $recordNr++; // Counting if posible attributes /* foreach ($arr['metadata'] as $nr => $keys) { $key = array_keys($keys)[0]; $key = $key . ' - ' . $keys[$key]['label']; if (isset($n[$key])) { $n[$key]++; } else { $n[$key] = 1; } } */ // Concert the array to lexical entry /* array contains all atributes of data structure * - id * - header * - status * - metadata * * all feeld of data structure with is presented upper */ // Looking for lemma = the Song title // Looking for lemma = the Song title $recordTitle = ''; foreach ($arr['metadata'] as $nr => $keys) { $key = array_keys($keys)[0]; // BUG in data sometime song title can be in one of these tags if ($key == 'dc:title') { //echo $recordTitle = $keys[$key]['value']; // Record title is between [] take it $recordTitle = $keys[$key]['value']; $startPos = strpos($recordTitle, '['); $endPos = strpos($recordTitle, ']'); $recordTitle = substr($recordTitle, $startPos + 1, $endPos - $startPos - 1); } } // For debuging if (!$recordTitle) { print_r($arr); } if ($arr['status'] != '-1' && $recordTitle) { $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $recordTitle, $arr['id'])); $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($recordTitle); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $recordTitle, $arr['id'])); $lexicalEntry->setLemma($lmfLemma); $lmfSense = new Owl\LmfSense(); $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm()); $lmfSense->setUri($this->getUriFactory()->create('Sense', $recordTitle, $arr['id'])); $lmfDefintion = new Owl\LmfDefinition(); $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $recordTitle, $arr['id'])); $lmfTextRepresentation = new Owl\LmfTextRepresentation(); $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $recordTitle, $arr['id'])); $writtenForm = "<![CDATA["; foreach ($arr['metadata'] as $nr => $keys) { $key = array_keys($keys)[0]; $attr = $keys[$key]; if ($key != 'dc:title') { // Image if ($key == 'dc:source' && stristr($attr['label'], "nuoroda")) { // Get image name $imageSrc = preg_replace('/(.*)\\?foto=(.*)\\&id=(.*)/i', "\\2", $attr['value']); // Preparing new url $imageSrc = "http://www.tautosakos-rankrastynas.lt/failai/vaizdas/" . urldecode($imageSrc); $val = "<img width=\"238\" alt=\"\" src=\"{$imageSrc}\">"; $writtenForm .= "<div><em>Signatūros iliustracija:</em><br/> {$val} </div>"; } else { $writtenForm .= "<div><em>{$attr['label']}:</em> {$attr['value']} </div>"; } // Spacing between rows $writtenForm .= "<div style=\"height: 5px;\"></div>"; } } $writtenForm .= "]]>"; $lmfTextRepresentation->setWrittenForm($writtenForm); $lmfDefintion->addTextRepresentation($lmfTextRepresentation); $lmfSense->setDefinition($lmfDefintion); $lexicalEntry->addSense($lmfSense); fwrite($fileIndividuals, $lexicalEntry->toLmfString()); } } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }
protected function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); //$data = array(); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; // Get record ids $attributes = array(); /** * Data structure * id * header * metadata [dc:identifier] => 80028 - ignoring because that they dublicae each other and are not informative [dc:title] => 80028 - Pavadinimas "Mįslė [tekstas]" [dc:description] => 79975 * Transponuotas užminimas [dc:description_1] => 79903 * Fiksacijos tekstas [dc:subject] => 79870 * Įminimas [dc:source] => 79898 * Signat8ra [tm:type] => 80020 * Tipas [dc:publisher] => 68196 * Fiksuotojas [dc:coverage] => 34491 * Vieta [tm:version] => 79993 * Versija [dc:creator] => 40683 * Pateikėjas * status */ $n = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata' && $node->nodeValue) { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); $num = 1; // Taking a dc records /* @var $record \DOMElement */ $record = $metadata->childNodes->item(0); foreach ($record->childNodes as $childNode) { /* @var $childNode \DOMElement */ if ($childNode->nodeValue) { // Receive other metadata nodes, but ignore identifiers if ($childNode->nodeName != 'dc:identifier') { if (isset($ins[$childNode->nodeName])) { $ins[$childNode->nodeName . '_' . $num]['label'] = $childNode->getAttribute('label'); $ins[$childNode->nodeName . '_' . $num]['value'] = $childNode->nodeValue; $num++; } else { $ins[$childNode->nodeName]['label'] = $childNode->getAttribute('label'); $ins[$childNode->nodeName]['value'] = $childNode->nodeValue; } } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } $recordNr++; // Counting if posible attributes /* foreach (array_keys($arr['metadata']) as $nr => $key) { $key = $key . ' - ' . $arr['metadata'][$key]['label']; if (isset($n[$key])) { $n[$key]++; } else { $n[$key] = 1; } } */ // Concert the array to lexical entry /* array contains all atributes of data structure * - id * - header * - status * - metadata * * all feeld of data structure with is presented upper */ if ($arr['status'] != '-1' && !empty($arr['metadata']['dc:description']['value'])) { $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['dc:description']['value'], $arr['id'])); $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['dc:description']['value']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['dc:description']['value'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); $lmfSense = new Owl\LmfSense(); $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm()); $lmfSense->setUri($this->getUriFactory()->create('Sense', $arr['metadata']['dc:description']['value'], $arr['id'])); $lmfDefintion = new Owl\LmfDefinition(); $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $arr['metadata']['dc:description']['value'], $arr['id'])); $lmfTextRepresentation = new Owl\LmfTextRepresentation(); $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $arr['metadata']['dc:description']['value'], $arr['id'])); $writtenForm = "<![CDATA["; foreach ($arr['metadata'] as $key => $attr) { if ($key != 'dc:title') { $writtenForm .= "<div><em>{$attr['label']}:</em> {$attr['value']} </div>"; // Spacing between rows $writtenForm .= "<div style=\"height: 5px;\"></div>"; } } $writtenForm .= "]]>"; $lmfTextRepresentation->setWrittenForm($writtenForm); $lmfDefintion->addTextRepresentation($lmfTextRepresentation); $lmfSense->setDefinition($lmfDefintion); $lexicalEntry->addSense($lmfSense); fwrite($fileIndividuals, $lexicalEntry->toLmfString()); } } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }
protected function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); //$data = array(); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; // Get record ids $attributes = array(); /** * Data structure * id * header * metadata [dc:identifier] - [dc:subject] => 3421 * Tipas [dc:description] => 3692 * Daina [dc:publisher] => 3349 * Fiksuotojas [dc:coverage] => 3095 * Vieta [dc:source] => 4543 * Archyvinis šaltinis [dc:source] => 646 * Spaudinys [dc:date] => 3441 * Fiksavimo laikas [dc:creator] => 3174 * Pateikėjas [dc:source] => 4 - Signatūros nuoroda [dc:title] => 275 * Versija vartojame vietoj dc:subject * status */ $n = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata' && $node->nodeValue) { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); $num = 1; // Taking a dc records /* @var $record \DOMElement */ $record = $metadata->childNodes->item(0); foreach ($record->childNodes as $childNode) { /* @var $childNode \DOMElement */ if ($childNode->nodeValue) { // Receive other metadata nodes, but ignore identifiers if ($childNode->nodeName != 'dc:identifier') { // a lot of dublication $tmpIns = array(); $tmpIns[$childNode->nodeName]['label'] = $childNode->getAttribute('label'); $tmpIns[$childNode->nodeName]['value'] = $childNode->nodeValue; $ins[] = $tmpIns; } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } $recordNr++; // Counting if posible attributes /* foreach ($arr['metadata'] as $nr => $keys) { $key = array_keys($keys)[0]; $key = $key . ' - ' . $keys[$key]['label']; if (isset($n[$key])) { $n[$key]++; } else { $n[$key] = 1; } } */ // Concert the array to lexical entry /* array contains all atributes of data structure * - id * - header * - status * - metadata * * all feeld of data structure with is presented upper */ // Looking for lemma = the Song title $songTitle = ''; foreach ($arr['metadata'] as $nr => $keys) { $key = array_keys($keys)[0]; // BUG in data sometime song title can be in one of these tags if ($key == 'dc:subject' || $key == 'dc:title') { $songTitle = $keys[$key]['value']; } } // For debuging if (!$songTitle) { print_r($arr); } if ($arr['status'] != '-1' && $songTitle) { $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $songTitle, $arr['id'])); $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($songTitle); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $songTitle, $arr['id'])); $lexicalEntry->setLemma($lmfLemma); $lmfSense = new Owl\LmfSense(); $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm()); $lmfSense->setUri($this->getUriFactory()->create('Sense', $songTitle, $arr['id'])); $lmfDefintion = new Owl\LmfDefinition(); $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $songTitle, $arr['id'])); $lmfTextRepresentation = new Owl\LmfTextRepresentation(); $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $songTitle, $arr['id'])); $writtenForm = "<![CDATA["; foreach ($arr['metadata'] as $nr => $keys) { $key = array_keys($keys)[0]; $attr = $keys[$key]; if ($key != 'dc:title' && $key != 'dc:subject') { if ($key == 'dc:description') { $val = str_replace("\n", '<br />', $attr['value']); $writtenForm .= "<div><em>{$attr['label']}:</em><br/> {$val} </div>"; } else { $writtenForm .= "<div><em>{$attr['label']}:</em> {$attr['value']} </div>"; } // Spacing between rows $writtenForm .= "<div style=\"height: 5px;\"></div>"; } } $writtenForm .= "]]>"; $lmfTextRepresentation->setWrittenForm($writtenForm); $lmfDefintion->addTextRepresentation($lmfTextRepresentation); $lmfSense->setDefinition($lmfDefintion); $lexicalEntry->addSense($lmfSense); fwrite($fileIndividuals, $lexicalEntry->toLmfString()); } } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }