/** * Equivalent to hasSense property * * @param \Rastija\Owl\LmfSense $sense */ public function addSense(LmfSense $sense) { if ($this->_nextSenseRank < $sense->getRank()) { $this->_nextSenseRank = $sense->getRank() + 1; } else { $sense->setRank($this->_nextSenseRank); $this->_nextSenseRank++; } array_push($this->senses, $sense); }
protected function buildLmfIndividuals($filename, $fileOfIndividuals) { $resourceName = $this->getResourceName(); $file = fopen($filename, 'r'); $xml = fread($file, filesize($filename)); fclose($file); $dom = new \DOMDocument('1.0', 'UTF-8'); $dom->loadXML($xml); $fileIndividuals = fopen($fileOfIndividuals, "w+"); $recordNr = 1; // Get record ids $attributes = array(); /** * Data structure * id * header * metadata - homonym * - word * - grammar * - wordtag * - [1..n] antonym + All child elements are transformed to one level * - antgramar * - anttag * - antremote * - [1..n] valcontext * - [1..n] example * - [1..n]exampletag * - expl + word explanation * - antexpl + antonym explanation * - entryfulltext - this element is not used * status */ $n = array(); $lexEntries = array(); // Index of all posible lexical entries ([] => 'lemma') $lexIndex = array(); foreach ($dom->getElementsByTagName('return') as $domRecord) { /* @var $domRecord \DOMElement */ $nodes = $domRecord->childNodes; $arr = array(); foreach ($nodes as $node) { /* @var $node \DOMElement */ if ($node->nodeName == 'metadata' && $node->nodeValue) { $metadata = new \DOMDocument('1.0', 'UTF-8'); $metadata->loadXML($node->nodeValue); $ins = array(); $num = 1; // Taking a dc records /* @var $record \DOMElement */ $record = $metadata->getElementsByTagName('record')->item(0); foreach ($record->getElementsByTagName('el') as $el) { /* @var $el \DOMElement */ if ($el->getAttribute('value') || $el->getAttribute('name') == 'valcontext') { // Homonym if ($el->getAttribute('name') == 'homonym') { if ($el->getAttribute('value')) { $ins['homonym'] = $el->getAttribute('value'); } } // Lemma if ($el->getAttribute('name') == 'word') { if (!isset($ins['lemma'])) { // First is lemma $ins['lemma'] = htmlspecialchars($el->getAttribute('value')); } else { // Second and next are wordForms $ins['wordForms'][] = htmlspecialchars($el->getAttribute('value')); } } // Antonyms if ($el->getAttribute('name') == 'antonym') { $ant = array('antonym' => $el->getAttribute('value')); foreach ($el->childNodes as $param) { // There are some DOMTExt nodes, so we will ignore them if (get_class($param) == 'DOMElement') { /* @var $param \DOMElement */ $ant[$param->getAttribute('name')][] = $param->getAttribute('value'); } } $ins['antonyms'][] = $ant; } // value context if ($el->getAttribute('name') == 'valcontext') { $valcontextArr = array(); // Take all childer elements they belong to same idiom foreach ($el->childNodes as $expl) { // There are some DOMTExt nodes, so we will ignore them if (get_class($expl) == 'DOMElement') { /* @var $expl \DOMElement */ if ($expl->getAttribute('name') == 'example') { $valcontextArr['examples'][] = array($expl->getAttribute('name') => $this->getChildNodesArray($expl)); } else { $valcontextArr[$expl->getAttribute('name')][] = $expl->getAttribute('value'); } } } $ins['valcontexts'][] = $valcontextArr; } } } $arr[$node->nodeName] = $ins; } else { $arr[$node->nodeName] = $node->nodeValue; } } $recordNr++; // Concert the array to lexical entry /* array contains all atributes of data structure * - id * - header * - status * - metadata * * all feeld of data structure with is presented upper */ if ($arr['status'] != '-1' && !empty($arr['metadata']['lemma'])) { $homonym = isset($arr['metadata']['homonym']) ? $arr['metadata']['homonym'] : ''; $lexIndex[$arr['id']] = $arr['metadata']['lemma']; // Lexical entry has multiple senses if (isset($lexEntries[$arr['metadata']['lemma']])) { $lexicalEntry = $lexEntries[$arr['metadata']['lemma']]; $lmfLemma = $lexicalEntry->getLemma(); } else { $lexicalEntry = new Owl\LmfLexicalEntry($resourceName); $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], 0)); $lmfLemma = new Owl\LmfLemma(); $lmfLemma->setWrittenForm($arr['metadata']['lemma']); $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id'])); $lexicalEntry->setLemma($lmfLemma); } $lmfSense = new Owl\LmfSense(); $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm()); if ($homonym) { $lmfSense->setRank($homonym); } $lmfSense->setUri($this->getUriFactory()->create('Sense', $arr['metadata']['lemma'], $arr['id'] . '-' . $homonym)); $lmfDefintion = new Owl\LmfDefinition(); $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $arr['metadata']['lemma'], $arr['id'] . '-' . $homonym)); $lmfTextRepresentation = new Owl\LmfTextRepresentation(); $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $arr['metadata']['lemma'], $arr['id'] . '-' . $homonym)); $antonyms = $arr['metadata']['antonyms']; $writtenForm = "<![CDATA["; foreach ($arr['metadata']['valcontexts'] as $key => $attr) { if (sizeof($arr['metadata']['valcontexts']) > 1) { $writtenForm .= "\n<span style=\"font-weight: bold;\">" . ($key + 1) . "</span>"; } // Explanation if (isset($attr['expl'])) { $writtenForm .= "\n<em>{$attr['expl'][0]}</em> <br />"; // Antonyms foreach ($antonyms as $key => $antonym) { $writtenForm .= "\n<span style=\"font-weight: bold;\">{$antonym['antonym']}</span>, "; } // remove last comma $writtenForm = substr($writtenForm, 0, strlen($writtenForm) - 2) . ' '; } // Antonym explanation if (isset($attr['antexpl'])) { $writtenForm .= "\n<br/> <em>{$attr['antexpl'][0]}</em>"; } // Examples if (isset($attr['examples'])) { $countExpl = count($attr['examples']); foreach ($attr['examples'] as $key => $expls) { // Example if (isset($expls['example']['value'])) { $writtenForm .= "\n<br />{$expls['example']['value']}"; // Tags if (isset($expls['example']['children'])) { foreach ($expls['example']['children'] as $example) { if (isset($example['exampletag']['value'])) { $writtenForm .= " {$example['exampletag']['value']}."; } } } } } } } $writtenForm .= " ]]>"; $lmfTextRepresentation->setWrittenForm($writtenForm); $lmfDefintion->addTextRepresentation($lmfTextRepresentation); $lmfSense->setDefinition($lmfDefintion); // Add sense relations foreach ($antonyms as $key => $antonym) { /* $antonymLexicalEntry = new Owl\LmfLexicalEntry($resourceName); $antonymLexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $antonym['antonym'], 0)); $antonymLmfLemma = new Owl\LmfLemma(); $antonymLmfLemma->setWrittenForm($arr['metadata']['lemma']); $antonymLmfLemma->setUri($this->getUriFactory()->create('Lemma', $antonym['antonym'], 0)); $antonymLmfLemma->setWrittenForm($antonym['antonym']); $antonymLexicalEntry->setLemma($antonymLmfLemma); fwrite($fileIndividuals, $antonymLexicalEntry->toLmfString()); */ $senseRelation = new Owl\LmfSenseRelation(); $senseRelation->setUri($this->getUriFactory()->create('SenseRelation', $arr['metadata']['lemma'], $arr['id'] . '-' . $key)); $senseRelation->setType('Antonimas'); $senseRelation->setRank($key + 1); //$senseRelation->addSenseRelatedTo($antonymLexicalEntry); $senseRelation->setWrittenForm($antonym['antonym']); $lmfSense->addSenseRelation($senseRelation); } $lexicalEntry->addSense($lmfSense); // Word form if (!empty($arr['metadata']['wordForms'])) { $rank = 1; foreach ($arr['metadata']['wordForms'] as $wordForm) { $lmfWordForm = new Owl\LmfWordForm(); $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', $wordForm, $arr['id'] . '-' . $rank++)); $lmfWordForm->setWrittenForm($wordForm); $lexicalEntry->addWordForm($lmfWordForm); } } // save not homonyms if (!$homonym) { fwrite($fileIndividuals, $lexicalEntry->toLmfString()); } else { // Update lexical entries $lexEntries[$arr['metadata']['lemma']] = @$lexicalEntry; } } } // Save homonyms foreach ($lexEntries as $lexEntry) { fwrite($fileIndividuals, $lexEntry->toLmfString()); } fclose($fileIndividuals); if (!empty($n)) { print_r($n); } }