Exemplo n.º 1
0
 private function buildLmfIndividuals($filename, $fileOfIndividuals)
 {
     $resourceName = $this->getResourceName();
     $file = fopen($filename, 'r');
     $xml = fread($file, filesize($filename));
     fclose($file);
     $dom = new \DOMDocument('1.0', 'UTF-8');
     $dom->loadXML($xml);
     $fileIndividuals = fopen($fileOfIndividuals, "w+");
     $recordNr = 1;
     foreach ($dom->getElementsByTagName('return') as $domRecord) {
         /* @var $domRecord \DOMElement */
         $nodes = $domRecord->childNodes;
         $arr = array();
         foreach ($nodes as $node) {
             /* @var $node \DOMElement */
             if ($node->nodeName == 'metadata') {
                 $metadata = new \DOMDocument('1.0', 'UTF-8');
                 $metadata->loadXML($node->nodeValue);
                 $ins = array();
                 // Taking a record
                 /* @var $record \DOMElement */
                 $record = $metadata->getElementsByTagName('record')->item(0);
                 foreach ($record->getElementsByTagName('el') as $el) {
                     /* @var $el \DOMElement */
                     if ($el->getAttribute('value') || $el->getAttribute('name') == 'Reiksme') {
                         // Lemma
                         if ($el->getAttribute('name') == 'AntrastinisZodis') {
                             $ins['lemma'] = htmlspecialchars($el->getAttribute('value'));
                         }
                         // Forms
                         if ($el->getAttribute('name') == 'Forma') {
                             $ins['wordForms'][] = $el->getAttribute('value');
                         }
                         // Pronunciation
                         if ($el->getAttribute('name') == 'Tarimas') {
                             $ins['pronunciation'] = $el->getAttribute('value');
                         }
                         // Senses
                         if ($el->getAttribute('name') == 'Reiksme') {
                             $senseArr = array();
                             foreach ($el->childNodes as $sense) {
                                 // There are some DOMTExt nodes, so we will ignore them
                                 if (get_class($sense) == 'DOMElement') {
                                     /* @var $sense \DOMElement */
                                     // PartOfSpeach
                                     if ($sense->getAttribute('name') == 'KalbosDalis') {
                                         $senseArr['partOfSpeach'] = $this->fullAbbreviation($sense->getAttribute('value'));
                                     }
                                     // Equivalents
                                     if ($sense->getAttribute('name') == 'Atitikmuo') {
                                         // Dictionary can contain ilegal xml chars
                                         $senseArr['equivalent'][] = htmlspecialchars($sense->getAttribute('value'));
                                     }
                                 }
                             }
                             $ins['senses'][] = $senseArr;
                         }
                     }
                 }
                 $arr[$node->nodeName] = $ins;
             } else {
                 $arr[$node->nodeName] = $node->nodeValue;
             }
         }
         // TODO pridėti tarimą ir wordFormas
         // Concert the array to lexical entry
         /* array contains
          * - id
          * - header
          * - status
          * - metadata
          *      - lemma (attr: word)
          *      - (attr: writer)
          *      - (attr: imageURL)
          *      - (attr: sourceLink)
          *      - (attr: 
          *      - pronunciation () - @TODO
          *      - wordForms        
          *      - senses
          *          - partOfSpeach
          *          - equivalent
          */
         if (isset($arr['metadata']['lemma'])) {
             $lexicalEntries = array();
             $senseNr = 1;
             $isFirst = TRUE;
             foreach ($arr['metadata']['senses'] as $sense) {
                 $lmfSense = new Owl\LmfSense();
                 if ($isFirst) {
                     $lexicalEntry = new Owl\LmfLexicalEntry($resourceName);
                     $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id']));
                     // Set Lemma
                     $lmfLemma = new Owl\LmfLemma();
                     $lmfLemma->setWrittenForm($arr['metadata']['lemma']);
                     $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id']));
                     $lexicalEntry->setLemma($lmfLemma);
                     $lexicalEntry->setPartOfSpeech($sense['partOfSpeach']);
                     array_push($lexicalEntries, $lexicalEntry);
                     $isFirst = FALSE;
                 } else {
                     reset($lexicalEntries);
                     $lexicalEntry = NULL;
                     // Check if lexical entry with specified part of speech exists
                     foreach ($lexicalEntries as $lexEntry) {
                         /* @var $lexEntry Owl\LmfLexicalEntry */
                         if ($lexEntry->getPartOfSpeech() == $sense['partOfSpeach']) {
                             $lexicalEntry = $lexEntry;
                         }
                     }
                     // Creation of new entity of lexical entry
                     if (!$lexicalEntry) {
                         $lexicalEntry = new Owl\LmfLexicalEntry($resourceName);
                         $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'] . '-' . (sizeof($lexicalEntries) + 1), $arr['id']));
                         // Set Lemma
                         $lmfLemma = new Owl\LmfLemma();
                         $lmfLemma->setWrittenForm($arr['metadata']['lemma']);
                         $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'] . '-' . (sizeof($lexicalEntries) + 1), $arr['id']));
                         $lexicalEntry->setLemma($lmfLemma);
                         $lexicalEntry->setPartOfSpeech($sense['partOfSpeach']);
                         array_push($lexicalEntries, $lexicalEntry);
                     }
                 }
                 $lmfSense->setUri($this->getUriFactory()->create('Sense', $lexicalEntry->getLemma()->getWrittenForm(), $arr['id'] . '-' . $senseNr++));
                 $lmfSense->setLemmaWrittenForm($lexicalEntry->getLemma()->getWrittenForm());
                 $equivalents = $sense['equivalent'];
                 $rank = 1;
                 foreach ($equivalents as $equivalent) {
                     $lmfEquivalent = new Owl\LmfEquivalent();
                     // Bug "patekti į nepatogią padėtį" firs space is nor normal
                     if ($equivalent == 'patekti į nepatogią padėtį') {
                         $equivalent = 'patekti į nepatogią padėtį';
                     }
                     $lmfEquivalent->setUri($this->getUriFactory()->create('Equivalent', $equivalent, $arr['id'] . '-' . $rank));
                     $lmfEquivalent->setLanguage('Lietuvių');
                     $lmfEquivalent->setWrittenForm($equivalent);
                     $lmfEquivalent->setRank($rank++);
                     $lmfSense->addEquivalent($lmfEquivalent);
                 }
                 $lexicalEntry->addSense($lmfSense);
             }
             // Word form
             if (!empty($arr['metadata']['wordForms'])) {
                 $rank = 1;
                 foreach ($arr['metadata']['wordForms'] as $wordForm) {
                     $lmfWordForm = new Owl\LmfWordForm();
                     $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', $wordForm, $arr['id'] . '-' . $rank++));
                     $lmfWordForm->setWrittenForm($wordForm);
                     $lexicalEntry->addWordForm($lmfWordForm);
                 }
             }
             // When is more than one sense
             foreach ($lexicalEntries as $lexicalEntry) {
                 fwrite($fileIndividuals, $lexicalEntry->toLmfString());
             }
         }
         //echo '<br />' . $recordNr++ . '-' . $arr['id'] . '-' .  $arr['metadata']['lemma'];
     }
     fclose($fileIndividuals);
 }
Exemplo n.º 2
0
 private function buildLmfIndividuals($filename, $fileOfIndividuals)
 {
     $resourceName = $this->getResourceName();
     $file = fopen($filename, 'r');
     $xml = fread($file, filesize($filename));
     fclose($file);
     $dom = new \DOMDocument('1.0', 'UTF-8');
     $dom->loadXML($xml);
     $fileIndividuals = fopen($fileOfIndividuals, "w+");
     $recordNr = 1;
     /*
      * Datastructure
      * Convert the array to lexical entry
      * array contains
      * - id
      * - header
      * - status
      * - metadata
      *      - AntrastinisZodis
      *      - Reikšme
      *          - Atitikmuo
      *          - 0..n Forma
      *          - Tarimas
      */
     $n = array();
     foreach ($dom->getElementsByTagName('return') as $domRecord) {
         /* @var $domRecord \DOMElement */
         $nodes = $domRecord->childNodes;
         $arr = array();
         foreach ($nodes as $node) {
             /* @var $node \DOMElement */
             if ($node->nodeName == 'metadata') {
                 $metadata = new \DOMDocument('1.0', 'UTF-8');
                 $metadata->loadXML($node->nodeValue);
                 $ins = array();
                 // Taking a record
                 /* @var $record \DOMElement */
                 $record = $metadata->getElementsByTagName('record')->item(0);
                 foreach ($record->getElementsByTagName('el') as $el) {
                     /* @var $el \DOMElement */
                     if ($el->getAttribute('value') || $el->getAttribute('name') == 'Reiksme') {
                         // Lemma
                         if ($el->getAttribute('name') == 'AntrastinisZodis') {
                             // Bug "patekti į nepatogią padėtį" firs space is nor normal
                             if ($el->getAttribute('value') == 'patekti į nepatogią padėtį') {
                                 $ins['lemma'] = 'patekti į nepatogią padėtį';
                             } else {
                                 $ins['lemma'] = htmlspecialchars($el->getAttribute('value'));
                             }
                         }
                         // Senses
                         if ($el->getAttribute('name') == 'Reiksme') {
                             $senseArr = array();
                             foreach ($el->childNodes as $sense) {
                                 // There are some DOMTExt nodes, so we will ignore them
                                 if (get_class($sense) == 'DOMElement') {
                                     /* @var $sense \DOMElement */
                                     // Forms
                                     if ($sense->getAttribute('name') == 'Forma') {
                                         $senseArr['wordForms'][] = $sense->getAttribute('value');
                                     }
                                     // Pronunciation
                                     if ($sense->getAttribute('name') == 'Tarimas') {
                                         $senseArr['pronunciation'] = $sense->getAttribute('value');
                                     }
                                     // Equivalents
                                     if ($sense->getAttribute('name') == 'Atitikmuo') {
                                         $senseArr['equivalent'][] = htmlspecialchars($sense->getAttribute('value'));
                                     }
                                 }
                             }
                             $ins['senses'][] = $senseArr;
                         }
                     }
                 }
                 $arr[$node->nodeName] = $ins;
             } else {
                 $arr[$node->nodeName] = $node->nodeValue;
             }
         }
         // TODO pridėti tarimą ir wordFormas
         // Convert the array to lexical entry
         /* array contains
          * - id
          * - header
          * - status
          * - metadata
          *      - lemma
          *      - senses
          *          - pronunciation () - TODO
          *          - wordForms        - TODO fix showing of equivalent forms
          *          - equivalent
          */
         if (isset($arr['metadata']['lemma'])) {
             $senseNr = 1;
             $lexicalEntry = new Owl\LmfLexicalEntry($resourceName);
             $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id']));
             // Set Lemma
             $lmfLemma = new Owl\LmfLemma();
             $lmfLemma->setWrittenForm($arr['metadata']['lemma']);
             $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id']));
             $lexicalEntry->setLemma($lmfLemma);
             foreach ($arr['metadata']['senses'] as $sense) {
                 $lmfSense = new Owl\LmfSense();
                 $lmfSense->setUri($this->getUriFactory()->create('Sense', $lexicalEntry->getLemma()->getWrittenForm(), $arr['id'] . '-' . $senseNr++));
                 $lmfSense->setLemmaWrittenForm($lexicalEntry->getLemma()->getWrittenForm());
                 $equivalents = $sense['equivalent'];
                 $rank = 1;
                 foreach ($equivalents as $equivalent) {
                     $lmfEquivalent = new Owl\LmfEquivalent();
                     $lmfEquivalent->setUri($this->getUriFactory()->create('Equivalent', $equivalent, $arr['id'] . '-' . $rank));
                     $lmfEquivalent->setLanguage('Anglų');
                     $lmfEquivalent->setWrittenForm($equivalent);
                     $lmfEquivalent->setRank($rank++);
                     $lmfSense->addEquivalent($lmfEquivalent);
                 }
                 $lexicalEntry->addSense($lmfSense);
             }
             // Word form
             /*                if (!empty($arr['metadata']['wordForms'])) {
                                 $rank = 1;
                                 foreach ($arr['metadata']['wordForms']  as $wordForm) {
                                     $lmfWordForm = new Owl\LmfWordForm();
                                     $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', 
                                             $wordForm, 
                                             $arr['id']  .  '-' . $rank++));                         
                                     $lmfWordForm->setWrittenForm($wordForm);
                                     $lexicalEntry->addWordForm($lmfWordForm);
                                  }
                             }                
             */
             fwrite($fileIndividuals, $lexicalEntry->toLmfString());
             echo '<br />' . $recordNr++ . '-' . $arr['id'] . '-' . $arr['metadata']['lemma'] . "\n";
         }
     }
     fclose($fileIndividuals);
     if (!empty($n)) {
         print_r($n);
     }
 }
Exemplo n.º 3
0
 private function buildLmfIndividuals($filename, $fileOfIndividuals)
 {
     $resourceName = $this->getResourceName();
     $file = fopen($filename, 'r');
     $xml = fread($file, filesize($filename));
     fclose($file);
     $dom = new \DOMDocument('1.0', 'UTF-8');
     $dom->loadXML($xml);
     $fileIndividuals = fopen($fileOfIndividuals, "w+");
     $recordNr = 1;
     /*
      * Datastructure
      * Convert the array to lexical entry
      * array contains
      * - id
      * - header
      * - status
      * - metadata
      *      - AntrastinisZodis
      *      - Reikšme
      *      - Straipnelis - kažkoks užkoduotas tekstas  @TODO
      *      - NuorodosId - nenaudojamas
      */
     $n = array();
     foreach ($dom->getElementsByTagName('return') as $domRecord) {
         /* @var $domRecord \DOMElement */
         $nodes = $domRecord->childNodes;
         $arr = array();
         foreach ($nodes as $node) {
             /* @var $node \DOMElement */
             if ($node->nodeName == 'metadata') {
                 $metadata = new \DOMDocument('1.0', 'UTF-8');
                 $metadata->loadXML($node->nodeValue);
                 $ins = array();
                 // Taking a record
                 /* @var $record \DOMElement */
                 $record = $metadata->getElementsByTagName('record')->item(0);
                 foreach ($record->getElementsByTagName('el') as $el) {
                     /* @var $el \DOMElement */
                     if ($el->getAttribute('value') || $el->getAttribute('name') == 'Reiksme') {
                         // Lemma
                         if ($el->getAttribute('name') == 'AntrastinisZodis') {
                             $ins['lemma'] = htmlspecialchars($el->getAttribute('value'));
                         }
                         // Senses
                         if ($el->getAttribute('name') == 'Reiksme') {
                             // There are some DOMTExt nodes, so we will ignore them
                             // Equivalents
                             $ins['senses'][] = array('equivalent' => array(htmlspecialchars($el->getAttribute('value'))));
                         }
                     }
                 }
                 $arr[$node->nodeName] = $ins;
             } else {
                 $arr[$node->nodeName] = $node->nodeValue;
             }
         }
         // TODO pridėti tarimą ir wordFormas
         // Convert the array to lexical entry
         /* array contains
          * - id
          * - header
          * - status
          * - metadata
          *      - lemma
          *      - senses
          *          - equivalent
          */
         if (isset($arr['metadata']['lemma'])) {
             $senseNr = 1;
             $lexicalEntry = new Owl\LmfLexicalEntry($resourceName);
             $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id']));
             // Set Lemma
             $lmfLemma = new Owl\LmfLemma();
             $lmfLemma->setWrittenForm($arr['metadata']['lemma']);
             $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id']));
             $lexicalEntry->setLemma($lmfLemma);
             foreach ($arr['metadata']['senses'] as $sense) {
                 $lmfSense = new Owl\LmfSense();
                 $lmfSense->setUri($this->getUriFactory()->create('Sense', $lexicalEntry->getLemma()->getWrittenForm(), $arr['id'] . '-' . $senseNr++));
                 $lmfSense->setLemmaWrittenForm($lexicalEntry->getLemma()->getWrittenForm());
                 $equivalents = $sense['equivalent'];
                 $rank = 1;
                 foreach ($equivalents as $equivalent) {
                     $lmfEquivalent = new Owl\LmfEquivalent();
                     $lmfEquivalent->setUri($this->getUriFactory()->create('Equivalent', $equivalent, $arr['id'] . '-' . $rank));
                     $lmfEquivalent->setLanguage('Lietuvių');
                     $lmfEquivalent->setWrittenForm($equivalent);
                     $lmfEquivalent->setRank($rank++);
                     $lmfSense->addEquivalent($lmfEquivalent);
                 }
                 $lexicalEntry->addSense($lmfSense);
             }
             fwrite($fileIndividuals, $lexicalEntry->toLmfString());
             echo '<br />' . $recordNr++ . '-' . $arr['id'] . '-' . $arr['metadata']['lemma'] . "\n";
         }
     }
     fclose($fileIndividuals);
     if (!empty($n)) {
         print_r($n);
     }
 }