Beispiel #1
0
 protected function buildLmfIndividuals($filename, $fileOfIndividuals)
 {
     $resourceName = $this->getResourceName();
     $file = fopen($filename, 'r');
     $xml = fread($file, filesize($filename));
     fclose($file);
     $dom = new \DOMDocument('1.0', 'UTF-8');
     $dom->loadXML($xml);
     //$data = array();
     $fileIndividuals = fopen($fileOfIndividuals, "w+");
     $recordNr = 1;
     // Get record ids
     $attributes = array();
     /**
     * Data structure
     * id 
     * header
     * metadata
        - homonym
     *      - [1..n] word  
     *      - [1..n] idiom          + All child elements are transformed to one level
     *          - idiomtag
     *          - expl
     *              - example
     *                  - exampletag
     *          - link
     *              - linktag
     *   - entryfulltext            - this element is not used
     * status
     */
     $n = array();
     foreach ($dom->getElementsByTagName('return') as $domRecord) {
         /* @var $domRecord \DOMElement */
         $nodes = $domRecord->childNodes;
         $arr = array();
         foreach ($nodes as $node) {
             /* @var $node \DOMElement */
             if ($node->nodeName == 'metadata' && $node->nodeValue) {
                 $metadata = new \DOMDocument('1.0', 'UTF-8');
                 $metadata->loadXML($node->nodeValue);
                 $ins = array();
                 $num = 1;
                 // Taking a dc records
                 /* @var $record \DOMElement */
                 $record = $metadata->getElementsByTagName('record')->item(0);
                 foreach ($record->getElementsByTagName('el') as $el) {
                     /* @var $el \DOMElement */
                     if ($el->getAttribute('value') || $el->getAttribute('name') == 'word') {
                         // Lemma
                         if ($el->getAttribute('name') == 'word') {
                             if (!isset($ins['lemma'])) {
                                 // First is lemma
                                 $ins['lemma'] = htmlspecialchars($el->getAttribute('value'));
                             } else {
                                 // Second and next are wordForms
                                 $ins['wordForms'][] = htmlspecialchars($el->getAttribute('value'));
                             }
                         }
                         // Idioms
                         if ($el->getAttribute('name') == 'idiom') {
                             $idiomsArr = array('idiom' => $el->getAttribute('value'));
                             $idiomsArr['explanations'] = array();
                             // Take all childer elements they belong to same idiom
                             foreach ($el->childNodes as $expl) {
                                 // There are some DOMTExt nodes, so we will ignore them
                                 if (get_class($expl) == 'DOMElement') {
                                     /* @var $expl \DOMElement */
                                     if ($expl->getAttribute('name') == 'expl') {
                                         $idiomsArr['explanations'][] = array($expl->getAttribute('name') => $this->getChildNodesArray($expl));
                                     } elseif ($expl->getAttribute('name') == 'link') {
                                         $idiomsArr['links'][] = array($expl->getAttribute('name') => $this->getChildNodesArray($expl));
                                     } else {
                                         $idiomsArr[$expl->getAttribute('name')][] = $expl->getAttribute('value');
                                     }
                                 }
                             }
                             $ins['idioms'][] = $idiomsArr;
                         }
                     }
                 }
                 $arr[$node->nodeName] = $ins;
             } else {
                 $arr[$node->nodeName] = $node->nodeValue;
             }
         }
         $recordNr++;
         // Concert the array to lexical entry
         /* array contains all atributes of data structure
          * - id
          * - header
          * - status
          * - metadata
          *      * all feeld of data structure with is presented upper
          */
         if ($arr['status'] != '-1' && !empty($arr['metadata']['lemma'])) {
             $lexicalEntry = new Owl\LmfLexicalEntry($resourceName);
             $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], $arr['id']));
             $lmfLemma = new Owl\LmfLemma();
             $lmfLemma->setWrittenForm($arr['metadata']['lemma']);
             $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id']));
             $lexicalEntry->setLemma($lmfLemma);
             $lmfSense = new Owl\LmfSense();
             $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm());
             $lmfSense->setUri($this->getUriFactory()->create('Sense', $arr['metadata']['lemma'], $arr['id']));
             $lmfDefintion = new Owl\LmfDefinition();
             $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $arr['metadata']['lemma'], $arr['id']));
             $lmfTextRepresentation = new Owl\LmfTextRepresentation();
             $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $arr['metadata']['lemma'], $arr['id']));
             $writtenForm = "<![CDATA[";
             foreach ($arr['metadata']['idioms'] as $key => $attr) {
                 //$writtenForm .= "<div>";
                 if (isset($attr['idiom'])) {
                     $writtenForm .= "\n<br/><span style=\"font-weight: bold;\">{$attr['idiom']}</span> ";
                     // IdiomTag
                     if (isset($attr['idiomtag'])) {
                         $writtenForm .= implode('., ', $attr['idiomtag']) . ". ";
                     }
                     // Explanation
                     if (isset($attr['explanations'])) {
                         $countExpl = count($attr['explanations']);
                         foreach ($attr['explanations'] as $key => $expls) {
                             if (isset($expls['expl'])) {
                                 if ($countExpl > 1) {
                                     $writtenForm .= "\n<br/> <em>" . ($key + 1) . ".</em><i>{$expls['expl']['value']}:</i>";
                                 } else {
                                     $writtenForm .= "<i>{$expls['expl']['value']}:</i>";
                                 }
                                 // Examples
                                 if (isset($expls['expl']['children'])) {
                                     foreach ($expls['expl']['children'] as $example) {
                                         if (isset($example['example']['value'])) {
                                             $writtenForm .= " {$example['example']['value']}.";
                                         }
                                         if (isset($example['example']['children'][0]['exampletag']['value'])) {
                                             $writtenForm .= " " . $example['example']['children'][0]['exampletag']['value'] . ".";
                                         }
                                     }
                                 }
                             }
                         }
                     }
                     // TODO make real links between lemmas
                     if (isset($attr['links'])) {
                         foreach ($attr['links'] as $key => $link) {
                             if (isset($link['link']['value'])) {
                                 if (isset($link['link']['children'][0]['linktag']['value'])) {
                                     $writtenForm .= "<i>{$link['link']['children'][0]['linktag']['value']}</i> ";
                                 }
                                 $writtenForm .= $link['link']['value'];
                             }
                         }
                     }
                 }
                 //$writtenForm .= "</div>";
             }
             $writtenForm .= "]]>";
             $lmfTextRepresentation->setWrittenForm($writtenForm);
             $lmfDefintion->addTextRepresentation($lmfTextRepresentation);
             $lmfSense->setDefinition($lmfDefintion);
             $lexicalEntry->addSense($lmfSense);
             // Word form
             if (!empty($arr['metadata']['wordForms'])) {
                 $rank = 1;
                 foreach ($arr['metadata']['wordForms'] as $wordForm) {
                     $lmfWordForm = new Owl\LmfWordForm();
                     $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', $wordForm, $arr['id'] . '-' . $rank++));
                     $lmfWordForm->setWrittenForm($wordForm);
                     $lexicalEntry->addWordForm($lmfWordForm);
                 }
             }
             fwrite($fileIndividuals, $lexicalEntry->toLmfString());
         }
     }
     fclose($fileIndividuals);
     if (!empty($n)) {
         print_r($n);
     }
 }
Beispiel #2
0
 protected function buildLmfIndividuals($filename, $fileOfIndividuals)
 {
     $resourceName = $this->getResourceName();
     $file = fopen($filename, 'r');
     $xml = fread($file, filesize($filename));
     fclose($file);
     $dom = new \DOMDocument('1.0', 'UTF-8');
     $dom->loadXML($xml);
     //$data = array();
     $fileIndividuals = fopen($fileOfIndividuals, "w+");
     $recordNr = 1;
     // Get record ids
     $attributes = array();
     /**
     * Data structure
     * id 
     * header
     * metadata
     *  record
     *      writer (4641)                       * Metrika -> Užrašytojai
     *      imageURLs (48822)                   * Paveikslėlis 126x166
     *      sourcelink (27369)                  * Metrika -> Šaltiniai
     *      gramref_header (26)
     *      attributes (num of instances:
                - kartoteka (48822)             - visiems rašoma "Kartoteka 1"
                - word (48822)                  * Antraštė kirčiuotas žodis ar junginys
                - word_header (48718)           - naudojamas LKIIS paieškoje
                - cardno (48822)                * Kortelės numeris
                - box (48822)                   - nenaudojamas
                - images (48822)                - paveiklėlių numeriai
                - sourcelocation (16140)        * Metrika->Vietovė (nurodoma prie šaltinio)
                - word_subtitle (8326)          * Paantraštė ()
                - note (328)                    * Pastaba
                - sourceauthor (483)            * Metrika -> Pateikėjas
                - writedate (1580)              * Metrika -> "Užrašymo metai"
                - wordvariant_subtitle (38)     - neatvaizduojamsa 
                - sourcelocation_free (837)     * Metrika -> "Vietovė kortelėje"
                - sourcelocation_geocode (2733) - Iškviečiamas interaktyvus langas
                - wordvariant_header (459)      * Antraštė -> Žodžio variantas
                - femineform_header (2530)      * Antraštė -> Moteriška giminė
                - repeatable_forms3 (3009)      * Antraštė -> III forma
                - repeatable_forms2 (3015)      * Antraštė -> II forma
                - sourcelink_free (2647)        * Metrika -> "Šaltinis kaip kortelėje"
                - writer_free (424)             * Metrika -> "Užrašytojas kortelėje"
                - repeatable_forms3_subtitle (30)
                - repeatable_forms2_subtitle (30)
                - repeatable_forms4 (12)
                - femineform_subtitle (49)
                - gram (2234)                   - neatsivaizduoja reiškia kirčiuotę (pvz 3b)
                - gram_subtitle (51)
                - writercomment (4)
                - sourceauthoryears (4)         * Metrika -> "Pateikėjo amžius/gimimo metai"
                - bugacard (18)
                - corrections (1)
                - homonym (4)
                - unusable (1)
                - explanation_header (1)
                - confidence (1)
                - explanation_subtitle (1)
                - content (2)
     * status
     */
     $n = array();
     foreach ($dom->getElementsByTagName('return') as $domRecord) {
         /* @var $domRecord \DOMElement */
         $nodes = $domRecord->childNodes;
         $arr = array();
         foreach ($nodes as $node) {
             /* @var $node \DOMElement */
             if ($node->nodeName == 'metadata' && $node->nodeValue) {
                 $metadata = new \DOMDocument('1.0', 'UTF-8');
                 $metadata->loadXML($node->nodeValue);
                 $ins = array();
                 // Taking a record
                 /* @var $record \DOMElement */
                 $record = $metadata->getElementsByTagName('record')->item(0);
                 // Get not empty attributes
                 foreach ($record->attributes as $attribute_name => $attribute_node) {
                     /* @var $attribute_node \DOMNode  */
                     if ($attribute_node->nodeValue) {
                         $ins[$attribute_name] = $attribute_node->nodeValue;
                         /* //To get list not empty attributes
                            if (isset($attributes[$attribute_name])) {
                                $attributes[$attribute_name] += 1;
                                 if ($attribute_name == 'gram')
                                     echo $attribute_node->nodeValue;
                                     
                            }  else {
                                $attributes[$attribute_name] = 1;
                            }*/
                     }
                 }
                 foreach ($record->childNodes as $childNode) {
                     /* @var $childNode \DOMElement */
                     if ($childNode->nodeValue) {
                         // Receive other metadata nodes
                         if ($childNode->nodeName == 'imageURLs') {
                             /* @var $imageUrlNode \DOMElement */
                             $imageUrlNode = $childNode->getElementsByTagName('imageURL')->item(0);
                             $arr['imageUrl'] = $imageUrlNode->getAttribute('value');
                         } else {
                             $arr[$childNode->nodeName] = $childNode->nodeValue;
                         }
                     }
                 }
                 $arr[$node->nodeName] = $ins;
             } else {
                 $arr[$node->nodeName] = $node->nodeValue;
             }
         }
         $recordNr++;
         // Concert the array to lexical entry
         /* array contains all atributes of data structure
          * - id
          * - header
          * - status
          * - metadata
          *      * all feeld of data structure with is presented upper
          */
         if ($arr['status'] != '-1' && !empty($arr['metadata']['word'])) {
             $lexicalEntry = new Owl\LmfLexicalEntry($resourceName);
             $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['word'] . '-' . $arr['metadata']['cardno'], $arr['id']));
             $lmfLemma = new Owl\LmfLemma();
             $lmfLemma->setWrittenForm($arr['metadata']['word']);
             $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['word'] . '-' . $arr['metadata']['cardno'], $arr['id']));
             $lmfLemma->setImage($arr['imageUrl']);
             $lexicalEntry->setLemma($lmfLemma);
             $lmfSense = new Owl\LmfSense();
             $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm());
             $lmfSense->setUri($this->getUriFactory()->create('Sense', $arr['metadata']['word'] . '-' . $arr['metadata']['cardno'], $arr['id']));
             $lmfDefintion = new Owl\LmfDefinition();
             $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $arr['metadata']['word'] . '-' . $arr['metadata']['cardno'], $arr['id']));
             $lmfTextRepresentation = new Owl\LmfTextRepresentation();
             $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $arr['metadata']['word'] . '-' . $arr['metadata']['cardno'], $arr['id']));
             $writtenForm = "<![CDATA[";
             $writtenForm .= "<div><em>Kortelės numeris:</em> {$arr['metadata']['cardno']}</div>";
             // Spacing between rows
             $writtenForm .= "<div style=\"height: 5px;\"></div>";
             $writtenForm .= "<div><img width=\"238\" alt=\"\" src=\"{$arr['imageUrl']}\"></div>";
             // Spacing between rows
             $writtenForm .= "<div style=\"height: 5px;\"></div>";
             if (!empty($arr['metadata']['sourcelocation']) || !empty($arr['sourcelink'])) {
                 $writtenForm .= "<div>Metrika</div>";
                 if (!empty($arr['metadata']['sourcelocation'])) {
                     $writtenForm .= "<div><em>Vietovė:</em> {$arr['metadata']['sourcelocation']}</div>";
                     // Spacing between rows
                     $writtenForm .= "<div style=\"height: 5px;\"></div>";
                 }
                 if (!empty($arr['sourcelink'])) {
                     $writtenForm .= "<div><em>Šaltiniai:</em> {$arr['sourcelink']}</div>";
                     // Spacing between rows
                     $writtenForm .= "<div style=\"height: 5px;\"></div>";
                 }
             }
             $writtenForm .= "]]>";
             $lmfTextRepresentation->setWrittenForm($writtenForm);
             $lmfDefintion->addTextRepresentation($lmfTextRepresentation);
             $lmfSense->setDefinition($lmfDefintion);
             $lexicalEntry->addSense($lmfSense);
             fwrite($fileIndividuals, $lexicalEntry->toLmfString());
         }
     }
     fclose($fileIndividuals);
 }
Beispiel #3
0
 protected function buildLmfIndividuals($filename, $fileOfIndividuals)
 {
     $resourceName = $this->getResourceName();
     $file = fopen($filename, 'r');
     $xml = fread($file, filesize($filename));
     fclose($file);
     $dom = new \DOMDocument('1.0', 'UTF-8');
     $dom->loadXML($xml);
     $fileIndividuals = fopen($fileOfIndividuals, "w+");
     $recordNr = 1;
     // Get record ids
     $attributes = array();
     /**
     * Data structure
     * id 
     * header
     * metadata
        - homonym
     *      - word
     *          - grammar
     *          - wordtag
     *      - [1..n] antonym        + All child elements are transformed to one level
     *          - antgramar
     *          - anttag
     *          - antremote
     *      - [1..n] valcontext
     *          - [1..n] example
     *              - [1..n]exampletag
     *          - expl              + word explanation
     *          - antexpl           + antonym explanation
     *   - entryfulltext            - this element is not used
     * status
     */
     $n = array();
     $lexEntries = array();
     // Index of all posible lexical entries ([] => 'lemma')
     $lexIndex = array();
     foreach ($dom->getElementsByTagName('return') as $domRecord) {
         /* @var $domRecord \DOMElement */
         $nodes = $domRecord->childNodes;
         $arr = array();
         foreach ($nodes as $node) {
             /* @var $node \DOMElement */
             if ($node->nodeName == 'metadata' && $node->nodeValue) {
                 $metadata = new \DOMDocument('1.0', 'UTF-8');
                 $metadata->loadXML($node->nodeValue);
                 $ins = array();
                 $num = 1;
                 // Taking a dc records
                 /* @var $record \DOMElement */
                 $record = $metadata->getElementsByTagName('record')->item(0);
                 foreach ($record->getElementsByTagName('el') as $el) {
                     /* @var $el \DOMElement */
                     if ($el->getAttribute('value') || $el->getAttribute('name') == 'valcontext') {
                         // Homonym
                         if ($el->getAttribute('name') == 'homonym') {
                             if ($el->getAttribute('value')) {
                                 $ins['homonym'] = $el->getAttribute('value');
                             }
                         }
                         // Lemma
                         if ($el->getAttribute('name') == 'word') {
                             if (!isset($ins['lemma'])) {
                                 // First is lemma
                                 $ins['lemma'] = htmlspecialchars($el->getAttribute('value'));
                             } else {
                                 // Second and next are wordForms
                                 $ins['wordForms'][] = htmlspecialchars($el->getAttribute('value'));
                             }
                         }
                         // Antonyms
                         if ($el->getAttribute('name') == 'antonym') {
                             $ant = array('antonym' => $el->getAttribute('value'));
                             foreach ($el->childNodes as $param) {
                                 // There are some DOMTExt nodes, so we will ignore them
                                 if (get_class($param) == 'DOMElement') {
                                     /* @var $param \DOMElement */
                                     $ant[$param->getAttribute('name')][] = $param->getAttribute('value');
                                 }
                             }
                             $ins['antonyms'][] = $ant;
                         }
                         // value context
                         if ($el->getAttribute('name') == 'valcontext') {
                             $valcontextArr = array();
                             // Take all childer elements they belong to same idiom
                             foreach ($el->childNodes as $expl) {
                                 // There are some DOMTExt nodes, so we will ignore them
                                 if (get_class($expl) == 'DOMElement') {
                                     /* @var $expl \DOMElement */
                                     if ($expl->getAttribute('name') == 'example') {
                                         $valcontextArr['examples'][] = array($expl->getAttribute('name') => $this->getChildNodesArray($expl));
                                     } else {
                                         $valcontextArr[$expl->getAttribute('name')][] = $expl->getAttribute('value');
                                     }
                                 }
                             }
                             $ins['valcontexts'][] = $valcontextArr;
                         }
                     }
                 }
                 $arr[$node->nodeName] = $ins;
             } else {
                 $arr[$node->nodeName] = $node->nodeValue;
             }
         }
         $recordNr++;
         // Concert the array to lexical entry
         /* array contains all atributes of data structure
          * - id
          * - header
          * - status
          * - metadata
          *      * all feeld of data structure with is presented upper
          */
         if ($arr['status'] != '-1' && !empty($arr['metadata']['lemma'])) {
             $homonym = isset($arr['metadata']['homonym']) ? $arr['metadata']['homonym'] : '';
             $lexIndex[$arr['id']] = $arr['metadata']['lemma'];
             // Lexical entry has multiple senses
             if (isset($lexEntries[$arr['metadata']['lemma']])) {
                 $lexicalEntry = $lexEntries[$arr['metadata']['lemma']];
                 $lmfLemma = $lexicalEntry->getLemma();
             } else {
                 $lexicalEntry = new Owl\LmfLexicalEntry($resourceName);
                 $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['lemma'], 0));
                 $lmfLemma = new Owl\LmfLemma();
                 $lmfLemma->setWrittenForm($arr['metadata']['lemma']);
                 $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['lemma'], $arr['id']));
                 $lexicalEntry->setLemma($lmfLemma);
             }
             $lmfSense = new Owl\LmfSense();
             $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm());
             if ($homonym) {
                 $lmfSense->setRank($homonym);
             }
             $lmfSense->setUri($this->getUriFactory()->create('Sense', $arr['metadata']['lemma'], $arr['id'] . '-' . $homonym));
             $lmfDefintion = new Owl\LmfDefinition();
             $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $arr['metadata']['lemma'], $arr['id'] . '-' . $homonym));
             $lmfTextRepresentation = new Owl\LmfTextRepresentation();
             $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $arr['metadata']['lemma'], $arr['id'] . '-' . $homonym));
             $antonyms = $arr['metadata']['antonyms'];
             $writtenForm = "<![CDATA[";
             foreach ($arr['metadata']['valcontexts'] as $key => $attr) {
                 if (sizeof($arr['metadata']['valcontexts']) > 1) {
                     $writtenForm .= "\n<span style=\"font-weight: bold;\">" . ($key + 1) . "</span>";
                 }
                 // Explanation
                 if (isset($attr['expl'])) {
                     $writtenForm .= "\n<em>{$attr['expl'][0]}</em> <br />";
                     // Antonyms
                     foreach ($antonyms as $key => $antonym) {
                         $writtenForm .= "\n<span style=\"font-weight: bold;\">{$antonym['antonym']}</span>, ";
                     }
                     // remove last comma
                     $writtenForm = substr($writtenForm, 0, strlen($writtenForm) - 2) . ' ';
                 }
                 // Antonym explanation
                 if (isset($attr['antexpl'])) {
                     $writtenForm .= "\n<br/> <em>{$attr['antexpl'][0]}</em>";
                 }
                 // Examples
                 if (isset($attr['examples'])) {
                     $countExpl = count($attr['examples']);
                     foreach ($attr['examples'] as $key => $expls) {
                         // Example
                         if (isset($expls['example']['value'])) {
                             $writtenForm .= "\n<br />{$expls['example']['value']}";
                             // Tags
                             if (isset($expls['example']['children'])) {
                                 foreach ($expls['example']['children'] as $example) {
                                     if (isset($example['exampletag']['value'])) {
                                         $writtenForm .= " {$example['exampletag']['value']}.";
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
             $writtenForm .= " ]]>";
             $lmfTextRepresentation->setWrittenForm($writtenForm);
             $lmfDefintion->addTextRepresentation($lmfTextRepresentation);
             $lmfSense->setDefinition($lmfDefintion);
             // Add sense relations
             foreach ($antonyms as $key => $antonym) {
                 /*
                 $antonymLexicalEntry = new Owl\LmfLexicalEntry($resourceName);
                 $antonymLexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', 
                             $antonym['antonym'],
                             0));
                 
                 $antonymLmfLemma = new Owl\LmfLemma();
                 $antonymLmfLemma->setWrittenForm($arr['metadata']['lemma']);
                 $antonymLmfLemma->setUri($this->getUriFactory()->create('Lemma', 
                                 $antonym['antonym'],
                                 0));
                 $antonymLmfLemma->setWrittenForm($antonym['antonym']);
                 
                 $antonymLexicalEntry->setLemma($antonymLmfLemma);
                 fwrite($fileIndividuals, $antonymLexicalEntry->toLmfString());
                 */
                 $senseRelation = new Owl\LmfSenseRelation();
                 $senseRelation->setUri($this->getUriFactory()->create('SenseRelation', $arr['metadata']['lemma'], $arr['id'] . '-' . $key));
                 $senseRelation->setType('Antonimas');
                 $senseRelation->setRank($key + 1);
                 //$senseRelation->addSenseRelatedTo($antonymLexicalEntry);
                 $senseRelation->setWrittenForm($antonym['antonym']);
                 $lmfSense->addSenseRelation($senseRelation);
             }
             $lexicalEntry->addSense($lmfSense);
             // Word form
             if (!empty($arr['metadata']['wordForms'])) {
                 $rank = 1;
                 foreach ($arr['metadata']['wordForms'] as $wordForm) {
                     $lmfWordForm = new Owl\LmfWordForm();
                     $lmfWordForm->setUri($this->getUriFactory()->create('WordForm', $wordForm, $arr['id'] . '-' . $rank++));
                     $lmfWordForm->setWrittenForm($wordForm);
                     $lexicalEntry->addWordForm($lmfWordForm);
                 }
             }
             // save not homonyms
             if (!$homonym) {
                 fwrite($fileIndividuals, $lexicalEntry->toLmfString());
             } else {
                 // Update lexical entries
                 $lexEntries[$arr['metadata']['lemma']] = @$lexicalEntry;
             }
         }
     }
     // Save homonyms
     foreach ($lexEntries as $lexEntry) {
         fwrite($fileIndividuals, $lexEntry->toLmfString());
     }
     fclose($fileIndividuals);
     if (!empty($n)) {
         print_r($n);
     }
 }
Beispiel #4
0
 protected function buildLmfIndividuals($filename, $fileOfIndividuals)
 {
     $resourceName = $this->getResourceName();
     $file = fopen($filename, 'r');
     $xml = fread($file, filesize($filename));
     fclose($file);
     $dom = new \DOMDocument('1.0', 'UTF-8');
     $dom->loadXML($xml);
     //$data = array();
     $fileIndividuals = fopen($fileOfIndividuals, "w+");
     $recordNr = 1;
     // Get record ids
     $attributes = array();
     /**
     * Data structure
     * id 
     * header
     * metadata
        [dc:identifier]             - 
        [dc:title] => 8274       * Pavadinimas
        [dc:description] => 6730 * Redaguotas tekstas 
        [dc:description] => 8222 * Originalus tekstas
        [dc:description] => 3703 * Pastabos 
        [dc:date] => 6818        * Užrašymo laikas
        [dc:source] => 7658      * Signatūra 
        [dc:source] => 7994      * Signatūros nuoroda 
        [dc:creator] => 5934     * Pateikėjas
        [dc:publisher] => 9373   * Fiksuotojas
        [dc:coverage] => 7612    * Vieta
        [dc:subject] => 17573    * Mokslinė— klasifikacija
        [dc:description] => *    * Mokslinis komentaras
     * status
     */
     $n = array();
     foreach ($dom->getElementsByTagName('return') as $domRecord) {
         /* @var $domRecord \DOMElement */
         $nodes = $domRecord->childNodes;
         $arr = array();
         foreach ($nodes as $node) {
             /* @var $node \DOMElement */
             if ($node->nodeName == 'metadata' && $node->nodeValue) {
                 $metadata = new \DOMDocument('1.0', 'UTF-8');
                 $metadata->loadXML($node->nodeValue);
                 $ins = array();
                 $num = 1;
                 // Taking a dc records
                 /* @var $record \DOMElement */
                 $record = $metadata->childNodes->item(0);
                 foreach ($record->childNodes as $childNode) {
                     /* @var $childNode \DOMElement */
                     if ($childNode->nodeValue) {
                         // Receive other metadata nodes, but ignore identifiers
                         if ($childNode->nodeName != 'dc:identifier') {
                             // a lot of dublication
                             $tmpIns = array();
                             $tmpIns[$childNode->nodeName]['label'] = $childNode->getAttribute('label');
                             $tmpIns[$childNode->nodeName]['value'] = $childNode->nodeValue;
                             $ins[] = $tmpIns;
                         }
                     }
                 }
                 $arr[$node->nodeName] = $ins;
             } else {
                 $arr[$node->nodeName] = $node->nodeValue;
             }
         }
         $recordNr++;
         // Counting if posible attributes
         /*
         foreach ($arr['metadata'] as $nr => $keys) {
             $key = array_keys($keys)[0];
             
             $key = $key . ' - ' . $keys[$key]['label'];
             if (isset($n[$key])) {
                 $n[$key]++;
             } else {
                 $n[$key] = 1; 
             }
         }
         */
         // Concert the array to lexical entry
         /* array contains all atributes of data structure
          * - id
          * - header
          * - status
          * - metadata
          *      * all feeld of data structure with is presented upper
          */
         // Looking for lemma = the Song title
         // Looking for lemma = the Song title
         $recordTitle = '';
         foreach ($arr['metadata'] as $nr => $keys) {
             $key = array_keys($keys)[0];
             // BUG in data sometime song title can be in one of these tags
             if ($key == 'dc:title') {
                 //echo $recordTitle = $keys[$key]['value'];
                 // Record title is between [] take it
                 $recordTitle = $keys[$key]['value'];
                 $startPos = strpos($recordTitle, '[');
                 $endPos = strpos($recordTitle, ']');
                 $recordTitle = substr($recordTitle, $startPos + 1, $endPos - $startPos - 1);
             }
         }
         // For debuging
         if (!$recordTitle) {
             print_r($arr);
         }
         if ($arr['status'] != '-1' && $recordTitle) {
             $lexicalEntry = new Owl\LmfLexicalEntry($resourceName);
             $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $recordTitle, $arr['id']));
             $lmfLemma = new Owl\LmfLemma();
             $lmfLemma->setWrittenForm($recordTitle);
             $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $recordTitle, $arr['id']));
             $lexicalEntry->setLemma($lmfLemma);
             $lmfSense = new Owl\LmfSense();
             $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm());
             $lmfSense->setUri($this->getUriFactory()->create('Sense', $recordTitle, $arr['id']));
             $lmfDefintion = new Owl\LmfDefinition();
             $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $recordTitle, $arr['id']));
             $lmfTextRepresentation = new Owl\LmfTextRepresentation();
             $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $recordTitle, $arr['id']));
             $writtenForm = "<![CDATA[";
             foreach ($arr['metadata'] as $nr => $keys) {
                 $key = array_keys($keys)[0];
                 $attr = $keys[$key];
                 if ($key != 'dc:title') {
                     // Image
                     if ($key == 'dc:source' && stristr($attr['label'], "nuoroda")) {
                         // Get image name
                         $imageSrc = preg_replace('/(.*)\\?foto=(.*)\\&id=(.*)/i', "\\2", $attr['value']);
                         // Preparing new url
                         $imageSrc = "http://www.tautosakos-rankrastynas.lt/failai/vaizdas/" . urldecode($imageSrc);
                         $val = "<img width=\"238\" alt=\"\" src=\"{$imageSrc}\">";
                         $writtenForm .= "<div><em>Signatūros iliustracija:</em><br/> {$val} </div>";
                     } else {
                         $writtenForm .= "<div><em>{$attr['label']}:</em> {$attr['value']} </div>";
                     }
                     // Spacing between rows
                     $writtenForm .= "<div style=\"height: 5px;\"></div>";
                 }
             }
             $writtenForm .= "]]>";
             $lmfTextRepresentation->setWrittenForm($writtenForm);
             $lmfDefintion->addTextRepresentation($lmfTextRepresentation);
             $lmfSense->setDefinition($lmfDefintion);
             $lexicalEntry->addSense($lmfSense);
             fwrite($fileIndividuals, $lexicalEntry->toLmfString());
         }
     }
     fclose($fileIndividuals);
     if (!empty($n)) {
         print_r($n);
     }
 }
Beispiel #5
0
 protected function buildLmfIndividuals($filename, $fileOfIndividuals)
 {
     $resourceName = $this->getResourceName();
     $file = fopen($filename, 'r');
     $xml = fread($file, filesize($filename));
     fclose($file);
     $dom = new \DOMDocument('1.0', 'UTF-8');
     $dom->loadXML($xml);
     //$data = array();
     $fileIndividuals = fopen($fileOfIndividuals, "w+");
     $recordNr = 1;
     // Get record ids
     $attributes = array();
     /**
     * Data structure
     * id 
     * header
     * metadata
        [dc:identifier] => 80028    - ignoring because that they dublicae each other and are not informative 
        [dc:title] => 80028         - Pavadinimas "Mįslė [tekstas]"
        [dc:description] => 79975   * Transponuotas užminimas
        [dc:description_1] => 79903 * Fiksacijos tekstas
        [dc:subject] => 79870       * Įminimas
        [dc:source] => 79898        * Signat8ra
        [tm:type] => 80020          * Tipas
        [dc:publisher] => 68196     * Fiksuotojas 
        [dc:coverage] => 34491      * Vieta
        [tm:version] => 79993       * Versija
        [dc:creator] => 40683       * Pateikėjas
     * status
     */
     $n = array();
     foreach ($dom->getElementsByTagName('return') as $domRecord) {
         /* @var $domRecord \DOMElement */
         $nodes = $domRecord->childNodes;
         $arr = array();
         foreach ($nodes as $node) {
             /* @var $node \DOMElement */
             if ($node->nodeName == 'metadata' && $node->nodeValue) {
                 $metadata = new \DOMDocument('1.0', 'UTF-8');
                 $metadata->loadXML($node->nodeValue);
                 $ins = array();
                 $num = 1;
                 // Taking a dc records
                 /* @var $record \DOMElement */
                 $record = $metadata->childNodes->item(0);
                 foreach ($record->childNodes as $childNode) {
                     /* @var $childNode \DOMElement */
                     if ($childNode->nodeValue) {
                         // Receive other metadata nodes, but ignore identifiers
                         if ($childNode->nodeName != 'dc:identifier') {
                             if (isset($ins[$childNode->nodeName])) {
                                 $ins[$childNode->nodeName . '_' . $num]['label'] = $childNode->getAttribute('label');
                                 $ins[$childNode->nodeName . '_' . $num]['value'] = $childNode->nodeValue;
                                 $num++;
                             } else {
                                 $ins[$childNode->nodeName]['label'] = $childNode->getAttribute('label');
                                 $ins[$childNode->nodeName]['value'] = $childNode->nodeValue;
                             }
                         }
                     }
                 }
                 $arr[$node->nodeName] = $ins;
             } else {
                 $arr[$node->nodeName] = $node->nodeValue;
             }
         }
         $recordNr++;
         // Counting if posible attributes
         /*
         foreach (array_keys($arr['metadata']) as $nr => $key) {
             $key = $key . ' - ' . $arr['metadata'][$key]['label'];
             if (isset($n[$key])) {
                 $n[$key]++;
             } else {
                 $n[$key] = 1; 
             }
         }
         */
         // Concert the array to lexical entry
         /* array contains all atributes of data structure
          * - id
          * - header
          * - status
          * - metadata
          *      * all feeld of data structure with is presented upper
          */
         if ($arr['status'] != '-1' && !empty($arr['metadata']['dc:description']['value'])) {
             $lexicalEntry = new Owl\LmfLexicalEntry($resourceName);
             $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $arr['metadata']['dc:description']['value'], $arr['id']));
             $lmfLemma = new Owl\LmfLemma();
             $lmfLemma->setWrittenForm($arr['metadata']['dc:description']['value']);
             $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $arr['metadata']['dc:description']['value'], $arr['id']));
             $lexicalEntry->setLemma($lmfLemma);
             $lmfSense = new Owl\LmfSense();
             $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm());
             $lmfSense->setUri($this->getUriFactory()->create('Sense', $arr['metadata']['dc:description']['value'], $arr['id']));
             $lmfDefintion = new Owl\LmfDefinition();
             $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $arr['metadata']['dc:description']['value'], $arr['id']));
             $lmfTextRepresentation = new Owl\LmfTextRepresentation();
             $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $arr['metadata']['dc:description']['value'], $arr['id']));
             $writtenForm = "<![CDATA[";
             foreach ($arr['metadata'] as $key => $attr) {
                 if ($key != 'dc:title') {
                     $writtenForm .= "<div><em>{$attr['label']}:</em> {$attr['value']} </div>";
                     // Spacing between rows
                     $writtenForm .= "<div style=\"height: 5px;\"></div>";
                 }
             }
             $writtenForm .= "]]>";
             $lmfTextRepresentation->setWrittenForm($writtenForm);
             $lmfDefintion->addTextRepresentation($lmfTextRepresentation);
             $lmfSense->setDefinition($lmfDefintion);
             $lexicalEntry->addSense($lmfSense);
             fwrite($fileIndividuals, $lexicalEntry->toLmfString());
         }
     }
     fclose($fileIndividuals);
     if (!empty($n)) {
         print_r($n);
     }
 }
Beispiel #6
0
 protected function buildLmfIndividuals($filename, $fileOfIndividuals)
 {
     $resourceName = $this->getResourceName();
     $file = fopen($filename, 'r');
     $xml = fread($file, filesize($filename));
     fclose($file);
     $dom = new \DOMDocument('1.0', 'UTF-8');
     $dom->loadXML($xml);
     //$data = array();
     $fileIndividuals = fopen($fileOfIndividuals, "w+");
     $recordNr = 1;
     // Get record ids
     $attributes = array();
     /**
     * Data structure
     * id 
     * header
     * metadata
        [dc:identifier]             - 
        [dc:subject] => 3421        * Tipas
        [dc:description] => 3692    * Daina
        [dc:publisher] => 3349      * Fiksuotojas
        [dc:coverage] => 3095       * Vieta
        [dc:source] => 4543         * Archyvinis šaltinis
        [dc:source] => 646          * Spaudinys 
        [dc:date] => 3441           * Fiksavimo laikas
        [dc:creator] => 3174        * Pateikėjas
        [dc:source] => 4            - Signatūros nuoroda
        [dc:title] => 275           * Versija vartojame vietoj dc:subject
     * status
     */
     $n = array();
     foreach ($dom->getElementsByTagName('return') as $domRecord) {
         /* @var $domRecord \DOMElement */
         $nodes = $domRecord->childNodes;
         $arr = array();
         foreach ($nodes as $node) {
             /* @var $node \DOMElement */
             if ($node->nodeName == 'metadata' && $node->nodeValue) {
                 $metadata = new \DOMDocument('1.0', 'UTF-8');
                 $metadata->loadXML($node->nodeValue);
                 $ins = array();
                 $num = 1;
                 // Taking a dc records
                 /* @var $record \DOMElement */
                 $record = $metadata->childNodes->item(0);
                 foreach ($record->childNodes as $childNode) {
                     /* @var $childNode \DOMElement */
                     if ($childNode->nodeValue) {
                         // Receive other metadata nodes, but ignore identifiers
                         if ($childNode->nodeName != 'dc:identifier') {
                             // a lot of dublication
                             $tmpIns = array();
                             $tmpIns[$childNode->nodeName]['label'] = $childNode->getAttribute('label');
                             $tmpIns[$childNode->nodeName]['value'] = $childNode->nodeValue;
                             $ins[] = $tmpIns;
                         }
                     }
                 }
                 $arr[$node->nodeName] = $ins;
             } else {
                 $arr[$node->nodeName] = $node->nodeValue;
             }
         }
         $recordNr++;
         // Counting if posible attributes
         /*
         foreach ($arr['metadata'] as $nr => $keys) {
             $key = array_keys($keys)[0];
             
             $key = $key . ' - ' . $keys[$key]['label'];
             if (isset($n[$key])) {
                 $n[$key]++;
             } else {
                 $n[$key] = 1; 
             }
         }
         */
         // Concert the array to lexical entry
         /* array contains all atributes of data structure
          * - id
          * - header
          * - status
          * - metadata
          *      * all feeld of data structure with is presented upper
          */
         // Looking for lemma = the Song title
         $songTitle = '';
         foreach ($arr['metadata'] as $nr => $keys) {
             $key = array_keys($keys)[0];
             // BUG in data sometime song title can be in one of these tags
             if ($key == 'dc:subject' || $key == 'dc:title') {
                 $songTitle = $keys[$key]['value'];
             }
         }
         // For debuging
         if (!$songTitle) {
             print_r($arr);
         }
         if ($arr['status'] != '-1' && $songTitle) {
             $lexicalEntry = new Owl\LmfLexicalEntry($resourceName);
             $lexicalEntry->setUri($this->getUriFactory()->create('LexicalEntry', $songTitle, $arr['id']));
             $lmfLemma = new Owl\LmfLemma();
             $lmfLemma->setWrittenForm($songTitle);
             $lmfLemma->setUri($this->getUriFactory()->create('Lemma', $songTitle, $arr['id']));
             $lexicalEntry->setLemma($lmfLemma);
             $lmfSense = new Owl\LmfSense();
             $lmfSense->setLemmaWrittenForm($lmfLemma->getWrittenForm());
             $lmfSense->setUri($this->getUriFactory()->create('Sense', $songTitle, $arr['id']));
             $lmfDefintion = new Owl\LmfDefinition();
             $lmfDefintion->setUri($this->getUriFactory()->create('Definition', $songTitle, $arr['id']));
             $lmfTextRepresentation = new Owl\LmfTextRepresentation();
             $lmfTextRepresentation->setUri($this->getUriFactory()->create('TextRepresentation', $songTitle, $arr['id']));
             $writtenForm = "<![CDATA[";
             foreach ($arr['metadata'] as $nr => $keys) {
                 $key = array_keys($keys)[0];
                 $attr = $keys[$key];
                 if ($key != 'dc:title' && $key != 'dc:subject') {
                     if ($key == 'dc:description') {
                         $val = str_replace("\n", '<br />', $attr['value']);
                         $writtenForm .= "<div><em>{$attr['label']}:</em><br/> {$val} </div>";
                     } else {
                         $writtenForm .= "<div><em>{$attr['label']}:</em> {$attr['value']} </div>";
                     }
                     // Spacing between rows
                     $writtenForm .= "<div style=\"height: 5px;\"></div>";
                 }
             }
             $writtenForm .= "]]>";
             $lmfTextRepresentation->setWrittenForm($writtenForm);
             $lmfDefintion->addTextRepresentation($lmfTextRepresentation);
             $lmfSense->setDefinition($lmfDefintion);
             $lexicalEntry->addSense($lmfSense);
             fwrite($fileIndividuals, $lexicalEntry->toLmfString());
         }
     }
     fclose($fileIndividuals);
     if (!empty($n)) {
         print_r($n);
     }
 }