Пример #1
0
 function storeParsedTokens($xmlString)
 {
     // $xmlString is the lemnatized XML result from the Edina Geoparser
     $sentencesPerPage = $this->sentencesPerPage();
     $output = false;
     $gazSourceRefs = array();
     $gazRbIDs = array();
     $tokensObj = new Tokens();
     //class for manipulating tokens in the database
     $documentIn = $tokensObj->checkDocumentDone($this->docID);
     if (!$documentIn) {
         @($xml = simplexml_load_string($xmlString));
         unset($xmlString);
         if ($xml) {
             $output = array();
             $pageNumber = 1;
             $sentPage = array();
             foreach ($xml->xpath("//s") as $sentence) {
                 $sentID = false;
                 foreach ($sentence->xpath("@id") as $xres) {
                     $sentID = (string) $xres;
                 }
                 //echo "<h2>Sentence: ".$sentID."</h2>";
                 $sentPage[] = $sentID;
                 if (count($sentPage) >= $sentencesPerPage) {
                     unset($sentPage);
                     $sentPage = array();
                     $pageNumber++;
                 }
                 foreach ($sentence->xpath("w") as $word) {
                     $tokenID = false;
                     $pws = null;
                     $token = false;
                     $location = false;
                     $gazRbID = false;
                     $gazSource = false;
                     $gazRef = false;
                     foreach ($word->xpath("@id") as $xres) {
                         $tokenID = (string) $xres;
                     }
                     foreach ($word->xpath("text()") as $xres) {
                         $token = (string) $xres;
                     }
                     foreach ($word->xpath("@pws") as $xres) {
                         $pws = (string) $xres;
                         if ($pws == "yes") {
                             $pws = true;
                         } else {
                             $pws = false;
                         }
                     }
                     foreach ($word->xpath("@locname") as $xres) {
                         $location = (string) $xres;
                     }
                     if ($location != false) {
                         //echo "<p><strong>Token: $token (".$tokenID.") ($location)</strong></p>";
                         foreach ($xml->xpath("//ent[parts/part/@ew = '{$tokenID}']") as $ent) {
                             foreach ($ent->xpath("@id") as $xres) {
                                 $gazRbID = (string) $xres;
                             }
                             foreach ($ent->xpath("@gazref") as $xres) {
                                 $gazRef = (string) $xres;
                             }
                             foreach ($ent->xpath("@source-gazref") as $xres) {
                                 $gazSource = (string) $xres;
                                 if (!in_array($gazSource, $gazSourceRefs)) {
                                     $gazSourceRefs[] = $gazSource;
                                     $this->gazSourceRefs = $gazSourceRefs;
                                 }
                             }
                         }
                         if (!$gazRbID) {
                             $location = "[place]";
                             //we couldn't find an entity for this token, but we want to note it is a place
                         } else {
                             $location = $gazRbID;
                         }
                     }
                     //end case of a place reference
                     $data = array("docID" => $this->docID, "batchID" => $this->batchID, "pageID" => $pageNumber, "sentID" => $sentID, "tokenID" => $tokenID, "pws" => $pws, "gazRef" => $location, "token" => $token);
                     $DBtokenID = $tokensObj->addRecord($data);
                     if ($gazRbID != false && $DBtokenID != false) {
                         if (!array_key_exists($gazRbID, $gazRbIDs)) {
                             $gazRbIDs[$gazRbID] = $DBtokenID;
                             //associate the place identification with the database token id
                             $this->gazRbIDs = $gazRbIDs;
                         }
                     }
                     $output[] = $DBtokenID;
                 }
                 //end loop through tokens in a sentence
             }
             //end loop through sentences
         }
         //end case of valid XML
     }
     //end case where the document is not yet done
     return $output;
 }