function storeParsedTokens($xmlString) { // $xmlString is the lemnatized XML result from the Edina Geoparser $sentencesPerPage = $this->sentencesPerPage(); $output = false; $gazSourceRefs = array(); $gazRbIDs = array(); $tokensObj = new Tokens(); //class for manipulating tokens in the database $documentIn = $tokensObj->checkDocumentDone($this->docID); if (!$documentIn) { @($xml = simplexml_load_string($xmlString)); unset($xmlString); if ($xml) { $output = array(); $pageNumber = 1; $sentPage = array(); foreach ($xml->xpath("//s") as $sentence) { $sentID = false; foreach ($sentence->xpath("@id") as $xres) { $sentID = (string) $xres; } //echo "<h2>Sentence: ".$sentID."</h2>"; $sentPage[] = $sentID; if (count($sentPage) >= $sentencesPerPage) { unset($sentPage); $sentPage = array(); $pageNumber++; } foreach ($sentence->xpath("w") as $word) { $tokenID = false; $pws = null; $token = false; $location = false; $gazRbID = false; $gazSource = false; $gazRef = false; foreach ($word->xpath("@id") as $xres) { $tokenID = (string) $xres; } foreach ($word->xpath("text()") as $xres) { $token = (string) $xres; } foreach ($word->xpath("@pws") as $xres) { $pws = (string) $xres; if ($pws == "yes") { $pws = true; } else { $pws = false; } } foreach ($word->xpath("@locname") as $xres) { $location = (string) $xres; } if ($location != false) { //echo "<p><strong>Token: $token (".$tokenID.") ($location)</strong></p>"; foreach ($xml->xpath("//ent[parts/part/@ew = '{$tokenID}']") as $ent) { foreach ($ent->xpath("@id") as $xres) { $gazRbID = (string) $xres; } foreach ($ent->xpath("@gazref") as $xres) { $gazRef = (string) $xres; } foreach ($ent->xpath("@source-gazref") as $xres) { $gazSource = (string) $xres; if (!in_array($gazSource, $gazSourceRefs)) { $gazSourceRefs[] = $gazSource; $this->gazSourceRefs = $gazSourceRefs; } } } if (!$gazRbID) { $location = "[place]"; //we couldn't find an entity for this token, but we want to note it is a place } else { $location = $gazRbID; } } //end case of a place reference $data = array("docID" => $this->docID, "batchID" => $this->batchID, "pageID" => $pageNumber, "sentID" => $sentID, "tokenID" => $tokenID, "pws" => $pws, "gazRef" => $location, "token" => $token); $DBtokenID = $tokensObj->addRecord($data); if ($gazRbID != false && $DBtokenID != false) { if (!array_key_exists($gazRbID, $gazRbIDs)) { $gazRbIDs[$gazRbID] = $DBtokenID; //associate the place identification with the database token id $this->gazRbIDs = $gazRbIDs; } } $output[] = $DBtokenID; } //end loop through tokens in a sentence } //end loop through sentences } //end case of valid XML } //end case where the document is not yet done return $output; }