예제 #1
0
function testEntityDictionary()
{
    $textrazorDictionary = new DictionaryManager();
    $dictionaryId = 'test_ents_php';
    try {
        print_r($textrazorDictionary->deleteDictionary($dictionaryId));
    } catch (Exception $e) {
        // Silently ignore missing dictionary for now.
    }
    // Define a new dictionary, then add some test entries
    print_r($textrazorDictionary->createDictionary($dictionaryId, 'STEM', true, "eng"));
    $new_entities = array();
    array_push($new_entities, array("id" => "TV_1", "text" => "BBC Panorama"));
    print_r($textrazorDictionary->addEntries($dictionaryId, $new_entities));
    // To use the new dictionary, simply add its ID to your analysis request.
    $textrazor = new TextRazor();
    $textrazor->addEntityDictionary($dictionaryId);
    $text = 'Barclays misled shareholders and the public about one of the biggest investments in the banks history, a BBC Panorama investigation has found.';
    $response = $textrazor->analyze($text);
    // The matched entities will be available in the response
    print_r($response['response']['entities']);
    // The client offers various methods for manipulating your stored dictionary entries.
    print_r($textrazorDictionary->getEntry($dictionaryId, "TV_1"));
    print_r($textrazorDictionary->allEntries($dictionaryId, 10));
    print_r($textrazorDictionary->getDictionary($dictionaryId));
    print_r($textrazorDictionary->allDictionaries());
    print_r($textrazorDictionary->deleteDictionary($dictionaryId));
}
예제 #2
0
파일: index.php 프로젝트: xelaus/NewsPulse
function getNews($getfield, $newsSource, $settings, $api_key, $tweetArray, $tags_array)
{
    //echo "INSIDE FUNCTION";
    $url = 'https://api.twitter.com/1.1/statuses/user_timeline.json';
    $requestMethod = 'GET';
    $twitter = new TwitterAPIExchange($settings);
    $response = $twitter->setGetfield($getfield)->buildOauth($url, $requestMethod)->performRequest();
    $obj = json_decode($response, true);
    //echo $obj[0]['text'];
    $stringArray = array();
    for ($x = 0; $x < 20; $x++) {
        $text1 = $obj[$x]['text'];
        $text2 = str_replace('#', '', $text1);
        array_push($stringArray, $text2);
    }
    //echo '<br>';
    for ($i = 0; $i < 20; $i++) {
        $check = false;
        if (isset($tags_array)) {
            foreach ($tags_array as $tag) {
                if (strpos($stringArray[$i], $tag)) {
                    $check = true;
                    break;
                }
            }
        } else {
            continue;
        }
        if ($check) {
            $text = $stringArray[$i];
            $textrazor = new TextRazor($api_key);
            $textrazor->addExtractor('entities');
            $textrazor->addExtractor('words');
            $textrazor->addEnrichmentQuery("fbase:/location/location/geolocation>/location/geocode/latitude");
            $textrazor->addEnrichmentQuery("fbase:/location/location/geolocation>/location/geocode/longitude");
            $response = $textrazor->analyze($text);
            if (isset($response['response']['entities'])) {
                foreach ($response['response']['entities'] as $entity) {
                    //echo '<h1>';
                    //print("Entity ID: " . $entity['entityId']);
                    $urlcontent = '';
                    //                    if (strpos($entity['entityId'], 'http') !== false) {
                    //                        //echo "SDKJLFHSDLKFJHSDLJKFHSDLJ";
                    //                        $urlcontent = $entity['entityId'];
                    //                        // echo $urlcontent;
                    //                    }
                    $pattern = '#\\bhttps?://[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^[:punct:]\\s]|/))#';
                    $subject = $stringArray[$i];
                    preg_match($pattern, $subject, $matches, PREG_OFFSET_CAPTURE, 3);
                    $urlcontent = $matches[0];
                    $stringArray2 = preg_replace($pattern, '', $stringArray[$i]);
                    // echo '</h1>';
                    $entity_data = $entity['data'];
                    if (!is_null($entity_data)) {
                        //print(PHP_EOL);
                        //print(PHP_EOL);
                        //echo $entity['entityId'];
                        if (count($tweetArray) == 0) {
                            $newTweet = new tweet($newsSource, $stringArray2, $entity_data["fbase:/location/location/geolocation>/location/geocode/latitude"][0], $entity_data["fbase:/location/location/geolocation>/location/geocode/longitude"][0], $urlcontent);
                            array_push($tweetArray, $newTweet);
                        } else {
                            $newTweet = new tweet($newsSource, $stringArray2, $entity_data["fbase:/location/location/geolocation>/location/geocode/latitude"][0], $entity_data["fbase:/location/location/geolocation>/location/geocode/longitude"][0], $urlcontent);
                            //                            for($i = 0; $i <= count($tweetArray); $i++) {
                            //                                if($tweetArray[$i]->stweet !==  $stringArray2){
                            //                                    array_push($tweetArray, $newTweet);
                            //                                }
                            //
                            //                            }
                            foreach ($tweetArray as $tw) {
                                if ($tw->stweet !== $stringArray2) {
                                    array_push($tweetArray, $newTweet);
                                    break;
                                }
                            }
                        }
                    }
                    //print(PHP_EOL);
                }
            }
        }
    }
    //echo '<br>';
    //var_dump($tweetArray);
    return $tweetArray;
}
 public function processTextRazorApi($entity)
 {
     require_once 'TextRazor.php';
     set_time_limit(5200);
     \DB::connection()->disableQueryLog();
     $call_type = array("entities");
     $text = $entity->content["description"];
     $initialDescription = utf8_decode($text);
     $textrazor = new TextRazor("e6438f10fc2f974ff0a5b97969a4296fe637da043237aba49569bd58");
     $textrazor->addExtractor('entities,topics,words,phrases,dependency-trees,relations,entailments,senses');
     $response = $textrazor->analyze($text);
     $result = array();
     // the confidence score needs to be normalized 0 .. 1
     $initialMin = 0.5;
     $initialMax = 10;
     //	dd($response['response']);
     if (isset($response['response']['entities'])) {
         $result["intermediateEntities"] = array();
         $entities = array();
         $result["initialEntities"] = array();
         $result["cleanedUpEntities"] = array();
         $initialEntities = array();
         for ($i = 0; $i < count($response['response']['entities']); $i++) {
             $entity = array();
             $entity["label"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["matchedText"]);
             $entity["startOffset"] = $response['response']['entities'][$i]["startingPos"];
             $entity["endOffset"] = $response['response']['entities'][$i]["endingPos"];
             $entity["confidence"] = 0 + ($response['response']['entities'][$i]["confidenceScore"] - $initialMin) * 1 / ($initialMax - $initialMin);
             $entity["provenance"] = "textrazor";
             $entity["timestamp"] = "00:00";
             $entity["description"] = "";
             $entity["types"] = array();
             if (strtolower(substr($initialDescription, (int) $entity["startOffset"], strlen($entity["label"]))) != utf8_decode(strtolower($entity["label"]))) {
                 continue;
             }
             $initialEntity = array();
             $initialEntity["label"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["matchedText"]);
             $initialEntity["startOffset"] = $response['response']['entities'][$i]["startingPos"];
             $initialEntity["endOffset"] = $response['response']['entities'][$i]["endingPos"];
             $initialEntity["confidence"] = 0 + ($response['response']['entities'][$i]["confidenceScore"] - $initialMin) * 1 / ($initialMax - $initialMin);
             $initialEntity["provenance"] = "textrazor";
             $initialEntity["timestamp"] = "00:00";
             $initialEntity["description"] = "";
             $initialEntity["types"] = array();
             if (isset($response['response']['entities'][$i]["type"])) {
                 for ($k = 0; $k < count($response['response']['entities'][$i]["type"]); $k++) {
                     $type = array();
                     $type["typeURI"] = str_replace(" ", "", "DBpedia::" . $response['response']['entities'][$i]["type"][$k]);
                     $initialType = array();
                     $initialType["typeURI"] = str_replace(" ", "", "DBpedia::" . $response['response']['entities'][$i]["type"][$k]);
                     $initialType["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["wikiLink"]);
                     $initialType["wikiURI"] = array();
                     $initialType["wikiURI"]["nl"] = null;
                     $initialType["wikiURI"]["en"] = null;
                     if ($response['response']['entities'][$i]["wikiLink"] == "" || $response['response']['entities'][$i]["wikiLink"] == null) {
                         $type["entityURI"] = null;
                         $type["wikiURI"] = array();
                         $type["wikiURI"]["nl"] = null;
                         if ($response['response']['entities'][$i]["entityEnglishId"] == null || $response['response']['entities'][$i]["entityEnglishId"] == "") {
                             $type["wikiURI"]["en"] = null;
                         } else {
                             $type["wikiURI"]["en"] = "http://en.wikipedia.org/wiki/" . $response['response']['entities'][$i]["entityEnglishId"];
                         }
                     } else {
                         if (strpos($response['response']['entities'][$i]["wikiLink"], "nl.wikipedia") !== false) {
                             $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromDutchWikipediaLink(utf8_encode($response['response']['entities'][$i]["wikiLink"])));
                             $type["wikiURI"] = array();
                             $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["wikiLink"]);
                             if ($response['response']['entities'][$i]["wikiLink"] == null || $response['response']['entities'][$i]["wikiLink"] == "") {
                                 $type["entityURI"] = null;
                                 $type["wikiURI"]["nl"] = null;
                             }
                             if ($response['response']['entities'][$i]["entityEnglishId"] == null || $response['response']['entities'][$i]["entityEnglishId"] == "") {
                                 $type["wikiURI"]["en"] = null;
                             } else {
                                 $type["wikiURI"]["en"] = "http://en.wikipedia.org/wiki/" . $response['response']['entities'][$i]["entityEnglishId"];
                             }
                         } else {
                             $englishEntityResource = $this->getEnglishResourceFromEnglishWikipediaLink($response['response']['entities'][$i]["wikiLink"]);
                             $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromEnglishResource($englishEntityResource));
                             $type["wikiURI"] = array();
                             $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchWikipediaLinkFromDutchResource(utf8_encode($type["entityURI"])));
                             $type["wikiURI"]["en"] = $response['response']['entities'][$i]["wikiLink"];
                         }
                     }
                     $type["confidence"]["score"] = null;
                     $type["confidence"]["bounds"] = null;
                     $initialType["confidence"]["score"] = null;
                     $initialType["confidence"]["bounds"] = null;
                     array_push($entity["types"], $type);
                     array_push($initialEntity["types"], $initialType);
                 }
             }
             if (isset($response['response']['entities'][$i]["freebaseTypes"])) {
                 for ($k = 0; $k < count($response['response']['entities'][$i]["freebaseTypes"]); $k++) {
                     $initialType = array();
                     $initialType["typeURI"] = str_replace(" ", "", "Freebase::" . $response['response']['entities'][$i]["freebaseTypes"][$k]);
                     if (isset($response['response']['entities'][$i]["freebaseId"])) {
                         $initialType["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', "http://www.freebase.com" . $response['response']['entities'][$i]["freebaseId"]);
                     } else {
                         $initialType["entityURI"] = null;
                     }
                     $initialType["wikiURI"] = array();
                     $initialType["wikiURI"]["en"] = null;
                     $initialType["wikiURI"]["nl"] = null;
                     if (strpos($response['response']['entities'][$i]["wikiLink"], "nl.wikipedia") !== false) {
                         $type = array();
                         $type["typeURI"] = null;
                         $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromDutchWikipediaLink(utf8_encode($response['response']['entities'][$i]["wikiLink"])));
                         $type["wikiURI"] = array();
                         $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["wikiLink"]);
                         if ($response['response']['entities'][$i]["entityEnglishId"] == null || $response['response']['entities'][$i]["entityEnglishId"] == "") {
                             $type["wikiURI"]["en"] = null;
                         } else {
                             $type["wikiURI"]["en"] = "http://en.wikipedia.org/wiki/" . $response['response']['entities'][$i]["entityEnglishId"];
                         }
                         $type["confidence"]["score"] = null;
                         $type["confidence"]["bounds"] = null;
                         array_push($entity["types"], $type);
                         $entity["types"] = array_map("unserialize", array_unique(array_map("serialize", $entity["types"])));
                     }
                     if (strpos($response['response']['entities'][$i]["wikiLink"], "en.wikipedia") !== false) {
                         $type = array();
                         $type["typeURI"] = null;
                         $englishResource = $this->getEnglishResourceFromEnglishWikipediaLink($response['response']['entities'][$i]["wikiLink"]);
                         $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromEnglishResource($englishResource));
                         $type["wikiURI"] = array();
                         $type["wikiURI"]["en"] = utf8_decode($response['response']['entities'][$i]["wikiLink"]);
                         $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchWikipediaLinkFromDutchResource(utf8_encode($type["entityURI"])));
                         $type["confidence"]["score"] = null;
                         $type["confidence"]["bounds"] = null;
                         array_push($entity["types"], $type);
                         $entity["types"] = array_map("unserialize", array_unique(array_map("serialize", $entity["types"])));
                     }
                     $initialType["confidence"]["score"] = null;
                     $initialType["confidence"]["bounds"] = null;
                     array_push($initialEntity["types"], $initialType);
                 }
             }
             if (!isset($response['response']['entities'][$i]["freebaseTypes"]) && !isset($response['response']['entities'][$i]["type"])) {
                 $type = array();
                 $type["typeURI"] = null;
                 $initialType = array();
                 $initialType["typeURI"] = null;
                 $initialType["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["wikiLink"]);
                 $initialType["wikiURI"] = array();
                 $initialType["wikiURI"]["en"] = null;
                 $initialType["wikiURI"]["nl"] = null;
                 if ($response['response']['entities'][$i]["wikiLink"] == "" || $response['response']['entities'][$i]["wikiLink"] == null) {
                     $type["entityURI"] = null;
                     $type["wikiURI"] = array();
                     $type["wikiURI"]["nl"] = null;
                     if ($response['response']['entities'][$i]["entityEnglishId"] == null || $response['response']['entities'][$i]["entityEnglishId"] == "") {
                         $type["wikiURI"]["en"] = null;
                     } else {
                         $type["wikiURI"]["en"] = "http://en.wikipedia.org/wiki/" . $response['response']['entities'][$i]["entityEnglishId"];
                     }
                 } else {
                     if (strpos($response['response']['entities'][$i]["wikiLink"], "nl.wikipedia") !== false) {
                         $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromDutchWikipediaLink(utf8_encode($response['response']['entities'][$i]["wikiLink"])));
                         $type["wikiURI"] = array();
                         $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', utf8_encode($response['response']['entities'][$i]["wikiLink"]));
                         if ($response['response']['entities'][$i]["wikiLink"] == null || $response['response']['entities'][$i]["wikiLink"] == "") {
                             $type["entityURI"] = null;
                             $type["wikiURI"]["nl"] = null;
                         }
                         if ($response['response']['entities'][$i]["entityEnglishId"] == null || $response['response']['entities'][$i]["entityEnglishId"] == "") {
                             $type["wikiURI"]["en"] = null;
                         } else {
                             $type["wikiURI"]["en"] = "http://en.wikipedia.org/wiki/" . $response['response']['entities'][$i]["entityEnglishId"];
                         }
                     } else {
                         $englishEntityResource = $this->getEnglishResourceFromEnglishWikipediaLink($response['response']['entities'][$i]["wikiLink"]);
                         $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromEnglishResource($englishEntityResource));
                         $type["wikiURI"] = array();
                         $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchWikipediaLinkFromDutchResource(utf8_encode($type["entityURI"])));
                         $type["wikiURI"]["en"] = $response['response']['entities'][$i]["wikiLink"];
                     }
                 }
                 $type["confidence"]["score"] = null;
                 $type["confidence"]["bounds"] = null;
                 array_push($entity["types"], $type);
                 $entity["types"] = array_map("unserialize", array_unique(array_map("serialize", $entity["types"])));
                 $initialType["confidence"]["score"] = null;
                 $initialType["confidence"]["bounds"] = null;
                 array_push($initialEntity["types"], $initialType);
             }
             if ($this->searchForStopWords($entity["label"]) == false) {
                 if (strtolower($this->searchForStopWordsEndLabel($entity["label"])) == strtolower($entity["label"])) {
                     if ($this->searchAbbrWords($entity["label"]) == false) {
                         if (strtolower($this->searchForLastWordLength($entity["label"])) == strtolower($entity["label"])) {
                             if (strlen($entity["label"]) > 2) {
                                 if (substr($entity["label"], -1) == "-") {
                                     $cleanedUpEntity = array();
                                     $cleanedUpEntity["label"] = trim(utf8_decode($this->searchForLastWordLength($entity["label"])), "-");
                                     $cleanedUpEntity["startOffset"] = $entity["startOffset"];
                                     $cleanedUpEntity["endOffset"] = (int) $entity["startOffset"] + strlen($cleanedUpEntity["label"]);
                                     $cleanedUpEntity["confidence"] = null;
                                     $cleanedUpEntity["provenance"] = "thd";
                                     $cleanedUpEntity["types"] = $entity["types"];
                                     array_push($result["cleanedUpEntities"], $cleanedUpEntity);
                                 } else {
                                     array_push($result["cleanedUpEntities"], $entity);
                                 }
                             } else {
                                 if (strtoupper($entity["label"]) == $entity["label"]) {
                                     array_push($result["cleanedUpEntities"], $entity);
                                 }
                             }
                         } else {
                             if (substr($entity["label"], -1) == "-") {
                                 $cleanedUpEntity = array();
                                 $cleanedUpEntity["label"] = trim(utf8_decode($this->searchForLastWordLength($entity["label"])), "-");
                                 $cleanedUpEntity["startOffset"] = $entity["startOffset"];
                                 $cleanedUpEntity["endOffset"] = (int) $entity["startOffset"] + strlen($cleanedUpEntity["label"]);
                                 $cleanedUpEntity["confidence"] = null;
                                 $cleanedUpEntity["provenance"] = "thd";
                                 $cleanedUpEntity["types"] = $entity["types"];
                                 array_push($result["cleanedUpEntities"], $cleanedUpEntity);
                             } else {
                                 $cleanedUpEntity = array();
                                 $cleanedUpEntity["label"] = utf8_decode($this->searchForLastWordLength($entity["label"]));
                                 $cleanedUpEntity["startOffset"] = $entity["startOffset"];
                                 $cleanedUpEntity["endOffset"] = (int) $entity["startOffset"] + strlen($cleanedUpEntity["label"]);
                                 $cleanedUpEntity["confidence"] = null;
                                 $cleanedUpEntity["provenance"] = "thd";
                                 $cleanedUpEntity["types"] = $entity["types"];
                                 array_push($result["cleanedUpEntities"], $cleanedUpEntity);
                             }
                         }
                     }
                 } else {
                     $cleanedUpEntity = array();
                     $cleanedUpEntity["label"] = utf8_decode($this->searchForStopWordsEndLabel($entity["label"]));
                     $cleanedUpEntity["startOffset"] = $entity["startOffset"];
                     $cleanedUpEntity["endOffset"] = (int) $entity["startOffset"] + strlen($cleanedUpEntity["label"]);
                     $cleanedUpEntity["confidence"] = null;
                     $cleanedUpEntity["provenance"] = "thd";
                     $cleanedUpEntity["types"] = $entity["types"];
                     array_push($result["cleanedUpEntities"], $cleanedUpEntity);
                 }
             }
             array_push($result["intermediateEntities"], $entity);
             array_push($result["initialEntities"], $initialEntity);
         }
     }
     if (isset($response['response']['coarseTopics'])) {
         $result["topics"] = array();
         for ($i = 0; $i < count($response['response']['coarseTopics']); $i++) {
             $topic = array();
             $topic["label"] = $response['response']['coarseTopics'][$i]["label"];
             $topic["wikiLink"] = $response['response']['coarseTopics'][$i]["wikiLink"];
             $topic["score"] = $response['response']['coarseTopics'][$i]["score"];
             array_push($result["topics"], $topic);
         }
     }
     //	dd($result);
     return $result;
 }