function testEntityDictionary() { $textrazorDictionary = new DictionaryManager(); $dictionaryId = 'test_ents_php'; try { print_r($textrazorDictionary->deleteDictionary($dictionaryId)); } catch (Exception $e) { // Silently ignore missing dictionary for now. } // Define a new dictionary, then add some test entries print_r($textrazorDictionary->createDictionary($dictionaryId, 'STEM', true, "eng")); $new_entities = array(); array_push($new_entities, array("id" => "TV_1", "text" => "BBC Panorama")); print_r($textrazorDictionary->addEntries($dictionaryId, $new_entities)); // To use the new dictionary, simply add its ID to your analysis request. $textrazor = new TextRazor(); $textrazor->addEntityDictionary($dictionaryId); $text = 'Barclays misled shareholders and the public about one of the biggest investments in the banks history, a BBC Panorama investigation has found.'; $response = $textrazor->analyze($text); // The matched entities will be available in the response print_r($response['response']['entities']); // The client offers various methods for manipulating your stored dictionary entries. print_r($textrazorDictionary->getEntry($dictionaryId, "TV_1")); print_r($textrazorDictionary->allEntries($dictionaryId, 10)); print_r($textrazorDictionary->getDictionary($dictionaryId)); print_r($textrazorDictionary->allDictionaries()); print_r($textrazorDictionary->deleteDictionary($dictionaryId)); }
function getNews($getfield, $newsSource, $settings, $api_key, $tweetArray, $tags_array) { //echo "INSIDE FUNCTION"; $url = 'https://api.twitter.com/1.1/statuses/user_timeline.json'; $requestMethod = 'GET'; $twitter = new TwitterAPIExchange($settings); $response = $twitter->setGetfield($getfield)->buildOauth($url, $requestMethod)->performRequest(); $obj = json_decode($response, true); //echo $obj[0]['text']; $stringArray = array(); for ($x = 0; $x < 20; $x++) { $text1 = $obj[$x]['text']; $text2 = str_replace('#', '', $text1); array_push($stringArray, $text2); } //echo '<br>'; for ($i = 0; $i < 20; $i++) { $check = false; if (isset($tags_array)) { foreach ($tags_array as $tag) { if (strpos($stringArray[$i], $tag)) { $check = true; break; } } } else { continue; } if ($check) { $text = $stringArray[$i]; $textrazor = new TextRazor($api_key); $textrazor->addExtractor('entities'); $textrazor->addExtractor('words'); $textrazor->addEnrichmentQuery("fbase:/location/location/geolocation>/location/geocode/latitude"); $textrazor->addEnrichmentQuery("fbase:/location/location/geolocation>/location/geocode/longitude"); $response = $textrazor->analyze($text); if (isset($response['response']['entities'])) { foreach ($response['response']['entities'] as $entity) { //echo '<h1>'; //print("Entity ID: " . $entity['entityId']); $urlcontent = ''; // if (strpos($entity['entityId'], 'http') !== false) { // //echo "SDKJLFHSDLKFJHSDLJKFHSDLJ"; // $urlcontent = $entity['entityId']; // // echo $urlcontent; // } $pattern = '#\\bhttps?://[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^[:punct:]\\s]|/))#'; $subject = $stringArray[$i]; preg_match($pattern, $subject, $matches, PREG_OFFSET_CAPTURE, 3); $urlcontent = $matches[0]; $stringArray2 = preg_replace($pattern, '', $stringArray[$i]); // echo '</h1>'; $entity_data = $entity['data']; if (!is_null($entity_data)) { //print(PHP_EOL); //print(PHP_EOL); //echo $entity['entityId']; if (count($tweetArray) == 0) { $newTweet = new tweet($newsSource, $stringArray2, $entity_data["fbase:/location/location/geolocation>/location/geocode/latitude"][0], $entity_data["fbase:/location/location/geolocation>/location/geocode/longitude"][0], $urlcontent); array_push($tweetArray, $newTweet); } else { $newTweet = new tweet($newsSource, $stringArray2, $entity_data["fbase:/location/location/geolocation>/location/geocode/latitude"][0], $entity_data["fbase:/location/location/geolocation>/location/geocode/longitude"][0], $urlcontent); // for($i = 0; $i <= count($tweetArray); $i++) { // if($tweetArray[$i]->stweet !== $stringArray2){ // array_push($tweetArray, $newTweet); // } // // } foreach ($tweetArray as $tw) { if ($tw->stweet !== $stringArray2) { array_push($tweetArray, $newTweet); break; } } } } //print(PHP_EOL); } } } } //echo '<br>'; //var_dump($tweetArray); return $tweetArray; }
public function processTextRazorApi($entity) { require_once 'TextRazor.php'; set_time_limit(5200); \DB::connection()->disableQueryLog(); $call_type = array("entities"); $text = $entity->content["description"]; $initialDescription = utf8_decode($text); $textrazor = new TextRazor("e6438f10fc2f974ff0a5b97969a4296fe637da043237aba49569bd58"); $textrazor->addExtractor('entities,topics,words,phrases,dependency-trees,relations,entailments,senses'); $response = $textrazor->analyze($text); $result = array(); // the confidence score needs to be normalized 0 .. 1 $initialMin = 0.5; $initialMax = 10; // dd($response['response']); if (isset($response['response']['entities'])) { $result["intermediateEntities"] = array(); $entities = array(); $result["initialEntities"] = array(); $result["cleanedUpEntities"] = array(); $initialEntities = array(); for ($i = 0; $i < count($response['response']['entities']); $i++) { $entity = array(); $entity["label"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["matchedText"]); $entity["startOffset"] = $response['response']['entities'][$i]["startingPos"]; $entity["endOffset"] = $response['response']['entities'][$i]["endingPos"]; $entity["confidence"] = 0 + ($response['response']['entities'][$i]["confidenceScore"] - $initialMin) * 1 / ($initialMax - $initialMin); $entity["provenance"] = "textrazor"; $entity["timestamp"] = "00:00"; $entity["description"] = ""; $entity["types"] = array(); if (strtolower(substr($initialDescription, (int) $entity["startOffset"], strlen($entity["label"]))) != utf8_decode(strtolower($entity["label"]))) { continue; } $initialEntity = array(); $initialEntity["label"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["matchedText"]); $initialEntity["startOffset"] = $response['response']['entities'][$i]["startingPos"]; $initialEntity["endOffset"] = $response['response']['entities'][$i]["endingPos"]; $initialEntity["confidence"] = 0 + ($response['response']['entities'][$i]["confidenceScore"] - $initialMin) * 1 / ($initialMax - $initialMin); $initialEntity["provenance"] = "textrazor"; $initialEntity["timestamp"] = "00:00"; $initialEntity["description"] = ""; $initialEntity["types"] = array(); if (isset($response['response']['entities'][$i]["type"])) { for ($k = 0; $k < count($response['response']['entities'][$i]["type"]); $k++) { $type = array(); $type["typeURI"] = str_replace(" ", "", "DBpedia::" . $response['response']['entities'][$i]["type"][$k]); $initialType = array(); $initialType["typeURI"] = str_replace(" ", "", "DBpedia::" . $response['response']['entities'][$i]["type"][$k]); $initialType["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["wikiLink"]); $initialType["wikiURI"] = array(); $initialType["wikiURI"]["nl"] = null; $initialType["wikiURI"]["en"] = null; if ($response['response']['entities'][$i]["wikiLink"] == "" || $response['response']['entities'][$i]["wikiLink"] == null) { $type["entityURI"] = null; $type["wikiURI"] = array(); $type["wikiURI"]["nl"] = null; if ($response['response']['entities'][$i]["entityEnglishId"] == null || $response['response']['entities'][$i]["entityEnglishId"] == "") { $type["wikiURI"]["en"] = null; } else { $type["wikiURI"]["en"] = "http://en.wikipedia.org/wiki/" . $response['response']['entities'][$i]["entityEnglishId"]; } } else { if (strpos($response['response']['entities'][$i]["wikiLink"], "nl.wikipedia") !== false) { $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromDutchWikipediaLink(utf8_encode($response['response']['entities'][$i]["wikiLink"]))); $type["wikiURI"] = array(); $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["wikiLink"]); if ($response['response']['entities'][$i]["wikiLink"] == null || $response['response']['entities'][$i]["wikiLink"] == "") { $type["entityURI"] = null; $type["wikiURI"]["nl"] = null; } if ($response['response']['entities'][$i]["entityEnglishId"] == null || $response['response']['entities'][$i]["entityEnglishId"] == "") { $type["wikiURI"]["en"] = null; } else { $type["wikiURI"]["en"] = "http://en.wikipedia.org/wiki/" . $response['response']['entities'][$i]["entityEnglishId"]; } } else { $englishEntityResource = $this->getEnglishResourceFromEnglishWikipediaLink($response['response']['entities'][$i]["wikiLink"]); $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromEnglishResource($englishEntityResource)); $type["wikiURI"] = array(); $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchWikipediaLinkFromDutchResource(utf8_encode($type["entityURI"]))); $type["wikiURI"]["en"] = $response['response']['entities'][$i]["wikiLink"]; } } $type["confidence"]["score"] = null; $type["confidence"]["bounds"] = null; $initialType["confidence"]["score"] = null; $initialType["confidence"]["bounds"] = null; array_push($entity["types"], $type); array_push($initialEntity["types"], $initialType); } } if (isset($response['response']['entities'][$i]["freebaseTypes"])) { for ($k = 0; $k < count($response['response']['entities'][$i]["freebaseTypes"]); $k++) { $initialType = array(); $initialType["typeURI"] = str_replace(" ", "", "Freebase::" . $response['response']['entities'][$i]["freebaseTypes"][$k]); if (isset($response['response']['entities'][$i]["freebaseId"])) { $initialType["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', "http://www.freebase.com" . $response['response']['entities'][$i]["freebaseId"]); } else { $initialType["entityURI"] = null; } $initialType["wikiURI"] = array(); $initialType["wikiURI"]["en"] = null; $initialType["wikiURI"]["nl"] = null; if (strpos($response['response']['entities'][$i]["wikiLink"], "nl.wikipedia") !== false) { $type = array(); $type["typeURI"] = null; $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromDutchWikipediaLink(utf8_encode($response['response']['entities'][$i]["wikiLink"]))); $type["wikiURI"] = array(); $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["wikiLink"]); if ($response['response']['entities'][$i]["entityEnglishId"] == null || $response['response']['entities'][$i]["entityEnglishId"] == "") { $type["wikiURI"]["en"] = null; } else { $type["wikiURI"]["en"] = "http://en.wikipedia.org/wiki/" . $response['response']['entities'][$i]["entityEnglishId"]; } $type["confidence"]["score"] = null; $type["confidence"]["bounds"] = null; array_push($entity["types"], $type); $entity["types"] = array_map("unserialize", array_unique(array_map("serialize", $entity["types"]))); } if (strpos($response['response']['entities'][$i]["wikiLink"], "en.wikipedia") !== false) { $type = array(); $type["typeURI"] = null; $englishResource = $this->getEnglishResourceFromEnglishWikipediaLink($response['response']['entities'][$i]["wikiLink"]); $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromEnglishResource($englishResource)); $type["wikiURI"] = array(); $type["wikiURI"]["en"] = utf8_decode($response['response']['entities'][$i]["wikiLink"]); $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchWikipediaLinkFromDutchResource(utf8_encode($type["entityURI"]))); $type["confidence"]["score"] = null; $type["confidence"]["bounds"] = null; array_push($entity["types"], $type); $entity["types"] = array_map("unserialize", array_unique(array_map("serialize", $entity["types"]))); } $initialType["confidence"]["score"] = null; $initialType["confidence"]["bounds"] = null; array_push($initialEntity["types"], $initialType); } } if (!isset($response['response']['entities'][$i]["freebaseTypes"]) && !isset($response['response']['entities'][$i]["type"])) { $type = array(); $type["typeURI"] = null; $initialType = array(); $initialType["typeURI"] = null; $initialType["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $response['response']['entities'][$i]["wikiLink"]); $initialType["wikiURI"] = array(); $initialType["wikiURI"]["en"] = null; $initialType["wikiURI"]["nl"] = null; if ($response['response']['entities'][$i]["wikiLink"] == "" || $response['response']['entities'][$i]["wikiLink"] == null) { $type["entityURI"] = null; $type["wikiURI"] = array(); $type["wikiURI"]["nl"] = null; if ($response['response']['entities'][$i]["entityEnglishId"] == null || $response['response']['entities'][$i]["entityEnglishId"] == "") { $type["wikiURI"]["en"] = null; } else { $type["wikiURI"]["en"] = "http://en.wikipedia.org/wiki/" . $response['response']['entities'][$i]["entityEnglishId"]; } } else { if (strpos($response['response']['entities'][$i]["wikiLink"], "nl.wikipedia") !== false) { $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromDutchWikipediaLink(utf8_encode($response['response']['entities'][$i]["wikiLink"]))); $type["wikiURI"] = array(); $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', utf8_encode($response['response']['entities'][$i]["wikiLink"])); if ($response['response']['entities'][$i]["wikiLink"] == null || $response['response']['entities'][$i]["wikiLink"] == "") { $type["entityURI"] = null; $type["wikiURI"]["nl"] = null; } if ($response['response']['entities'][$i]["entityEnglishId"] == null || $response['response']['entities'][$i]["entityEnglishId"] == "") { $type["wikiURI"]["en"] = null; } else { $type["wikiURI"]["en"] = "http://en.wikipedia.org/wiki/" . $response['response']['entities'][$i]["entityEnglishId"]; } } else { $englishEntityResource = $this->getEnglishResourceFromEnglishWikipediaLink($response['response']['entities'][$i]["wikiLink"]); $type["entityURI"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchResourceFromEnglishResource($englishEntityResource)); $type["wikiURI"] = array(); $type["wikiURI"]["nl"] = iconv('UTF-8', 'UTF-8//IGNORE', $this->getDutchWikipediaLinkFromDutchResource(utf8_encode($type["entityURI"]))); $type["wikiURI"]["en"] = $response['response']['entities'][$i]["wikiLink"]; } } $type["confidence"]["score"] = null; $type["confidence"]["bounds"] = null; array_push($entity["types"], $type); $entity["types"] = array_map("unserialize", array_unique(array_map("serialize", $entity["types"]))); $initialType["confidence"]["score"] = null; $initialType["confidence"]["bounds"] = null; array_push($initialEntity["types"], $initialType); } if ($this->searchForStopWords($entity["label"]) == false) { if (strtolower($this->searchForStopWordsEndLabel($entity["label"])) == strtolower($entity["label"])) { if ($this->searchAbbrWords($entity["label"]) == false) { if (strtolower($this->searchForLastWordLength($entity["label"])) == strtolower($entity["label"])) { if (strlen($entity["label"]) > 2) { if (substr($entity["label"], -1) == "-") { $cleanedUpEntity = array(); $cleanedUpEntity["label"] = trim(utf8_decode($this->searchForLastWordLength($entity["label"])), "-"); $cleanedUpEntity["startOffset"] = $entity["startOffset"]; $cleanedUpEntity["endOffset"] = (int) $entity["startOffset"] + strlen($cleanedUpEntity["label"]); $cleanedUpEntity["confidence"] = null; $cleanedUpEntity["provenance"] = "thd"; $cleanedUpEntity["types"] = $entity["types"]; array_push($result["cleanedUpEntities"], $cleanedUpEntity); } else { array_push($result["cleanedUpEntities"], $entity); } } else { if (strtoupper($entity["label"]) == $entity["label"]) { array_push($result["cleanedUpEntities"], $entity); } } } else { if (substr($entity["label"], -1) == "-") { $cleanedUpEntity = array(); $cleanedUpEntity["label"] = trim(utf8_decode($this->searchForLastWordLength($entity["label"])), "-"); $cleanedUpEntity["startOffset"] = $entity["startOffset"]; $cleanedUpEntity["endOffset"] = (int) $entity["startOffset"] + strlen($cleanedUpEntity["label"]); $cleanedUpEntity["confidence"] = null; $cleanedUpEntity["provenance"] = "thd"; $cleanedUpEntity["types"] = $entity["types"]; array_push($result["cleanedUpEntities"], $cleanedUpEntity); } else { $cleanedUpEntity = array(); $cleanedUpEntity["label"] = utf8_decode($this->searchForLastWordLength($entity["label"])); $cleanedUpEntity["startOffset"] = $entity["startOffset"]; $cleanedUpEntity["endOffset"] = (int) $entity["startOffset"] + strlen($cleanedUpEntity["label"]); $cleanedUpEntity["confidence"] = null; $cleanedUpEntity["provenance"] = "thd"; $cleanedUpEntity["types"] = $entity["types"]; array_push($result["cleanedUpEntities"], $cleanedUpEntity); } } } } else { $cleanedUpEntity = array(); $cleanedUpEntity["label"] = utf8_decode($this->searchForStopWordsEndLabel($entity["label"])); $cleanedUpEntity["startOffset"] = $entity["startOffset"]; $cleanedUpEntity["endOffset"] = (int) $entity["startOffset"] + strlen($cleanedUpEntity["label"]); $cleanedUpEntity["confidence"] = null; $cleanedUpEntity["provenance"] = "thd"; $cleanedUpEntity["types"] = $entity["types"]; array_push($result["cleanedUpEntities"], $cleanedUpEntity); } } array_push($result["intermediateEntities"], $entity); array_push($result["initialEntities"], $initialEntity); } } if (isset($response['response']['coarseTopics'])) { $result["topics"] = array(); for ($i = 0; $i < count($response['response']['coarseTopics']); $i++) { $topic = array(); $topic["label"] = $response['response']['coarseTopics'][$i]["label"]; $topic["wikiLink"] = $response['response']['coarseTopics'][$i]["wikiLink"]; $topic["score"] = $response['response']['coarseTopics'][$i]["score"]; array_push($result["topics"], $topic); } } // dd($result); return $result; }