예제 #1
0
function ontologyTextToD3Graph($MODEL_QA_ONTOLOGY, $inputType, $searchResultTextArr, $minFreq = 0, $widthHeigthArr, $lang, $mainConceptsOnly = false, $isPhraseSearch = false, $isQuestion = false, $query = "")
{
    global $thing_class_name_ar, $is_a_relation_name_ar;
    $graphObj = array();
    $graphNodes = array();
    $graphLinks = array();
    ////// calculate start points
    $width = $widthHeigthArr[0];
    $height = $widthHeigthArr[1];
    $startLocationXMin = $width / 2 - 100;
    $startLocationXMax = $width / 2 + 100;
    $startLocationYMin = $height / 2 - 100;
    $startLocationYMax = $height / 2 + 100;
    ////////////////////////////
    /** SHOULD BE ZERO BASED FOR D3 TO WORK - o.target.weight = NULL**/
    $nodeSerialNumber = 0;
    $lastWord = null;
    foreach ($searchResultTextArr as $index => $text) {
        if ($inputType == "SEARCH_RESULTS_TEXT_ARRAY") {
            $textWordsArr = preg_split("/ /", $text);
        } else {
            if (!$isPhraseSearch) {
                // extendedQueryParam
                $textWordsArr = array_keys($searchResultTextArr);
            } else {
                // phrase should be checked as is
                $textWordsArr[0] = $query;
            }
        }
        foreach ($textWordsArr as $word) {
            if ($lang == "EN") {
                $word = cleanAndTrim($word);
                $word = strtolower($word);
                // translate English name to arabic concept name/id
                //$wordConveretedToConceptID = $MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'][$word];
                $wordConveretedToConceptID = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $word);
            } else {
                $wordConveretedToConceptID = convertWordToConceptID($word);
            }
            if (modelEntryExistsInMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID)) {
                //preprint_r($MODEL_QA_ONTOLOGY['CONCEPTS'][$wordConveretedToConceptID]);exit;
                //echoN($wordConveretedToConceptID);
                $mainConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID);
                //$mainConceptArr = $MODEL_QA_ONTOLOGY['CONCEPTS'][$wordConveretedToConceptID];
                $conceptLabelAR = $mainConceptArr['label_ar'];
                $conceptLabelEN = $mainConceptArr['label_en'];
                $conceptFrequency = $mainConceptArr['frequency'];
                $conceptWeight = $mainConceptArr['weight'];
                $finalNodeLabel = $conceptLabelAR;
                if ($lang == "EN") {
                    $finalNodeLabel = $conceptLabelEN;
                }
                /*if ( empty($finalNodeLabel))
                		{
                			echoN($conceptLabelAR);
                			exit;
                		}*/
                if ($conceptFrequency < $minFreq) {
                    continue;
                }
                if ($wordConveretedToConceptID == $thing_class_name_ar) {
                    continue;
                }
                if (!isset($graphNodes[$wordConveretedToConceptID])) {
                    $randomXLocation = rand($startLocationXMin, $startLocationXMax);
                    $randomYLocation = rand($startLocationYMin, $startLocationYMax);
                    $graphNodes[$wordConveretedToConceptID] = createNewConceptObj($nodeSerialNumber, $lang, $finalNodeLabel, $mainConceptArr, $randomXLocation, $randomYLocation, 1);
                }
            }
        }
    }
    $tooManyConcepts = count($graphNodes) > 200;
    $ONTOLOGY_RELATIONS = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "RELATIONS", "");
    //preprint_r($graphNodes,1);exit;
    $linksHashLookupTable = array();
    //preprint_r($graphNodes,true);exit;
    foreach ($graphNodes as $concept => $conceptArr) {
        $conceptID = convertWordToConceptID($concept);
        //$relationsOfConceptAsSource = $MODEL_QA_ONTOLOGY['GRAPH_INDEX_SOURCES'][$conceptID];
        $relationsOfConceptAsSource = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_SOURCES", $conceptID);
        //$relationsOfConceptAsTarget = $MODEL_QA_ONTOLOGY['GRAPH_INDEX_TARGETS'][$conceptID];
        $relationsOfConceptAsTarget = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_TARGETS", $conceptID);
        foreach ($relationsOfConceptAsSource as $index => $relArr) {
            $verb = $relArr["link_verb"];
            $object = $relArr["target"];
            //echoN("$verb==$is_a_relation_name_ar && $object==$thing_class_name_ar");
            // ignore is-a thing relations
            if ($verb == $is_a_relation_name_ar && $object == $thing_class_name_ar) {
                continue;
            }
            if ($tooManyConcepts && $verb == $is_a_relation_name_ar) {
                continue;
            }
            // IF SHOWING MAIN CONCEPTS ONLY, IGNORE CONCEPTS NOT IN MAIN CONCEPTS LIST
            if ($mainConceptsOnly && !isset($graphNodes[$object])) {
                continue;
            }
            // NO extending by relations in case of search result text
            // to reduce number of concepts we only add relations with other concepts
            // found in the text
            if ($inputType == "SEARCH_RESULTS_TEXT_ARRAY" && !isset($graphNodes[$object])) {
                continue;
            }
            //preprint_r($relArr,true);
            $randomXLocation = rand($startLocationXMin, $startLocationXMax);
            $randomYLocation = rand($startLocationYMin, $startLocationYMax);
            $relHashID = buildRelationHashID($conceptID, $verb, $object);
            $fullRelationArr = $ONTOLOGY_RELATIONS[$relHashID];
            //$conceptArr = $MODEL_QA_ONTOLOGY['CONCEPTS'][$object];
            $conceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $object);
            $finalNodeLabel = $conceptArr['label_ar'];
            if ($lang == "EN") {
                $finalNodeLabel = formatEnglishConcept($conceptArr['label_en']);
                $verb = $fullRelationArr['VERB_TRANSLATION_EN'];
            }
            if (!isset($graphNodes[$object])) {
                $graphNodes[$object] = createNewConceptObj($nodeSerialNumber, $lang, $finalNodeLabel, $conceptArr, $randomXLocation, $randomYLocation, 2);
            }
            $linkArr = array("source" => $graphNodes[$concept]["id"], "target" => $graphNodes[$object]["id"], "link_verb" => $verb, "link_frequency" => $fullRelationArr['FREQUENCY']);
            //////// HANDLING MULTIPLE LINKS BETWEEN SAME NODES BEFORE ASSIGNING LINK
            $arrHash = getArrayHashForFields($linkArr, array('source', 'target'));
            /*preprint_r($graphNodes);
            		echoN($finalNodeLabel);
            		preprint_r($linkArr);*/
            if (!isset($linksHashLookupTable[$arrHash])) {
                $graphLinks[] = $linkArr;
                $linksHashLookupTable[$arrHash] = count($graphLinks) - 1;
            } else {
                $linkIndex = $linksHashLookupTable[$arrHash];
                if (strpos($graphLinks[$linkIndex]['link_verb'], "{$verb}") === false) {
                    $graphLinks[$linkIndex]['link_verb'] .= "," . $verb;
                }
            }
            /*if (  $MODEL_QA_ONTOLOGY['CONCEPTS'][$object]['label_en']=="help")
            		{
            			echoN(isset($graphNodes[$object])." ".$object," ");
            			echoN($concept);
            			preprint_r($graphLinks[$linkIndex]);
            			preprint_r($graphNodes[$object]);
            			preprint_r($graphNodes[$concept]);
            			exit;
            			
            		}*/
            /////////////////////////////////////////////////////////////
        }
        foreach ($relationsOfConceptAsTarget as $index => $relArr) {
            $verb = $relArr["link_verb"];
            $subject = $relArr["source"];
            $relationIndex = $relArr['relation_index'];
            // IF SHOWING MAIN CONCEPTS ONLY, IGNORE CONCEPTS NOT IN MAIN CONCEPTS LIST
            if ($mainConceptsOnly && !isset($graphNodes[$subject])) {
                continue;
            }
            if ($tooManyConcepts && $verb == $is_a_relation_name_ar) {
                continue;
            }
            // NO extending by relations in case of search result text
            // to reduce number of concepts we only add relations with other concepts
            // found in the text
            if ($inputType == "SEARCH_RESULTS_TEXT_ARRAY" && !isset($graphNodes[$object])) {
                continue;
            }
            $relHashID = buildRelationHashID($subject, $verb, $concept);
            $fullRelationArr = $ONTOLOGY_RELATIONS[$relHashID];
            $randomXLocation = rand($startLocationXMin, $startLocationXMax);
            $randomYLocation = rand($startLocationYMin, $startLocationYMax);
            //$conceptArr = $MODEL_QA_ONTOLOGY['CONCEPTS'][$subject];
            $conceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $subject);
            $finalNodeLabel = $conceptArr['label_ar'];
            if ($lang == "EN") {
                $finalNodeLabel = formatEnglishConcept($conceptArr['label_en']);
                $verb = $fullRelationArr['VERB_TRANSLATION_EN'];
            }
            if (!isset($graphNodes[$subject])) {
                $graphNodes[$subject] = createNewConceptObj($nodeSerialNumber, $lang, $finalNodeLabel, $conceptArr, $randomXLocation, $randomYLocation, 2);
            }
            $linkArr = array("source" => $graphNodes[$subject]["id"], "target" => $graphNodes[$concept]["id"], "link_verb" => $verb, "link_frequency" => $fullRelationArr['frequency']);
            //////// HANDLING MULTIPLE LINKS BETWEEN SAME NODES BEFORE ASSIGNING LINK
            $arrHash = getArrayHashForFields($linkArr, array('source', 'target'));
            if (!isset($linksHashLookupTable[$arrHash])) {
                $graphLinks[] = $linkArr;
                $linksHashLookupTable[$arrHash] = count($graphLinks) - 1;
            } else {
                $linkIndex = $linksHashLookupTable[$arrHash];
                if (strpos($graphLinks[$linkIndex]['link_verb'], "{$verb}") === false) {
                    $graphLinks[$linkIndex]['link_verb'] .= "," . $verb;
                }
            }
            //////////////////////////////////////////////////////////////
        }
    }
    //preprint_r($graphLinks);exit;
    $graphNodesArr = array();
    foreach ($graphNodes as $word => $nodeArr) {
        $graphNodesArr[] = $nodeArr;
    }
    //preprint_r($graphNodesArr,1);exit;
    //$graphNodesArr = array_slice($graphNodesArr, 1,10);
    //$graphLinks = array_slice($graphLinks, 1,10);
    $graphObj["nodes"] = $graphNodesArr;
    $graphObj["links"] = $graphLinks;
    return $graphObj;
}
예제 #2
0
function getConceptsFoundInText($text, $lang)
{
    global $thing_class_name_ar, $is_a_relation_name_ar;
    $conceptsInTextArr = array();
    $textWordsArr = preg_split("/ /", $text);
    foreach ($textWordsArr as $index => $word) {
        if ($lang == "EN") {
            $word = cleanAndTrim($word);
            $word = strtolower($word);
            // translate English name to arabic concept name/id
            //$wordConveretedToConceptID = $MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'][$word];
            $wordConveretedToConceptID = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $word);
        } else {
            $wordConveretedToConceptID = convertWordToConceptID($word);
        }
        //echoN($wordConveretedToConceptID);
        if (modelEntryExistsInMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID)) {
            //preprint_r($MODEL_QA_ONTOLOGY['CONCEPTS'][$wordConveretedToConceptID]);exit;
            //echoN($wordConveretedToConceptID);
            //$mainConceptArr = $MODEL_QA_ONTOLOGY['CONCEPTS'][$wordConveretedToConceptID];
            $mainConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID);
            $conceptLabelAR = $mainConceptArr['label_ar'];
            $conceptLabelEN = $mainConceptArr['label_en'];
            $conceptFrequency = $mainConceptArr['frequency'];
            $conceptWeight = $mainConceptArr['weight'];
            $finalNodeLabel = $conceptLabelAR;
            if ($lang == "EN") {
                $finalNodeLabel = $conceptLabelEN;
            }
            if ($wordConveretedToConceptID == $thing_class_name_ar) {
                continue;
            }
            $conceptsInTextArr[$wordConveretedToConceptID] = createNewConceptObj($nodeSerialNumber, $lang, $finalNodeLabel, $mainConceptArr, $randomXLocation, $randomYLocation, 1);
        }
    }
    return $conceptsInTextArr;
}
예제 #3
0
function postResultSuggestions($lang, $queryWordsWithoutDerivation)
{
    $wordsNotInTheQuran = array();
    foreach ($queryWordsWithoutDerivation as $word => $dummy) {
        if (mb_strlen($word) <= 2) {
            continue;
        }
        if (!modelEntryExistsInMemory($lang, "MODEL_SEARCH", "INVERTED_INDEX", $word) && !modelEntryExistsInMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", convertWordToConceptID($word))) {
            $wordsNotInTheQuran[$word] = 1;
        }
    }
    // GET SIMILAR WORDS BY MIN-EDIT-DISTANCE
    return getSimilarWords($lang, array_keys($wordsNotInTheQuran));
}