コード例 #1
0
function extendQueryWordsByConceptTaxRelations($extendedQueryArr, $lang, $isQuestion = false)
{
    global $is_a_relation_name_ar, $thing_class_name_en, $thing_class_name_ar, $TRANSLATION_MAP_EN_TO_AR;
    $conceptsFromTaxRelations = array();
    if ($lang == "EN") {
        $thing_class_name = strtolower($thing_class_name_en);
    } else {
        $thing_class_name = $thing_class_name_ar;
    }
    $questionIncludesVerb = $isQuestion && doesQuestionIncludesVerb($extendedQueryArr);
    foreach ($extendedQueryArr as $word => $pos) {
        // ignore any tern which is not nound or verb
        // [0-9]+ to allow normal non pos tagged queries - incase of pharase search
        if (!preg_match("/NN|VB|[0-9]+/", $pos)) {
            continue;
        }
        /*
         * Differentiate between word and concept ID, in English 'word' is needed unmapped for 'verb' search
         */
        if ($lang == "EN") {
            //corresponding arabic Concept - only if it is a concept
            //$conceptIDStr = $MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'][$word];
            $conceptIDStr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $word);
        } else {
            $conceptIDStr = $word;
        }
        //!$questionIncludesVerb since if the question includes a verb then the user is not looking ofr is-a relation
        if (!$questionIncludesVerb && modelEntryExistsInMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $conceptIDStr)) {
            //$inboundRelationsArr = $MODEL_QA_ONTOLOGY['GRAPH_INDEX_TARGETS'][$conceptIDStr];
            $inboundRelationsArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_TARGETS", $conceptIDStr);
            //$outboundRelationsArr = $MODEL_QA_ONTOLOGY['GRAPH_INDEX_SOURCES'][$conceptIDStr];
            $outboundRelationsArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_SOURCES", $conceptIDStr);
            // FOR INBOUND IS-A RELATIONS EX: X IS AN ANIMAL($word)
            foreach ($inboundRelationsArr as $index => $relationArr) {
                $subject = $relationArr['source'];
                $verbAR = $relationArr['link_verb'];
                $subjectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $subject);
                if ($lang == "EN") {
                    $subject = trim(removeBasicEnglishStopwordsNoNegation($subjectConceptArr['label_en']));
                }
                /// CLEAN AND REPLACE CONCEPT
                $subject = cleanWordnetCollocation($subject);
                ///////////////////////////
                //TODO: check if this is needed $subject!=$thing_class_name
                if ($verbAR == $is_a_relation_name_ar && $subject != $thing_class_name) {
                    // ignore phrase parent concepts
                    // عذاب + عذاب الله
                    if (strpos($subject, $conceptIDStr) === false) {
                        $conceptsFromTaxRelations[] = $subject;
                    }
                }
            }
            if ($isQuestion) {
                // FOR OUTBOUND IS-A RELATIONS EX: X($word) IS A PERSON
                foreach ($outboundRelationsArr as $index => $relationArr) {
                    $verbAR = $relationArr['link_verb'];
                    $object = $relationArr['target'];
                    $objectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $object);
                    if ($lang == "EN") {
                        $object = trim(removeBasicEnglishStopwordsNoNegation($objectConceptArr['label_en']));
                    }
                    /// CLEAN AND REPLACE CONCEPT
                    $object = cleanWordnetCollocation($object);
                    ///////////////////////////
                    if ($verbAR == $is_a_relation_name_ar && $object != $thing_class_name) {
                        //echoN(" $object!=$thing_class_name");
                        //echoN("|$thing_class_name|$object|");
                        // ignore phrase parent concepts
                        // عذاب + عذاب الله
                        if (strpos($object, $conceptIDStr) === false) {
                            $conceptsFromTaxRelations[] = $object;
                        }
                    }
                }
            }
        }
        if (!$isQuestion) {
            ///////// add concept name to query if the current query word is found to be synonym to that concept
            if (modelEntryExistsInMemory("ALL", "MODEL_QA_ONTOLOGY", "SYNONYMS_INDEX", $word)) {
                //$conceptNameAR = $MODEL_QA_ONTOLOGY['SYNONYMS_INDEX'][$word];
                $conceptNameAR = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "SYNONYMS_INDEX", $word);
                $finalConceptName = $conceptNameAR;
                if ($lang == "EN") {
                    $conceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $conceptNameAR);
                    $finalConceptName = $conceptArr['label_en'];
                }
                $conceptsFromTaxRelations[] = $finalConceptName;
            }
            //////////////////////////////////////////////////////////////
        }
        //$lang=="AR" check since AR words are not PoS tagged yet
        if ($isQuestion && ($lang == "AR" || posIsVerb($pos))) {
            if (($verbArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "VERB_INDEX", $word)) != null || ($verbArr = isWordPartOfAVerbInVerbIndex($word, $lang))) {
                foreach ($verbArr as $index => $verbSTArr) {
                    $subject = $verbSTArr['SUBJECT'];
                    $object = $verbSTArr['OBJECT'];
                    if ($lang == "EN") {
                        $subjectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID);
                        $objectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID);
                        $object = trim(removeBasicEnglishStopwordsNoNegation($objectConceptArr['label_en']));
                        $subject = trim(removeBasicEnglishStopwordsNoNegation($subjectConceptArr['label_en']));
                    }
                    // we are not interested in X is_a Thing - does not add value
                    if ($object == $thing_class_name) {
                        continue;
                    }
                    //echoN("-$subject>$word>$object");
                    //echoN(" $object!=$thing_class_name");
                    if (isset($extendedQueryArr[$subject])) {
                        $conceptsFromTaxRelations[] = $object;
                    } else {
                        if (isset($extendedQueryArr[$object])) {
                            $conceptsFromTaxRelations[] = $subject;
                        }
                    }
                }
            }
        }
    }
    $conceptsFromTaxRelations = array_unique($conceptsFromTaxRelations);
    //preprint_r($conceptsFromTaxRelations);
    return $conceptsFromTaxRelations;
}
コード例 #2
0
function getWordnetEntryByWordString($wordToSearchFor, $includeOnlyRelationsOfType = "")
{
    global $MODEL_WORDNET;
    if (empty($MODEL_WORDNET)) {
        throw new Exception("Wordnet module is not loaded!");
    }
    if (empty($wordToSearchFor)) {
        return false;
    }
    $wordToSearchFor = strtolower($wordToSearchFor);
    $wordnetInfoArr = array();
    $wordnetInfoArr['SYNONYMS'] = array();
    $wordnetInfoArr['SEMANTIC_TYPES'] = array();
    $wordnetInfoArr['RELATIONSHIPS'] = array();
    $wordnetInfoArr['WORD'] = $wordToSearchFor;
    // Not found in Wordnet
    if (!isset($MODEL_WORDNET['INDEX'][$wordToSearchFor])) {
        return false;
    }
    foreach ($MODEL_WORDNET['INDEX'][$wordToSearchFor] as $pos => $currIndexArr) {
        $wordIndexEntryArr = $currIndexArr;
        //preprint_r($wordIndexEntryArr);
        //$pos =$wordIndexEntryArr['POS'];;
        $wordnetInfoArr['POS'][$pos] = 1;
        // each synset in INDEX
        foreach ($wordIndexEntryArr['SYNSETS'] as $index => $fileOffset) {
            $entryArr = $MODEL_WORDNET['DATA'][$pos][$fileOffset];
            if (!isset($wordnetInfoArr['SYNONYMS'][$pos])) {
                $wordnetInfoArr['SYNONYMS'][$pos] = array_keys($entryArr['WORDS']);
            } else {
                $wordnetInfoArr['SYNONYMS'][$pos] = array_merge(array_keys($entryArr['WORDS']), $wordnetInfoArr['SYNONYMS'][$pos]);
            }
            $lexicoSemanticCategoryID = $entryArr['SEMANTIC_CATEGORY_ID'];
            $semanticType = $MODEL_WORDNET['LEXICO_SEMANTIC_CATEGORIES'][$lexicoSemanticCategoryID];
            $semanticType = ucfirst(substr($semanticType, strpos($semanticType, ".") + 1));
            $wordnetInfoArr['SEMANTIC_TYPES'][$pos][$index] = $semanticType;
            $wordnetInfoArr['GLOSSARY'][$pos] = $entryArr['GLOSSARY'];
            // EACH POINTER IN THE CURRENT SYNSET
            foreach ($entryArr['POINTERS'] as $index2 => $pointersArr) {
                $pointerOffset = $pointersArr['SYNSET_OFFSET'];
                $relationName = $pointersArr['SYMBOL_DESC'];
                $pointerPoS = getLongPoSName($pointersArr['POS']);
                $pointerEntryArr = $MODEL_WORDNET['DATA'][$pointerPoS][$pointerOffset];
                $pointerGLoss = $pointerEntryArr['GLOSSARY'];
                $pointerWordsArr = $pointerEntryArr['WORDS'];
                $pointerWordsEditedArr = array();
                foreach ($pointerWordsArr as $word => $dummy) {
                    $word = cleanWordnetCollocation(ucfirst($word));
                    $pointerWordsEditedArr[$word] = 1;
                }
                $pointerSemanticCatID = $pointerEntryArr['SEMANTIC_CATEGORY_ID'];
                $wordnetInfoArr['RELATIONSHIPS'][$pos][] = array("RELATION" => $relationName, "WORDS" => $pointerWordsEditedArr, "SEMANTIC_CATEGORY_ID" => $pointerSemanticCatID, "GLOSSARY" => $pointerGLoss);
            }
        }
        $wordnetInfoArr['SYNONYMS'][$pos] = array_unique($wordnetInfoArr['SYNONYMS'][$pos]);
    }
    return $wordnetInfoArr;
}
コード例 #3
0
         $tentitaveTranslation = translateText($glossary);
         /*	echoN($glossary);
         			echoN("==".("(plural) any group of human beings (men or women or children) collectively"==$glossary));
         			//showHiddenChars(removeUnacceptedChars(cleanAndTrim("(plural) any group of human beings (men or women or children) collectively")),"EN");
         			//showHiddenChars("someone who leads you to believe something that is not true","EN");
         			isFoundInTranslationTable($glossary,"DESC");
         			preprint_r($CUSTOM_TRANSLATION_TABLE_EN_AR["(plural) any group of human beings (men or women or children) collectively"]);
         			preprint_r($CUSTOM_TRANSLATION_TABLE_EN_AR);
         			exit;*/
         addTranslationEntry($glossary, "DESC", $tentitaveTranslation);
         $glossaryAR = $tentitaveTranslation;
     }
     $enrichedFinalConcepts[$finalConceptName]['EXTRA']['MEANING_AR']['WORDNET'] = $glossaryAR;
     foreach ($wordsArr as $synonym => $dummy) {
         if ($synonym != $parentConceptName) {
             $enrichedFinalConcepts[$finalConceptName]['EXTRA']['AKA']['EN']['WORDNET'] = cleanWordnetCollocation($synonym);
         }
     }
 } else {
     //echoN("##$finalConceptName|T-BOX");
     //echoN("$concept,$is_a_relation_name_ar,$finalConceptName");
     /*
      * WILL NOT DO IT HERE SINCE SOME RELATIONS ARE EXCLUDED LATER, SO OPERATIONS DONE HERE
      * CAN'T BE REVERTED BACK, WILL BE MOVED AFTER EXCLUSION INSTEAD
      */
     //$enrichedFinalConcepts[$finalConceptName]['CONCEPT_TYPE']='T-BOX';
 }
 echoN("XPP: 2  {$finalConceptName}");
 $relationType = "TAXONOMIC";
 $res = addRelation($relationsArr, $relationType, $concept, "{$is_a_relation_name_ar}", $finalConceptName, "{$is_a_relation_name_en}");
 if ($res == true) {