Exemplo n.º 1
0
function extendQueryWordsByConceptTaxRelations($extendedQueryArr, $lang, $isQuestion = false)
{
    global $is_a_relation_name_ar, $thing_class_name_en, $thing_class_name_ar, $TRANSLATION_MAP_EN_TO_AR;
    $conceptsFromTaxRelations = array();
    if ($lang == "EN") {
        $thing_class_name = strtolower($thing_class_name_en);
    } else {
        $thing_class_name = $thing_class_name_ar;
    }
    $questionIncludesVerb = $isQuestion && doesQuestionIncludesVerb($extendedQueryArr);
    foreach ($extendedQueryArr as $word => $pos) {
        // ignore any tern which is not nound or verb
        // [0-9]+ to allow normal non pos tagged queries - incase of pharase search
        if (!preg_match("/NN|VB|[0-9]+/", $pos)) {
            continue;
        }
        /*
         * Differentiate between word and concept ID, in English 'word' is needed unmapped for 'verb' search
         */
        if ($lang == "EN") {
            //corresponding arabic Concept - only if it is a concept
            //$conceptIDStr = $MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'][$word];
            $conceptIDStr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $word);
        } else {
            $conceptIDStr = $word;
        }
        //!$questionIncludesVerb since if the question includes a verb then the user is not looking ofr is-a relation
        if (!$questionIncludesVerb && modelEntryExistsInMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $conceptIDStr)) {
            //$inboundRelationsArr = $MODEL_QA_ONTOLOGY['GRAPH_INDEX_TARGETS'][$conceptIDStr];
            $inboundRelationsArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_TARGETS", $conceptIDStr);
            //$outboundRelationsArr = $MODEL_QA_ONTOLOGY['GRAPH_INDEX_SOURCES'][$conceptIDStr];
            $outboundRelationsArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_SOURCES", $conceptIDStr);
            // FOR INBOUND IS-A RELATIONS EX: X IS AN ANIMAL($word)
            foreach ($inboundRelationsArr as $index => $relationArr) {
                $subject = $relationArr['source'];
                $verbAR = $relationArr['link_verb'];
                $subjectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $subject);
                if ($lang == "EN") {
                    $subject = trim(removeBasicEnglishStopwordsNoNegation($subjectConceptArr['label_en']));
                }
                /// CLEAN AND REPLACE CONCEPT
                $subject = cleanWordnetCollocation($subject);
                ///////////////////////////
                //TODO: check if this is needed $subject!=$thing_class_name
                if ($verbAR == $is_a_relation_name_ar && $subject != $thing_class_name) {
                    // ignore phrase parent concepts
                    // عذاب + عذاب الله
                    if (strpos($subject, $conceptIDStr) === false) {
                        $conceptsFromTaxRelations[] = $subject;
                    }
                }
            }
            if ($isQuestion) {
                // FOR OUTBOUND IS-A RELATIONS EX: X($word) IS A PERSON
                foreach ($outboundRelationsArr as $index => $relationArr) {
                    $verbAR = $relationArr['link_verb'];
                    $object = $relationArr['target'];
                    $objectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $object);
                    if ($lang == "EN") {
                        $object = trim(removeBasicEnglishStopwordsNoNegation($objectConceptArr['label_en']));
                    }
                    /// CLEAN AND REPLACE CONCEPT
                    $object = cleanWordnetCollocation($object);
                    ///////////////////////////
                    if ($verbAR == $is_a_relation_name_ar && $object != $thing_class_name) {
                        //echoN(" $object!=$thing_class_name");
                        //echoN("|$thing_class_name|$object|");
                        // ignore phrase parent concepts
                        // عذاب + عذاب الله
                        if (strpos($object, $conceptIDStr) === false) {
                            $conceptsFromTaxRelations[] = $object;
                        }
                    }
                }
            }
        }
        if (!$isQuestion) {
            ///////// add concept name to query if the current query word is found to be synonym to that concept
            if (modelEntryExistsInMemory("ALL", "MODEL_QA_ONTOLOGY", "SYNONYMS_INDEX", $word)) {
                //$conceptNameAR = $MODEL_QA_ONTOLOGY['SYNONYMS_INDEX'][$word];
                $conceptNameAR = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "SYNONYMS_INDEX", $word);
                $finalConceptName = $conceptNameAR;
                if ($lang == "EN") {
                    $conceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $conceptNameAR);
                    $finalConceptName = $conceptArr['label_en'];
                }
                $conceptsFromTaxRelations[] = $finalConceptName;
            }
            //////////////////////////////////////////////////////////////
        }
        //$lang=="AR" check since AR words are not PoS tagged yet
        if ($isQuestion && ($lang == "AR" || posIsVerb($pos))) {
            if (($verbArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "VERB_INDEX", $word)) != null || ($verbArr = isWordPartOfAVerbInVerbIndex($word, $lang))) {
                foreach ($verbArr as $index => $verbSTArr) {
                    $subject = $verbSTArr['SUBJECT'];
                    $object = $verbSTArr['OBJECT'];
                    if ($lang == "EN") {
                        $subjectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID);
                        $objectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID);
                        $object = trim(removeBasicEnglishStopwordsNoNegation($objectConceptArr['label_en']));
                        $subject = trim(removeBasicEnglishStopwordsNoNegation($subjectConceptArr['label_en']));
                    }
                    // we are not interested in X is_a Thing - does not add value
                    if ($object == $thing_class_name) {
                        continue;
                    }
                    //echoN("-$subject>$word>$object");
                    //echoN(" $object!=$thing_class_name");
                    if (isset($extendedQueryArr[$subject])) {
                        $conceptsFromTaxRelations[] = $object;
                    } else {
                        if (isset($extendedQueryArr[$object])) {
                            $conceptsFromTaxRelations[] = $subject;
                        }
                    }
                }
            }
        }
    }
    $conceptsFromTaxRelations = array_unique($conceptsFromTaxRelations);
    //preprint_r($conceptsFromTaxRelations);
    return $conceptsFromTaxRelations;
}
Exemplo n.º 2
0
function formatEnglishConcept($conceptEN)
{
    return ucfirst(removeBasicEnglishStopwordsNoNegation($conceptEN));
}
 $updatedFinalConcept[$thing_class_name_ar] = $thingArr;
 unset($finalConcepts[$thing_class_name_ar]);
 $updatedFinalConcept = array_merge($updatedFinalConcept, $finalConcepts);
 $finalConcepts = $updatedFinalConcept;
 //////////////////////////////////////////
 //preprint_r($finalConcepts);exit;
 $counter++;
 foreach ($finalConcepts as $concept => $coneptArr) {
     $conceptType = $coneptArr['CONCEPT_TYPE'];
     $conceptNameEn = $coneptArr['EXTRA']['TRANSLATION_EN'];
     //echoN($concept." ".$conceptNameEn." ".$coneptArr['EXTRA']['IS_QURANA_NGRAM_CONCEPT']);
     // Qurana concept should be left as is to match concepts in inverted index
     if ($coneptArr['EXTRA']['IS_QURANA_NGRAM_CONCEPT'] !== true) {
         //	echoN($coneptArr['EXTRA']['IS_QURANA_NGRAM_CONCEPT']);
         //echoN($conceptNameEn);
         $conceptNameEn = removeBasicEnglishStopwordsNoNegation($conceptNameEn);
         //echoN($conceptNameEn);
     } else {
         $conceptNameEn = strtolower($conceptNameEn);
     }
     $conceptNameAr = $concept;
     $classID = getXMLFriendlyString($conceptNameAr);
     $classOrInstanceName = $classID;
     if ($conceptType == "T-BOX") {
         $ontology->createClass($classOrInstanceName);
     } else {
         $classOrInstanceName = $classID;
         if (!conceptHasParentClasses($relationsArr, $classOrInstanceName)) {
             $ontology->addInstance($classOrInstanceName, $thingClassName, $properties);
         } else {
             $ontology->createClass($classOrInstanceName);