function extendQueryWordsByConceptTaxRelations($extendedQueryArr, $lang, $isQuestion = false) { global $is_a_relation_name_ar, $thing_class_name_en, $thing_class_name_ar, $TRANSLATION_MAP_EN_TO_AR; $conceptsFromTaxRelations = array(); if ($lang == "EN") { $thing_class_name = strtolower($thing_class_name_en); } else { $thing_class_name = $thing_class_name_ar; } $questionIncludesVerb = $isQuestion && doesQuestionIncludesVerb($extendedQueryArr); foreach ($extendedQueryArr as $word => $pos) { // ignore any tern which is not nound or verb // [0-9]+ to allow normal non pos tagged queries - incase of pharase search if (!preg_match("/NN|VB|[0-9]+/", $pos)) { continue; } /* * Differentiate between word and concept ID, in English 'word' is needed unmapped for 'verb' search */ if ($lang == "EN") { //corresponding arabic Concept - only if it is a concept //$conceptIDStr = $MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'][$word]; $conceptIDStr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $word); } else { $conceptIDStr = $word; } //!$questionIncludesVerb since if the question includes a verb then the user is not looking ofr is-a relation if (!$questionIncludesVerb && modelEntryExistsInMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $conceptIDStr)) { //$inboundRelationsArr = $MODEL_QA_ONTOLOGY['GRAPH_INDEX_TARGETS'][$conceptIDStr]; $inboundRelationsArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_TARGETS", $conceptIDStr); //$outboundRelationsArr = $MODEL_QA_ONTOLOGY['GRAPH_INDEX_SOURCES'][$conceptIDStr]; $outboundRelationsArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_SOURCES", $conceptIDStr); // FOR INBOUND IS-A RELATIONS EX: X IS AN ANIMAL($word) foreach ($inboundRelationsArr as $index => $relationArr) { $subject = $relationArr['source']; $verbAR = $relationArr['link_verb']; $subjectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $subject); if ($lang == "EN") { $subject = trim(removeBasicEnglishStopwordsNoNegation($subjectConceptArr['label_en'])); } /// CLEAN AND REPLACE CONCEPT $subject = cleanWordnetCollocation($subject); /////////////////////////// //TODO: check if this is needed $subject!=$thing_class_name if ($verbAR == $is_a_relation_name_ar && $subject != $thing_class_name) { // ignore phrase parent concepts // عذاب + عذاب الله if (strpos($subject, $conceptIDStr) === false) { $conceptsFromTaxRelations[] = $subject; } } } if ($isQuestion) { // FOR OUTBOUND IS-A RELATIONS EX: X($word) IS A PERSON foreach ($outboundRelationsArr as $index => $relationArr) { $verbAR = $relationArr['link_verb']; $object = $relationArr['target']; $objectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $object); if ($lang == "EN") { $object = trim(removeBasicEnglishStopwordsNoNegation($objectConceptArr['label_en'])); } /// CLEAN AND REPLACE CONCEPT $object = cleanWordnetCollocation($object); /////////////////////////// if ($verbAR == $is_a_relation_name_ar && $object != $thing_class_name) { //echoN(" $object!=$thing_class_name"); //echoN("|$thing_class_name|$object|"); // ignore phrase parent concepts // عذاب + عذاب الله if (strpos($object, $conceptIDStr) === false) { $conceptsFromTaxRelations[] = $object; } } } } } if (!$isQuestion) { ///////// add concept name to query if the current query word is found to be synonym to that concept if (modelEntryExistsInMemory("ALL", "MODEL_QA_ONTOLOGY", "SYNONYMS_INDEX", $word)) { //$conceptNameAR = $MODEL_QA_ONTOLOGY['SYNONYMS_INDEX'][$word]; $conceptNameAR = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "SYNONYMS_INDEX", $word); $finalConceptName = $conceptNameAR; if ($lang == "EN") { $conceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $conceptNameAR); $finalConceptName = $conceptArr['label_en']; } $conceptsFromTaxRelations[] = $finalConceptName; } ////////////////////////////////////////////////////////////// } //$lang=="AR" check since AR words are not PoS tagged yet if ($isQuestion && ($lang == "AR" || posIsVerb($pos))) { if (($verbArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "VERB_INDEX", $word)) != null || ($verbArr = isWordPartOfAVerbInVerbIndex($word, $lang))) { foreach ($verbArr as $index => $verbSTArr) { $subject = $verbSTArr['SUBJECT']; $object = $verbSTArr['OBJECT']; if ($lang == "EN") { $subjectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID); $objectConceptArr = getModelEntryFromMemory("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $wordConveretedToConceptID); $object = trim(removeBasicEnglishStopwordsNoNegation($objectConceptArr['label_en'])); $subject = trim(removeBasicEnglishStopwordsNoNegation($subjectConceptArr['label_en'])); } // we are not interested in X is_a Thing - does not add value if ($object == $thing_class_name) { continue; } //echoN("-$subject>$word>$object"); //echoN(" $object!=$thing_class_name"); if (isset($extendedQueryArr[$subject])) { $conceptsFromTaxRelations[] = $object; } else { if (isset($extendedQueryArr[$object])) { $conceptsFromTaxRelations[] = $subject; } } } } } } $conceptsFromTaxRelations = array_unique($conceptsFromTaxRelations); //preprint_r($conceptsFromTaxRelations); return $conceptsFromTaxRelations; }
function getWordnetEntryByWordString($wordToSearchFor, $includeOnlyRelationsOfType = "") { global $MODEL_WORDNET; if (empty($MODEL_WORDNET)) { throw new Exception("Wordnet module is not loaded!"); } if (empty($wordToSearchFor)) { return false; } $wordToSearchFor = strtolower($wordToSearchFor); $wordnetInfoArr = array(); $wordnetInfoArr['SYNONYMS'] = array(); $wordnetInfoArr['SEMANTIC_TYPES'] = array(); $wordnetInfoArr['RELATIONSHIPS'] = array(); $wordnetInfoArr['WORD'] = $wordToSearchFor; // Not found in Wordnet if (!isset($MODEL_WORDNET['INDEX'][$wordToSearchFor])) { return false; } foreach ($MODEL_WORDNET['INDEX'][$wordToSearchFor] as $pos => $currIndexArr) { $wordIndexEntryArr = $currIndexArr; //preprint_r($wordIndexEntryArr); //$pos =$wordIndexEntryArr['POS'];; $wordnetInfoArr['POS'][$pos] = 1; // each synset in INDEX foreach ($wordIndexEntryArr['SYNSETS'] as $index => $fileOffset) { $entryArr = $MODEL_WORDNET['DATA'][$pos][$fileOffset]; if (!isset($wordnetInfoArr['SYNONYMS'][$pos])) { $wordnetInfoArr['SYNONYMS'][$pos] = array_keys($entryArr['WORDS']); } else { $wordnetInfoArr['SYNONYMS'][$pos] = array_merge(array_keys($entryArr['WORDS']), $wordnetInfoArr['SYNONYMS'][$pos]); } $lexicoSemanticCategoryID = $entryArr['SEMANTIC_CATEGORY_ID']; $semanticType = $MODEL_WORDNET['LEXICO_SEMANTIC_CATEGORIES'][$lexicoSemanticCategoryID]; $semanticType = ucfirst(substr($semanticType, strpos($semanticType, ".") + 1)); $wordnetInfoArr['SEMANTIC_TYPES'][$pos][$index] = $semanticType; $wordnetInfoArr['GLOSSARY'][$pos] = $entryArr['GLOSSARY']; // EACH POINTER IN THE CURRENT SYNSET foreach ($entryArr['POINTERS'] as $index2 => $pointersArr) { $pointerOffset = $pointersArr['SYNSET_OFFSET']; $relationName = $pointersArr['SYMBOL_DESC']; $pointerPoS = getLongPoSName($pointersArr['POS']); $pointerEntryArr = $MODEL_WORDNET['DATA'][$pointerPoS][$pointerOffset]; $pointerGLoss = $pointerEntryArr['GLOSSARY']; $pointerWordsArr = $pointerEntryArr['WORDS']; $pointerWordsEditedArr = array(); foreach ($pointerWordsArr as $word => $dummy) { $word = cleanWordnetCollocation(ucfirst($word)); $pointerWordsEditedArr[$word] = 1; } $pointerSemanticCatID = $pointerEntryArr['SEMANTIC_CATEGORY_ID']; $wordnetInfoArr['RELATIONSHIPS'][$pos][] = array("RELATION" => $relationName, "WORDS" => $pointerWordsEditedArr, "SEMANTIC_CATEGORY_ID" => $pointerSemanticCatID, "GLOSSARY" => $pointerGLoss); } } $wordnetInfoArr['SYNONYMS'][$pos] = array_unique($wordnetInfoArr['SYNONYMS'][$pos]); } return $wordnetInfoArr; }
$tentitaveTranslation = translateText($glossary); /* echoN($glossary); echoN("==".("(plural) any group of human beings (men or women or children) collectively"==$glossary)); //showHiddenChars(removeUnacceptedChars(cleanAndTrim("(plural) any group of human beings (men or women or children) collectively")),"EN"); //showHiddenChars("someone who leads you to believe something that is not true","EN"); isFoundInTranslationTable($glossary,"DESC"); preprint_r($CUSTOM_TRANSLATION_TABLE_EN_AR["(plural) any group of human beings (men or women or children) collectively"]); preprint_r($CUSTOM_TRANSLATION_TABLE_EN_AR); exit;*/ addTranslationEntry($glossary, "DESC", $tentitaveTranslation); $glossaryAR = $tentitaveTranslation; } $enrichedFinalConcepts[$finalConceptName]['EXTRA']['MEANING_AR']['WORDNET'] = $glossaryAR; foreach ($wordsArr as $synonym => $dummy) { if ($synonym != $parentConceptName) { $enrichedFinalConcepts[$finalConceptName]['EXTRA']['AKA']['EN']['WORDNET'] = cleanWordnetCollocation($synonym); } } } else { //echoN("##$finalConceptName|T-BOX"); //echoN("$concept,$is_a_relation_name_ar,$finalConceptName"); /* * WILL NOT DO IT HERE SINCE SOME RELATIONS ARE EXCLUDED LATER, SO OPERATIONS DONE HERE * CAN'T BE REVERTED BACK, WILL BE MOVED AFTER EXCLUSION INSTEAD */ //$enrichedFinalConcepts[$finalConceptName]['CONCEPT_TYPE']='T-BOX'; } echoN("XPP: 2 {$finalConceptName}"); $relationType = "TAXONOMIC"; $res = addRelation($relationsArr, $relationType, $concept, "{$is_a_relation_name_ar}", $finalConceptName, "{$is_a_relation_name_en}"); if ($res == true) {