function addRelation(&$relationsArr, $type, $subject, $verb, $object, $joinedPattern, $verbEngTranslation = "", $fullVerbQuranWord = "") { global $WORDS_TRANSLATIONS_AR_EN; global $is_a_relation_name_en; if (empty($subject) || empty($object)) { return false; } // make shallow last resort, since it spoils words and lead to duplicate oncepts if (!isSimpleQuranWord($subject)) { //CONVERT UTHMANI TO SIMPLE $subjectSimple = getItemFromUthmaniToSimpleMappingTable($subject); // IF NOT CORRESPONDING SIMPLE WORD, CONVERT USING SHALLOW CONVERSION ALGORITHM if (empty($subjectSimple)) { $subjectSimple = shallowUthmaniToSimpleConversion($subject); } } else { $subjectSimple = $subject; } // SAME AS ABOVE BUT FOR OBJECT if (!isSimpleQuranWord($object)) { $objectSimple = getItemFromUthmaniToSimpleMappingTable($object); //object simple to avoid null in case when not in the mapping table if (empty($objectSimple)) { $objectSimple = shallowUthmaniToSimpleConversion($object); } } else { $objectSimple = $object; } $verbUthmani = $verb; $verbSimple = ""; ///////// VERB TRANSLATION if (empty($verbEngTranslation)) { $verbEngTranslation = ""; // SINGLE WORD VERB if (!isMultiWordStr($verb)) { $verb = trim($verb); $translatableVerb = $fullVerbQuranWord; // VERB IS SIMPLE if (isSimpleQuranWord($verb)) { $translatableVerb = getItemFromUthmaniToSimpleMappingTable($fullVerbQuranWord); } else { $verbSimple = getItemFromUthmaniToSimpleMappingTable($verb); } $verbEngTranslation = cleanEnglishTranslation($WORDS_TRANSLATIONS_AR_EN[$translatableVerb]); //IF NOT IN TRANSLATION TABLE - EX: ONE OF THE SEGMENTS TRIMMED if (empty($verbEngTranslation)) { // CHECK IF IS ALSO NOTO IN TRANSLATION ENTRY if (!isFoundInTranslationTable($translatableVerb, "VERB")) { // TRANSLATE USING MICROSOFT API $verbEngTranslation = translateText($translatableVerb, "ar", "en"); // ADD TO QA CUSTOM TRANSLATION TABLE addTranslationEntry($verbEngTranslation, "VERB", $translatableVerb, "AR"); //no need //persistTranslationTable(); } else { $customTranslationEntryArr = getTranlationEntryByEntryKeyword($translatableVerb); $verbEngTranslation = $customTranslationEntryArr['EN_TEXT']; } } } else { //SPLIT PHRASE $verbPhraseArr = preg_split("/ /", $verb); foreach ($verbPhraseArr as $verbPart) { $translatableVerb = $verbPart; // IF SIMPLE if (isSimpleQuranWord($verbPart)) { //GET UTHMANI WORD TO BE ABEL TO TRANSLATE $translatableVerb = getItemFromUthmaniToSimpleMappingTable($verbPart); } else { // GET SIMPLE WORD TO BE ADDED IN RELATION META $simplePart = getItemFromUthmaniToSimpleMappingTable($verbPart); //if not in translation table, use shalow conversion if (empty($simplePart)) { $simplePart = shallowUthmaniToSimpleConversion($verbPart); } $verbSimple = $verbSimple . " " . $simplePart; // THIS VARIABLE NEEDED FOR TRANSLATION $translatableVerb = $simplePart; } // TRANSLATE $verbPartTranslated = cleanEnglishTranslation($WORDS_TRANSLATIONS_AR_EN[$translatableVerb]); //IF NOT IN TRANSLATION TABLE - EX: ONE OF THE SEGMENTS TRIMMED if (empty($verbPartTranslated)) { // CHECK IF IS ALSO NOTO IN TRANSLATION ENTRY if (!isFoundInTranslationTable($verbPart, "VERB")) { // TRANSLATE USING MICROSOFT API $verbPartTranslated = translateText($verbPart, "ar", "en"); // ADD TO QA CUSTOM TRANSLATION TABLE addTranslationEntry($verbPartTranslated, "VERB", $verbPart, "AR"); //persistTranslationTable(); } else { $customTranslationEntryArr = getTranlationEntryByEntryKeyword($verbPart); $verbPartTranslated = $customTranslationEntryArr['EN_TEXT']; } } // TRANSLATION ACCUMILATION $verbEngTranslation = $verbEngTranslation . " " . $verbPartTranslated; } } } if ($verbEngTranslation != "is kind of" && $verbEngTranslation != "part of" && $verbEngTranslation != $is_a_relation_name_en) { //$verbEngTranslation = removeBasicEnglishStopwordsNoNegation($verbEngTranslation); } $verbSimple = trim($verbSimple); if (empty($verbSimple)) { $verbSimple = removeTashkeel(shallowUthmaniToSimpleConversion($verbUthmani)); } return addNewRelation($relationsArr, $type, $subjectSimple, $verbSimple, $objectSimple, $joinedPattern, $verbEngTranslation, $verbUthmani); }
// CONVERT ALL WORDS TO SIMPLE $concept = getModelEntryFromMemory("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $biGramWords[0]); $adj1 = getModelEntryFromMemory("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $biGramWords[1]); $adj2 = getModelEntryFromMemory("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $biGramWords[2]); $hasAttribute = "من صفاتة"; $type = "NON-TAXONOMIC"; // HANDLE ADJECTIVE 1 // BOTH SUBJECT AND OBJECT EXISTS IN THE CONCEPTS LIST if (isset($finalConcepts[$concept]) && isset($finalConcepts[$adj1])) { //ADD RELATION: CONCEPT( PN ) has attribute ($adj1) addNewRelation($relationsArr, $type, $concept, $hasAttribute, $adj1, "ADJ", "has attribute"); } // HANDLE ADJECTIVE 2 if (isset($finalConcepts[$concept]) && isset($finalConcepts[$adj2])) { //ADD RELATION: CONCEPT ( PN ) has attribute ($adj2) addNewRelation($relationsArr, $type, $concept, $hasAttribute, $adj2, "ADJ", "has attribute"); } /* produced 13 relations*/ } preprint_r($relationsArr); echoN("FINAL NONTAXONOMIC RELATIONS :" . count($relationsArr)); file_put_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.relations", serialize($relationsArr)); } if ($EXTRACT_NEWCONCEPTS_FROM_RELATIONS) { // LOAD CACHED RESULTS FROM LAST STAGE $relationsArr = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.relations")); $finalConcepts = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.concepts.stage1")); $finalTerms = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.all.terms")); echoN("BA-AA:" . count($finalConcepts)); $notInCounceptsCounter = 0; $handled = array();