Ejemplo n.º 1
0
function addRelation(&$relationsArr, $type, $subject, $verb, $object, $joinedPattern, $verbEngTranslation = "", $fullVerbQuranWord = "")
{
    global $WORDS_TRANSLATIONS_AR_EN;
    global $is_a_relation_name_en;
    if (empty($subject) || empty($object)) {
        return false;
    }
    // make shallow last resort, since it spoils words and lead to duplicate oncepts
    if (!isSimpleQuranWord($subject)) {
        //CONVERT UTHMANI TO SIMPLE
        $subjectSimple = getItemFromUthmaniToSimpleMappingTable($subject);
        // IF NOT CORRESPONDING SIMPLE WORD, CONVERT USING SHALLOW CONVERSION ALGORITHM
        if (empty($subjectSimple)) {
            $subjectSimple = shallowUthmaniToSimpleConversion($subject);
        }
    } else {
        $subjectSimple = $subject;
    }
    // SAME AS ABOVE BUT FOR OBJECT
    if (!isSimpleQuranWord($object)) {
        $objectSimple = getItemFromUthmaniToSimpleMappingTable($object);
        //object simple to avoid null in case when not in the mapping table
        if (empty($objectSimple)) {
            $objectSimple = shallowUthmaniToSimpleConversion($object);
        }
    } else {
        $objectSimple = $object;
    }
    $verbUthmani = $verb;
    $verbSimple = "";
    ///////// VERB TRANSLATION
    if (empty($verbEngTranslation)) {
        $verbEngTranslation = "";
        // SINGLE WORD VERB
        if (!isMultiWordStr($verb)) {
            $verb = trim($verb);
            $translatableVerb = $fullVerbQuranWord;
            // VERB IS SIMPLE
            if (isSimpleQuranWord($verb)) {
                $translatableVerb = getItemFromUthmaniToSimpleMappingTable($fullVerbQuranWord);
            } else {
                $verbSimple = getItemFromUthmaniToSimpleMappingTable($verb);
            }
            $verbEngTranslation = cleanEnglishTranslation($WORDS_TRANSLATIONS_AR_EN[$translatableVerb]);
            //IF NOT IN TRANSLATION TABLE - EX: ONE OF THE SEGMENTS TRIMMED
            if (empty($verbEngTranslation)) {
                // CHECK IF IS ALSO NOTO IN TRANSLATION ENTRY
                if (!isFoundInTranslationTable($translatableVerb, "VERB")) {
                    // TRANSLATE USING MICROSOFT API
                    $verbEngTranslation = translateText($translatableVerb, "ar", "en");
                    // ADD TO QA CUSTOM TRANSLATION TABLE
                    addTranslationEntry($verbEngTranslation, "VERB", $translatableVerb, "AR");
                    //no need
                    //persistTranslationTable();
                } else {
                    $customTranslationEntryArr = getTranlationEntryByEntryKeyword($translatableVerb);
                    $verbEngTranslation = $customTranslationEntryArr['EN_TEXT'];
                }
            }
        } else {
            //SPLIT PHRASE
            $verbPhraseArr = preg_split("/ /", $verb);
            foreach ($verbPhraseArr as $verbPart) {
                $translatableVerb = $verbPart;
                // IF SIMPLE
                if (isSimpleQuranWord($verbPart)) {
                    //GET UTHMANI WORD TO BE ABEL TO TRANSLATE
                    $translatableVerb = getItemFromUthmaniToSimpleMappingTable($verbPart);
                } else {
                    // GET SIMPLE WORD TO BE ADDED IN RELATION META
                    $simplePart = getItemFromUthmaniToSimpleMappingTable($verbPart);
                    //if not in translation table, use shalow conversion
                    if (empty($simplePart)) {
                        $simplePart = shallowUthmaniToSimpleConversion($verbPart);
                    }
                    $verbSimple = $verbSimple . " " . $simplePart;
                    // THIS VARIABLE NEEDED FOR TRANSLATION
                    $translatableVerb = $simplePart;
                }
                // TRANSLATE
                $verbPartTranslated = cleanEnglishTranslation($WORDS_TRANSLATIONS_AR_EN[$translatableVerb]);
                //IF NOT IN TRANSLATION TABLE - EX: ONE OF THE SEGMENTS TRIMMED
                if (empty($verbPartTranslated)) {
                    // CHECK IF IS ALSO NOTO IN TRANSLATION ENTRY
                    if (!isFoundInTranslationTable($verbPart, "VERB")) {
                        // TRANSLATE USING MICROSOFT API
                        $verbPartTranslated = translateText($verbPart, "ar", "en");
                        // ADD TO QA CUSTOM TRANSLATION TABLE
                        addTranslationEntry($verbPartTranslated, "VERB", $verbPart, "AR");
                        //persistTranslationTable();
                    } else {
                        $customTranslationEntryArr = getTranlationEntryByEntryKeyword($verbPart);
                        $verbPartTranslated = $customTranslationEntryArr['EN_TEXT'];
                    }
                }
                // TRANSLATION ACCUMILATION
                $verbEngTranslation = $verbEngTranslation . " " . $verbPartTranslated;
            }
        }
    }
    if ($verbEngTranslation != "is kind of" && $verbEngTranslation != "part of" && $verbEngTranslation != $is_a_relation_name_en) {
        //$verbEngTranslation = removeBasicEnglishStopwordsNoNegation($verbEngTranslation);
    }
    $verbSimple = trim($verbSimple);
    if (empty($verbSimple)) {
        $verbSimple = removeTashkeel(shallowUthmaniToSimpleConversion($verbUthmani));
    }
    return addNewRelation($relationsArr, $type, $subjectSimple, $verbSimple, $objectSimple, $joinedPattern, $verbEngTranslation, $verbUthmani);
}
        // CONVERT ALL WORDS TO SIMPLE
        $concept = getModelEntryFromMemory("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $biGramWords[0]);
        $adj1 = getModelEntryFromMemory("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $biGramWords[1]);
        $adj2 = getModelEntryFromMemory("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $biGramWords[2]);
        $hasAttribute = "من صفاتة";
        $type = "NON-TAXONOMIC";
        // HANDLE ADJECTIVE 1
        // BOTH SUBJECT AND OBJECT EXISTS IN THE CONCEPTS LIST
        if (isset($finalConcepts[$concept]) && isset($finalConcepts[$adj1])) {
            //ADD RELATION: CONCEPT( PN ) has attribute ($adj1)
            addNewRelation($relationsArr, $type, $concept, $hasAttribute, $adj1, "ADJ", "has attribute");
        }
        // HANDLE ADJECTIVE 2
        if (isset($finalConcepts[$concept]) && isset($finalConcepts[$adj2])) {
            //ADD RELATION: CONCEPT ( PN ) has attribute ($adj2)
            addNewRelation($relationsArr, $type, $concept, $hasAttribute, $adj2, "ADJ", "has attribute");
        }
        /* produced 13 relations*/
    }
    preprint_r($relationsArr);
    echoN("FINAL NONTAXONOMIC RELATIONS :" . count($relationsArr));
    file_put_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.relations", serialize($relationsArr));
}
if ($EXTRACT_NEWCONCEPTS_FROM_RELATIONS) {
    // LOAD CACHED RESULTS FROM LAST STAGE
    $relationsArr = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.relations"));
    $finalConcepts = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.concepts.stage1"));
    $finalTerms = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.all.terms"));
    echoN("BA-AA:" . count($finalConcepts));
    $notInCounceptsCounter = 0;
    $handled = array();