function handleNewConceptFromRelation(&$finalConcepts, $subjectOrObject, $conceptLocationInRelation, &$notInCounceptsCounter, &$statsUniqueSubjects) { global $WORDS_TRANSLATIONS_AR_EN; $subjectOrObjectFlag = null; // SUBJECT NOT IN MASTER CONCEPTS LIST if (!isset($finalConcepts[$subject])) { if ($conceptLocationInRelation == "SUBJECT") { echoN("NOT IN CONCEPTS:S:{$subjectOrObject}"); } else { echoN("NOT IN CONCEPTS:O:{$subjectOrObject}"); } $notInCounceptsCounter++; $statsUniqueSubjects[$subjectOrObject] = 1; } $termsArr = getTermArrBySimpleWord($finalTerms, $subjectOrObject); $freq = $termsArr['FREQ']; $isQuranaPhraseConcept = false; //echoN("^&&*:".(strpos($subjectOrObject," ")!==false)); if (isMultiWordStr($subjectOrObject)) { $quranaConceptArr = getQuranaConceptEntryByARWord($subjectOrObject); $engTranslation = ucfirst($quranaConceptArr['EN']); echoN("^^{$subjectOrObject}"); $isQuranaPhraseConcept = true; } else { $uthmaniWord = getItemFromUthmaniToSimpleMappingTable($subjectOrObject); $engTranslation = ucfirst(cleanEnglishTranslation($WORDS_TRANSLATIONS_AR_EN[$uthmaniWord])); } addNewConcept($finalConcepts, $subjectOrObject, "A-BOX", "POPULATION_FROM_RELATIONS", $freq, $engTranslation); $finalConcepts[$subjectOrObject]['EXTRA']['POS'] = $subjectOrObjectFlag; $finalConcepts[$subjectOrObject]['EXTRA']['WEIGHT'] = $termsArr['WEIGHT']; if ($isQuranaPhraseConcept) { echoN($isQuranaPhraseConcept . "||||{$subjectOrObject}"); $finalConcepts[$subjectOrObject]['EXTRA']['IS_QURANA_NGRAM_CONCEPT'] = true; } }
} } $maxConceptFreq = $amxConceptFreq; //max(array_values($commonBiGramsConceptsWithQurana)); foreach ($commonBiGramsConceptsWithQurana as $biGramConcept => $freq) { $pos = $filteredBiGramsPOS[$biGramConcept]; // phrase weight = average weight of inner terms $biGramWords = preg_split("/ /", $biGramConcept); $weight = 0; foreach ($biGramWords as $biGramTerm) { $weight += floatval($WORDS_FREQUENCY['WORDS_TFIDF'][$biGramTerm]['TFIDF']); } $weight = $weight / 2; ////// //$weight = round($freq/$maxConceptFreq,2); $quranaConceptArr = getQuranaConceptEntryByARWord($biGramConcept); // ADD QURANA TRANSLATION FOR QURANA BIGRAMS $engTranslation = ucfirst($quranaConceptArr['EN']); addNewConcept($finalConcepts, $biGramConcept, "A-BOX", "PHRASE", $freq, $engTranslation); $finalConcepts[$biGramConcept]['EXTRA']['POS'] = $pos; $finalConcepts[$biGramConcept]['EXTRA']['WEIGHT'] = $weight; $finalConcepts[$biGramConcept]['EXTRA']['IS_QURANA_NGRAM_CONCEPT'] = true; } rsortBy($finalConcepts, "FREQ"); echoN("FINAL CONCEPTS COUNT:" . count($finalConcepts)); //preprint_r($finalConcepts); file_put_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.concepts.stage1", serialize($finalConcepts)); file_put_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.all.terms", serialize($finalTerms)); } if ($GENERATE_NONTAXONOMIC_RELATIONS) { $finalConcepts = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.concepts.stage1"));