PHP getAPCIterator Examples

Programming Language: PHP

Method/Function: getAPCIterator

Examples at hotexamples.com: 7

PHP getAPCIterator - 7 examples found. These are the top rated real world PHP examples of getAPCIterator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: generate-stopwords-ar-strict-level2.php Project: AhmedAMohamed/qurananalysis

#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#    You can use Quran Analysis code, framework or corpora in your website
#	 or application (commercial/non-commercial) provided that you link
#    back to www.qurananalysis.com and sufficient credits are given.
#
#  ====================================================================
require_once "../global.settings.php";
require_once "../libs/core.lib.php";
loadModels("core,qac", "AR");
$LEMMA_TO_SIMPLE_WORD_MAP = loadLemmaToSimpleMappingTable();
printHTMLPageHeader();
//$qacMasterTableEntryArr2 = getModelEntryFromMemory("AR","MODEL_QAC","QAC_POS",$qacLocation);
$qacPoSTagsIterator = getAPCIterator("AR\\/MODEL_QAC\\/QAC_POS\\/.*");
$QURAN_TEXT = getModelEntryFromMemory("AR", "MODEL_CORE", "QURAN_TEXT", "");
$TOTALS = getModelEntryFromMemory("AR", "MODEL_CORE", "TOTALS", "");
$PAUSEMARKS = $TOTALS['PAUSEMARKS'];
preprint_r($PAUSEMARKS);
foreach ($qacPoSTagsIterator as $qacPoSTagsIteratorCursor) {
    $POS_ARR = $qacPoSTagsIteratorCursor['value'];
    $key = $qacPoSTagsIteratorCursor['key'];
    $POS = getEntryKeyFromAPCKey($key);
    if ($POS == "N" || $POS == "PN" || $POS == "ADJ") {
        continue;
    }
    //echoN("|$POS|");
    foreach ($POS_ARR as $location => $segmentId) {
        $qacMasterTableEntry = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_MASTERTABLE", $location);
        // get Word, Lema and root

Example #2

Show file

File: uthmani-to-simple.php Project: AhmedAMohamed/qurananalysis

			  			
			  		</div>
			  		<table id='uts-mapping-table'>
							<tr>
							<th>
								Uthmani
							</th>
							<th>
								Simple
							</th>
							</tr>
							
							<?php 
$uthmaniCounter = 0;
$simpleCounter = 0;
$qaOntologyConceptsIterator = getAPCIterator("AR\\/OTHERS\\/UTHMANI_TO_SIMPLE_WORD_MAP\\/.*");
foreach ($qaOntologyConceptsIterator as $conceptsCursor) {
    $mapTermKey = getEntryKeyFromAPCKey($conceptsCursor['key']);
    $mapTermVal = $conceptsCursor['value'];
    if (isSimpleQuranWord($mapTermKey)) {
        $simpleCounter++;
        //echoN("##".$mapTermKey);
        continue;
    }
    $uthmaniCounter++;
    ?>
								<tr>
									<td><?php 
    echo $mapTermKey;
    ?>
</td>

Example #3

Show file

File: codescratch.php Project: AhmedAMohamed/qurananalysis

#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
#    You can use Quran Analysis code, framework or corpora in your website
#	 or application (commercial/non-commercial) provided that you link
#    back to www.qurananalysis.com and sufficient credits are given.
#
#  ====================================================================
require_once "../global.settings.php";
$lang = "AR";
loadModels("", $lang);
$QURAN_TEXT = getModelEntryFromMemory($lang, "MODEL_CORE", "QURAN_TEXT", "");
preprint_r($QURAN_TEXT[1][1]);
$location = "1:1:1";
$qacMasterTableEntry = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_MASTERTABLE", $location);
preprint_r($qacMasterTableEntry);
$qaOntologyConceptsIterator = getAPCIterator("ALL\\/MODEL_QA_ONTOLOGY\\/CONCEPTS\\/.*");
foreach ($qaOntologyConceptsIterator as $conceptsCursor) {
    $conceptNameID = getEntryKeyFromAPCKey($conceptsCursor['key']);
    $conceptArr = $conceptsCursor['value'];
    $conceptLabelAR = $conceptArr['label_ar'];
    $conceptLabelEN = $conceptArr['label_en'];
    $conceptFrequency = $conceptArr['frequency'];
    $conceptWeight = $conceptArr['weight'];
    preprint_r($conceptArr);
    break;
    //only one concept
}
// print all words in wordnet
preprint_r(array_keys($MODEL_WORDNET['INDEX']));
// get all information about "Egypt" from wordnet
$wordNetEntry = getWordnetEntryByWordString("egypt");

Example #4

Show file

File: ontology.lib.php Project: AhmedAMohamed/qurananalysis

function isWordPartOfAVerbInVerbIndex($word, $lang)
{
    $verbIndexIterator = getAPCIterator("ALL\\/MODEL_QA_ONTOLOGY\\/VERB_INDEX\\/.*");
    foreach ($verbIndexIterator as $verbIndexCursor) {
        $verbWord = getEntryKeyFromAPCKey($verbIndexCursor['key']);
        $verbArr = $verbIndexCursor['value'];
        if ($lang == "EN") {
            $verbWord = strtolower($verbWord);
        }
        if (mb_strpos($verbWord, $word) !== false) {
            //echoN("|$verbWord| |$word|".( mb_strpos($verbWord, $word)!==false));
            return $verbArr;
        }
    }
    return false;
}

Example #5

Show file

File: graph.lib.php Project: AhmedAMohamed/qurananalysis

function ontologyToD3TreemapHierarchical($MODEL_QA_ONTOLOGY, $minFreq = 0, $lang)
{
    global $lang;
    $alreadyInLevel1 = array();
    $treeRootObj = array();
    $treeRootObj["name"] = "قرآن";
    if ($lang == "EN") {
        $treeRootObj["name"] = "Quran";
    }
    $treeRootObj["children"] = array();
    /** SHOULD BE ZERO BASED FOR D3 TO WORK - o.target.weight = NULL**/
    $nodeSerialNumber = 0;
    $qaOntologyConceptsIterator = getAPCIterator("ALL\\/MODEL_QA_ONTOLOGY\\/CONCEPTS\\/.*");
    foreach ($qaOntologyConceptsIterator as $conceptsCursor) {
        $conceptNameID = getEntryKeyFromAPCKey($conceptsCursor['key']);
        $conceptArr = $conceptsCursor['value'];
        $conceptLabelAR = $conceptArr['label_ar'];
        $conceptLabelEN = $conceptArr['label_en'];
        $conceptFrequency = $conceptArr['frequency'];
        $conceptWeight = $conceptArr['weight'];
        $type = $conceptArr['type'];
        if ($conceptFrequency < $minFreq) {
            continue;
        }
        //ONLY CLASSES (CLUSTERS)
        if ($type == "class") {
            $alreadyInLevel1[$conceptNameID] = 1;
            //echoN($conceptLabelAR);
            //echoN($conceptNameID);
            if ($lang == "EN") {
                $conceptNameClean = convertConceptIDtoGraphLabel($conceptLabelEN);
            } else {
                $conceptNameClean = convertConceptIDtoGraphLabel($conceptLabelAR);
            }
            /*= array("id"=>$nodeSerialNumber++,"word"=>$conceptLabelAR,
            	 "size"=>$conceptWeight,"x"=>rand($startLocationXMin,$startLocationXMax),
            			"y"=>rand($startLocationYMin,$startLocationYMax));*/
            $treeRootObj["children"][] = array("name" => $conceptNameClean, "size" => $conceptFrequency, "children" => getTreeNodeChildren($MODEL_QA_ONTOLOGY, $conceptNameID, $minFreq, $lang, 1, $alreadyInLevel1));
        }
    }
    /*
    	foreach($MODEL_QA_ONTOLOGY['RELATIONS'] as $index => $relArr)
    	{
    
    		$subject = $relArr['subject'];
    		$verbAR = $relArr['verb'];
    		$verbEN = $relArr['VERB_TRANSLATION_EN'];
    		$verbUthmani = $relArr['verb_uthmani'];
    		$relFreq = $relArr['frequency'];
    		$object = $relArr['object'];
    			
    
    		//$treeRootObj[$subject]["children"][]["name"]=$object;
    
    		$objectConceptArr = $MODEL_QA_ONTOLOGY['CONCEPTS'][$object];
    
    			
    		$index = search2DArrayForValue($currentArr,$subject);
    
    
    		$isObjectIncludedBefore = search2DArrayForValue($currentArr[$index]["children"],$object);
    
    		if ( $isObjectIncludedBefore===false)
    		{
    			//$currentArr[$index]["children"][] = array("name"=>$object,"size"=>$objectConceptArr['frequency'],"children"=>array());
    		}
    
    	}*/
    //echoN(count($treeRootObj["children"]));
    return $treeRootObj;
}

Example #6

Show file

File: model.loader.php Project: AhmedAMohamed/qurananalysis

function loadModels($modelsToBeLoaded, $lang)
{
    global $modelSources, $serializedModelFile, $quranMetaDataFile, $META_DATA, $MODEL_CORE, $MODEL_SEARCH, $MODEL_QAC, $MODEL_QURANA;
    global $UTHMANI_TO_SIMPLE_WORD_MAP, $numberOfSuras, $pauseMarksFile;
    global $TRANSLATION_MAP_EN_TO_AR, $TRANSLATION_MAP_AR_TO_EN, $TRANSLITERATION_WORDS_MAP, $TRANSLITERATION_VERSES_MAP;
    global $wordByWordTranslationFile, $transliterationFile;
    global $MODEL_WORDNET, $qaOntologyNamespace, $qaOntologyFile, $is_a_relation_name_ar, $is_a_relation_name_en;
    global $thing_class_name_ar, $thing_class_name_en;
    global $MODEL_QA_ONTOLOGY, $arabicStopWordsFileL2;
    global $TRANSLITERATION_WORDS_LOCATION_MAP, $TRANSLITERATION_WORDS_INDEX;
    //not working
    gc_enable();
    if (!function_exists("apc_exists")) {
        throw new Exception("APC not found!");
    }
    //echoN("MODEL EXISTS IN CACHE?:".apc_exists("EN/MODEL_CORE/TOTALS/"));
    ##### CHECK MODEL IN CACHE ##### #####
    if (TRUE && apc_exists("EN/MODEL_CORE/TOTALS/") !== false) {
        // split list by comma
        $modelListArr = preg_split("/,/", trim($modelsToBeLoaded));
        /**
         * TODO: CHANGE THE CODE TO REFERENCE APC MEMORY DIRECTLY INSTEAD OF LOADING DATA IN EACH SCRIPT
         */
        foreach ($modelListArr as $modelName) {
            //echoN("$modelName $lang ".time());
            //	echoN(memory_get_peak_usage());
            //echoN($modelName);
            if ($modelName == "ontology") {
                /*$MODEL_QA_ONTOLOGY =  apc_fetch("MODEL_QA_ONTOLOGY");
                		
                		
                		
                		if ($MODEL_QA_ONTOLOGY===false )
                		{
                			echo "$MODEL_QA_ONTOLOGY NOT CACHED";exit;
                		}
                		*/
            }
            if ($modelName == "wordnet") {
            }
            if ($modelName == "core") {
                //$MODEL_CORE = json_decode((file_get_contents("$serializedModelFile.core")),TRUE);
                /*$MODEL_CORE  = apc_fetch("MODEL_CORE[$lang]");
                		
                		
                		if ($MODEL_CORE===false )
                		{
                			echo "CORE MODEL [$lang] NOT CACHED";exit;
                		}*/
            } else {
                if ($modelName == "search") {
                    //$MODEL_SEARCH = json_decode((file_get_contents("$serializedModelFile.search")),TRUE);
                    //$MODEL_SEARCH  = apc_fetch("MODEL_SEARCH[$lang]");
                    /*if ($MODEL_SEARCH===false )
                    		{
                    			echo "SEARCH MODEL [$lang] NOT CACHED";exit;
                    		}*/
                } else {
                    if ($modelName == "qac") {
                        //$MODEL_QAC = json_decode((file_get_contents("$serializedModelFile.qac")),TRUE);
                        /*$MODEL_QAC  = apc_fetch("MODEL_QAC");
                        		
                        		
                        		if ($MODEL_QAC===false )
                        		{
                        			echo "QAC MODEL NOT CACHED";exit;
                        		}
                        		*/
                    }
                }
            }
        }
        $MODEL_WORDNET['INDEX'] = apc_fetch("WORDNET_INDEX");
        if ($MODEL_WORDNET['INDEX'] === false) {
            echo "MODEL_WORDNET['INDEX'] NOT CACHED";
            exit;
        }
        $MODEL_WORDNET['LEXICO_SEMANTIC_CATEGORIES'] = apc_fetch("WORDNET_LEXICO_SEMANTIC_CATEGORIES");
        if ($MODEL_WORDNET['LEXICO_SEMANTIC_CATEGORIES'] === false) {
            echo " MODEL MODEL_WORDNET['LEXICO_SEMANTIC_CATEGORIES'] NOT CACHED";
            exit;
        }
        $MODEL_WORDNET['DATA'] = apc_fetch("WORDNET_DATA");
        if ($MODEL_WORDNET['DATA'] === false) {
            echo "MODEL MODEL_WORDNET['DATA'] NOT CACHED";
            exit;
        }
        //else if ( ($modelName=="qurana"))
        //{
        //$MODEL_QURANA = json_decode((file_get_contents("$serializedModelFile.qurana")),TRUE);
        $MODEL_QURANA = apc_fetch("MODEL_QURANA");
        if ($MODEL_QURANA === false) {
            echo "QURANA MODEL NOT CACHED";
            exit;
        }
        //}
        return;
    }
    ########## ##### ##### ##### ##### #####
    //$quran = file($quranMetaDataFile);
    $quranMetaDataXMLObj = simplexml_load_file($quranMetaDataFile);
    ###### CONVERT META XML STRUCUTURE TO OUR STRUCTURE
    foreach ($quranMetaDataXMLObj->suras as $index => $surasArr) {
        foreach ($surasArr->sura as $suraMetaArr) {
            $tempArr = array();
            $tempArr['index'] = (string) $suraMetaArr['index'];
            $tempArr['ayas'] = (string) $suraMetaArr['ayas'];
            $tempArr['name_ar'] = (string) $suraMetaArr['name'];
            $tempArr['name_trans'] = (string) $suraMetaArr['tname'];
            $tempArr['name_en'] = (string) $suraMetaArr['ename'];
            $tempArr['type'] = (string) $suraMetaArr['type'];
            $tempArr['order'] = (string) $suraMetaArr['order'];
            $META_DATA['SURAS'][] = $tempArr;
        }
    }
    ##############################################
    /////////// LOAD ONTOLOGY
    $reader = new OWLReader();
    $ontology = new OWLMemoryOntology();
    $thingClassName = "{$thing_class_name_ar}";
    $ontology->setNamespace($qaOntologyNamespace);
    $reader->readFromFile($qaOntologyFile, $ontology);
    //preprint_r($ontology->{'owl_data'}['classes']);
    //preprint_r($ontology->{'owl_data'}['properties']);
    //preprint_r($ontology->{'owl_data'}['labels']);
    //preprint_r($ontology->{'owl_data'}['annotations']);
    //preprint_r($ontology->{'owl_data'}['instances']);
    $classes = $ontology->{'owl_data'}['classes'];
    $instances = $ontology->{'owl_data'}['instances'];
    $qaOntologyConceptsArr = array();
    $qaOntologyRelationsArr = array();
    $relationsCount = 0;
    foreach ($classes as $className => $infoArr) {
        $className = stripOntologyNamespace($className);
        $qaOntologyConceptsArr[$className] = array("type" => "class");
        //echoN($className);
        //preprint_r($infoArr);
        foreach ($infoArr[0]['properties'] as $index => $propertiesArr) {
            /** INCASE THIS INSTANCE HAS MULTIPLE PROPERTIES WITH SAME VERB **/
            foreach ($propertiesArr as $index2 => $onePropertyArr) {
                if (empty($onePropertyArr)) {
                    continue;
                }
                $verb = key($onePropertyArr);
                $objectClassArr = current($onePropertyArr);
                $objectConceptName = stripOntologyNamespace($objectClassArr[0]);
                //echoN("CLASS:***** $className => $verb -> $objectConceptName");
                $attributedArr = next($onePropertyArr);
                $freq = $attributedArr['frequency'];
                $engTranslation = $attributedArr['verb_translation_en'];
                $verbUthmani = $attributedArr['verb_uthmani'];
                $relHashID = buildRelationHashID($className, $verb, $objectConceptName);
                $qaOntologyRelationsArr[$relHashID] = array("SUBJECT" => $className, "VERB" => $verb, "OBJECT" => $objectConceptName, "FREQUENCY" => $freq, "VERB_TRANSLATION_EN" => $engTranslation, "VERB_UTHMANI" => $verbUthmani);
                //preprint_r($qaOntologyRelationsArr[$relHashID]);
                $relationsCount++;
            }
        }
    }
    foreach ($instances as $instanceName => $intancesArr) {
        foreach ($intancesArr as $index => $infoArr) {
            $subjectConceptName = stripOntologyNamespace($instanceName);
            $parent = stripOntologyNamespace($infoArr['class']);
            //echoN("$subjectConceptName $parent");
            $relHashID = buildRelationHashID($subjectConceptName, $is_a_relation_name_ar, $parent);
            $qaOntologyRelationsArr[$relHashID] = array("SUBJECT" => $subjectConceptName, "VERB" => "{$is_a_relation_name_ar}", "OBJECT" => $parent, "VERB_TRANSLATION_EN" => "{$is_a_relation_name_en}");
            if ($parent != $thing_class_name_ar) {
                $relationsCount++;
            }
            $propertiesArr = $infoArr['properties'];
            //echoN($instanceName);
            //echoN("$instanceName:@@@");
            //preprint_r($propertiesArr);
            /** INCASE THIS INSTANCE HAS MULTIPLE PROPERTIES WITH SAME VERB **/
            foreach ($propertiesArr as $index2 => $onePropertyArr) {
                if (empty($onePropertyArr)) {
                    continue;
                }
                $verb = key($onePropertyArr);
                $objectClassArr = current($onePropertyArr);
                $objectConceptName = stripOntologyNamespace($objectClassArr[0]);
                //echoN("***** $verb -> $objectConceptName");
                $attributedArr = next($onePropertyArr);
                $freq = $attributedArr['frequency'];
                $engTranslation = $attributedArr['verb_translation_en'];
                $verbUthmani = $attributedArr['verb_uthmani'];
                $relHashID = buildRelationHashID($subjectConceptName, $verb, $objectConceptName);
                $qaOntologyRelationsArr[$relHashID] = array("SUBJECT" => $subjectConceptName, "VERB" => $verb, "OBJECT" => $objectConceptName, "FREQUENCY" => $freq, "VERB_TRANSLATION_EN" => $engTranslation, "VERB_UTHMANI" => $verbUthmani);
                $relationsCount++;
            }
            // if it is class dont make it instance even if it is a subject (subclass of another class
            // BUG: animal was not apearing on ontology graph page since it was instance
            if (empty($qaOntologyConceptsArr[$subjectConceptName]) || $qaOntologyConceptsArr[$subjectConceptName][type] != 'class') {
                $qaOntologyConceptsArr[$subjectConceptName] = array("type" => "instance");
            }
        }
    }
    foreach ($qaOntologyConceptsArr as $conceptName => $infoArr) {
        $fullConceptName = $qaOntologyNamespace . $conceptName;
        $labelsArr = $ontology->{'owl_data'}['labels'][$fullConceptName];
        foreach ($labelsArr as $labelLang => $label) {
            /*if ( mb_strlen($label)==1)
            		{
            			echon($fullConceptName);
            			preprint_r($ontology->{'owl_data'}['labels'][$fullConceptName]);
            		}*/
            $qaOntologyConceptsArr[$conceptName]['label_' . strtolower($labelLang)] = $label;
        }
        // "Thing" does not have annotations
        if (isset($ontology->{'owl_data'}['annotations'][$fullConceptName])) {
            $annotationsArr = $ontology->{'owl_data'}['annotations'][$fullConceptName];
            foreach ($annotationsArr as $index => $annotArr) {
                $key = $annotArr['KEY'];
                $val = $annotArr['VAL'];
                $qaOntologyConceptsArr[$conceptName][$key] = $val;
                //echoN("[$conceptName][$key] = $val");
            }
        }
    }
    ////////// OUTPUT STATS
    /*echoN("INSTANCES COUNT:".count($ontology->{'owl_data'}['instances']));
    	echoN("CLASSES COUNT:".count($ontology->{'owl_data'}['classes']));
    	echoN("PROPERTIES COUNT - DECLERATIONS ONLY:".count($ontology->{'owl_data'}['properties']));;
    	echoN("CONCEPTS COUNT:".count($qaOntologyConceptsArr));
    	echoN("RELATIONS COUNT:".$relationsCount);
    	preprint_r($qaOntologyRelationsArr);*/
    //////////////////
    ///////////// QUALITY CHECK CONCEPTS
    $qaOntologyConceptsArr2 = array();
    foreach ($qaOntologyConceptsArr as $key => $val) {
        $newKey = strtr($key, "_", " ");
        $qaOntologyConceptsArr2[$newKey] = $value;
    }
    $ONTOLOGY_EXTRACTION_FOLDER = "../data/ontology/extraction/";
    $finalConcepts = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.concepts.final"));
    $diffArr = array_diff(array_keys($qaOntologyConceptsArr2), array_keys($finalConcepts));
    $conceptsDiffCount = count($matchingTable);
    if ($relationsDiffCount > 0) {
        echoN("<b>### OWL-PROPRIETARY-CONCEPTS-DIFF-COUNT:</b>" . $conceptsDiffCount);
    }
    //preprint_r($diffArr);
    //////////////////////////////////////////////////////////////
    //////// quality check relations
    $relationsArr = unserialize(file_get_contents("{$ONTOLOGY_EXTRACTION_FOLDER}/temp.final.relations"));
    $matchingTable = array();
    foreach ($qaOntologyRelationsArr as $index => $relArr) {
        $trippleStr = $relArr['SUBJECT'] . "->" . $relArr['VERB'] . "->" . $relArr['OBJECT'];
        //since Thing relations are not in the list we are comparing with
        if ($relArr['OBJECT'] == $thing_class_name_ar) {
            continue;
        }
        //echoN($trippleStr);
        $trippleStr = trim($trippleStr);
        $matchingTable[$trippleStr]++;
    }
    foreach ($relationsArr as $index => $relArr) {
        $relArr['SUBJECT'] = strtr($relArr['SUBJECT'], " ", "_");
        $relArr['VERB'] = strtr($relArr['VERB'], " ", "_");
        $relArr['OBJECT'] = strtr($relArr['OBJECT'], " ", "_");
        $trippleStr = $relArr['SUBJECT'] . "->" . $relArr['VERB'] . "->" . $relArr['OBJECT'];
        $trippleStr = trim($trippleStr);
        $matchingTable[$trippleStr]++;
    }
    function filterFunc($v)
    {
        return $v <= 1;
    }
    $matchingTable = array_filter($matchingTable, 'filterFunc');
    $relationsDiffCount = count($matchingTable);
    if ($relationsDiffCount > 0) {
        echoN("<b>### OWL-PROPRIETARY-RELATIONS-DIFF-COUNT:</b>" . $relationsDiffCount);
        preprint_r($matchingTable);
    }
    //////////////////////////////////////////////
    //echoN( join("<br>",array_keys($qaOntologyConceptsArr)));
    $qaOntologyVerbIndex = array();
    $qaOntologyGraphSourcesIndex = array();
    $qaOntologyGraphTargetsIndex = array();
    //preprint_r($qaOntologyRelationsArr);
    //exit;
    foreach ($qaOntologyRelationsArr as $index => $relArr) {
        $subject = $relArr['SUBJECT'];
        $verb = $relArr['VERB'];
        $verb_translation_en = $relArr['VERB_TRANSLATION_EN'];
        $object = $relArr['OBJECT'];
        //$qaOntologyVerbIndex[$verb][]=array("SUBJECT"=>$subject,"OBJECT"=>$object);
        //$qaOntologyVerbIndex[$verb_translation_en][]=array("SUBJECT"=>$subject,"OBJECT"=>$object);
        addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "VERB_INDEX", $verb, array("SUBJECT" => $subject, "OBJECT" => $object));
        addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "VERB_INDEX", $verb_translation_en, array("SUBJECT" => $subject, "OBJECT" => $object));
        //$qaOntologyGraphSourcesIndex[$subject][]=array("link_verb"=>$verb,"target"=>$object,"relation_index"=>$index);
        //$qaOntologyGraphTargetsIndex[$object][]=array("source"=>$subject,"link_verb"=>$verb,"relation_index"=>$index);
        addToMemoryModelList("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_SOURCES", $subject, array("link_verb" => $verb, "target" => $object, "relation_index" => $index));
        addToMemoryModelList("ALL", "MODEL_QA_ONTOLOGY", "GRAPH_INDEX_TARGETS", $object, array("source" => $subject, "link_verb" => $verb, "relation_index" => $index));
    }
    $qaOntologyConceptsENtoARMapArr = array();
    foreach ($qaOntologyConceptsArr as $arName => $conceptArr) {
        $enLabel = trim(strtolower($conceptArr['label_en']));
        //$qaOntologyConceptsENtoARMapArr[$enLabel]=$arName;
        //$qaOntologyConceptsENtoARMapArr[$enLabel]=$arName;
        addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS_EN_AR_NAME_MAP", $enLabel, $arName);
    }
    $qaSynonymsIndex = array();
    foreach ($qaOntologyConceptsArr as $arName => $conceptArr) {
        addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "CONCEPTS", $arName, $conceptArr);
        $i = 1;
        while (isset($conceptArr['synonym_' . $i])) {
            if (empty($conceptArr['synonym_' . $i])) {
                $i++;
                continue;
            }
            $synonymLabel = trim(strtolower($conceptArr['synonym_' . $i]));
            $qaSynonymsIndex[$synonymLabel] = $arName;
            addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "SYNONYMS_INDEX", $synonymLabel, $arName);
            $i++;
        }
    }
    //preprint_r($qaOntologyConceptsArr);exit;
    //$MODEL_QA_ONTOLOGY['CONCEPTS'] = $qaOntologyConceptsArr;
    //$MODEL_QA_ONTOLOGY['RELATIONS'] = $qaOntologyRelationsArr;
    addValueToMemoryModel("ALL", "MODEL_QA_ONTOLOGY", "RELATIONS", "", $qaOntologyRelationsArr);
    //$MODEL_QA_ONTOLOGY['GRAPH_INDEX_SOURCES'] = $qaOntologyGraphSourcesIndex;
    //$MODEL_QA_ONTOLOGY['GRAPH_INDEX_TARGETS'] = $qaOntologyGraphTargetsIndex;
    //$MODEL_QA_ONTOLOGY['CONCEPTS_EN_AR_NAME_MAP'] = $qaOntologyConceptsENtoARMapArr;
    //$MODEL_QA_ONTOLOGY['VERB_INDEX']  = $qaOntologyVerbIndex;
    //$MODEL_QA_ONTOLOGY['SYNONYMS_INDEX']  = $qaSynonymsIndex;
    //$res = apc_store("MODEL_QA_ONTOLOGY",$MODEL_QA_ONTOLOGY);
    //if ( $res===false){ throw new Exception("Can't cache MODEL_QA_ONTOLOGY"); }
    //preprint_r($MODEL_QA_ONTOLOGY);exit;
    //////// END ONTOLOGY LOADING
    ////////////////////////////
    /// WORDNET
    loadWordnet($MODEL_WORDNET);
    /////////////
    //free resources
    $quranMetaDataXMLObj = null;
    unset($quranMetaDataXMLObj);
    foreach ($modelSources as $supportedLang => $modelSourceArr) {
        $type = $modelSourceArr['type'];
        $file = $modelSourceArr['file'];
        //echoN("$lang $type $file");
        loadModel($supportedLang, $type, $file);
        //not working
        $gced = gc_collect_cycles();
        //echoN($gced);
    }
    //echoN(json_encode($MODEL));
    ############ Uthmani/Simple mapping table #################
    ############ AND WORD-WORD TRANSLATION AND TRANSLITERATION #################
    $pauseMarksArr = getPauseMarksArrByFile($pauseMarksFile);
    $wordByWordFileArr = file($wordByWordTranslationFile, FILE_SKIP_EMPTY_LINES | FILE_IGNORE_NEW_LINES);
    $translitertationArr = file($transliterationFile, FILE_SKIP_EMPTY_LINES | FILE_IGNORE_NEW_LINES);
    $WORD_SENSES_EN = array();
    $WORD_SENSES_AR = array();
    $quranTextEntryFromAPC_AR = getModelEntryFromMemory("AR", "MODEL_CORE", "QURAN_TEXT", "");
    $quranTextEntryFromAPC_UTH = getModelEntryFromMemory("AR_UTH", "MODEL_CORE", "QURAN_TEXT", "");
    /* SURA'S LOOP **/
    for ($s = 0; $s < $numberOfSuras; $s++) {
        $suraSize = count($quranTextEntryFromAPC_AR[$s]);
        /* VERSES LOOP **/
        for ($a = 0; $a < $suraSize; $a++) {
            $i++;
            $verseTextSimple = $quranTextEntryFromAPC_AR[$s][$a];
            $simpleWordsArr = preg_split("/ /", $verseTextSimple);
            $verseTextUthmani = $quranTextEntryFromAPC_UTH[$s][$a];
            $uthmaniWordsArr = preg_split("/ /", $verseTextUthmani);
            $simpleWordsArr = removePauseMarksFromArr($pauseMarksArr, $simpleWordsArr);
            $uthmaniWordsArr = removePauseMarksFromArr($pauseMarksArr, $uthmaniWordsArr);
            $verseLocation = $s + 1 . ":" . ($a + 1);
            $UTHMANI_TO_SIMPLE_LOCATION_MAP[$verseLocation] = array();
            ///////// Transliteration /////////////
            $transliterationLine = current($translitertationArr);
            next($translitertationArr);
            $lineParts = preg_split("/\\|/", $transliterationLine);
            $verseTransliteration = $lineParts[2];
            //echoN($transliterationLine);
            $TRANSLITERATION_VERSES_MAP[$verseLocation] = $verseTransliteration;
            $wordsTransliterationArr = preg_split("/ /", $verseTransliteration);
            // preprint_r($wordsTransliterationArr);exit;
            /////////////////////////////////////////////////
            $wtwIndex = 0;
            foreach ($uthmaniWordsArr as $index => $wordUthmani) {
                $qacMasterID = $s + 1 . ":" . ($a + 1) . ":" . ($index + 1);
                $qacMasterTableEntry = getModelEntryFromMemory("AR", "MODEL_QAC", "QAC_MASTERTABLE", $qacMasterID);
                $lemma = $qacMasterTableEntry[0]['FEATURES']['LEM'];
                // to handle multi segment words such as الدنيا
                if (empty($lemma)) {
                    $lemma = $qacMasterTableEntry[1]['FEATURES']['LEM'];
                }
                //echoN("|$lemma|$wordUthmani");
                //$wtwIndex (INDEX_IN_AYA_EMLA2Y) needs to be 1 based  ( UTHMANI=IMLA2Y )
                $UTHMANI_TO_SIMPLE_LOCATION_MAP[$s + 1 . ":" . ($a + 1)][$index + 1] = $wtwIndex + 1;
                $wordSimple = $simpleWordsArr[$wtwIndex++];
                //$UTHMANI_TO_SIMPLE_LOCATION_MAP[($s+1).":".($a+1)][($index+1)."-".$wordUthmani]=($wtwIndex)."-".$wordSimple;
                /* for ayas which are different in size, do the following
                 * if the current word is  ويا  or  ها or   يا
                 * then join it with the next word and make them one word
                 */
                if (count($uthmaniWordsArr) != count($simpleWordsArr) && ($wordSimple == "يا" || $wordSimple == "ها" || $wordSimple == "ويا" || $wordUthmani == "وَأَلَّوِ")) {
                    if ($wordUthmani == "يَبْنَؤُمَّ") {
                        // example 0 => 1
                        $UTHMANI_TO_SIMPLE_LOCATION_MAP[$s + 1 . ":" . ($a + 1)][$index + 1] = $wtwIndex + 1;
                        //[($index+1)."-".$wordUthmani]=($wtwIndex+1)."-".$wordSimple;
                        $wordSimple = $wordSimple . " " . $simpleWordsArr[$wtwIndex++] . " " . $simpleWordsArr[$wtwIndex++];
                    } else {
                        // example 0 => 1
                        $UTHMANI_TO_SIMPLE_LOCATION_MAP[$s + 1 . ":" . ($a + 1)][$index + 1] = $wtwIndex + 1;
                        //[($index+1)."-".$wordUthmani]=($wtwIndex+1)."-".$wordSimple;
                        $wordSimple = $wordSimple . " " . $simpleWordsArr[$wtwIndex++];
                    }
                    //echoN("$wordUthmani:$wordSimple");
                }
                //	printHTMLPageHeader();
                //	echoN("$wordSimple|$wordUthmani");
                ///////// english translation ////////
                $wordByWordTranslationLine = current($wordByWordFileArr);
                next($wordByWordFileArr);
                $linePartsArr = preg_split("/\\|/", $wordByWordTranslationLine);
                $englishTranslationForCurrentWord = $linePartsArr[5];
                /////////////////////////////////////////////////
                $WORD_SENSES_EN[$englishTranslationForCurrentWord][$wordUthmani]++;
                $WORD_SENSES_AR[$wordUthmani][$englishTranslationForCurrentWord]++;
                $TRANSLATION_MAP_EN_TO_AR[$englishTranslationForCurrentWord] = $wordUthmani;
                $TRANSLATION_MAP_AR_TO_EN[$wordUthmani] = $englishTranslationForCurrentWord;
                $TRANSLITERATION_WORDS_MAP[$wordUthmani] = $wordsTransliterationArr[$index];
                $clenaedTranliteration = cleanTransliteratedText($wordsTransliterationArr[$index]);
                $TRANSLITERATION_WORDS_INDEX[$clenaedTranliteration] = 1;
                $TRANSLITERATION_WORDS_LOCATION_MAP["{$s}:{$a}:{$index}"] = $wordsTransliterationArr[$index];
                //preprint_r($TRANSLITERATION_WORDS_LOCATION_MAP);
                // preprint_r($TRANSLATION_MAP_AR_TO_EN);
                // preprint_r($TRANSLITERATION_WORDS_MAP);
                $UTHMANI_TO_SIMPLE_WORD_MAP[$wordUthmani] = $wordSimple;
                addValueToMemoryModel("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $wordUthmani, $wordSimple);
                $UTHMANI_TO_SIMPLE_WORD_MAP[$wordSimple] = $wordUthmani;
                addValueToMemoryModel("AR", "OTHERS", "UTHMANI_TO_SIMPLE_WORD_MAP", $wordSimple, $wordUthmani);
                if (!empty($lemma)) {
                    if (!isset($LEMMA_TO_SIMPLE_WORD_MAP[$lemma])) {
                        $LEMMA_TO_SIMPLE_WORD_MAP[$lemma] = $wordSimple;
                    } else {
                        $oldSimple = $LEMMA_TO_SIMPLE_WORD_MAP[$lemma];
                        if (myLevensteinEditDistance($oldSimple, $lemma) > myLevensteinEditDistance($wordSimple, $lemma)) {
                            $LEMMA_TO_SIMPLE_WORD_MAP[$lemma] = $wordSimple;
                        }
                    }
                }
            }
        }
    }
    /////// ADD UTHMANI TO SIMPLE LOCATION MAP TO MEMORY
    foreach ($UTHMANI_TO_SIMPLE_LOCATION_MAP as $verseLocation => $verseMappingArr) {
        /*foreach($mappingArr as $uhtmaniIndex=>$imal2yIndex)
        		{
        			
        		}*/
        addValueToMemoryModel("AR", "OTHERS", "UTHMANI_TO_SIMPLE_LOCATION_MAP", $verseLocation, $verseMappingArr);
    }
    ///////////////////////////////////////////////////////
    //preprint_r($TRANSLATION_MAP_EN_TO_AR);exit;
    //preprint_r($WORD_SENSES_AR);exit;
    // CAN'T BE ADDED IN THE CORE_MODEL since the mapping happens after loadModel
    //$res = apc_store("UTHMANI_TO_SIMPLE_WORD_MAP",$UTHMANI_TO_SIMPLE_WORD_MAP);
    //if ( $res===false){ throw new Exception("Can't cache UTHMANI_TO_SIMPLE_WORD_MAP"); }
    //$res = apc_store("UTHMANI_TO_SIMPLE_LOCATION_MAP",$UTHMANI_TO_SIMPLE_LOCATION_MAP);
    //if ( $res===false){ throw new Exception("Can't cache UTHMANI_TO_SIMPLE_LOCATION_MAP"); }
    $res = apc_store("LEMMA_TO_SIMPLE_WORD_MAP", $LEMMA_TO_SIMPLE_WORD_MAP);
    if ($res === false) {
        throw new Exception("Can't cache LEMMA_TO_SIMPLE_WORD_MAP");
    }
    $res = apc_store("WORDS_TRANSLATIONS_EN_AR", $TRANSLATION_MAP_EN_TO_AR);
    if ($res === false) {
        throw new Exception("Can't cache WORDS_TRANSLATIONS_EN_AR");
    }
    $res = apc_store("WORDS_TRANSLATIONS_AR_EN", $TRANSLATION_MAP_AR_TO_EN);
    if ($res === false) {
        throw new Exception("Can't cache WORDS_TRANSLATIONS_AR_EN");
    }
    $res = apc_store("WORDS_TRANSLITERATION", $TRANSLITERATION_WORDS_MAP);
    if ($res === false) {
        throw new Exception("Can't cache WORDS_TRANSLITERATION");
    }
    $res = apc_store("TRANSLITERATION_WORDS_LOCATION_MAP", $TRANSLITERATION_WORDS_LOCATION_MAP);
    if ($res === false) {
        throw new Exception("Can't cache TRANSLITERATION_WORDS_LOCATION_MAP");
    }
    $res = apc_store("TRANSLITERATION_VERSES_MAP", $TRANSLITERATION_VERSES_MAP);
    if ($res === false) {
        throw new Exception("Can't cache TRANSLITERATION_VERSES_MAP");
    }
    $res = apc_store("TRANSLITERATION_WORDS_INDEX", $TRANSLITERATION_WORDS_INDEX);
    if ($res === false) {
        throw new Exception("Can't cache TRANSLITERATION_WORDS_INDEX");
    }
    $res = apc_store("WORD_SENSES_EN", $WORD_SENSES_EN);
    if ($res === false) {
        throw new Exception("Can't cache WORD_SENSES_EN");
    }
    $res = apc_store("WORD_SENSES_AR", $WORD_SENSES_AR);
    if ($res === false) {
        throw new Exception("Can't cache {$WORD_SENSES_AR}");
    }
    //// ENRICH INVERTED INDEX BY UTHMANI-EMLA2Y INDEXES
    //echoN(count($MODEL_SEARCH['AR']['INVERTED_INDEX']));
    foreach (getAPCIterator("AR\\/MODEL_SEARCH\\/INVERTED_INDEX\\/.*") as $invertedIndexCursor) {
        $wordDataArr = $invertedIndexCursor['value'];
        $key = $invertedIndexCursor['key'];
        $word = getEntryKeyFromAPCKey($key);
        foreach ($wordDataArr as $index => $documentArrInIndex) {
            $WORD_TYPE = $documentArrInIndex['WORD_TYPE'];
            $SURA = $documentArrInIndex['SURA'];
            $AYA = $documentArrInIndex['AYA'];
            //echoN($word." ".$WORD_TYPE);
            if ($WORD_TYPE == "NORMAL_WORD") {
                $INDEX_IN_AYA_EMLA2Y = $documentArrInIndex['INDEX_IN_AYA_EMLA2Y'];
                foreach ($UTHMANI_TO_SIMPLE_LOCATION_MAP[$SURA + 1 . ":" . ($AYA + 1)] as $uhtmaniIndex => $imal2yIndex) {
                    if ($imal2yIndex == $INDEX_IN_AYA_EMLA2Y) {
                        $INDEX_IN_AYA_UTHMANI = $uhtmaniIndex;
                        break;
                    }
                }
                //echoN($INDEX_IN_AYA_UTHMANI);
                $wordDataArr[$index]['INDEX_IN_AYA_UTHMANI'] = $INDEX_IN_AYA_UTHMANI;
            } else {
                // needed for highlighting pronoun charcters in search
                $INDEX_IN_AYA_UTHMANI = $documentArrInIndex['INDEX_IN_AYA_UTHMANI'];
                $INDEX_IN_AYA_EMLA2Y = getSimpleWordIndexByUthmaniWordIndex($SURA + 1 . ":" . ($AYA + 1), $INDEX_IN_AYA_UTHMANI);
                $wordDataArr[$index]['INDEX_IN_AYA_EMLA2Y'] = $INDEX_IN_AYA_EMLA2Y;
            }
        }
        //UPDATE
        updateModelData($key, $wordDataArr);
    }
    //$res = apc_store("MODEL_SEARCH[AR]",$MODEL_SEARCH['AR']);
    //if ( $res===false){ throw new Exception("Can't cache MODEL_SEARCH[AR]"); }
    //preprint_r($TRANSLITERATION_WORDS_LOCATION_MAP);
    /// ADD TRANSLITERATION TO INVERETD INDEX WWITH ENGLISH WORDS
    if ($lang == "EN") {
        $invertedIndexBatchApcArr = array();
        foreach ($TRANSLITERATION_WORDS_LOCATION_MAP as $location => $transliteratedWord) {
            $locationArr = explode(":", $location);
            $s = $locationArr[0];
            $a = $locationArr[1];
            $wordIndex = $locationArr[2];
            //echoN("$transliteratedWord,$s,$a,$wordIndex");
            $transliteratedWord = strtolower(strip_tags($transliteratedWord));
            //$MODEL_SEARCH['EN']['INVERTED_INDEX'][$word]
            addToInvertedIndex($invertedIndexBatchApcArr, $lang, $transliteratedWord, $s, $a, $wordIndex, "NORMAL_WORD");
        }
        addToMemoryModelBatch($invertedIndexBatchApcArr);
        //$res = apc_store("MODEL_SEARCH[EN]",$MODEL_SEARCH['EN']);
    }
    //if ( $res===false){ throw new Exception("Can't cache MODEL_SEARCH[EN]"); }
    /////////////////////////////////////////////////////////
    //preprint_r($UTHMANI_TO_SIMPLE_WORD_MAP);
    //preprint_r($MODEL_CORE["AR_UTH"]['QURAN_TEXT']);exit;
    ##############################################################
    // get memory usage
    $debug = memory_get_usage(true) / 1024 / 1024 . "/" . memory_get_peak_usage(true) / 1024 / 1024 . "Memory <br>";
    //echoN($debug);
    //needed to be set here after both languages has been loaded
    // reload all models from memory to set all variables (WORDNET) - after model generation
    /* needed to reload all generated models from memory specialy model_core since 
     * it has 3 languages, if this line is removed: all 3 langauges are loaded although only one language 
     * is requested, also it caused a bug in getPoSTaggedSubsentences
     */
    //loadModels($modelsToBeLoaded,$lang);
}

Example #7

Show file

File: search.lib.php Project: AhmedAMohamed/qurananalysis

function extendQueryWordsByDerivations($taggedSignificantWords, $lang)
{
    foreach ($taggedSignificantWords as $word => $posTag) {
        // avoid small words, will lead to too many iirelevant derivations
        if (mb_strlen($word) <= 2) {
            continue;
        }
        if ($lang == "EN") {
            if ($posTag == "NN") {
                $plural = $word . "s";
                $taggedSignificantWords[$plural] = "{$posTag}" . "S";
            } else {
                if ($posTag == "NNS") {
                    $single = substr($word, 0, -1);
                    $taggedSignificantWords[$single] = substr($posTag, 0, -1);
                }
            }
        } else {
            $simmlarWords = array();
            $qaOntologyConceptsIterator = getAPCIterator("ALL\\/MODEL_QA_ONTOLOGY\\/CONCEPTS\\/.*");
            foreach ($qaOntologyConceptsIterator as $conceptsCursor) {
                $conceptID = getEntryKeyFromAPCKey($conceptsCursor['key']);
                $mainConceptArr = $conceptsCursor['value'];
                $conceptLabelAR = $mainConceptArr['label_ar'];
                $dist = myLevensteinEditDistance($word, $conceptLabelAR);
                if ($dist <= 5) {
                    $dist = getDistanceByCommonUniqueChars($word, $conceptLabelAR);
                    $simmlarWords[$conceptLabelAR] = $dist;
                }
                $i = 1;
                while (isset($mainConceptArr['synonym_' . $i]) && isArabicString($mainConceptArr['synonym_' . $i])) {
                    $synonym = $mainConceptArr['synonym_' . $i];
                    $dist = myLevensteinEditDistance($word, $synonym);
                    if ($dist <= 5) {
                        $dist = getDistanceByCommonUniqueChars($word, $synonym);
                        $simmlarWords[$synonym] = $dist;
                    }
                    $i++;
                }
            }
            foreach ($simmlarWords as $conceptWord => $distBySimChars) {
                $diff = mb_strlen($conceptWord) - mb_strlen($word);
                $absDiffSize = abs($diff);
                // $word is bigger
                if ($diff < 0) {
                    $absDiffSize = abs($diff);
                    $diffStr = getStringDiff($conceptWord, $word);
                    //echoN($diffStr);
                    //حيوان => الحيوانات
                    // the bigger word should not contain space الله => سبيل الله
                    // $diffStr=="الات" for حيوان = الحياوانات
                    if (mb_strpos($word, $conceptWord) !== false && strpos($word, " ") === false && ($diffStr == "ات" || $diffStr == "ال" || $diffStr == "الات")) {
                        //echoN("$word, $conceptWord");
                        /// convert word to noun plular
                        $taggedSignificantWords[$word] = "NNS";
                        // concept word is singular
                        $taggedSignificantWords[$conceptWord] = "NN";
                    } else {
                        if ($diff == 1) {
                            $wordLastCharTrimmed = mb_substr($conceptWord, 0, -1);
                            if ($wordLastCharTrimmed . "ات" == $word) {
                                /// convert word to noun plular
                                $taggedSignificantWords[$word] = "NNS";
                                /// convert word to noun plular
                                $taggedSignificantWords[$conceptWord] = "NN";
                            }
                        }
                    }
                } else {
                    $diffStr = getStringDiff($conceptWord, $word);
                    //  الحيوانات => حيوان
                    // the bigger word should not contain space الله => سبيل الله
                    if ($diff != 0 && mb_strpos($conceptWord, $word) !== false && strpos($conceptWord, " ") === false && ($diffStr == "ات" || $diffStr == "ال" || $diffStr == "الات")) {
                        //echoN("$word,$conceptWord");
                        /// convert word to noun plular
                        $taggedSignificantWords[$conceptWord] = "NNS";
                    } else {
                        if ($diff == 1) {
                            $wordLastCharTrimmed = mb_substr($word, 0, -1);
                            if ($wordLastCharTrimmed . "ات" == $conceptWord) {
                                /// convert word to noun plular
                                $taggedSignificantWords[$conceptWord] = "NNS";
                            }
                        }
                    }
                }
            }
            // limit the number of derivations+original terms to 15
            $taggedSignificantWords = array_slice($taggedSignificantWords, 0, 10);
            //arsort($simmlarWords);
            //preprint_r($taggedSignificantWords);
            //preprint_r($simmlarWords);
            //exit;
        }
    }
    //preprint_r($taggedSignificantWords);
    return $taggedSignificantWords;
}