function applyPorterStemming(array $searchArray)
{
    $stemsUnique = array();
    $searchDiff = stopwordRemoval($searchArray);
    foreach ($searchDiff as $word) {
        $stems[] .= PorterStemmer::Stem($word) . "\n";
    }
    // Remove duplicate words
    $stemsUnique = array_unique($stems);
    // Return stems
    return $stemsUnique;
}
function getSynonyms(array $searchArray)
{
    // Remove stopwords: do not want to find synonyms for these
    $searchDiff = stopwordRemoval($searchArray);
    // For each word in the search array, find synonyms
    foreach ($searchDiff as $key => $value) {
        $result = array();
        // Initialise variables for http request
        $apikey = "yoNJkbNlb7pQEYhSflTz";
        $language = "en_US";
        $endpoint = "http://thesaurus.altervista.org/thesaurus/v1";
        $word = "{$value}";
        // Invoke the remote service
        $session = curl_init();
        curl_setopt($session, CURLOPT_URL, "{$endpoint}?word=" . urlencode($word) . "&language={$language}&key={$apikey}&output=json");
        curl_setopt($session, CURLOPT_RETURNTRANSFER, 1);
        $data = curl_exec($session);
        $info = curl_getinfo($session);
        curl_close($session);
        // Check request has been processed and parse response
        if ($info['http_code'] == 200) {
            $result = json_decode($data, true);
            // Put all synonyms into a string
            foreach ($result['response'] as $value) {
                $terms = strtolower(str_replace("(antonym)", " ", $value["list"]["synonyms"]));
            }
            // Explode the string into an array of synonym tokens
            $termsArray = explode("|", $terms);
            foreach ($termsArray as $value) {
                //$synonyms .= "<a href='http://elenamagno.host56.com//MetaSearchEngine/search.php?RadioGroup2=stemmingOff&searchText=$value&submit=+Search+&RadioGroup1=non-aggregate'>" . $value . "</a>" . "&nbsp&nbsp";
                //$synonyms .= "<a href='http://localhost/MetaSearchEngine/search.php?RadioGroup2=stemmingOff&searchText=$value
                //			  &submit=+Search+&RadioGroup1=non-aggregate'>" . $value . "</a>" . "&nbsp&nbsp";
                // same: this will search according to the last settings
                $synonyms .= "<a href='http://localhost/MetaSearchEngine/search.php?RadioGroup2=" . $_GET['RadioGroup2'] . "&searchText=" . $value . "&submit=+Search+&RadioGroup1=" . $_GET['RadioGroup1'] . "'>" . $value . "</a>" . "&nbsp&nbsp";
            }
        }
        //	else
        //		$synonyms = "Http Error: ".$info['http_code'];
    }
    return $synonyms;
}
function getIndex(array $arrayTopK)
{
    $snippetCollection = array();
    // Create a new array with docID pointing to the snippet
    foreach ($arrayTopK as $value) {
        $docID = $value['docID'];
        $snippetCollection[$docID] = $value['snippet'];
        // Count tot documents (to use in tf-idf)
        $docCount = count($snippetCollection);
    }
    foreach ($snippetCollection as $docID => $snippet) {
        $snippetString .= $docID . "=>" . $snippet . "<br/>";
        // for test
        $terms = explode(" ", strtolower(preg_replace("/[^a-zA-Z]+/", " ", $snippet)));
        // Count the term occurences within each document (before removing duplicates)
        $termFrequency[$docID] = array_count_values($terms);
        // Remove stopwords: do not want to include these in dictionary
        $termsDiff = stopwordRemoval($terms);
        foreach ($termsDiff as $term) {
            // Remove strings shorter than 3 letters (take care of us, ie, au, etc)
            if (strlen($term) >= 3) {
                if (!isset($dictionary[$term]['docFrequency'])) {
                    // Initialise member docFrequency count at zero
                    $dictionary[$term]['docFrequency'] = 0;
                }
                // Increase member docFrequency count each time the term is found
                $dictionary[$term]['docFrequency']++;
                // per term, and per docID, add the termFrequency (postings are docID + termFrequency)
                $dictionary[$term]['termFrequency'][$docID] = $termFrequency[$docID][$term];
            }
        }
    }
    // Sort by key
    ksort($dictionary);
    // Return the inverstedIndex which contains dicitonary and termFrequency
    $topKIndex = array('docCount' => $docCount, 'dictionary' => $dictionary);
    return $topKIndex;
}