Exemple #1
0
function explorerDir($path, $tab_mot_vide)
{
    $folder = opendir($path);
    while ($entree = readdir($folder)) {
        //On ignore les entrées
        if ($entree != "." && $entree != "..") {
            // On vérifie si il s'agit d'un répertoire
            if (is_dir($path . "/" . $entree)) {
                $sav_path = $path;
                // Construction du path jusqu'au nouveau répertoire
                $path .= "/" . $entree;
                //echo "DOSSIER = ", $path, "<BR>";
                // On parcours le nouveau répertoire
                explorerDir($path, $tab_mot_vide);
                $path = $sav_path;
            } else {
                //C'est un fichier html ou pas
                $path_source = $path . "/" . $entree;
                if (stripos($path_source, '.htm')) {
                    //echo 'On appelle le module indexation <br>';
                    //echo $path_source, '<br>';
                    indexer($path_source, $tab_mot_vide);
                }
                //Si c'est un .html
                //On appelle la fonction d'indexation
                //Dans le module_indexation.php
                //Par un include
            }
        }
    }
    closedir($folder);
}
Exemple #2
0
function summarize($filename, $compress)
{
    // proses indexing
    $inv_index = indexer();
    // load file dan daftar stopwords
    $load_file = file_get_contents("./corpus/" . $filename);
    $sentence = preg_split("/[.]+/", $load_file);
    $sentence = array_slice($sentence, 0, sizeof($sentence) - 1);
    // buang array terakhir (kosong)
    $stopwords = file_get_contents("./stopwords.txt");
    $stopwords = preg_split("/[\\s]+/", $stopwords);
    // jumlah kalimat yang diringkas
    $compression_rate = $compress / 100;
    $max_sentence = floor(sizeof($sentence) * $compression_rate);
    // inisialisasi
    $sentence_weight = array();
    // menghitung bobot tf.idf tiap kalimat
    foreach ($sentence as $key => $value) {
        // tokenisasi dengan membuang stopwords
        $word = preg_split("/[\\d\\W\\s]+/", strtolower($value));
        $word = array_diff($word, $stopwords);
        $word = array_values($word);
        // perbaiki indeks
        // inisialisasi bobot dan hitung frekuensi token
        $tf_idf = 0;
        $freq_word = array_count_values($word);
        // hitung bobot tf.idf
        foreach ($freq_word as $token => $tf) {
            $tf_idf += $tf * $inv_index[$token]['idf'];
        }
        // simpan nilai bobot kalimat
        array_push($sentence_weight, $tf_idf);
    }
    // sorting bobot tertinggi -> potong array -> sorting urutan kalimat
    arsort($sentence_weight);
    $sorted = array_slice($sentence_weight, 0, $max_sentence, true);
    ksort($sorted);
    // gabungkan ringkasan
    $summary = "";
    foreach ($sorted as $key => $value) {
        $summary = $summary . $sentence[$key] . ". ";
    }
    // return teks asli dan hasil ringkasan
    $output = array();
    $output['original'] = $load_file;
    $output['summary'] = $summary;
    return $output;
}
Exemple #3
0
// the index array structure should be (but not comfirmed yet) as below:
// $index = array(
// 				'slug-after-hashbang'=>array('timestamp'=>23234234, 'title'=>'Title from that mada', 'excerpt'=>'first para without img'),
// 				'slug-after-hashbang'=>array('timestamp'=>23234234, 'title'=>'Title from that mada', 'excerpt'=>'first para without img'),
// 				'slug-after-hashbang'=>array('timestamp'=>23234234, 'title'=>'Title from that mada', 'excerpt'=>'first para without img'),
// 				'slug-after-hashbang'=>array('timestamp'=>23234234, 'title'=>'Title from that mada', 'excerpt'=>'first para without img'),
// 			);
// in json mind you. :)
## Configurations:
const MARKDOWN_DIR = 'posts';
error_reporting(E_ALL);
## MAIN
$mds = get_mds();
foreach ($mds as $filename) {
    if ($filename != '.DS_Store') {
        $ind = indexer($filename);
        $index[$ind['slug']] = $ind;
    }
}
dumper($index);
$data = json_encode($index);
file_put_contents('assets/index.json', $data);
echo 'done!';
#################################
########### FUNCTIONS ###########
function dumper($multi)
{
    echo '<pre>';
    var_dump($multi);
    echo '</pre>';
}