Ejemplo n.º 1
0
        $tmp = get_meta_tags("data://{$mime};base64," . base64_encode($content));
        if (isset($tmp['description'])) {
            $desc = preg_replace('/[^(\\x20-\\x7F)]*/', '', trim($tmp['description']));
        } else {
            $desc = '';
        }
        // This is the rest of the content. We try to clean it somewhat using
        // the custom function html2text which works 90% of the tiem
        $content = preg_replace('/[^(\\x20-\\x7F)]*/', '', trim(strip_tags(html2txt($content))));
        // If values arent set lets try to set them here. Start with desc
        // using content and then try the title using desc
        if ($desc == '' && $content != '') {
            $desc = substr($content, 0, 200) . '...';
        }
        if ($title == '' && $desc != '') {
            $title = substr($desc, 0, 50) . '...';
        }
        $count++;
        // If we dont have a title, then we dont have desc or content
        // so lets not add it to the index
        if ($title != '') {
            $toindex[] = array($url, $title, $desc, $rank);
            echo 'INDEXING ' . $count . "\r\n";
        } else {
            echo 'SKIP ' . $count . "\r\n";
        }
    }
}
echo "Starting Index\r\n";
$indexer->index($toindex);
Ejemplo n.º 2
0
                //add document to db and get the docID
                $arr = explode(" = ", substr($line, 1));
                //set the document data
                $document[$arr[0]] = $arr[1];
                $offset += strlen($line);
            }
            if ($counter > 3) {
                break;
            }
            $counter++;
        }
        $document["content"] = stream_get_contents($fp, -1, $offset);
        $documents[] = $document;
    }
}
$indexer->index($documents);
$fp = opendir(SOURCE_DIR);
while (false !== ($file = readdir($fp))) {
    if (is_file(SOURCE_DIR . $file)) {
        unlink(SOURCE_DIR . $file);
    }
}
?>
<h1> documents added successfully! </h1>
<a href="admin.php">back</a>