function test_index_object_with_same_words_in_db()
	{
		$this->db->sql_insert('sys_word', array('word' => 'this', 'object_count' => 1));	
		$this->db->sql_insert('sys_word', array('word' => 'is', 'object_count' => 1));
		$this->db->sql_insert('sys_word', array('word' => 'test', 'object_count' => 1));
		$this->db->sql_insert('sys_word', array('word' => 'title', 'object_count' => 1));		
		$this->db->sql_insert('sys_word', array('word' => 'content', 'object_count' => 1));
		$this->db->sql_insert('sys_word', array('word' => 'a', 'object_count' => 1));

		indexer :: add($this->site_object);
		
		$this->db->sql_select('sys_word', '*', array('word' => 'test', 'object_count' => 3));
		$row = $this->db->fetch_row();
		$this->assertNotEqual($row, array());

	}
<?php

session_start();
if (isset($_GET['function'])) {
    $i = new indexer();
    require_once "mysql.class.php";
    $mysql = new mySQL();
    if ($_GET['function'] == 'init') {
        $i->init();
    }
    if ($_GET['function'] == 'indexIt') {
        $result = mysql_query("select count(*) as c from `test`.`crawler_tree`");
        $row = mysql_fetch_array($result);
        $no_of_links = $row['c'];
        //echo $no_of_links;
        $result = mysql_query("select id,url from `test`.`crawler` where ftch=1");
        if ($row = mysql_fetch_array($result)) {
            $id = $row['id'];
            $url = $row['url'];
            $xml = simplexml_load_file("indexData/" . $id . ".xml");
            $title = preg_replace('/^( )*$/', "No title", $xml->title);
            $linksCount = (int) $xml->out->attributes() + (int) $xml->links->attributes();
            $output = mysql_fetch_array(mysql_query("select count(*) as count from test.crawler_tree where child_id={$id}"));
            $inLinks = $output['count'];
            $outXML = simplexml_load_file("../data/links/" . $_SESSION['domainID'] . ".xml");
            if (!($link = $outXML->XPath("/domain/link[url = '{$url}']"))) {
                $link = $outXML->addChild('link');
                $_SESSION['linkID'] = count($outXML);
                $link->addAttribute('id', count($outXML));
                $link->url = $url;
                $link->title = $title;
示例#3
0
    Vosbox is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Vosbox.  If not, see <http://www.gnu.org/licenses/>.

    Vosbox copyright Callan Bryant 2011-2012 <*****@*****.**> http://callanbryant.co.uk/
*/
$keywords =& $_REQUEST['keywords'];
require_once __DIR__ . '/../../VSE/indexer.class.php';
// include original class to reconstruct each item
require_once __DIR__ . '/../../audioFile.class.php';
try {
    if (!extension_loaded('json')) {
        throw new Exception('json extension not loaded');
    }
    if (!$keywords) {
        throw new Exception('Erm, please search for something!');
    }
    $i = indexer::getInstance();
    $response = $i->search($keywords);
    header('Content-Type:application/json');
    echo json_encode($response);
} catch (Exception $e) {
    // manually throw the error, as the json ext may not be loaded
    header('Content-Type:application/json');
    echo '{"error":"' . $e->getMessage() . '"}';
}
 function remove(&$site_object)
 {
     $indexer =& indexer::instance();
     $indexer->db =& db_factory::instance();
     $object_id = $site_object->get_id();
     $indexer->db->sql_exec("SELECT word_id FROM sys_word_link WHERE object_id='{$object_id}'");
     $word_array =& $indexer->db->get_array();
     $word_ids_array = complex_array::get_column_values('word_id', $word_array);
     if (count($word_ids_array) > 0) {
         $word_id_string = implode(',', $word_ids_array);
         if (count($word_ids_array) > 0) {
             $indexer->db->sql_exec("UPDATE sys_word SET object_count=( object_count - 1 ) WHERE id in ( {$word_id_string} )");
         }
         $indexer->db->sql_exec("DELETE FROM sys_word WHERE object_count='0'");
         $indexer->db->sql_exec("DELETE FROM sys_word_link WHERE object_id='{$object_id}'");
     }
 }
示例#5
0
文件: add.php 项目: Ramaniks/Phindex
<?php

error_reporting(E_ALL);
//error_reporting(0);
ini_set("display_errors", 1);
set_time_limit(0);
define('INDEXLOCATION', dirname(__FILE__) . '/index/');
define('DOCUMENTLOCATION', dirname(__FILE__) . '/documents/');
include_once './classes/ranker.class.php';
include_once './classes/indexer.class.php';
include_once './classes/multifolderindex.class.php';
include_once './classes/multifolderdocumentstore.class.php';
$ranker = new ranker();
$index = new multifolderindex();
$docstore = new multifolderdocumentstore();
$indexer = new indexer($index, $docstore, $ranker);
function html2txt($document)
{
    $search = array('@<script[^>]*?>.*?</script>@si', '@<[\\/\\!]*?[^<>]*?>@si', '@<style[^>]*?>.*?</style>@siU', '@<![\\s\\S]*?--[ \\t\\n\\r]*>@', '@<style[^>]*?>.*?</style>@si', '@\\W+@si');
    $text = preg_replace($search, ' ', $document);
    return $text;
}
$toindex = array();
$count = 0;
foreach (new RecursiveIteratorIterator(new RecursiveDirectoryIterator('./crawler/documents/')) as $x) {
    $filename = $x->getPathname();
    if (is_file($filename)) {
        $handle = fopen($filename, 'r');
        $contents = fread($handle, filesize($filename));
        fclose($handle);
        $unserialized = unserialize($contents);
示例#6
0
<?php

include_once 'connect.php';
include_once 'classes/storage.class.php';
include_once 'classes/index.class.php';
include_once 'classes/indexer.class.php';
define("SOURCE_DIR", "source/");
$storage = new storage();
$index = new index();
$indexer = new indexer($index, $storage);
$file_names = scandir(SOURCE_DIR, 1);
if (!$file_names) {
    die;
}
$documents = array();
foreach ($file_names as $file_name) {
    if ($file_name[0] != '.') {
        //copy file from source to storage
        $fp = fopen(SOURCE_DIR . $file_name, "r");
        //scan file
        if (!$fp) {
            continue;
        }
        $document = array();
        $offset = 0;
        $counter = 0;
        while (($line = fgets($fp)) !== false) {
            //if the line start with # it is document data
            if ($line[0] == '#') {
                //add document to db and get the docID
                $arr = explode(" = ", substr($line, 1));