示例#1
0
 /**
  * Get or Create Solr Service
  * 
  * @return void
  */
 protected function _setSolrService()
 {
     if (null === $this->_solrService) {
         $this->_solrService = new Apache_Solr_Service($this->_host, $this->_port, $this->_path);
         if (!$this->_solrService->ping()) {
             trigger_error("the Apache Solr Service is unavaliable");
             exit;
         }
     }
 }
示例#2
0
 /**
  *
  * @throws Opus_SolrSearch_Exception If connection to Solr server could not be established.
  */
 public function __construct()
 {
     $this->log = Zend_Registry::get('Zend_Log');
     $this->config = Zend_Registry::get('Zend_Config');
     $this->solr_server = $this->getSolrServer();
     if (false === $this->solr_server->ping()) {
         $this->log->err('Connection to Solr server ' . $this->solr_server_url . ' could not be established.');
         throw new Opus_SolrSearch_Exception('Solr server ' . $this->solr_server_url . ' is not responding.', Opus_SolrSearch_Exception::SERVER_UNREACHABLE);
     }
     $this->log->info('Connection to Solr server ' . $this->solr_server_url . ' was successfully established.');
 }
示例#3
0
 public function getSolr()
 {
     require_once __DIR__ . '/../' . 'Lib/Apache/Solr/Apache_Solr_Service.php';
     $em = $this->em;
     $solrservers = $em->getRepository('EcoInformaticaAnalizadorBundle:SolrServer')->findAll();
     if (count($solrservers) > 0) {
         foreach ($solrservers as $solrserver) {
             $USR = $solrserver->getUsername();
             $PSWD = $solrserver->getPassword();
             $HOST = $solrserver->getHost();
             $PORT = $solrserver->getPort();
             $SOLRPATH = $solrserver->getPath();
             $solr = new \Apache_Solr_Service("{$USR}:{$PSWD}@{$HOST}", $PORT, $SOLRPATH);
             if ($solr->ping()) {
                 break;
             }
         }
     } else {
         $solr = false;
     }
     return $solr;
 }
 /**
  * This is a singleton class, thus the constructor should be private/protected
  *
  * @access	protected
  *
  * @param	string		$core: The name of the core to use
  *
  * @return	void
  */
 protected function __construct($core)
 {
     // Load class.
     if (!class_exists('Apache_Solr_Service')) {
         require_once t3lib_div::getFileAbsFileName('EXT:' . self::$extKey . '/lib/SolrPhpClient/Apache/Solr/Service.php');
     }
     // Get Solr credentials.
     $conf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
     $host = $conf['solrHost'] ? $conf['solrHost'] : 'localhost';
     // Prepend username and password to hostname.
     if ($conf['solrUser'] && $conf['solrPass']) {
         $host = $conf['solrUser'] . ':' . $conf['solrPass'] . '@' . $host;
     }
     // Set port if not set.
     $port = tx_dlf_helper::intInRange($conf['solrPort'], 1, 65535, 8180);
     // Append core name to path.
     $path = trim($conf['solrPath'], '/') . '/' . $core;
     // Instantiate Apache_Solr_Service class.
     $this->service = t3lib_div::makeInstance('Apache_Solr_Service', $host, $port, $path);
     // Check if connection is established.
     if ($this->service->ping() !== FALSE) {
         // Do not collapse single value arrays.
         $this->service->setCollapseSingleValueArrays = FALSE;
         // Set core name.
         $this->core = $core;
         // Instantiation successful!
         $this->ready = TRUE;
     }
 }
 */
function printMessage($message)
{
    echo strftime('%d.%m.%Y %H:%M:%S', time()) . $message . "\n";
}
define('APPLICATION_ENV', 'production');
// basic bootstrapping
require_once dirname(__FILE__) . '/../common/bootstrap.php';
$config = Zend_Registry::get('Zend_Config');
$host = $config->searchengine->solr->host;
$port = $config->searchengine->solr->port;
$baseUri = $config->searchengine->solr->path;
$EOL = "\n";
$commitRange = 100;
$solr = new Apache_Solr_Service($host, $port, $baseUri);
if (false === $solr->ping()) {
    echo 'Could not connect to solr service.' . $EOL;
    return;
}
$startTime = time();
$docIds = Opus_Document::getAllIds();
$documents = array();
$conf = Zend_Registry::get('Zend_Config');
$baseFilePath = null;
if (true === isset($conf->file->destinationPath) and true === is_dir($conf->file->destinationPath)) {
    $baseFilePath = $conf->file->destinationPath;
}
foreach ($docIds as $docId) {
    printMessage(' Indexing document : ' . $docId);
    $opusDoc = new Opus_Document($docId);
    $solrDocument = Qucosa_Search_Solr_Document_OpusDocument::loadOpusDocument($opusDoc);
示例#6
0
 public function ping($timeout = self::DEFAULT_PING_TIMEOUT)
 {
     set_error_handler(array(get_class($this), 'ping_error'), E_ALL);
     $ping = parent::ping($timeout);
     restore_error_handler();
     if ($this->_error || !$ping) {
         $this->_error = false;
         $this->_lastPing = NULL;
         return false;
     } else {
         $this->_lastPing = $ping;
         return true;
     }
 }
示例#7
0
文件: db.php 项目: rdmpage/bioguid
function db_store_article($article, $PageID = 0, $updating = false)
{
    global $db;
    global $config;
    $update = false;
    $id = 0;
    // If we are editing an existing reference then we already know its id
    if (isset($article->reference_id)) {
        $id = $article->reference_id;
    } else {
        $id = db_find_article($article);
    }
    if ($id != 0) {
        if ($updating) {
            $update = true;
        } else {
            return $id;
        }
    }
    // Try and trap empty references
    if ($id == 0) {
        $ok = false;
        if (isset($article->title)) {
            $ok = $article->title != '';
        }
        if (!$ok) {
            return 0;
        }
    }
    if (!isset($article->genre)) {
        $article->genre = 'article';
    }
    $keys = array();
    $values = array();
    // Article metadata
    foreach ($article as $k => $v) {
        switch ($k) {
            // Ignore as it's an array
            case 'authors':
                break;
            case 'date':
                $keys[] = 'date';
                $values[] = $db->qstr($v);
                if (!isset($article->year)) {
                    $keys[] = 'year';
                    $values[] = $db->qstr(year_from_date($v));
                }
                break;
                // Don't store BHL URL here
            // Don't store BHL URL here
            case 'url':
                if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $v)) {
                } else {
                    // extract Handle if it exists
                    if (preg_match('/^http:\\/\\/hdl.handle.net\\/(?<hdl>.*)$/', $v, $m)) {
                        $keys[] = 'hdl';
                        $values[] = $db->qstr($m['hdl']);
                    } else {
                        $keys[] = $k;
                        $values[] = $db->qstr($v);
                    }
                }
                break;
                // Things we store as is
            // Things we store as is
            case 'title':
            case 'secondary_title':
            case 'volume':
            case 'series':
            case 'issue':
            case 'spage':
            case 'epage':
            case 'year':
            case 'date':
            case 'issn':
            case 'genre':
            case 'doi':
            case 'hdl':
            case 'lsid':
            case 'oclc':
            case 'pdf':
            case 'abstract':
            case 'pmid':
                $keys[] = $k;
                $values[] = $db->qstr($v);
                break;
                // Things we ignore
            // Things we ignore
            default:
                break;
        }
    }
    // Date
    if (!isset($article->date) && isset($article->year)) {
        $keys[] = 'date';
        $values[] = $db->qstr($article->year . '-00-00');
    }
    // BHL PageID
    if ($PageID != 0) {
        $keys[] = 'PageID';
        $values[] = $PageID;
    }
    // SICI
    $s = new Sici();
    $sici = $s->create($article);
    if ($sici != '') {
        $keys[] = 'sici';
        $values[] = $db->qstr($sici);
    }
    if ($update) {
        // Versioning?
        // Delete links	(author, pages, etc)
        // Don't delete page range as we may loose plates, etc. outside range
        /*
        $sql = 'DELETE FROM rdmp_reference_page_joiner WHERE reference_id=' . $id;
        $result = $db->Execute($sql);
        if ($result == false) die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        */
        $sql = 'DELETE FROM rdmp_author_reference_joiner WHERE reference_id = ' . $id;
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        // update (updated timestamp will be automatically updated)
        $sql = 'UPDATE rdmp_reference SET ';
        $num_values = count($keys);
        for ($i = 0; $i < $num_values; $i++) {
            if ($i > 0) {
                $sql .= ', ';
            }
            $sql .= $keys[$i] . '=' . $values[$i];
        }
        $sql .= ' WHERE reference_id=' . $id;
        /*		$cache_file = @fopen('/tmp/update.sql', "w+") or die("could't open file");
        		@fwrite($cache_file, $sql);
        		fclose($cache_file);
        */
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
    } else {
        // Adding article for first time so add 'created' and 'updated' timestamp
        $keys[] = 'created';
        $values[] = 'NOW()';
        $keys[] = 'updated';
        $values[] = 'NOW()';
        $sql = 'INSERT INTO rdmp_reference (' . implode(",", $keys) . ') VALUES (' . implode(",", $values) . ')';
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        $id = $db->Insert_ID();
        // Store reference_cluster_id which we can use to group duplicates, by default
        // reference_cluster_id = reference_id
        $sql = 'UPDATE rdmp_reference SET reference_cluster_id=' . $id . ' WHERE reference_id=' . $id;
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
    }
    // Indexing-------------------------------------------------------------------------------------
    if (1) {
        // solr
        // this code is redundant with code in reference.php but I use different objects
        // here and there (doh!). Also once we've added old stuff to solr this is the only place we
        // should be calling solr
        $solr = new Apache_Solr_Service('localhost', '8983', '/solr');
        if (!$solr->ping()) {
            echo 'Solr service not responding.';
            exit;
        }
        $item = array();
        $item['id'] = 'reference/' . $id;
        $item['title'] = $article->title;
        $item['publication_outlet'] = $article->secondary_title;
        $item['year'] = $article->year;
        $authors = array();
        foreach ($article->authors as $a) {
            $authors[] = $a->forename . ' ' . $a->surname;
        }
        $item['authors'] = $authors;
        $citation = '';
        $citation .= ' ' . $article->year;
        $citation .= ' ' . $article->title;
        $citation .= ' ' . $article->secondary_title;
        $citation .= ' ' . $article->volume;
        if (isset($article->issue)) {
            $citation .= '(' . $article->issue . ')';
        }
        $citation .= ':';
        $citation .= ' ';
        $citation .= $article->spage;
        if (isset($article->epage)) {
            $citation .= '-' . $article->epage;
        }
        $item['citation'] = $citation;
        $text = '';
        $num_authors = count($article->authors);
        $count = 0;
        if ($num_authors > 0) {
            foreach ($article->authors as $author) {
                $text .= $author->forename . ' ' . $author->lastname;
                if (isset($author->suffix)) {
                    $text .= ' ' . $author->suffix;
                }
                $count++;
                if ($count == 2 && $num_authors > 3) {
                    $text .= ' et al.';
                    break;
                }
                if ($count < $num_authors - 1) {
                    $text .= ', ';
                } else {
                    if ($count < $num_authors) {
                        $text .= ' and ';
                    }
                }
            }
        }
        $item['citation'] = $text . ' ' . $citation;
        $parts = array();
        $parts[] = $item;
        //print_r($parts);
        // add to solr
        $documents = array();
        foreach ($parts as $item => $fields) {
            $part = new Apache_Solr_Document();
            foreach ($fields as $key => $value) {
                if (is_array($value)) {
                    foreach ($value as $datum) {
                        $part->setMultiValue($key, $datum);
                    }
                } else {
                    $part->{$key} = $value;
                }
            }
            $documents[] = $part;
        }
        //
        //
        // Load the documents into the index
        //
        try {
            $solr->addDocuments($documents);
            $solr->commit();
            $solr->optimize();
        } catch (Exception $e) {
            echo $e->getMessage();
        }
    } else {
        $sql = 'DELETE FROM rdmp_text_index WHERE (object_uri=' . $db->qstr($config['web_root'] . 'reference/' . $id) . ')';
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        // Only do this if we have a title, as sometimes we don't (e.g. CrossRef lacks metadata)
        if (isset($article->title)) {
            $sql = 'INSERT INTO rdmp_text_index(object_type, object_id, object_uri, object_text)
			VALUES ("title"' . ', ' . $id . ', ' . $db->qstr($config['web_root'] . 'reference/' . $id) . ', ' . $db->qstr($article->title) . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
        }
    }
    // Versioning-----------------------------------------------------------------------------------
    // Store this object in version table so we can recover it if we overwrite item
    $ip = getip();
    $sql = 'INSERT INTO rdmp_reference_version(reference_id, ip, json) VALUES(' . $id . ', ' . 'INET_ATON(\'' . $ip . '\')' . ',' . $db->qstr(json_encode($article)) . ')';
    $result = $db->Execute($sql);
    if ($result == false) {
        die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
    }
    // Author(s)------------------------------------------------------------------------------------
    // Store author as and link to the article
    if (isset($article->authors)) {
        db_store_authors($id, $article->authors);
    }
    // Store page range (only if not updating, otherwise we may loose plates, etc.
    // that aren't in page range)
    if ($PageID != 0 && !$update) {
        $page_range = array();
        if (isset($article->spage) && isset($article->epage)) {
            $page_range = bhl_page_range($PageID, $article->epage - $article->spage + 1);
        } else {
            // No epage, so just get spage (to do: how do we tell user we don't have page range?)
            $page_range = bhl_page_range($PageID, 0);
        }
        //print_r($page_range);
        $count = 0;
        foreach ($page_range as $page) {
            $sql = 'INSERT INTO rdmp_reference_page_joiner (reference_id, PageID, page_order) 
			VALUES (' . $id . ',' . $page . ',' . $count++ . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
        }
    }
    // Tweet----------------------------------------------------------------------------------------
    if (!$update) {
        if ($config['twitter']) {
            $url = $config['web_root'] . 'reference/' . $id . ' ' . '#bhlib';
            // url + hashtag
            $url_len = strlen($url);
            $status = '';
            if (isset($article->title)) {
                $status = $article->title;
                $status_len = strlen($status);
                $extra = 140 - $status_len - $url_len - 1;
                if ($extra < 0) {
                    $status_len += $extra;
                    $status_len -= 1;
                    $status = substr($status, 0, $status_len);
                    $status .= '…';
                }
            }
            $status .= ' ' . $url;
            tweet($status);
        }
    }
    return $id;
}
示例#8
0
 public function isActive()
 {
     require_once __DIR__ . '/../' . 'Lib/Apache/Solr/Apache_Solr_Service.php';
     $solr = new \Apache_Solr_Service("{$this->username}:{$this->password}@{$this->host}", $this->port, $this->path);
     if ($solr->ping()) {
         return true;
     } else {
         return false;
     }
 }
示例#9
0
<?php

require_once dirname(dirname(__FILE__)) . '/Apache/Solr/Service.php';
require_once dirname(dirname(__FILE__)) . '/db.php';
require_once dirname(dirname(__FILE__)) . '/reference.php';
//
//
// Try to connect to the named server, port, and url
//
$solr = new Apache_Solr_Service('localhost', '8983', '/solr');
if (!$solr->ping()) {
    echo 'Solr service not responding.';
    exit;
}
$sql = "SELECT COUNT(reference_id) as c FROM rdmp_reference WHERE (PageID <> 0)";
$result = $db->Execute($sql);
if ($result == false) {
    die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
}
$num = $result->fields['c'];
$page_size = 100;
$pages = $num / $page_size;
for ($page = 0; $page < $pages; $page++) {
    $parts = array();
    $sql = "SELECT reference_id FROM rdmp_reference  WHERE (PageID <> 0) LIMIT " . $page * $page_size . "," . $page_size;
    echo $sql . "\n";
    $result = $db->Execute($sql);
    if ($result == false) {
        die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
    }
    $ids = array();
示例#10
0
 function deleteAction()
 {
     $this->_helper->viewRenderer->setNoRender(TRUE);
     $title = "<h4>HUKUMONLINE INDONESIA: <small>search</small></h4><hr/>";
     echo $title . '<br>';
     $solr = new Apache_Solr_Service('nihki:sirkulasi@202.153.129.35', '8983', '/solr/core-catalog');
     if (!$solr->ping()) {
         echo 'Solr service not responding.';
         exit;
     }
     $a = array('fl17956', 'fl1131');
     foreach ($a as $c) {
         $indexingEngine = Pandamp_Search::manager();
         $indexingEngine->deleteCatalogFromIndex($c);
     }
 }
示例#11
0
 /**
  * Iterate through available write services and select the first with a ping
  * that satisfies configured timeout restrictions (or the default)
  *
  * @return Apache_Solr_Service
  *
  * @throws Exception If there are no write services that meet requirements
  */
 private function _selectWriteService()
 {
     if (!$this->_currentWriteService || !isset($this->_writeableServices[$this->_currentWriteService])) {
         foreach ($this->_writeableServices as $id => $service) {
             if (is_array($service)) {
                 //convert the array definition to a client object
                 $service = new Apache_Solr_Service($service['host'], $service['port'], $service['path']);
                 $this->_writeableServices[$id] = $service;
             }
             //check the service
             if ($service->ping($this->_writePingTimeout) !== false) {
                 $this->_currentWriteService = $id;
                 return $this->_writeableServices[$this->_currentWriteService];
             }
         }
         throw new Exception('No write services were available');
     }
     return $this->_writeableServices[$this->_currentWriteService];
 }
 /**
  * Test if Solr server is reachable within a given period.
  * @param int $iTimeout Seconds to wait for reply from server.
  * @return int Time it took to answer in microseconds or false if no answer.
  */
 public function ping($iTimeout = 2)
 {
     wfProfileIn('BS::' . __METHOD__);
     if ($this->bUseDifferentProtocolThanHttp) {
         $ctx = stream_context_create(array('https' => array('timeout' => $iTimeout)));
         $start = microtime(true);
         $res = @file_get_contents($this->sUrl, 0, $ctx);
         wfProfileOut('BS::' . __METHOD__);
         return $res ? microtime(true) - $start : false;
     }
     wfProfileOut('BS::' . __METHOD__);
     return parent::ping($iTimeout);
 }
 /**
  * This is a singleton class, thus the constructor should be private/protected
  *
  * @access	protected
  *
  * @param	string		$core: The name of the core to use
  *
  * @return	void
  */
 protected function __construct($core)
 {
     // Load class.
     if (!class_exists('Apache_Solr_Service')) {
         require_once \TYPO3\CMS\Core\Utility\GeneralUtility::getFileAbsFileName('EXT:' . self::$extKey . '/lib/SolrPhpClient/Apache/Solr/Service.php');
     }
     $solrInfo = self::getSolrConnectionInfo($core);
     // Instantiate Apache_Solr_Service class.
     $this->service = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('Apache_Solr_Service', $solrInfo['host'], $solrInfo['port'], $solrInfo['path']);
     // Check if connection is established.
     if ($this->service->ping() !== FALSE) {
         // Do not collapse single value arrays.
         $this->service->setCollapseSingleValueArrays = FALSE;
         // Set core name.
         $this->core = $core;
         // Instantiation successful!
         $this->ready = TRUE;
     }
 }