function emptyAction() { //$log = new Kutu_Log(); //$registry = Zend_Registry::getInstance(); //$conf = $registry->get('config'); $solr = new Apache_Solr_Service('localhost', '8983', '/solr/core0'); $solr->deleteByQuery('*:*'); $solr->commit(); //$log->info("Indexing empty successfully"); $this->view->success = true; }
/** * Commit all pushed documents to the Solr Server */ public function commit() { $this->_setSolrService(); if (count($this->documents)) { try { $this->_solrService->addDocuments($this->documents); $this->_solrService->commit(); $this->_solrService->optimize(); $this->_clear(); } catch (Exception $e) { trigger_error($e->getMessage(), $e->getCode()); } } else { trigger_error("There is not document for committing"); } }
default: printMessage(' Skipped file "' . $filePath . '". Reason: Mime type "' . $mimeType . '" has no processor.'); break; } } else { printMessage(' Skipped file "' . $filePath . '". Reason: File not found.'); } } } else { printMessage(' Skipped indexing of document files. Reason: no base path to files or document is not published.'); } $documents[] = $solrDocument; if (0 === count($documents) % $commitRange) { printMessage(' Committing data set of ' . $commitRange . ' values.'); $solr->addDocuments($documents); $solr->commit(); $documents = array(); printMessage(' Committing done.'); } } if (count($documents) > 0) { printMessage(' Committing data set of ' . count($documents) . ' values.'); $solr->addDocuments($documents); $solr->commit(); printMessage(' Committing done.'); } printMessage(' Optimizing.'); $solr->optimize(); printMessage(' Optimizing done.'); $stopTime = time(); $time = $stopTime - $startTime;
/** * Clear the cache whenever we commit changes. * * @see Apache_Solr_Service::commit() */ public function commit($optimize = TRUE, $waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) { parent::commit($optimize, $waitFlush, $waitSearcher, $timeout); $this->_clearCache(); }
function db_store_article($article, $PageID = 0, $updating = false) { global $db; global $config; $update = false; $id = 0; // If we are editing an existing reference then we already know its id if (isset($article->reference_id)) { $id = $article->reference_id; } else { $id = db_find_article($article); } if ($id != 0) { if ($updating) { $update = true; } else { return $id; } } // Try and trap empty references if ($id == 0) { $ok = false; if (isset($article->title)) { $ok = $article->title != ''; } if (!$ok) { return 0; } } if (!isset($article->genre)) { $article->genre = 'article'; } $keys = array(); $values = array(); // Article metadata foreach ($article as $k => $v) { switch ($k) { // Ignore as it's an array case 'authors': break; case 'date': $keys[] = 'date'; $values[] = $db->qstr($v); if (!isset($article->year)) { $keys[] = 'year'; $values[] = $db->qstr(year_from_date($v)); } break; // Don't store BHL URL here // Don't store BHL URL here case 'url': if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $v)) { } else { // extract Handle if it exists if (preg_match('/^http:\\/\\/hdl.handle.net\\/(?<hdl>.*)$/', $v, $m)) { $keys[] = 'hdl'; $values[] = $db->qstr($m['hdl']); } else { $keys[] = $k; $values[] = $db->qstr($v); } } break; // Things we store as is // Things we store as is case 'title': case 'secondary_title': case 'volume': case 'series': case 'issue': case 'spage': case 'epage': case 'year': case 'date': case 'issn': case 'genre': case 'doi': case 'hdl': case 'lsid': case 'oclc': case 'pdf': case 'abstract': case 'pmid': $keys[] = $k; $values[] = $db->qstr($v); break; // Things we ignore // Things we ignore default: break; } } // Date if (!isset($article->date) && isset($article->year)) { $keys[] = 'date'; $values[] = $db->qstr($article->year . '-00-00'); } // BHL PageID if ($PageID != 0) { $keys[] = 'PageID'; $values[] = $PageID; } // SICI $s = new Sici(); $sici = $s->create($article); if ($sici != '') { $keys[] = 'sici'; $values[] = $db->qstr($sici); } if ($update) { // Versioning? // Delete links (author, pages, etc) // Don't delete page range as we may loose plates, etc. outside range /* $sql = 'DELETE FROM rdmp_reference_page_joiner WHERE reference_id=' . $id; $result = $db->Execute($sql); if ($result == false) die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); */ $sql = 'DELETE FROM rdmp_author_reference_joiner WHERE reference_id = ' . $id; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } // update (updated timestamp will be automatically updated) $sql = 'UPDATE rdmp_reference SET '; $num_values = count($keys); for ($i = 0; $i < $num_values; $i++) { if ($i > 0) { $sql .= ', '; } $sql .= $keys[$i] . '=' . $values[$i]; } $sql .= ' WHERE reference_id=' . $id; /* $cache_file = @fopen('/tmp/update.sql', "w+") or die("could't open file"); @fwrite($cache_file, $sql); fclose($cache_file); */ $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } else { // Adding article for first time so add 'created' and 'updated' timestamp $keys[] = 'created'; $values[] = 'NOW()'; $keys[] = 'updated'; $values[] = 'NOW()'; $sql = 'INSERT INTO rdmp_reference (' . implode(",", $keys) . ') VALUES (' . implode(",", $values) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } $id = $db->Insert_ID(); // Store reference_cluster_id which we can use to group duplicates, by default // reference_cluster_id = reference_id $sql = 'UPDATE rdmp_reference SET reference_cluster_id=' . $id . ' WHERE reference_id=' . $id; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } // Indexing------------------------------------------------------------------------------------- if (1) { // solr // this code is redundant with code in reference.php but I use different objects // here and there (doh!). Also once we've added old stuff to solr this is the only place we // should be calling solr $solr = new Apache_Solr_Service('localhost', '8983', '/solr'); if (!$solr->ping()) { echo 'Solr service not responding.'; exit; } $item = array(); $item['id'] = 'reference/' . $id; $item['title'] = $article->title; $item['publication_outlet'] = $article->secondary_title; $item['year'] = $article->year; $authors = array(); foreach ($article->authors as $a) { $authors[] = $a->forename . ' ' . $a->surname; } $item['authors'] = $authors; $citation = ''; $citation .= ' ' . $article->year; $citation .= ' ' . $article->title; $citation .= ' ' . $article->secondary_title; $citation .= ' ' . $article->volume; if (isset($article->issue)) { $citation .= '(' . $article->issue . ')'; } $citation .= ':'; $citation .= ' '; $citation .= $article->spage; if (isset($article->epage)) { $citation .= '-' . $article->epage; } $item['citation'] = $citation; $text = ''; $num_authors = count($article->authors); $count = 0; if ($num_authors > 0) { foreach ($article->authors as $author) { $text .= $author->forename . ' ' . $author->lastname; if (isset($author->suffix)) { $text .= ' ' . $author->suffix; } $count++; if ($count == 2 && $num_authors > 3) { $text .= ' et al.'; break; } if ($count < $num_authors - 1) { $text .= ', '; } else { if ($count < $num_authors) { $text .= ' and '; } } } } $item['citation'] = $text . ' ' . $citation; $parts = array(); $parts[] = $item; //print_r($parts); // add to solr $documents = array(); foreach ($parts as $item => $fields) { $part = new Apache_Solr_Document(); foreach ($fields as $key => $value) { if (is_array($value)) { foreach ($value as $datum) { $part->setMultiValue($key, $datum); } } else { $part->{$key} = $value; } } $documents[] = $part; } // // // Load the documents into the index // try { $solr->addDocuments($documents); $solr->commit(); $solr->optimize(); } catch (Exception $e) { echo $e->getMessage(); } } else { $sql = 'DELETE FROM rdmp_text_index WHERE (object_uri=' . $db->qstr($config['web_root'] . 'reference/' . $id) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } // Only do this if we have a title, as sometimes we don't (e.g. CrossRef lacks metadata) if (isset($article->title)) { $sql = 'INSERT INTO rdmp_text_index(object_type, object_id, object_uri, object_text) VALUES ("title"' . ', ' . $id . ', ' . $db->qstr($config['web_root'] . 'reference/' . $id) . ', ' . $db->qstr($article->title) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } } // Versioning----------------------------------------------------------------------------------- // Store this object in version table so we can recover it if we overwrite item $ip = getip(); $sql = 'INSERT INTO rdmp_reference_version(reference_id, ip, json) VALUES(' . $id . ', ' . 'INET_ATON(\'' . $ip . '\')' . ',' . $db->qstr(json_encode($article)) . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } // Author(s)------------------------------------------------------------------------------------ // Store author as and link to the article if (isset($article->authors)) { db_store_authors($id, $article->authors); } // Store page range (only if not updating, otherwise we may loose plates, etc. // that aren't in page range) if ($PageID != 0 && !$update) { $page_range = array(); if (isset($article->spage) && isset($article->epage)) { $page_range = bhl_page_range($PageID, $article->epage - $article->spage + 1); } else { // No epage, so just get spage (to do: how do we tell user we don't have page range?) $page_range = bhl_page_range($PageID, 0); } //print_r($page_range); $count = 0; foreach ($page_range as $page) { $sql = 'INSERT INTO rdmp_reference_page_joiner (reference_id, PageID, page_order) VALUES (' . $id . ',' . $page . ',' . $count++ . ')'; $result = $db->Execute($sql); if ($result == false) { die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql); } } } // Tweet---------------------------------------------------------------------------------------- if (!$update) { if ($config['twitter']) { $url = $config['web_root'] . 'reference/' . $id . ' ' . '#bhlib'; // url + hashtag $url_len = strlen($url); $status = ''; if (isset($article->title)) { $status = $article->title; $status_len = strlen($status); $extra = 140 - $status_len - $url_len - 1; if ($extra < 0) { $status_len += $extra; $status_len -= 1; $status = substr($status, 0, $status_len); $status .= '…'; } } $status .= ' ' . $url; tweet($status); } } return $id; }
/** * Executes the Solr query and returns the JSON response. */ function solr_proxy_main() { if (isset($_GET['solrUrl'])) { $spliturl = parse_url($_GET['solrUrl']); $host = $spliturl['host'] == 'solr.smk.dk' ? 'solr-02.smk.dk' : $spliturl['host']; $port = $spliturl['host'] == 'solr.smk.dk' ? '8080' : $spliturl['port']; $core_log = $spliturl['host'] == 'solr.smk.dk' ? 'prod_search_log' : 'preprod_search_log'; $path = explode("/", trim($spliturl['path'], '/')); $core = array_pop($path); $path = implode("/", $path); $solr = new Apache_Solr_Service($host, $port, '/' . $path . '/' . $core . '/'); //$solr = new Apache_Solr_Service('csdev-seb', 8180, '/solr-example/preprod_all_dk/'); //var_dump($solr); $solr_search_log = new Apache_Solr_Service($host, $port, '/' . $path . '/' . $core_log . '/'); //$solr_search_log = new Apache_Solr_Service('solr-02.smk.dk', 8080, '/solr/prod_search_log/'); //var_dump($solr_search_log); $document = new Apache_Solr_Document(); $q_default = "-(id_s:(*/*) AND category:collections) -(id_s:(*verso) AND category:collections)"; $fq_tag = "tag"; $fq_prev = array(); $q_prev = array(); $picture_url = ''; $numfound = 0; if (isset($_GET['prev_query'])) { $params = array(); $params['q'] = '*:*'; $keys = ''; $core = ''; //error_log($_GET['query']); // The names of Solr parameters that may be specified multiple times. $multivalue_keys = array('bf', 'bq', 'facet.date', 'facet.date.other', 'facet.field', 'facet.query', 'fq', 'pf', 'qf'); $pairs = explode('&', $_GET['prev_query']); foreach ($pairs as $pair) { if ($pair != '') { list($key, $value) = explode('=', $pair, 2); $value = urldecode($value); if (in_array($key, $multivalue_keys)) { $params[$key][] = $value; } elseif ($key == 'q') { //error_log($value); $keys = $value; } elseif ($key == 'core') { $core = "{$value}/"; } else { $params[$key] = $value; } } } // try { // $response = $solr->search($keys, $params['start'], $params['rows'], $params); // } // catch (Exception $e) { // die($e->__toString()); // } //error_log($response->getRawResponse()); //print $response->getRawResponse(); /*ררררררררר*/ $fq = array(); $q = array(); // proceed only if 'start' param was null ('start' is set when the user uses pagination in website, and we want to avoid duplication on search string) //if(!isset($params['start'])){ // process q if ($keys != '') { $q = explode(",", $keys); // remove default 'q' value if (($key = array_search($q_default, $q)) !== false) { unset($q[$key]); } array_filter($q); } // process fq if (isset($params['fq'])) { $fq = $params['fq']; // remove 'tag' facet $matches = array_filter($fq, function ($var) use($fq_tag) { return preg_match("/\\b{$fq_tag}\\b/i", $var); }); foreach ($matches as $key => $value) { unset($fq[$key]); } array_filter($fq); } if (count($q) + count($fq) > 0) { $fq_prev = $fq; $q_prev = $q; } //} } if (isset($_GET['query'])) { $params = array(); $params['q'] = '*:*'; $keys = ''; $core = ''; //error_log($_GET['query']); // The names of Solr parameters that may be specified multiple times. $multivalue_keys = array('bf', 'bq', 'facet.date', 'facet.date.other', 'facet.field', 'facet.query', 'fq', 'pf', 'qf'); $pairs = explode('&', $_GET['query']); foreach ($pairs as $pair) { list($key, $value) = explode('=', $pair, 2); $value = urldecode($value); if (in_array($key, $multivalue_keys)) { $params[$key][] = $value; } elseif ($key == 'q') { //error_log($value); $keys = $value; } elseif ($key == 'core') { $core = "{$value}/"; } else { $params[$key] = $value; } } try { $response = $solr->search($keys, $params['start'], $params['rows'], $params); //var_dump($response); $numfound = $response->response->numFound; foreach ($response->response->docs as $doc) { foreach ($doc as $field => $value) { if ($field == "medium_image_url") { $picture_url = $value; break; } } } } catch (Exception $e) { die($e->__toString()); } //error_log($response->getRawResponse()); //print $response->getRawResponse(); /*ררררררררר*/ $fq = array(); $q = array(); // proceed only if 'start' param was null ('start' is set when the user uses pagination in website, and we want to avoid duplication on search string) if (!isset($params['start'])) { // process q if ($keys != '') { $q = explode(",", $keys); // remove default 'q' value if (($key = array_search($q_default, $q)) !== false) { unset($q[$key]); } array_filter($q); } // process fq if (isset($params['fq'])) { $fq = $params['fq']; // remove 'tag' facet $matches = array_filter($fq, function ($var) use($fq_tag) { return preg_match("/\\b{$fq_tag}\\b/i", $var); }); foreach ($matches as $key => $value) { unset($fq[$key]); } array_filter($fq); } if (count($q) + count($fq) > 0) { //$solr_search_log = new Apache_Solr_Service('csdev-seb', 8180, '/solr-example/dev_search_log/' . $core); //$document = new Apache_Solr_Document(); $document->id = uniqid(); //or something else suitably unique $document->q = $q; $document->facet = $fq; $document->ip = isset($_SERVER['HTTP_X_FORWARDED_FOR']) ? $_SERVER['REMOTE_ADDR'] + "-" + $_SERVER['HTTP_X_FORWARDED_FOR'] : $_SERVER['REMOTE_ADDR']; $document->last_update = gmdate('Y-m-d\\TH:i:s\\Z', strtotime("now")); $document->numfound = $numfound; // user called for detailed view of an artwork? $artwork = "id_s"; $matches = array_filter($q, function ($var) use($artwork) { return preg_match("/\\b{$artwork}\\b/i", $var); }); if (count($matches) > 0) { if (count($q_prev) > 0) { $document->prev_q = $q_prev; } if (count($fq_prev) > 0) { $document->prev_facet = $fq_prev; } if ($picture_url != '') { $document->picture_url = $picture_url; } } $solr_search_log->addDocument($document); //if you're going to be adding documents in bulk using addDocuments with an array of documents is faster //$solr_search_log->deleteByQuery('*:*'); $solr_search_log->commit(); //echo 'ok'; } } /*ררררררררררר*/ echo $_GET['callback'] . '(' . $response->getRawResponse() . ')'; } } }
public function testCommitWithNonDefaultParameters() { // set a mock transport $mockTransport = $this->getMockHttpTransportInterface(); // setup expected call and response $mockTransport->expects($this->once())->method('performPostRequest')->with($this->equalTo('http://localhost:8180/solr/update?wt=json'), $this->equalTo('<commit expungeDeletes="true" waitFlush="false" waitSearcher="false" />'), $this->equalTo('text/xml; charset=UTF-8'), $this->equalTo(7200))->will($this->returnValue(Apache_Solr_HttpTransport_ResponseTest::get200Response())); $fixture = new Apache_Solr_Service(); $fixture->setHttpTransport($mockTransport); $fixture->commit(true, false, false, 7200); }
/** * Commits and optimizes newly created Solr index * * @param String $dataset Dataset name that equals the core name to identify the index to optimize/commit * @return void * @access private */ private function commitAndOptimize($dataset) { $transportInstance = new Apache_Solr_HttpTransport_CurlNoReuse(); $service = new Apache_Solr_Service(SOLR_MASTER_HOST, SOLR_PORT, "/solr/{$dataset}", $transportInstance); try { //commit and optimize $service->commit(true); #$service->optimize(); } catch (Exception $e) { throw new Exception($e); } }
public function testCommit() { // set a mock transport $mockTransport = $this->getMockHttpTransportInterface(); // setup expected call and response $mockTransport->expects($this->once())->method('performPostRequest')->will($this->returnValue(Apache_Solr_HttpTransport_ResponseTest::get200Response())); $fixture = new Apache_Solr_Service(); $fixture->setHttpTransport($mockTransport); $fixture->commit(); }
/** * commit */ public function commit() { $this->service->commit(); }