function emptyAction()
 {
     //$log = new Kutu_Log();
     //$registry = Zend_Registry::getInstance();
     //$conf = $registry->get('config');
     $solr = new Apache_Solr_Service('localhost', '8983', '/solr/core0');
     $solr->deleteByQuery('*:*');
     $solr->commit();
     //$log->info("Indexing empty successfully");
     $this->view->success = true;
 }
Example #2
0
 /**
  * Commit all pushed documents to the Solr Server
  */
 public function commit()
 {
     $this->_setSolrService();
     if (count($this->documents)) {
         try {
             $this->_solrService->addDocuments($this->documents);
             $this->_solrService->commit();
             $this->_solrService->optimize();
             $this->_clear();
         } catch (Exception $e) {
             trigger_error($e->getMessage(), $e->getCode());
         }
     } else {
         trigger_error("There is not document for committing");
     }
 }
                    default:
                        printMessage(' Skipped file "' . $filePath . '". Reason: Mime type "' . $mimeType . '" has no processor.');
                        break;
                }
            } else {
                printMessage(' Skipped file "' . $filePath . '". Reason: File not found.');
            }
        }
    } else {
        printMessage(' Skipped indexing of document files. Reason: no base path to files or document is not published.');
    }
    $documents[] = $solrDocument;
    if (0 === count($documents) % $commitRange) {
        printMessage(' Committing data set of ' . $commitRange . ' values.');
        $solr->addDocuments($documents);
        $solr->commit();
        $documents = array();
        printMessage(' Committing done.');
    }
}
if (count($documents) > 0) {
    printMessage(' Committing data set of ' . count($documents) . ' values.');
    $solr->addDocuments($documents);
    $solr->commit();
    printMessage(' Committing done.');
}
printMessage(' Optimizing.');
$solr->optimize();
printMessage(' Optimizing done.');
$stopTime = time();
$time = $stopTime - $startTime;
 /**
  * Clear the cache whenever we commit changes.
  *
  * @see Apache_Solr_Service::commit()
  */
 public function commit($optimize = TRUE, $waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600)
 {
     parent::commit($optimize, $waitFlush, $waitSearcher, $timeout);
     $this->_clearCache();
 }
Example #5
0
function db_store_article($article, $PageID = 0, $updating = false)
{
    global $db;
    global $config;
    $update = false;
    $id = 0;
    // If we are editing an existing reference then we already know its id
    if (isset($article->reference_id)) {
        $id = $article->reference_id;
    } else {
        $id = db_find_article($article);
    }
    if ($id != 0) {
        if ($updating) {
            $update = true;
        } else {
            return $id;
        }
    }
    // Try and trap empty references
    if ($id == 0) {
        $ok = false;
        if (isset($article->title)) {
            $ok = $article->title != '';
        }
        if (!$ok) {
            return 0;
        }
    }
    if (!isset($article->genre)) {
        $article->genre = 'article';
    }
    $keys = array();
    $values = array();
    // Article metadata
    foreach ($article as $k => $v) {
        switch ($k) {
            // Ignore as it's an array
            case 'authors':
                break;
            case 'date':
                $keys[] = 'date';
                $values[] = $db->qstr($v);
                if (!isset($article->year)) {
                    $keys[] = 'year';
                    $values[] = $db->qstr(year_from_date($v));
                }
                break;
                // Don't store BHL URL here
            // Don't store BHL URL here
            case 'url':
                if (preg_match('/^http:\\/\\/(www\\.)?biodiversitylibrary.org\\/page\\/(?<pageid>[0-9]+)/', $v)) {
                } else {
                    // extract Handle if it exists
                    if (preg_match('/^http:\\/\\/hdl.handle.net\\/(?<hdl>.*)$/', $v, $m)) {
                        $keys[] = 'hdl';
                        $values[] = $db->qstr($m['hdl']);
                    } else {
                        $keys[] = $k;
                        $values[] = $db->qstr($v);
                    }
                }
                break;
                // Things we store as is
            // Things we store as is
            case 'title':
            case 'secondary_title':
            case 'volume':
            case 'series':
            case 'issue':
            case 'spage':
            case 'epage':
            case 'year':
            case 'date':
            case 'issn':
            case 'genre':
            case 'doi':
            case 'hdl':
            case 'lsid':
            case 'oclc':
            case 'pdf':
            case 'abstract':
            case 'pmid':
                $keys[] = $k;
                $values[] = $db->qstr($v);
                break;
                // Things we ignore
            // Things we ignore
            default:
                break;
        }
    }
    // Date
    if (!isset($article->date) && isset($article->year)) {
        $keys[] = 'date';
        $values[] = $db->qstr($article->year . '-00-00');
    }
    // BHL PageID
    if ($PageID != 0) {
        $keys[] = 'PageID';
        $values[] = $PageID;
    }
    // SICI
    $s = new Sici();
    $sici = $s->create($article);
    if ($sici != '') {
        $keys[] = 'sici';
        $values[] = $db->qstr($sici);
    }
    if ($update) {
        // Versioning?
        // Delete links	(author, pages, etc)
        // Don't delete page range as we may loose plates, etc. outside range
        /*
        $sql = 'DELETE FROM rdmp_reference_page_joiner WHERE reference_id=' . $id;
        $result = $db->Execute($sql);
        if ($result == false) die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        */
        $sql = 'DELETE FROM rdmp_author_reference_joiner WHERE reference_id = ' . $id;
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        // update (updated timestamp will be automatically updated)
        $sql = 'UPDATE rdmp_reference SET ';
        $num_values = count($keys);
        for ($i = 0; $i < $num_values; $i++) {
            if ($i > 0) {
                $sql .= ', ';
            }
            $sql .= $keys[$i] . '=' . $values[$i];
        }
        $sql .= ' WHERE reference_id=' . $id;
        /*		$cache_file = @fopen('/tmp/update.sql', "w+") or die("could't open file");
        		@fwrite($cache_file, $sql);
        		fclose($cache_file);
        */
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
    } else {
        // Adding article for first time so add 'created' and 'updated' timestamp
        $keys[] = 'created';
        $values[] = 'NOW()';
        $keys[] = 'updated';
        $values[] = 'NOW()';
        $sql = 'INSERT INTO rdmp_reference (' . implode(",", $keys) . ') VALUES (' . implode(",", $values) . ')';
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        $id = $db->Insert_ID();
        // Store reference_cluster_id which we can use to group duplicates, by default
        // reference_cluster_id = reference_id
        $sql = 'UPDATE rdmp_reference SET reference_cluster_id=' . $id . ' WHERE reference_id=' . $id;
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
    }
    // Indexing-------------------------------------------------------------------------------------
    if (1) {
        // solr
        // this code is redundant with code in reference.php but I use different objects
        // here and there (doh!). Also once we've added old stuff to solr this is the only place we
        // should be calling solr
        $solr = new Apache_Solr_Service('localhost', '8983', '/solr');
        if (!$solr->ping()) {
            echo 'Solr service not responding.';
            exit;
        }
        $item = array();
        $item['id'] = 'reference/' . $id;
        $item['title'] = $article->title;
        $item['publication_outlet'] = $article->secondary_title;
        $item['year'] = $article->year;
        $authors = array();
        foreach ($article->authors as $a) {
            $authors[] = $a->forename . ' ' . $a->surname;
        }
        $item['authors'] = $authors;
        $citation = '';
        $citation .= ' ' . $article->year;
        $citation .= ' ' . $article->title;
        $citation .= ' ' . $article->secondary_title;
        $citation .= ' ' . $article->volume;
        if (isset($article->issue)) {
            $citation .= '(' . $article->issue . ')';
        }
        $citation .= ':';
        $citation .= ' ';
        $citation .= $article->spage;
        if (isset($article->epage)) {
            $citation .= '-' . $article->epage;
        }
        $item['citation'] = $citation;
        $text = '';
        $num_authors = count($article->authors);
        $count = 0;
        if ($num_authors > 0) {
            foreach ($article->authors as $author) {
                $text .= $author->forename . ' ' . $author->lastname;
                if (isset($author->suffix)) {
                    $text .= ' ' . $author->suffix;
                }
                $count++;
                if ($count == 2 && $num_authors > 3) {
                    $text .= ' et al.';
                    break;
                }
                if ($count < $num_authors - 1) {
                    $text .= ', ';
                } else {
                    if ($count < $num_authors) {
                        $text .= ' and ';
                    }
                }
            }
        }
        $item['citation'] = $text . ' ' . $citation;
        $parts = array();
        $parts[] = $item;
        //print_r($parts);
        // add to solr
        $documents = array();
        foreach ($parts as $item => $fields) {
            $part = new Apache_Solr_Document();
            foreach ($fields as $key => $value) {
                if (is_array($value)) {
                    foreach ($value as $datum) {
                        $part->setMultiValue($key, $datum);
                    }
                } else {
                    $part->{$key} = $value;
                }
            }
            $documents[] = $part;
        }
        //
        //
        // Load the documents into the index
        //
        try {
            $solr->addDocuments($documents);
            $solr->commit();
            $solr->optimize();
        } catch (Exception $e) {
            echo $e->getMessage();
        }
    } else {
        $sql = 'DELETE FROM rdmp_text_index WHERE (object_uri=' . $db->qstr($config['web_root'] . 'reference/' . $id) . ')';
        $result = $db->Execute($sql);
        if ($result == false) {
            die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
        }
        // Only do this if we have a title, as sometimes we don't (e.g. CrossRef lacks metadata)
        if (isset($article->title)) {
            $sql = 'INSERT INTO rdmp_text_index(object_type, object_id, object_uri, object_text)
			VALUES ("title"' . ', ' . $id . ', ' . $db->qstr($config['web_root'] . 'reference/' . $id) . ', ' . $db->qstr($article->title) . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
        }
    }
    // Versioning-----------------------------------------------------------------------------------
    // Store this object in version table so we can recover it if we overwrite item
    $ip = getip();
    $sql = 'INSERT INTO rdmp_reference_version(reference_id, ip, json) VALUES(' . $id . ', ' . 'INET_ATON(\'' . $ip . '\')' . ',' . $db->qstr(json_encode($article)) . ')';
    $result = $db->Execute($sql);
    if ($result == false) {
        die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
    }
    // Author(s)------------------------------------------------------------------------------------
    // Store author as and link to the article
    if (isset($article->authors)) {
        db_store_authors($id, $article->authors);
    }
    // Store page range (only if not updating, otherwise we may loose plates, etc.
    // that aren't in page range)
    if ($PageID != 0 && !$update) {
        $page_range = array();
        if (isset($article->spage) && isset($article->epage)) {
            $page_range = bhl_page_range($PageID, $article->epage - $article->spage + 1);
        } else {
            // No epage, so just get spage (to do: how do we tell user we don't have page range?)
            $page_range = bhl_page_range($PageID, 0);
        }
        //print_r($page_range);
        $count = 0;
        foreach ($page_range as $page) {
            $sql = 'INSERT INTO rdmp_reference_page_joiner (reference_id, PageID, page_order) 
			VALUES (' . $id . ',' . $page . ',' . $count++ . ')';
            $result = $db->Execute($sql);
            if ($result == false) {
                die("failed [" . __FILE__ . ":" . __LINE__ . "]: " . $sql);
            }
        }
    }
    // Tweet----------------------------------------------------------------------------------------
    if (!$update) {
        if ($config['twitter']) {
            $url = $config['web_root'] . 'reference/' . $id . ' ' . '#bhlib';
            // url + hashtag
            $url_len = strlen($url);
            $status = '';
            if (isset($article->title)) {
                $status = $article->title;
                $status_len = strlen($status);
                $extra = 140 - $status_len - $url_len - 1;
                if ($extra < 0) {
                    $status_len += $extra;
                    $status_len -= 1;
                    $status = substr($status, 0, $status_len);
                    $status .= '…';
                }
            }
            $status .= ' ' . $url;
            tweet($status);
        }
    }
    return $id;
}
Example #6
0
/**
 * Executes the Solr query and returns the JSON response.
 */
function solr_proxy_main()
{
    if (isset($_GET['solrUrl'])) {
        $spliturl = parse_url($_GET['solrUrl']);
        $host = $spliturl['host'] == 'solr.smk.dk' ? 'solr-02.smk.dk' : $spliturl['host'];
        $port = $spliturl['host'] == 'solr.smk.dk' ? '8080' : $spliturl['port'];
        $core_log = $spliturl['host'] == 'solr.smk.dk' ? 'prod_search_log' : 'preprod_search_log';
        $path = explode("/", trim($spliturl['path'], '/'));
        $core = array_pop($path);
        $path = implode("/", $path);
        $solr = new Apache_Solr_Service($host, $port, '/' . $path . '/' . $core . '/');
        //$solr = new Apache_Solr_Service('csdev-seb', 8180, '/solr-example/preprod_all_dk/');
        //var_dump($solr);
        $solr_search_log = new Apache_Solr_Service($host, $port, '/' . $path . '/' . $core_log . '/');
        //$solr_search_log = new Apache_Solr_Service('solr-02.smk.dk', 8080, '/solr/prod_search_log/');
        //var_dump($solr_search_log);
        $document = new Apache_Solr_Document();
        $q_default = "-(id_s:(*/*) AND category:collections) -(id_s:(*verso) AND category:collections)";
        $fq_tag = "tag";
        $fq_prev = array();
        $q_prev = array();
        $picture_url = '';
        $numfound = 0;
        if (isset($_GET['prev_query'])) {
            $params = array();
            $params['q'] = '*:*';
            $keys = '';
            $core = '';
            //error_log($_GET['query']);
            // The names of Solr parameters that may be specified multiple times.
            $multivalue_keys = array('bf', 'bq', 'facet.date', 'facet.date.other', 'facet.field', 'facet.query', 'fq', 'pf', 'qf');
            $pairs = explode('&', $_GET['prev_query']);
            foreach ($pairs as $pair) {
                if ($pair != '') {
                    list($key, $value) = explode('=', $pair, 2);
                    $value = urldecode($value);
                    if (in_array($key, $multivalue_keys)) {
                        $params[$key][] = $value;
                    } elseif ($key == 'q') {
                        //error_log($value);
                        $keys = $value;
                    } elseif ($key == 'core') {
                        $core = "{$value}/";
                    } else {
                        $params[$key] = $value;
                    }
                }
            }
            // 		try {
            // 			$response = $solr->search($keys, $params['start'], $params['rows'], $params);
            // 		}
            // 		catch (Exception $e) {
            // 			die($e->__toString());
            // 		}
            //error_log($response->getRawResponse());
            //print $response->getRawResponse();
            /*ררררררררר*/
            $fq = array();
            $q = array();
            // proceed only if 'start' param was null ('start' is set when the user uses pagination in website, and we want to avoid duplication on search string)
            //if(!isset($params['start'])){
            // process q
            if ($keys != '') {
                $q = explode(",", $keys);
                // remove default 'q' value
                if (($key = array_search($q_default, $q)) !== false) {
                    unset($q[$key]);
                }
                array_filter($q);
            }
            // process fq
            if (isset($params['fq'])) {
                $fq = $params['fq'];
                // remove 'tag' facet
                $matches = array_filter($fq, function ($var) use($fq_tag) {
                    return preg_match("/\\b{$fq_tag}\\b/i", $var);
                });
                foreach ($matches as $key => $value) {
                    unset($fq[$key]);
                }
                array_filter($fq);
            }
            if (count($q) + count($fq) > 0) {
                $fq_prev = $fq;
                $q_prev = $q;
            }
            //}
        }
        if (isset($_GET['query'])) {
            $params = array();
            $params['q'] = '*:*';
            $keys = '';
            $core = '';
            //error_log($_GET['query']);
            // The names of Solr parameters that may be specified multiple times.
            $multivalue_keys = array('bf', 'bq', 'facet.date', 'facet.date.other', 'facet.field', 'facet.query', 'fq', 'pf', 'qf');
            $pairs = explode('&', $_GET['query']);
            foreach ($pairs as $pair) {
                list($key, $value) = explode('=', $pair, 2);
                $value = urldecode($value);
                if (in_array($key, $multivalue_keys)) {
                    $params[$key][] = $value;
                } elseif ($key == 'q') {
                    //error_log($value);
                    $keys = $value;
                } elseif ($key == 'core') {
                    $core = "{$value}/";
                } else {
                    $params[$key] = $value;
                }
            }
            try {
                $response = $solr->search($keys, $params['start'], $params['rows'], $params);
                //var_dump($response);
                $numfound = $response->response->numFound;
                foreach ($response->response->docs as $doc) {
                    foreach ($doc as $field => $value) {
                        if ($field == "medium_image_url") {
                            $picture_url = $value;
                            break;
                        }
                    }
                }
            } catch (Exception $e) {
                die($e->__toString());
            }
            //error_log($response->getRawResponse());
            //print $response->getRawResponse();
            /*ררררררררר*/
            $fq = array();
            $q = array();
            // proceed only if 'start' param was null ('start' is set when the user uses pagination in website, and we want to avoid duplication on search string)
            if (!isset($params['start'])) {
                // process q
                if ($keys != '') {
                    $q = explode(",", $keys);
                    // remove default 'q' value
                    if (($key = array_search($q_default, $q)) !== false) {
                        unset($q[$key]);
                    }
                    array_filter($q);
                }
                // process fq
                if (isset($params['fq'])) {
                    $fq = $params['fq'];
                    // remove 'tag' facet
                    $matches = array_filter($fq, function ($var) use($fq_tag) {
                        return preg_match("/\\b{$fq_tag}\\b/i", $var);
                    });
                    foreach ($matches as $key => $value) {
                        unset($fq[$key]);
                    }
                    array_filter($fq);
                }
                if (count($q) + count($fq) > 0) {
                    //$solr_search_log = new Apache_Solr_Service('csdev-seb', 8180, '/solr-example/dev_search_log/' . $core);
                    //$document = new Apache_Solr_Document();
                    $document->id = uniqid();
                    //or something else suitably unique
                    $document->q = $q;
                    $document->facet = $fq;
                    $document->ip = isset($_SERVER['HTTP_X_FORWARDED_FOR']) ? $_SERVER['REMOTE_ADDR'] + "-" + $_SERVER['HTTP_X_FORWARDED_FOR'] : $_SERVER['REMOTE_ADDR'];
                    $document->last_update = gmdate('Y-m-d\\TH:i:s\\Z', strtotime("now"));
                    $document->numfound = $numfound;
                    // user called for detailed view of an artwork?
                    $artwork = "id_s";
                    $matches = array_filter($q, function ($var) use($artwork) {
                        return preg_match("/\\b{$artwork}\\b/i", $var);
                    });
                    if (count($matches) > 0) {
                        if (count($q_prev) > 0) {
                            $document->prev_q = $q_prev;
                        }
                        if (count($fq_prev) > 0) {
                            $document->prev_facet = $fq_prev;
                        }
                        if ($picture_url != '') {
                            $document->picture_url = $picture_url;
                        }
                    }
                    $solr_search_log->addDocument($document);
                    //if you're going to be adding documents in bulk using addDocuments with an array of documents is faster
                    //$solr_search_log->deleteByQuery('*:*');
                    $solr_search_log->commit();
                    //echo 'ok';
                }
            }
            /*ררררררררררר*/
            echo $_GET['callback'] . '(' . $response->getRawResponse() . ')';
        }
    }
}
Example #7
0
 public function testCommitWithNonDefaultParameters()
 {
     // set a mock transport
     $mockTransport = $this->getMockHttpTransportInterface();
     // setup expected call and response
     $mockTransport->expects($this->once())->method('performPostRequest')->with($this->equalTo('http://localhost:8180/solr/update?wt=json'), $this->equalTo('<commit expungeDeletes="true" waitFlush="false" waitSearcher="false" />'), $this->equalTo('text/xml; charset=UTF-8'), $this->equalTo(7200))->will($this->returnValue(Apache_Solr_HttpTransport_ResponseTest::get200Response()));
     $fixture = new Apache_Solr_Service();
     $fixture->setHttpTransport($mockTransport);
     $fixture->commit(true, false, false, 7200);
 }
Example #8
0
 /**
  * Commits and optimizes newly created Solr index
  *
  * @param String $dataset Dataset name that equals the core name to identify the index to optimize/commit
  * @return void
  * @access private
  */
 private function commitAndOptimize($dataset)
 {
     $transportInstance = new Apache_Solr_HttpTransport_CurlNoReuse();
     $service = new Apache_Solr_Service(SOLR_MASTER_HOST, SOLR_PORT, "/solr/{$dataset}", $transportInstance);
     try {
         //commit and optimize
         $service->commit(true);
         #$service->optimize();
     } catch (Exception $e) {
         throw new Exception($e);
     }
 }
Example #9
0
 public function testCommit()
 {
     // set a mock transport
     $mockTransport = $this->getMockHttpTransportInterface();
     // setup expected call and response
     $mockTransport->expects($this->once())->method('performPostRequest')->will($this->returnValue(Apache_Solr_HttpTransport_ResponseTest::get200Response()));
     $fixture = new Apache_Solr_Service();
     $fixture->setHttpTransport($mockTransport);
     $fixture->commit();
 }
Example #10
0
 /**
  * commit
  */
 public function commit()
 {
     $this->service->commit();
 }