public function storeDoc($url, $esDoc) { Log::info("Store {$url}"); $esDoc->status->processed = gmdate('c'); $r = new Elasticsearch_Request($GLOBALS['phinde']['elasticsearch'] . 'document/' . ElasticSearch::getDocId($url), \HTTP_Request2::METHOD_PUT); $r->setBody(json_encode($esDoc)); $r->send(); }
public function search($query, $filters, $site, $page, $perPage, $sort) { if (preg_match_all('#nick:([^ ]*)#', $query, $matches)) { foreach ($matches[1] as $authorName) { $query = str_replace('nick:' . $authorName, 'author.name:' . $authorName, $query); } } $qMust = array(); //query parts for the MUST section //modification date filters if (preg_match('#after:([^ ]+)#', $query, $matches)) { $dateAfter = $matches[1]; $query = trim(str_replace($matches[0], '', $query)); $qMust[] = array('range' => array('modate' => array('gt' => $dateAfter . '||/d'))); } if (preg_match('#before:([^ ]+)#', $query, $matches)) { $dateBefore = $matches[1]; $query = trim(str_replace($matches[0], '', $query)); $qMust[] = array('range' => array('modate' => array('lt' => $dateBefore . '||/d'))); } if (preg_match('#date:([^ ]+)#', $query, $matches)) { $dateExact = $matches[1]; $query = trim(str_replace($matches[0], '', $query)); $qMust[] = array('range' => array('modate' => array('gte' => $dateExact . '||/d', 'lte' => $dateExact . '||/d'))); } $qMust[] = array('query_string' => array('default_field' => '_all', 'default_operator' => 'AND', 'query' => $query)); $qMust[] = array('term' => array('status.findable' => true)); if ($sort == '' && $GLOBALS['phinde']['defaultSort'] == 'date') { $sort = 'date'; } if ($sort == 'date') { $sortCfg = array('status.modate' => array('order' => 'desc')); } else { $sortCfg = array(); } $contentMatchSize = 100; if ($GLOBALS['phinde']['showFullContent']) { $contentMatchSize = 999999; } $r = new Elasticsearch_Request($this->baseUrl . 'document/_search', \HTTP_Request2::METHOD_GET); $doc = array('_source' => array('url', 'title', 'author', 'status.modate'), 'query' => array('bool' => array('must' => $qMust)), 'highlight' => array('pre_tags' => array('<em class="hl">'), 'order' => 'score', 'encoder' => 'html', 'fields' => array('title' => array('require_field_match' => false, 'number_of_fragments' => 0), 'url' => array('require_field_match' => false, 'number_of_fragments' => 0), 'text' => array('require_field_match' => false, 'number_of_fragments' => 1, 'fragment_size' => $contentMatchSize, 'no_match_size' => $contentMatchSize))), 'aggregations' => array('tags' => array('terms' => array('field' => 'tags')), 'language' => array('terms' => array('field' => 'language')), 'domain' => array('terms' => array('field' => 'domain')), 'type' => array('terms' => array('field' => 'type'))), 'from' => $page * $perPage, 'size' => $perPage, 'sort' => $sortCfg); foreach ($filters as $type => $value) { $doc['query']['bool']['must'][] = array('term' => array($type => $value)); } if ($site != '') { $doc['query']['bool']['must'][] = array('prefix' => array('schemalessUrl' => array('value' => $site))); } //unset($doc['_source']); //ini_set('xdebug.var_display_max_depth', 10); //echo json_encode($doc);die(); $r->setBody(json_encode($doc)); $res = $r->send(); return json_decode($res->getBody()); }
#!/usr/bin/env php <?php namespace phinde; /** * Configure the elasticsearch index. * Throws away all data. */ require_once __DIR__ . '/../src/init.php'; $json = file_get_contents(__DIR__ . '/../data/elasticsearch-mapping.json'); if (json_decode($json) === null) { Log::error("Error: Schema JSON is broken"); chdir(__DIR__ . '/../'); passthru('json_pp -t null < data/elasticsearch-mapping.json'); exit(1); } //delete old index $r = new Elasticsearch_Request($GLOBALS['phinde']['elasticsearch'], \HTTP_Request2::METHOD_DELETE); $r->allow404 = true; $r->send(); //recreate it $r = new Elasticsearch_Request($GLOBALS['phinde']['elasticsearch'], \HTTP_Request2::METHOD_PUT); $r->setBody($json); $r->send();