Example #1
0
 static function remote_open($host_or_program, $port_or_args, $timeout = 10000, $connect_timeout = null)
 {
     switch (func_num_args()) {
         case 2:
         case 3:
             $r = remote_open($host_or_program, $port_or_args, $timeout);
             break;
         default:
             $r = remote_open($host_or_program, $port_or_args, $timeout, $connect_timeout);
     }
     if (is_resource($r)) {
         $c = 'Xapian' . substr(get_resource_type($r), strpos(get_resource_type($r), '__') ? strpos(get_resource_type($r), '__') + 2 : 3);
         if (class_exists($c)) {
             return new $c($r);
         }
         return new XapianDatabase($r);
     }
     return $r;
 }
 public function SEARCHENGINE($query)
 {
     if (!defined('XAPIANDB') || !XAPIANDB) {
         return null;
     }
     global $xapiandb, $PAGE, $hansardmajors, $parties;
     if (!$xapiandb) {
         if (strstr(XAPIANDB, ":")) {
             //ini_set('display_errors', 'On');
             list($xapian_host, $xapian_port) = explode(":", XAPIANDB);
             twfy_debug("SEARCH", "Using Xapian remote backend: " . $xapian_host . " port " . $xapian_port);
             $xapiandb_remote = remote_open($xapian_host, intval($xapian_port));
             $xapiandb = new XapianDatabase($xapiandb_remote);
         } else {
             $xapiandb = new XapianDatabase(XAPIANDB);
         }
     }
     $this->query = $query;
     if (!isset($this->stemmer)) {
         $this->stemmer = new XapianStem('english');
     }
     if (!isset($this->enquire)) {
         $this->enquire = new XapianEnquire($xapiandb);
     }
     if (!isset($this->queryparser)) {
         $this->queryparser = new XapianQueryParser();
         $this->datevaluerange = new XapianDateValueRangeProcessor(1);
         $this->queryparser->set_stemmer($this->stemmer);
         $this->queryparser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
         $this->queryparser->set_database($xapiandb);
         $this->queryparser->set_default_op(Query_OP_AND);
         $this->queryparser->add_boolean_prefix('speaker', 'S');
         $this->queryparser->add_boolean_prefix('major', 'M');
         $this->queryparser->add_boolean_prefix('date', 'D');
         $this->queryparser->add_boolean_prefix('batch', 'B');
         $this->queryparser->add_boolean_prefix('segment', 'U');
         $this->queryparser->add_boolean_prefix('department', 'G');
         $this->queryparser->add_boolean_prefix('party', 'P');
         $this->queryparser->add_boolean_prefix('column', 'C');
         $this->queryparser->add_boolean_prefix('gid', 'Q');
         $this->queryparser->add_valuerangeprocessor($this->datevaluerange);
     }
     # Force words to lower case
     $this->query = preg_replace('#(department|party):.+?\\b#ie', 'strtolower("$0")', $this->query);
     // Any characters other than this are treated as, basically, white space
     // (apart from quotes and minuses, special case below)
     // The colon is in here for prefixes speaker:10043 and so on.
     $this->wordchars = "A-Za-z0-9,.'&:_À-ÿ";
     $this->wordcharsnodigit = "A-Za-z0-9'&_À-ÿ";
     // An array of normal words.
     $this->words = array();
     // All quoted phrases, as an (array of (arrays of words in each phrase)).
     $this->phrases = array();
     // Items prefixed with a colon (speaker:10024) as an (array of (name, value))
     $this->prefixed = array();
     // Split words up into individual words, and quoted phrases
     preg_match_all('/(' . '"|' . '(?:(?<![' . $this->wordchars . '])-)?' . '[' . $this->wordchars . ']+' . ')/', $this->query, $all_words);
     if ($all_words) {
         $all_words = $all_words[0];
     } else {
         $all_words = array();
     }
     $in_quote = false;
     $from = '';
     $to = '';
     foreach ($all_words as $word) {
         if ($word == '"') {
             $in_quote = !$in_quote;
             if ($in_quote) {
                 array_push($this->phrases, array());
             }
             if (!$in_quote && !count($this->phrases[count($this->phrases) - 1])) {
                 array_pop($this->phrases);
             }
             continue;
         }
         if ($word == '') {
             continue;
         }
         if (strpos($word, ':') !== false) {
             $items = explode(":", strtolower($word));
             $type = $items[0];
             if (substr($type, 0, 1) == '-') {
                 $type = substr($type, 1);
             }
             $value = strtolower(join(":", array_slice($items, 1)));
             if ($type == 'section') {
                 $newv = $value;
                 if ($value == 'debates' || $value == 'debate') {
                     $newv = 1;
                 } elseif ($value == 'whall' || $value == 'westminster' || $value == 'westminhall') {
                     $newv = 2;
                 } elseif ($value == 'wrans' || $value == 'wran') {
                     $newv = 3;
                 } elseif ($value == 'wms' || $value == 'statements' || $value == 'statement') {
                     $newv = 4;
                 } elseif ($value == 'lordsdebates' || $value == 'lords') {
                     $newv = 101;
                 } elseif ($value == 'ni' || $value == 'nidebates') {
                     $newv = 5;
                 } elseif ($value == 'pbc' || $value == 'standing') {
                     $newv = 6;
                 } elseif ($value == 'sp') {
                     $newv = 7;
                 } elseif ($value == 'spwrans' || $value == 'spwran') {
                     $newv = 8;
                 } elseif ($value == 'uk') {
                     $newv = array(1, 2, 3, 4, 6, 101);
                 } elseif ($value == 'scotland') {
                     $newv = array(7, 8);
                 } elseif ($value == 'future') {
                     $newv = 'F';
                 }
                 if (is_array($newv)) {
                     $newv = 'major:' . join(' major:', $newv);
                 } else {
                     $newv = "major:{$newv}";
                 }
                 $this->query = str_ireplace("{$type}:{$value}", $newv, $this->query);
             } elseif ($type == 'groupby') {
                 $newv = $value;
                 if ($value == 'debates' || $value == 'debate') {
                     $newv = 'debate';
                 }
                 if ($value == 'speech' || $value == 'speeches') {
                     $newv = 'speech';
                 }
                 $this->query = str_ireplace("{$type}:{$value}", '', $this->query);
                 array_push($this->prefixed, array($type, $newv));
             } elseif ($type == 'from') {
                 $from = $value;
             } elseif ($type == 'to') {
                 $to = $value;
             }
         } elseif (strpos($word, '-') !== false) {
         } elseif ($in_quote) {
             array_push($this->phrases[count($this->phrases) - 1], strtolower($word));
         } elseif (strpos($word, '..') !== false) {
         } elseif ($word == 'OR' || $word == 'AND' || $word == 'XOR' || $word == 'NEAR') {
         } else {
             array_push($this->words, strtolower($word));
         }
     }
     if ($from && $to) {
         $this->query = str_ireplace("from:{$from}", '', $this->query);
         $this->query = str_ireplace("to:{$to}", '', $this->query);
         $this->query .= " {$from}..{$to}";
     } elseif ($from) {
         $this->query = str_ireplace("from:{$from}", '', $this->query);
         $this->query .= " {$from}.." . date('Ymd');
     } elseif ($to) {
         $this->query = str_ireplace("to:{$to}", '', $this->query);
         $this->query .= " 19990101..{$to}";
     }
     # Merged people
     $db = new ParlDB();
     $merged = $db->query('SELECT * FROM gidredirect WHERE gid_from LIKE :gid_from', array(':gid_from' => "uk.org.publicwhip/person/%"));
     for ($n = 0; $n < $merged->rows(); $n++) {
         $from_id = str_replace('uk.org.publicwhip/person/', '', $merged->field($n, 'gid_from'));
         $to_id = str_replace('uk.org.publicwhip/person/', '', $merged->field($n, 'gid_to'));
         $this->query = preg_replace("#speaker:({$from_id}|{$to_id})#i", "(speaker:{$from_id} OR speaker:{$to_id})", $this->query);
     }
     twfy_debug("SEARCH", "prefixed: " . var_export($this->prefixed, true));
     twfy_debug("SEARCH", "query -- " . $this->query);
     $flags = XapianQueryParser::FLAG_BOOLEAN | XapianQueryParser::FLAG_LOVEHATE | XapianQueryParser::FLAG_WILDCARD | XapianQueryParser::FLAG_SPELLING_CORRECTION;
     $flags = $flags | XapianQueryParser::FLAG_PHRASE;
     try {
         $query = $this->queryparser->parse_query($this->query, $flags);
     } catch (Exception $e) {
         # Nothing we can really do with a bad query
         $this->error = _htmlspecialchars($e->getMessage());
         return null;
     }
     $this->enquire->set_query($query);
     # Now parse the parsed query back into a query string, yummy
     $qd = $query->get_description();
     twfy_debug("SEARCH", "queryparser original description -- " . $qd);
     $qd = substr($qd, 14, -1);
     # Strip Xapian::Query()
     $qd = preg_replace('#:\\(.*?\\)#', '', $qd);
     # Don't need pos or weight
     # Date range
     $qd = preg_replace('#VALUE_RANGE 1 (\\d+) (\\d+)#e', 'preg_replace("#(\\d{4})(\\d\\d)(\\d\\d)#", "\\$3/\\$2/\\$1", $1)
         . ".." . preg_replace("#(\\d{4})(\\d\\d)(\\d\\d)#", "\\$3/\\$2/\\$1", $2)', $qd);
     # Replace phrases with the phrase in quotes
     preg_match_all('#\\(([^(]*? PHRASE [^(]*?)\\)#', $qd, $m);
     foreach ($m[1] as $phrase) {
         $phrase_new = preg_replace('# PHRASE \\d+#', '', $phrase);
         #$this->phrases[] = preg_split('#\s+#', $phrase_new);
         $qd = str_replace("({$phrase})", '"' . $phrase_new . '"', $qd);
     }
     preg_match_all('#\\(([^(]*? NEAR [^(]*?)\\)#', $qd, $m);
     foreach ($m[1] as $mm) {
         $mmn = preg_replace('# NEAR \\d+ #', ' NEAR ', $mm);
         $qd = str_replace("({$mm})", "({$mmn})", $qd);
     }
     # Awesome regexes to get rid of superfluous matching brackets
     $qd = preg_replace('/( \\( ( (?: (?>[^ ()]+) | (?1) ) (?: [ ](?:AND|OR|XOR|FILTER|NEAR[ ]\\d+|PHRASE[ ]\\d+)[ ] (?: (?>[^ ()]+) | (?1) ) )*  ) \\) ) [ ] (FILTER|AND_NOT)/x', '$2 $3', $qd);
     $qd = preg_replace('/(?:FILTER | 0 [ ] \\* ) [ ] ( \\( ( (?: (?>[^ ()]+) | (?1) ) (?: [ ](?:AND|OR|XOR)[ ] (?: (?>[^ ()]+) | (?1) ) )*  ) \\) )/x', '$2', $qd);
     $qd = preg_replace('/(?:FILTER | 0 [ ] \\* ) [ ] ( [^()] )/x', '$1', $qd);
     $qd = str_replace('AND ', '', $qd);
     # AND is the default
     $qd = preg_replace('/^ ( \\( ( (?: (?>[^()]+) | (?1) )* ) \\) ) $/x', '$2', $qd);
     # Other prefixes
     $qd = preg_replace('#\\bU(\\d+)\\b#', 'segment:$1', $qd);
     $qd = preg_replace('#\\bC(\\d+)\\b#', 'column:$1', $qd);
     $qd = preg_replace('#\\bQ(.*?)\\b#', 'gid:$1', $qd);
     $qd = preg_replace('#\\bP(.*?)\\b#e', '"party:" . (isset($parties[ucfirst("$1")]) ? $parties[ucfirst("$1")] : "$1")', $qd);
     $qd = preg_replace('#\\bD(.*?)\\b#', 'date:$1', $qd);
     $qd = preg_replace('#\\bG(.*?)\\b#', 'department:$1', $qd);
     # XXX Lookup to show proper name of dept
     if (strstr($qd, 'M1 OR M2 OR M3 OR M4 OR M6 OR M101')) {
         $qd = str_replace('M1 OR M2 OR M3 OR M4 OR M6 OR M101', 'section:uk', $qd);
     } elseif (strstr($qd, 'M7 OR M8')) {
         $qd = str_replace('M7 OR M8', 'section:scotland', $qd);
     }
     $qd = preg_replace('#\\bM(\\d+)\\b#e', '"in the \'" . (isset($hansardmajors[$1]["title"]) ? $hansardmajors[$1]["title"] . "\'" : "$1")', $qd);
     $qd = preg_replace('#\\bMF\\b#', 'in Future Business', $qd);
     # Replace stemmed things with their unstemmed terms from the query
     $used = array();
     preg_match_all('#Z[^\\s()]+#', $qd, $m);
     foreach ($m[0] as $mm) {
         $iter = $this->queryparser->unstem_begin($mm);
         $end = $this->queryparser->unstem_end($mm);
         while (!$iter->equals($end)) {
             $tt = $iter->get_term();
             if (!in_array($tt, $used)) {
                 break;
             }
             $iter->next();
         }
         $used[] = $tt;
         $qd = preg_replace('#' . preg_quote($mm, '#') . '#', $tt, $qd, 1);
     }
     # Speakers
     for ($n = 0; $n < $merged->rows(); $n++) {
         $from_id = str_replace('uk.org.publicwhip/person/', '', $merged->field($n, 'gid_from'));
         $to_id = str_replace('uk.org.publicwhip/person/', '', $merged->field($n, 'gid_to'));
         $qd = str_replace("(S{$from_id} OR S{$to_id})", "S{$to_id}", $qd);
         $qd = str_replace("S{$from_id} OR S{$to_id}", "S{$to_id}", $qd);
     }
     preg_match_all('#S(\\d+)#', $qd, $m);
     foreach ($m[1] as $mm) {
         $member = new MEMBER(array('person_id' => $mm));
         $name = iconv('iso-8859-1', 'utf-8//TRANSLIT', $member->full_name());
         # Names are currently in ISO-8859-1
         $qd = str_replace("S{$mm}", "speaker:{$name}", $qd);
     }
     # Simplify display of excluded words
     $qd = preg_replace('#AND_NOT ([a-z0-9"]+)#', '-$1', $qd);
     preg_match_all('#AND_NOT \\((.*?)\\)#', $qd, $m);
     foreach ($m[1] as $mm) {
         $mmn = '-' . join(' -', explode(' OR ', $mm));
         $qd = str_replace("AND_NOT ({$mm})", $mmn, $qd);
     }
     foreach ($this->prefixed as $items) {
         if ($items[0] == 'groupby') {
             if ($items[1] == 'debate') {
                 $qd .= ' grouped by debate';
             } elseif ($items[1] == 'speech') {
                 $qd .= ' showing all speeches';
             } else {
                 $PAGE->error_message("Unknown group by '{$items['1']}' ignored");
             }
         }
     }
     $qd = iconv('utf-8', 'iso-8859-1//TRANSLIT', $qd);
     # Xapian is UTF-8, site is ISO8859-1
     $this->query_desc = trim($qd);
     #print 'DEBUG: ' . $query->get_description();
     twfy_debug("SEARCH", "words: " . var_export($this->words, true));
     twfy_debug("SEARCH", "phrases: " . var_export($this->phrases, true));
     twfy_debug("SEARCH", "queryparser description -- " . $this->query_desc);
     $this->valid = true;
 }
Example #3
0
 static function remote_open($host_or_program, $port_or_args, $timeout = 10000, $connect_timeout = null)
 {
     switch (func_num_args()) {
         case 2:
         case 3:
             $r = remote_open($host_or_program, $port_or_args, $timeout);
             break;
         default:
             $r = remote_open($host_or_program, $port_or_args, $timeout, $connect_timeout);
     }
     return is_resource($r) ? new XapianDatabase($r) : $r;
 }