Example #1
0
 function _get_data_by_search($args)
 {
     // Creates a fairly standard $data structure for the search function.
     // Will probably be rendered by the hansard_search.php template.
     // $args is an associative array with 's'=>'my search term' and
     // (optionally) 'p'=>1  (the page number of results to show) annd
     // (optionall) 'pop'=>1 (if "popular" search link, so don't log)
     global $PAGE, $hansardmajors;
     if (isset($args['s'])) {
         // $args['s'] should have been tidied up by the time we get here.
         // eg, by doing filter_user_input($s, 'strict');
         $searchstring = $args['s'];
     } else {
         $PAGE->error_message("No search string");
         return false;
     }
     // What we'll return.
     $data = array();
     $data['info']['s'] = $args['s'];
     // Allows us to specify how many results we want
     // Mainly for glossary term adding
     if (isset($args['num']) && $args['num']) {
         $results_per_page = $args['num'];
     } else {
         $results_per_page = 20;
     }
     if ($results_per_page > 1000) {
         $results_per_page = 1000;
     }
     $data['info']['results_per_page'] = $results_per_page;
     // What page are we on?
     if (isset($args['p']) && is_numeric($args['p'])) {
         $page = $args['p'];
     } else {
         $page = 1;
     }
     $data['info']['page'] = $page;
     if (isset($args['e'])) {
         $encode = 'url';
     } else {
         $encode = 'html';
     }
     // Fetch count of number of matches
     global $SEARCHENGINE;
     $data['searchdescription'] = $SEARCHENGINE->query_description_long();
     $count = $SEARCHENGINE->run_count();
     $data['info']['total_results'] = $count;
     // Log this query so we can improve them - if it wasn't a "popular
     // query" link
     if (!isset($args['pop']) or $args['pop'] != 1) {
         global $SEARCHLOG;
         $SEARCHLOG->add(array('query' => $searchstring, 'page' => $page, 'hits' => $count));
     }
     // No results.
     if ($count <= 0) {
         $data['rows'] = array();
         return $data;
     }
     // For Xapian's equivalent of an SQL LIMIT clause.
     $first_result = ($page - 1) * $results_per_page;
     $data['info']['first_result'] = $first_result + 1;
     // Take account of LIMIT's 0 base.
     // Get the gids from Xapian
     $sort_order = 'date';
     if (isset($args['o'])) {
         if ($args['o'] == 'd') {
             $sort_order = 'date';
         } elseif ($args['o'] == 'c') {
             $sort_order = 'created';
         } elseif ($args['o'] == 'r') {
             $sort_order = 'relevance';
         }
     }
     $SEARCHENGINE->run_search($first_result, $results_per_page, $sort_order);
     $gids = $SEARCHENGINE->get_gids();
     if ($sort_order == 'created') {
         $createds = $SEARCHENGINE->get_createds();
     }
     $relevances = $SEARCHENGINE->get_relevances();
     if (count($gids) <= 0) {
         // No results.
         $data['rows'] = array();
         return $data;
     }
     #if ($sort_order=='created') { print_r($gids); }
     // We'll put all the data in here before giving it to a template.
     $rows = array();
     // We'll cache the ids=>first_names/last_names of speakers here.
     $speakers = array();
     // We'll cache (sub)section_ids here:
     $hansard_to_gid = array();
     // Cycle through each result, munge the data, get more, and put it all in $data.
     for ($n = 0; $n < count($gids); $n++) {
         $gid = $gids[$n];
         $relevancy = $relevances[$n];
         if ($sort_order == 'created') {
             $created = substr($createds[$n], 0, strpos($createds[$n], ':'));
             if ($created < $args['threshold']) {
                 $data['info']['total_results'] = $n;
                 break;
             }
         }
         // Get the data for the gid from the database
         $q = $this->db->query("SELECT hansard.gid,\n                                    hansard.hdate,\n                                    hansard.section_id,\n                                    hansard.subsection_id,\n                                    hansard.htype,\n                                    hansard.major,\n                                    hansard.speaker_id,\n\t\t\t\t    hansard.hpos,\n                                    epobject.body\n                            FROM hansard, epobject\n                            WHERE hansard.gid = '{$gid}'\n                            AND hansard.epobject_id = epobject.epobject_id");
         if ($q->rows() > 1) {
             $PAGE->error_message("Got more than one row getting data for {$gid}");
         }
         if ($q->rows() == 0) {
             # This error message is totally spurious, so don't show it
             # $PAGE->error_message("Unexpected missing gid $gid while searching");
             continue;
         }
         $itemdata = array();
         $itemdata['gid'] = fix_gid_from_db($q->field(0, 'gid'));
         $itemdata['hdate'] = $q->field(0, 'hdate');
         $itemdata['htype'] = $q->field(0, 'htype');
         $itemdata['major'] = $q->field(0, 'major');
         $itemdata['section_id'] = $q->field(0, 'section_id');
         $itemdata['subsection_id'] = $q->field(0, 'subsection_id');
         $itemdata['relevance'] = $relevances[$n];
         $itemdata['speaker_id'] = $q->field(0, 'speaker_id');
         $itemdata['hpos'] = $q->field(0, 'hpos');
         //////////////////////////
         // 1. Trim and highlight the body text.
         $body = $q->field(0, 'body');
         // We want to trim the body to an extract that is centered
         // around the position of the first search word.
         // we don't use strip_tags as it doesn't replace tags with spaces,
         // which means some words end up stuck together
         $extract = strip_tags_tospaces($body);
         // $bestpos is the position of the first search word
         $bestpos = $SEARCHENGINE->position_of_first_word($extract);
         // Where do we want to extract from the $body to start?
         $length_of_extract = 400;
         // characters.
         $startpos = $bestpos - $length_of_extract / 2;
         if ($startpos < 0) {
             $startpos = 0;
         }
         // Trim it to length and position, adding ellipses.
         $extract = trim_characters($extract, $startpos, $length_of_extract);
         // Highlight search words
         $extract = $SEARCHENGINE->highlight($extract);
         $itemdata['body'] = $extract;
         //////////////////////////
         // 2. Create the URL to link to this bit of text.
         $id_data = array('major' => $itemdata['major'], 'htype' => $itemdata['htype'], 'gid' => $itemdata['gid'], 'section_id' => $itemdata['section_id'], 'subsection_id' => $itemdata['subsection_id']);
         // We append the query onto the end of the URL as variable 's'
         // so we can highlight them on the debate/wrans list page.
         $url_args = array('s' => $searchstring);
         $itemdata['listurl'] = $this->_get_listurl($id_data, $url_args, $encode);
         //////////////////////////
         // 3. Get the speaker for this item, if applicable.
         if ($itemdata['speaker_id'] != 0) {
             $itemdata['speaker'] = $this->_get_speaker($itemdata['speaker_id'], $itemdata['hdate']);
         }
         //////////////////////////
         // 4. Get data about the parent (sub)section. TODO: CHECK THIS for major==4
         if ($itemdata['major'] && $hansardmajors[$itemdata['major']]['type'] == 'debate') {
             // Debate
             if ($itemdata['htype'] != 10) {
                 $section = $this->_get_section($itemdata);
                 $itemdata['parent']['body'] = $section['body'];
                 #					$itemdata['parent']['listurl'] = $section['listurl'];
                 if ($itemdata['section_id'] != $itemdata['subsection_id']) {
                     $subsection = $this->_get_subsection($itemdata);
                     $itemdata['parent']['body'] .= ': ' . $subsection['body'];
                     #						$itemdata['parent']['listurl'] = $subsection['listurl'];
                 }
                 if ($itemdata['major'] == 5) {
                     $itemdata['parent']['body'] = 'NIA: ' . $itemdata['parent']['body'];
                 }
             } else {
                 // It's a section, so it will be its own title.
                 $itemdata['parent']['body'] = $itemdata['body'];
                 $itemdata['body'] = '';
             }
         } else {
             // Wrans or WMS
             $section = $this->_get_section($itemdata);
             $subsection = $this->_get_subsection($itemdata);
             $body = $hansardmajors[$itemdata['major']]['title'] . ' &#8212; ';
             if (isset($section['body'])) {
                 $body .= $section['body'];
             }
             if (isset($subsection['body'])) {
                 $body .= ': ' . $subsection['body'];
             }
             if (isset($subsection['listurl'])) {
                 $listurl = $subsection['listurl'];
             } else {
                 $listurl = '';
             }
             $itemdata['parent'] = array('body' => $body, 'listurl' => $listurl);
         }
         // Add this item's data onto the main array we'll be returning.
         $rows[] = $itemdata;
     }
     $data['rows'] = $rows;
     return $data;
 }
Example #2
0
function trim_characters($text, $start, $length)
{
    $text = strip_tags_tospaces($text);
    // Split long strings up so they don't go too long.
    // Mainly for URLs which are displayed, but aren't links when trimmed.
    # http://bugs.php.net/bug.php?id=42298 for why I'm having to repeat
    # \S 60 times...
    $text = rtrim(preg_replace('/' . str_repeat('\\S', 60) . '/u', '$0 ', $text));
    // Otherwise the word boundary matching goes odd...
    $text = preg_replace("/[\n\r]/", " ", $text);
    // Trim start.
    if ($start > 0) {
        $text = substr($text, $start);
        // Word boundary.
        if (preg_match("/.+?\\b(.*)/", $text, $matches)) {
            $text = $matches[1];
            // Strip spare space at the start.
            $text = ltrim($text);
        }
        $text = '...' . $text;
    }
    // Trim end.
    if (mb_strlen($text) > $length) {
        // Allow space for ellipsis.
        $text = mb_substr($text, 0, $length - 3, 'utf-8');
        // Word boundary.
        if (preg_match("/(.*)\\b.+/u", $text, $matches)) {
            $text = $matches[1];
            // Strip spare space at the end.
            $text = rtrim($text);
        }
        // We don't want to use the HTML entity for an ellipsis (&#8230;), because then
        // it screws up when we subsequently use htmlentities() to print the returned
        // string!
        $text .= '...';
    }
    return $text;
}
 function prepare_search_result_for_display($body)
 {
     global $SEARCHENGINE;
     // We want to trim the body to an extract that is centered
     // around the position of the first search word.
     // we don't use strip_tags as it doesn't replace tags with spaces,
     // which means some words end up stuck together
     $extract = strip_tags_tospaces($body);
     // $bestpos is the position of the first search word
     $bestpos = $SEARCHENGINE->position_of_first_word($extract);
     // Where do we want to extract from the $body to start?
     $length_of_extract = 400;
     // characters.
     $startpos = $bestpos - $length_of_extract / 2;
     if ($startpos < 0) {
         $startpos = 0;
     }
     // Trim it to length and position, adding ellipses.
     $extract = trim_characters($extract, $startpos, $length_of_extract);
     // Highlight search words
     $extract = $SEARCHENGINE->highlight($extract);
     return $extract;
 }
Example #4
0
function trim_characters($text, $start, $length, $url_length = 60)
{
    // Pass it a string, a numeric start position and a numeric length.
    // If the start position is > 0, the string will be trimmed to start at the
    // nearest word boundary after (or at) that position.
    // If the string is then longer than $length, it will be trimmed to the nearest
    // word boundary below (or at) that length.
    // If either end is trimmed, ellipses will be added.
    // The modified string is then returned - its *maximum* length is $length.
    // HTML is always stripped (must be for trimming to prevent broken tags).
    $text = strip_tags_tospaces($text);
    // Split long strings up so they don't go too long.
    // Mainly for URLs which are displayed, but aren't links when trimmed.
    $text = preg_replace('/(\\S{' . $url_length . '})/', "\$1 ", $text);
    // Otherwise the word boundary matching goes odd...
    $text = preg_replace("/[\n\r]/", " ", $text);
    // Trim start.
    if ($start > 0) {
        $text = substr($text, $start);
        // Word boundary.
        if (preg_match("/.+?\\b(.*)/", $text, $matches)) {
            $text = $matches[1];
            // Strip spare space at the start.
            $text = preg_replace("/^\\s/", '', $text);
        }
        $text = '...' . $text;
    }
    // Trim end.
    if (strlen($text) > $length) {
        // Allow space for ellipsis.
        $text = substr($text, 0, $length - 3);
        // Word boundary.
        if (preg_match("/(.*)\\s.+/", $text, $matches)) {
            $text = $matches[1];
            // Strip spare space at the end.
            $text = preg_replace("/\\s\$/", '', $text);
        }
        // We don't want to use the HTML entity for an ellipsis (&#8230;), because then
        // it screws up when we subsequently use htmlentities() to print the returned
        // string!
        $text .= '...';
    }
    return $text;
}