/**
  * Test that glossarising a single word works as expected.
  *
  * @group xapian
  */
 public function testSearchLink()
 {
     $SEARCHENGINE = new SEARCHENGINE('test');
     $this->assertEquals('<a href="/mp/?m=40584" title="Our page on Mr Test - \'the Member for Birmingham (Mr Test)\'">Mr <span class="hi">Test</span></a>', $SEARCHENGINE->highlight('<a href="/mp/?m=40584" title="Our page on Mr Test - \'the Member for Birmingham (Mr Test)\'">Mr Test</a>'));
 }
Esempio n. 2
0
 /**
  * Test that search highlighting with phrases skips words contained in link title attributes.
  *
  * @group xapian
  */
 public function testSearchPhraseHighlightingInTags()
 {
     $SEARCHENGINE = new SEARCHENGINE('"Shabana"');
     $expected_text = '<p pid="b.893.4/1">On a point of order, Mr <a href="/glossary/?gl=21" title="The Speaker is an MP who has been elected to act as Chairman during debates..." class="glossary">Speaker</a>. In yesterday&#8217;s Finance Bill debate, <a href="/mp/?m=40084" title="Our page on Shabana Mahmood - \'the hon. Member for Birmingham, Ladywood (Shabana Mahmood)\'"><span class="hi">Shabana</span> Mahmood</a> said that the tax gap was 32 billion when the previous Government left office and that it has now gone up to 35 billion. Official Her Majesty&#8217;s Revenue and Customs figures show the tax gap was actually 42 billion when Labour left office, so there has been a fall of 7 billion under this Government';
     $text = '<p pid="b.893.4/1">On a point of order, Mr <a href="/glossary/?gl=21" title="The Speaker is an MP who has been elected to act as Chairman during debates..." class="glossary">Speaker</a>. In yesterday&#8217;s Finance Bill debate, <a href="/mp/?m=40084" title="Our page on Shabana Mahmood - \'the hon. Member for Birmingham, Ladywood (Shabana Mahmood)\'">Shabana Mahmood</a> said that the tax gap was 32 billion when the previous Government left office and that it has now gone up to 35 billion. Official Her Majesty&#8217;s Revenue and Customs figures show the tax gap was actually 42 billion when Labour left office, so there has been a fall of 7 billion under this Government';
     $this->assertEquals($expected_text, $SEARCHENGINE->highlight($text));
 }
 protected function display_section_or_speech($args = array())
 {
     global $DATA, $this_page, $THEUSER;
     # += as we *don't* want to override any already supplied argument
     $args += array('gid' => get_http_var('id'), 's' => get_http_var('s'), 'member_id' => get_http_var('m'));
     if (preg_match('/speaker:(\\d+)/', get_http_var('s'), $mmm)) {
         $args['person_id'] = $mmm[1];
     }
     try {
         $data = $this->list->display('gid', $args, 'none');
     } catch (\RedirectException $e) {
         $URL = new \URL($this->major_data['page_all']);
         if ($this->major == 6) {
             # Magically (as in I can't remember quite why), pbc_clause will
             # contain the new URL without any change...
             $URL->remove(array('id'));
         } else {
             $URL->insert(array('id' => $e->getMessage()));
         }
         # put the search term back in so highlighting works.
         # NB: as we don't see the # part of the URL we lose this :(
         if ($args['s'] !== '') {
             $URL->insert(array('s' => $args['s']));
         }
         redirect($URL->generate('none'));
     }
     $data['individual_item'] = $this->list->commentspage == $this_page;
     if ($data['individual_item']) {
         $COMMENTLIST = new \COMMENTLIST();
         $args['user_id'] = get_http_var('u');
         $args['epobject_id'] = $this->list->epobject_id();
         $data['comments']['object'] = $COMMENTLIST;
         $data['comments']['args'] = $args;
         $data['comments']['commentdata'] = array('epobject_id' => $this->list->epobject_id(), 'gid' => get_http_var('id'), 'return_page' => $this_page);
     }
     if (!isset($data['info'])) {
         header("HTTP/1.0 404 Not Found");
         exit;
         # XXX
     }
     # Okay, let's set up highlighting and glossarisation
     $SEARCHENGINE = null;
     if (isset($data['info']['searchstring']) && $data['info']['searchstring'] != '') {
         $SEARCHENGINE = new \SEARCHENGINE($data['info']['searchstring']);
     }
     // Before we print the body text we need to insert glossary links
     // and highlight search string words.
     $speeches = 0;
     $bodies = array();
     foreach ($data['rows'] as $row) {
         $htype = $row['htype'];
         if ($htype == 12 || $htype == 13) {
             $speeches++;
         }
         $body = $row['body'];
         $body = preg_replace('#<phrase class="honfriend" id="uk.org.publicwhip/member/(\\d+)" name="([^"]*?)">(.*?\\s*\\((.*?)\\))</phrase>#', '<a href="/mp/?m=$1" title="Our page on $2 - \'$3\'">$4</a>', $body);
         $body = preg_replace_callback('#<phrase class="offrep" id="(.*?)/(\\d+)-(\\d+)-(\\d+)\\.(.*?)">(.*?)</phrase>#', function ($matches) {
             return '<a href="/search/?pop=1&s=date:' . $matches[2] . $matches[3] . $matches[4] . '+column:' . $matches[5] . '+section:' . $matches[1] . '">' . str_replace("Official Report", "Hansard", $matches[6]) . '</a>';
         }, $body);
         #$body = preg_replace('#<phrase class="offrep" id="((.*?)/(\d+)-(\d+)-(\d+)\.(.*?))">(.*?)</phrase>#e', "\"<a href='/search/?pop=1&amp;s=date:$3$4$5+column:$6+section:$2&amp;match=$1'>\" . str_replace('Official Report', 'Hansard', '$7') . '</a>'", $body);
         $bodies[] = $body;
     }
     // Do all this unless the glossary is turned off in the URL
     if (get_http_var('ug') != 1) {
         // And glossary phrases
         twfy_debug_timestamp('Before glossarise');
         $args['sort'] = "regexp_replace";
         $GLOSSARY = new \GLOSSARY($args);
         $bodies = $GLOSSARY->glossarise($bodies, 1);
         twfy_debug_timestamp('After glossarise');
     }
     if ($SEARCHENGINE) {
         // We have some search terms to highlight.
         twfy_debug_timestamp('Before highlight');
         $bodies = $SEARCHENGINE->highlight($bodies);
         twfy_debug_timestamp('After highlight');
     }
     $first_speech = null;
     $data['section_title'] = '';
     $subsection_title = '';
     for ($i = 0; $i < count($data['rows']); $i++) {
         $row = $data['rows'][$i];
         $htype = $row['htype'];
         // HPOS should be defined below if it's needed; otherwise default to 0
         $heading_hpos = 0;
         if ($htype == 10) {
             $data['section_title'] = $row['body'];
             $heading_hpos = $row['hpos'];
         } elseif ($htype == 11) {
             $subsection_title = $row['body'];
             $heading_hpos = $row['hpos'];
         } elseif ($htype == 12) {
             # Splitting out highlighting results back into individual bits
             $data['rows'][$i]['body'] = $bodies[$i];
         }
         if ($htype == 12 || $htype == 13) {
             if (!$first_speech) {
                 $first_speech = $data['rows'][$i];
             }
             # Voting links
             $data['rows'][$i]['voting_data'] = '';
             if (isset($row['votes'])) {
                 $data['rows'][$i]['voting_data'] = $this->generate_votes($row['votes'], $row['epobject_id'], $row['gid']);
             }
             # Annotation link
             if ($this->is_debate_section_page()) {
                 // Build the 'Add an annotation' link.
                 if (!$THEUSER->isloggedin()) {
                     $URL = new \URL('userprompt');
                     $URL->insert(array('ret' => $row['commentsurl']));
                     $data['rows'][$i]['annotation_url'] = $URL->generate();
                 } else {
                     $data['rows'][$i]['annotation_url'] = $row['commentsurl'];
                 }
                 $data['rows'][$i]['commentteaser'] = $this->generate_commentteaser($row);
             }
             if (isset($row['mentions'])) {
                 $data['rows'][$i]['mentions'] = $this->get_question_mentions_html($row['mentions']);
             }
             if ($this->major == 1) {
                 $data['rows'][$i]['video'] = $this->get_video_html($row, $heading_hpos, $speeches);
             }
         }
     }
     if ($subsection_title) {
         $data['heading'] = $subsection_title;
     } else {
         $data['heading'] = $data['section_title'];
     }
     if ($subsection_title) {
         $data['intro'] = "{$data['section_title']}";
     } else {
         $data['intro'] = "";
     }
     $country = 'UK';
     if ($this->major == 1) {
         $data['location'] = '&ndash; in the House of Commons';
     } elseif ($this->major == 2) {
         $data['location'] = '&ndash; in Westminster Hall';
     } elseif ($this->major == 3) {
         $data['location'] = 'written question &ndash; answered';
     } elseif ($this->major == 4) {
         $data['location'] = 'written statement &ndash; made';
     } elseif ($this->major == 5) {
         $country = 'NORTHERN IRELAND';
         $data['location'] = '&ndash; in the Northern Ireland Assembly';
     } elseif ($this->major == 6) {
         $data['location'] = '&ndash; in a Public Bill Committee';
     } elseif ($this->major == 7) {
         $country = 'SCOTLAND';
         $data['location'] = '&ndash; in the Scottish Parliament';
     } elseif ($this->major == 8) {
         $country = 'SCOTLAND';
         $data['location'] = '&ndash; Scottish Parliament written question &ndash; answered';
     } elseif ($this->major == 101) {
         $data['location'] = '&ndash; in the House of Lords';
     }
     $data['current_assembly'] = "westminster--debate";
     switch ($country) {
         case "UK":
             $data['current_assembly'] = "westminster--debate";
             break;
         case "SCOTLAND":
             $data['current_assembly'] = "scotland";
             break;
         case "NORTHERN IRELAND":
             $data['current_assembly'] = "ni";
             break;
     }
     if (array_key_exists('text_heading', $data['info'])) {
         // avoid having Clause 1 etc as the alert text search string on PBC pages as it's
         // almost certainly not what the person wants
         if ($this->major == 6) {
             $data['email_alert_text'] = $data['section_title'];
         } else {
             $data['email_alert_text'] = $data['info']['text_heading'];
         }
     } else {
         // The user has requested only part of a debate, so find a suitable title
         if ($subsection_title) {
             $data['intro'] = "Part of {$data['section_title']}";
         } else {
             $data['intro'] = "Part of the debate";
         }
         foreach ($data['rows'] as $row) {
             if ($row['htype'] == 10 || $row['htype'] == 11) {
                 $data['email_alert_text'] = $row['body'];
                 $data['full_debate_url'] = $row['listurl'];
                 break;
             }
         }
     }
     // strip a couple of common characters that result in encode junk in the
     // search string
     $data['email_alert_text'] = preg_replace('/(?:[:()\\[\\]]|&#\\d+;)/', '', $data['email_alert_text']);
     $data['debate_time_human'] = format_time($first_speech['htime'], 'g:i a');
     $data['debate_day_human'] = format_date($first_speech['hdate'], 'jS F Y');
     $URL = new \URL($this->list->listpage);
     $URL->insert(array('d' => $first_speech['hdate']));
     $URL->remove(array('id'));
     $data['debate_day_link'] = $URL->generate();
     $data['nextprev'] = $DATA->page_metadata($this_page, 'nextprev');
     return $data;
 }
Esempio n. 4
0
	// and highlight search string words.
	
	$bodies = array();
	foreach ($data['rows'] as $row) {
		$bodies[] = $row['body'];
	}
	if (isset($data['info']['glossarise']) && $data['info']['glossarise']) {
		// And glossary phrases
		twfy_debug_timestamp('Before glossarise');
		$bodies = $GLOSSARY->glossarise($bodies, $data['info']['glossarise']);
		twfy_debug_timestamp('After glossarise');
	}
	if ($SEARCHENGINE) {
		// We have some search terms to highlight.
		twfy_debug_timestamp('Before highlight');
		$bodies = $SEARCHENGINE->highlight($bodies);
		twfy_debug_timestamp('After highlight');
	}
	if (isset($data['info']['glossarise']) && ($data['info']['glossarise'] == 1)) {
		// Now we replace the title attributes for the glossarised links
		// to avoid words being highlighted within them.
		twfy_debug_timestamp('Before glossarise_titletags');
		$bodies = $GLOSSARY->glossarise_titletags($bodies, 1);
		twfy_debug_timestamp('After glossarise_titletags');
	}

	$speeches = 0;
	for ($i=0; $i<count($data['rows']); $i++) {
		if ($data['rows'][$i]['htype'] == 12)
			$data['rows'][$i]['body'] = $bodies[$i];
		if ($data['rows'][$i]['htype'] == 12 || $data['rows'][$i]['htype'] == 13)
Esempio n. 5
0
 private function highlightSpeeches($data)
 {
     $SEARCHENGINE = null;
     if (isset($data['info']['searchstring']) && $data['info']['searchstring'] != '') {
         $SEARCHENGINE = new \SEARCHENGINE($data['info']['searchstring']);
     }
     // Before we print the body text we need to insert glossary links
     // and highlight search string words.
     $speeches = 0;
     $bodies = array();
     foreach ($data['rows'] as $row) {
         $htype = $row['htype'];
         if ($htype == 12 || $htype == 13) {
             $speeches++;
         }
         $body = $row['body'];
         $body = preg_replace('#<phrase class="honfriend" id="uk.org.publicwhip/member/(\\d+)" name="([^"]*?)">(.*?\\s*\\((.*?)\\))</phrase>#', '<a href="/mp/?m=$1" title="Our page on $2 - \'$3\'">$4</a>', $body);
         $body = preg_replace('#<phrase class="honfriend" name="([^"]*?)" person_id="uk.org.publicwhip/person/(\\d+)">(.*?\\s*\\((.*?)\\))</phrase>#', '<a href="/mp/?p=$2" title="Our page on $1 - \'$3\'">$4</a>', $body);
         $body = preg_replace_callback('#<phrase class="offrep" id="(.*?)/(\\d+)-(\\d+)-(\\d+)\\.(.*?)">(.*?)</phrase>#', function ($matches) {
             return '<a href="/search/?pop=1&s=date:' . $matches[2] . $matches[3] . $matches[4] . '+column:' . $matches[5] . '+section:' . $matches[1] . '">' . str_replace("Official Report", "Hansard", $matches[6]) . '</a>';
         }, $body);
         #$body = preg_replace('#<phrase class="offrep" id="((.*?)/(\d+)-(\d+)-(\d+)\.(.*?))">(.*?)</phrase>#e', "\"<a href='/search/?pop=1&amp;s=date:$3$4$5+column:$6+section:$2&amp;match=$1'>\" . str_replace('Official Report', 'Hansard', '$7') . '</a>'", $body);
         $bodies[] = $body;
     }
     if ($SEARCHENGINE) {
         // We have some search terms to highlight.
         twfy_debug_timestamp('Before highlight');
         $bodies = $SEARCHENGINE->highlight($bodies);
         twfy_debug_timestamp('After highlight');
     }
     // Do all this unless the glossary is turned off in the URL
     if (get_http_var('ug') != 1) {
         // And glossary phrases
         twfy_debug_timestamp('Before glossarise');
         $args['sort'] = "regexp_replace";
         $GLOSSARY = new \GLOSSARY($args);
         $bodies = $GLOSSARY->glossarise($bodies, 1);
         twfy_debug_timestamp('After glossarise');
     }
     return array($bodies, $speeches);
 }