function doFiltersQuery(&$filters) { global $mvIndexTableName, $mvMediaFilesTable, $mvDefaultClipLength, $wgRequest, $mvDo_SQL_CALC_FOUND_ROWS, $mvSpokenByInSearchResult, $mvMediaSearchResultsLimit; $dbr =& wfGetDB(DB_SLAVE); //organize the queries (group full-text searches and category/attributes) //if the attribute is not a numerical just add it to the fulltext query $ftq_match_asql = $last_person_aon = $ftq_match = $ftq = $snq = $toplq_cat = $date_range_join = $date_range_where = $asql = ''; //top query and full text query ='' if ($filters == '') { return array(); } $selOpt = $mvDo_SQL_CALC_FOUND_ROWS ? 'SQL_CALC_FOUND_ROWS' : ''; list($this->limit, $this->offset) = $wgRequest->getLimitOffset(20, 'searchlimit'); if ($this->limit > $mvMediaSearchResultsLimit) { $this->limit = $mvMediaSearchResultsLimit; } $group_spoken = true; $categoryTable = $dbr->tableName('categorylinks'); foreach ($filters as $f) { //proocc and or for fulltext: if (!isset($f['a'])) { $f['a'] = 'and'; } switch ($f['a']) { case 'and': $aon = '+'; $asql = 'AND'; break; case 'or': $aon = ''; $asql = 'OR'; break; case 'not': $aon = '-'; $asql = 'NOT'; break; } //add to the fulltext query: switch ($f['t']) { case 'spoken_by': //if we have an OR set prev to OR if ($last_person_aon == '+' && $aon == '') { $ftq = str_replace('+"spoken by', '"spoken by', $ftq); $group_spoken = false; } //full text based semantic query: $ftq .= ' ' . $aon . '"spoken by ' . mysql_escape_string($f['v']) . '"'; //table based query: $last_person_aon = $aon; break; case 'match': $ftq_match .= ' ' . $aon . '"' . mysql_escape_string($f['v']) . '"'; //only need to split out ftq match if spoken by is more than one if ($ftq_match_asql != '') { $ftq_match_asql = $asql; } break; //top level queries (sets up time ranges ) //top level queries (sets up time ranges ) case 'category': //full text based category query: $toplq .= ' ' . $aon . '"category ' . mysql_escape_string($f['v']) . '" '; //$ftq.=' '.$aon.'category:'.mysql_escape_string($f['v']); //table based query: switch ($f['a']) { case 'and': $toplq_cat = 'AND'; break; case 'or': $toplq_cat = 'OR'; break; case 'not': $toplq_cat = 'NOT'; break; } $toplq_cat .= " {$categoryTable}.`cl_to`='" . mysql_escape_string($f['v']) . "'"; break; case 'date_range': $date_range_join = ' JOIN `mv_streams` ' . 'ON `' . $mvIndexTableName . '`.`stream_id` =`mv_streams`.`id` '; list($month, $day, $year) = explode('/', $f['vs']); $sts = mktime(0, 0, 0, $month, $day, $year); list($month, $day, $year) = explode('/', $f['ve']); $ets = mktime(0, 0, 0, $month, $day + 1, $year); //(the start of the next day) $date_range_where .= '( `mv_streams`.`date_start_time` > ' . mysql_escape_string($sts) . ' AND `mv_streams`.`date_start_time` < ' . mysql_escape_string($ets) . ')'; $date_range_andor = ' ' . $asql . ' '; break; case 'stream_name': if ($snq != '') { switch ($f['a']) { case 'and': $snq = 'AND'; break; case 'or': $snq = 'OR'; break; case 'not': $snq = 'NOT'; break; } } //get stream name: //print "f: " . $f['v']; $stream =& mvGetMVStream($f['v']); $snq .= " `stream_id` = {$stream->getStreamId()} "; break; case 'smw_property': //more complicated query work needed ;) break; } } $searchindexTable = $dbr->tableName('searchindex'); $ret_ary = array(); //a join operation to restrict search results to streams with files $join_streams_with_low_ogg_sql = "JOIN `{$mvStreamFilesTable}` ON (`{$mvIndexTableName}`.`stream_id` = `{$mvStreamFilesTable}`.`stream_id`) JOIN `{$mvMediaFilesTable}` ON (`{$mvStreamFilesTable}`.`file_id`= `{$mvMediaFilesTable}`.`id` AND `{$mvMediaFilesTable}`.`file_desc_msg`='mv_ogg_low_quality') "; //only run the top range query if we have no secondary query if ($toplq_cat != '' && $ftq == '') { //@@todo unify top query with ranged query ... kind of tricky //@@todo we should only look in annotative layer for top level queries? ... //@@todo paging for top level queries? ... 200 hit limit is probably ok $sql = "SELECT `mv_page_id` as `id`, `{$mvIndexTableName}`.`stream_id`,`start_time`,`end_time`, `wiki_title`, {$searchindexTable}.`si_text` as `text`\n\t \t\t\tFROM `{$mvIndexTableName}` \n\t \t\t\t{$date_range_join}\n\t \t\t\tJOIN {$categoryTable} ON `{$mvIndexTableName}`.`mv_page_id` = {$categoryTable}.`cl_from`\n\t\t\t\t{$join_streams_with_low_ogg_sql} \n\t \t\t\tLEFT JOIN {$searchindexTable} ON `{$mvIndexTableName}`.`mv_page_id` = {$searchindexTable}.`si_page` \n\t \t\t\tWHERE \n\t\t\t\t`mvd_type`='Anno_en' " . " {$toplq_cat} " . " {$snq} " . "{$date_range_andor} {$date_range_where} " . "LIMIT 0, 200"; //echo "topQ: $sql \n\n"; $top_result = $dbr->query($sql, 'MV_Index:doFiltersQuery_topQ'); if ($dbr->numRows($top_result) == 0) { return array(); } //set up ranges sql query $sql = "SELECT {$selOpt} `mv_page_id` as `id`, `{$mvIndexTableName}`.`stream_id`,`start_time`,`end_time`, `wiki_title`, {$searchindexTable}.`si_text` as `text` "; if ($mvSpokenByInSearchResult) { $sql .= ",`smw_relations`.`object_title` as `spoken_by` "; } $sql .= "FROM `{$mvIndexTableName}` " . $join_streams_with_low_ogg_sql . "JOIN {$searchindexTable} ON `{$mvIndexTableName}`.`mv_page_id` = {$searchindexTable}.`si_page` "; if ($mvSpokenByInSearchResult) { $sql .= "LEFT JOIN `smw_relations` ON (`mv_mvd_index`.`mv_page_id`=`smw_relations`.`subject_id` " . "AND `smw_relations`.`relation_title`='Spoken_By') "; } $sql .= "WHERE "; $or = ''; $sql .= '( '; while ($row = $dbr->fetchObject($top_result)) { //also set initial sranges: if (!isset($ret_ary[$row->stream_id])) { $ret_ary[$row->stream_id] = array(); } //insert into return ary: $insertRow = $ftq == '' ? true : false; //add that its a top level query to the row: $row->toplq = true; MV_Index::insert_merge_range($ret_ary[$row->stream_id], $ret_ary, $row, $insertRow); $sql .= $or . " (`{$mvIndexTableName}`.`stream_id`='{$row->stream_id}' AND " . '`start_time`>=' . $row->start_time . ' AND ' . '`end_time`<=' . $row->end_time . ' ) '; $or = ' OR '; } $sql .= ') '; //if($ftq!='') // $sql.=" AND MATCH (text) // AGAINST('$ftq' IN BOOLEAN MODE) "; $sql .= "LIMIT {$this->offset}, {$this->limit} "; } else { //add the top query to the base query: $ftq .= $toplq; $sql = "SELECT {$selOpt} `mv_page_id` as `id`,`{$mvIndexTableName}`.`stream_id`,`start_time`,`end_time`, `wiki_title`, {$searchindexTable}.`si_text` AS `text` "; if ($mvSpokenByInSearchResult) { $sql .= ",`smw_relations`.`object_title` as `spoken_by` "; } $sql .= "FROM `{$mvIndexTableName}` \n\t \t\t\tJOIN {$searchindexTable} ON `{$mvIndexTableName}`.`mv_page_id` = {$searchindexTable}.`si_page` \n\t\t\t\t{$join_streams_with_low_ogg_sql} \n\t \t\t\t{$date_range_join} "; //include spoken by relation in results (LEFT JOIN should not be *that* costly ) if ($mvSpokenByInSearchResult) { $sql .= "LEFT JOIN `smw_relations` ON (`mv_mvd_index`.`mv_page_id`=`smw_relations`.`subject_id` " . "AND `smw_relations`.`relation_title`='Spoken_By') "; } $sql .= "WHERE {$snq} "; $two_part_anor = ''; if ($group_spoken) { $ftq .= $ftq_match; } else { if ($ftq_match_asql) { $sql .= ' ' . $ftq_match_asql . ' '; } if ($ftq_match != '') { $sql .= "\tMATCH ( {$searchindexTable}.`si_text` ) \n\t\t\t \t\t\t\tAGAINST('{$ftq_match}' IN BOOLEAN MODE) "; if ($ftq != '') { $sql .= ' AND '; } } } if ($ftq != '') { $sql .= "\tMATCH ( {$searchindexTable}.`si_text` ) \n\t\t \t\t\tAGAINST('{$ftq}' IN BOOLEAN MODE) "; } //date range stuff is SLOW when its the only filter (pulls up matches for everything) if ($snq != '' || $ftq != '' && isset($date_range_andor)) { $sql .= $date_range_andor; } $sql .= " {$date_range_where} "; $sql .= "LIMIT {$this->offset}, {$this->limit} "; } //echo "SQL:".$sql." \n"; $result = $dbr->query($sql, 'MV_Index:doFiltersQuery_base'); $this->numResults = $dbr->numRows($result); if ($dbr->numRows($result) == 0) { return array(); } if ($mvDo_SQL_CALC_FOUND_ROWS) { $resFound = $dbr->query('SELECT FOUND_ROWS() as `count`;'); $found = $dbr->fetchObject($resFound); $this->numResultsFound = $found->count; } else { $this->numResultsFound = null; } //@@TODO hide empty categories (if limit > rows found ) //group by time range in a given stream //while($row = $dbr->fetchObject( $result )){ // $ret_ary[]=$row; //} //return $ret_ary; //group by stream_name & time range: while ($row = $dbr->fetchObject($result)) { if (!isset($ret_ary[$row->stream_id])) { $ret_ary[$row->stream_id] = array(); } if (count($ret_ary[$row->stream_id]) == 0) { $new_srange = array('s' => $row->start_time, 'e' => $row->end_time, 'rows' => array($row)); $ret_ary[$row->stream_id][] = $new_srange; } else { MV_Index::insert_merge_range($ret_ary[$row->stream_id], $ret_ary, $row); } } //throw out empty top level ranges foreach ($ret_ary as &$stream_set) { foreach ($stream_set as $k => &$srange) { if (count($srange['rows']) == 0) { //print "throw out: ". $srange['s'] . $srange['e']; unset($stream_set[$k]); } } } return $ret_ary; }
function doUnifiedFiltersQuery(&$filters, $metaDataIncludes = null) { global $mvDefaultClipLength, $wgRequest, $mvDo_SQL_CALC_FOUND_ROWS, $mvMediaSearchResultsLimit; global $mvSpokenByInSearchResult, $mvCategoryInSearchResult, $mvBillInSearchResult; // init vars $from_tables = ''; $vars = $conds = $options = array(); // init top range generation query $from_tables_top = ''; $vars_top = $conds_top = $options_top = array(); $do_top_range_query = false; $dbr = wfGetDB(DB_SLAVE); // organize the queries (group full-text searches and category/attributes) // if the attribute is not a numerical just add it to the fulltext query $ftq_match_asql = $last_person_aon = $ftq_match = $ftq = $snq = $toplq = $toplq_cat = $date_range_join = $date_range_where = $asql = ''; // top query and full text query ='' if ($filters == '') { return array(); } $ftq_match_asql = $date_cond = ''; $date_range_join = true; // $selOpt = ($mvDo_SQL_CALC_FOUND_ROWS)?'SQL_CALC_FOUND_ROWS':''; if ($mvDo_SQL_CALC_FOUND_ROWS) { $options[] = 'SQL_CALC_FOUND_ROWS'; } // set limit offset: list($this->limit, $this->offset) = $wgRequest->getLimitOffset(20, 'searchlimit'); if ($this->limit > $mvMediaSearchResultsLimit) { $this->limit = $mvMediaSearchResultsLimit; } $this->order = strtolower($wgRequest->getVal('order')); // force order type: if (!($this->order == 'relevant' || $this->order == 'recent' || $this->order == 'viewed')) { $this->order = 'recent'; } $group_spoken = true; // $categoryTable = ; $valid_filter_count = 0; foreach ($filters as $f) { // proocc and or for fulltext: if (!isset($f['a'])) { $f['a'] = 'and'; } switch ($f['a']) { case 'and': $aon = '+'; $asql = 'AND'; break; case 'or': $aon = ''; $asql = 'OR'; break; case 'not': $aon = '-'; $asql = 'NOT'; break; } // add to the fulltext query: switch ($f['t']) { case 'speech_by': case 'spoken_by': $skey = str_replace('_', ' ', $f['t']); // skip if empty value: if (trim($f['v']) == '') { continue; } // if we have an OR set prev to OR if ($last_person_aon == '+' && $aon == '') { $ftq = str_replace('+"' . $skey, '"' . $skey, $ftq); $group_spoken = false; } // full text based semantic query: $ftq .= ' ' . $aon . '"' . $skey . ' ' . mysql_real_escape_string($f['v']) . '" '; // table based query: $last_person_aon = $aon; $valid_filter_count++; // $conds[]= break; case 'bill': // skip if empty value: if (trim($f['v']) == '') { continue; } $f['v'] = str_replace(array('.', '_', ':'), ' ', $f['v']); // full text based semantic query: $ftq .= ' ' . $aon . '"bill ' . mysql_real_escape_string($f['v']) . '" '; // table based query: $last_person_aon = $aon; $valid_filter_count++; // $conds[]= break; case 'match': // skip if empty value: if (trim($f['v']) == '') { continue; } $mwords = explode(' ', $f['v']); $space = ''; foreach ($mwords as $word) { $ftq_match .= $space . $aon . mysql_real_escape_string($word); $space = ' '; } // only need to split out ftq match if spoken by is more than one if ($ftq_match_asql != '') { $ftq_match_asql = $asql; } $valid_filter_count++; break; // top level queries (sets up time ranges ) // top level queries (sets up time ranges ) case 'category': // skip if empty value: if (trim($f['v']) == '') { continue; } $do_top_range_query = true; // full text based category query: $toplq .= ' ' . $aon . '"category ' . mysql_real_escape_string($f['v']) . '" '; // $ftq.=' '.$aon.'category:'.mysql_escape_string($f['v']); // table based query: switch ($f['a']) { case 'and': $toplq_cat = 'AND'; break; case 'or': $toplq_cat = 'OR'; break; case 'not': $toplq_cat = 'NOT'; break; } $toplq_cat .= $dbr->tableName('categorylinks') . '.cl_to=' . $dbr->addQuotes($f['v']); $valid_filter_count++; break; case 'date_range': list($month, $day, $year) = explode('/', $f['vs']); $sts = mktime(0, 0, 0, $month, $day, $year); list($month, $day, $year) = explode('/', $f['ve']); $ets = mktime(0, 0, 0, $month, $day + 1, $year); // (the start of the next day) // add date condtion: // note dissable and or for date range for now: $asql $conds[] = ' ( `mv_streams`.`date_start_time` > ' . $dbr->addQuotes($sts) . ' AND `mv_streams`.`date_start_time` < ' . $dbr->addQuotes($ets) . ') '; // print $date_cond; $valid_filter_count++; break; case 'stream_name': // skip if empty value: if (trim($f['v']) == '') { continue; } $stream =& mvGetMVStream($f['v']); // add stream cond $conds[] = $asql . " stream_id = " . $dbr->addQuotes($stream->getStreamId()); $valid_filter_count++; break; case 'smw_property': //@@todo merge doUnifiedFiltersQuery function with SMW Ask more complicated query work needed break; } } if ($valid_filter_count == 0) { return array(); } // add the top query to the base query: $ftq .= $toplq; $vars = "mv_page_id as id," . $dbr->tableName('mv_mvd_index') . '.stream_id, (' . $dbr->tableName('mv_streams') . '.date_start_time+' . $dbr->tableName('mv_mvd_index') . '.start_time) AS mvd_date_start_time, ' . 'start_time, end_time, view_count, wiki_title,' . $dbr->tableName('searchindex') . '.si_text AS `text` '; /*if ( $mvSpokenByInSearchResult ) $vars .= ', smw_relations.object_title as spoken_by ';*/ $from_tables .= $dbr->tableName('mv_mvd_index') . ' '; $from_tables .= 'JOIN ' . $dbr->tableName('searchindex') . ' ON (' . $dbr->tableName('mv_mvd_index') . '.mv_page_id = ' . $dbr->tableName('searchindex') . '.si_page ) '; if ($date_range_join) { $from_tables .= 'LEFT JOIN ' . $dbr->tableName('mv_streams') . ' ON (' . $dbr->tableName('mv_mvd_index') . '.stream_id = ' . $dbr->tableName('mv_streams') . '.id ) '; } // print "FROM TABLES:". $from_tables; // restrict to streams with valid $mvDefaultVideoQualityKey files: global $mvDefaultVideoQualityKey, $mvDefaultFlashQualityKey; $from_tables .= 'JOIN ' . $dbr->tableName('mv_stream_files') . ' ON ' . '( ' . $dbr->tableName('mv_mvd_index') . '.stream_id = ' . $dbr->tableName('mv_stream_files') . '.stream_id ' . ' AND (' . $dbr->tableName('mv_stream_files') . '.file_desc_msg = ' . $dbr->addQuotes($mvDefaultVideoQualityKey) . ')' . ') '; // date range join: // include spoken by relation in results (LEFT JOIN should not be *that* costly ) /*if ( $mvSpokenByInSearchResult ) { //$sql.="LEFT JOIN `smw_relations` ON (`mv_mvd_index`.`mv_page_id`=`smw_relations`.`subject_id` " . // "AND `smw_relations`.`relation_title`='Spoken_By') "; $from_tables .= 'LEFT JOIN ' . $dbr->tableName( 'smw_relations' ) . ' ON ' . '( ' . $dbr->tableName( 'mv_mvd_index' ) . '.mv_page_id = ' . $dbr->tableName( 'smw_relations' ) . '.subject_id ' . ' AND ' . $dbr->tableName( 'smw_relations' ) . '.relation_title = \'Spoken_By\'' . ') '; }*/ // add conditions to last condition element (cuz we have to manually mannage and or): $conds[count($conds)] = ' ' . $dbr->tableName('mv_mvd_index') . '.mvd_type = \'ht_en\' ' . ' OR ' . $dbr->tableName('mv_mvd_index') . '.mvd_type=\'anno_en\' '; // limit to ht_en & anno_en (for now) (future allow selection // $conds_inx = (count($conds)==0)?0:count($conds)-1; $two_part_anor = ''; if ($group_spoken) { $ftq .= $ftq_match; } else { if ($ftq_match != '') { $conds[] .= $ftq_match_asql . ' MATCH ( ' . $dbr->tableName('searchindex') . '.si_text )' . ' AGAINST(\'' . $ftq_match . '\' IN BOOLEAN MODE) '; // if($ftq!='')$sql.=' AND '; } } if ($ftq != '') { $conds[] .= "\tMATCH ( " . $dbr->tableName('searchindex') . '.si_text ) ' . ' AGAINST(\'' . $ftq . '\' IN BOOLEAN MODE) '; } // print_r($conds); // die; // date range stuff is SLOW when its the only filter (pulls up matches for everything) /*if($snq!='' || $ftq!='' && isset($date_range_andor)) $sql.=$date_range_andor; $sql.=" $date_range_where ";*/ switch ($this->order) { case 'relevant': // @@todo need to add in some relevence metrics break; case 'recent': $options['ORDER BY'] = 'mvd_date_start_time DESC '; break; case 'viewed': $options['ORDER BY'] = 'view_count DESC '; break; } // echo $this->order; // $sql.="LIMIT {$this->offset}, {$this->limit} "; $options['LIMIT'] = $this->limit; $options['OFFSET'] = $this->offset; $options['GROUP BY'] = 'mv_page_id'; $result = $dbr->select($from_tables, $vars, $conds, __METHOD__, $options); //echo "SQL:".$dbr->lastQuery($result)." \n"; //die; // $result = $dbr->query($sql, 'MV_Index:doFiltersQuery_base'); $this->numResults = $dbr->numRows($result); if ($dbr->numRows($result) == 0) { return array(); } if ($mvDo_SQL_CALC_FOUND_ROWS) { $resFound = $dbr->query('SELECT FOUND_ROWS() as `count`;'); $found = $dbr->fetchObject($resFound); $this->numResultsFound = $found->count; } else { $this->numResultsFound = null; } // @@TODO hide empty categories (if limit > rows found ) // while($row = $dbr->fetchObject( $result )){ // $ret_ary[]=$row; // } // return $ret_ary; // group by stream_name & time range: // init ret_ary & stream_group $ret_ary = $stream_groups = array(); while ($row = $dbr->fetchObject($result)) { $ret_ary[$row->id] = $row; if (!isset($stream_groups[$row->stream_id])) { $stream_groups[$row->stream_id] = array(); } if (count($stream_groups[$row->stream_id]) == 0) { $new_srange = array('s' => $row->start_time, 'e' => $row->end_time, 'rows' => array($row)); $stream_groups[$row->stream_id][] = $new_srange; } else { MV_Index::insert_merge_range($stream_groups[$row->stream_id], $stream_groups, $row); } } if ($mvCategoryInSearchResult) { $or = ''; $conds = ''; $options = array(); //build the category query conditions: foreach ($ret_ary as $row) { if (!isset($ret_ary[$row->id]->category)) { $ret_ary[$row->id]->categories = array(); } $conds .= $or . ' cl_from =' . $dbr->addQuotes($row->id); $or = ' OR '; } //do the lookup: $from_tables = $dbr->tableName('categorylinks'); $from_tables .= ' LEFT JOIN ' . $dbr->tableName('mv_mvd_index') . ' ON ( ' . $dbr->tableName('categorylinks') . '.cl_from = ' . $dbr->tableName('mv_mvd_index') . '.mv_page_id' . ' ) '; $vars = array('cl_from', 'cl_to'); $options['LIMIT'] = 2000; // max avarage 5 categories per page $result_cat = $dbr->select($from_tables, $vars, $conds, __METHOD__, $options); while ($cat_row = $dbr->fetchObject($result_cat)) { $ret_ary[$cat_row->cl_from]->categories[$cat_row->cl_to] = true; } } if ($mvSpokenByInSearchResult || $mvBillInSearchResult) { // slow especially for lots of query results but join Query is crazy complicated for SMW >= 1.2 // (and I have not been able to construct it without hitting exessive number of rows in the EXPLIN) // @@todo these queries should be merged with semantic wiki Ask with some ~special~ keywords for fulltext search $smwStore =& smwfGetStore(); foreach ($ret_ary as &$row) { //@@todo this is all very hackish but this is because SMW changed the db schema causing a few hacks: // obviously this should be rewritten to use some SMW based query system. $smw_properties = array(); if ($mvSpokenByInSearchResult && strtolower(substr($row->wiki_title, 0, 2)) == 'ht') { $smw_properties[] = 'Spoken_By'; } if ($mvSpokenByInSearchResult && strtolower(substr($row->wiki_title, 0, 4)) == 'anno') { $smw_properties[] = 'Speech_by'; } if ($mvBillInSearchResult) { $smw_properties[] = 'Bill'; } $rowTitle = Title::newFromText($row->wiki_title, MV_NS_MVD); foreach ($smw_properties as $propKey) { if ($propKey != 'category') { //print "on key: $propKey"; $propTitle = Title::newFromText($propKey, SMW_NS_PROPERTY); $smwProps = $smwStore->getPropertyValues($rowTitle, $propTitle); // just a temp hack .. we need to think about this abstraction a bit.. //print_r($smwProps); if (count($smwProps) != 0) { if ($propKey == 'Spoken_By' || $propKey == 'Speech_by') { $v = current($smwProps); $row->spoken_by = $v->getXSDValue(); } elseif ($propKey == 'Bill') { $row->bills = array(); foreach ($smwProps as $v) { $row->bills[$v->getXSDValue()] = true; } } } } } } } foreach ($ret_ary as $k => $v) { $ret_ary[$k]->text = str_replace(array('u800', 'u82e'), '', $v->text); } return $ret_ary; }