/** * Calculates and stores the correlated tags of all tags. * The correlations are stored in the 'tag_correlation' table. * * Two tags are correlated if they appear together a lot. * Ex.: Users tagged with "computers" will probably also be tagged with "algorithms". * * The rationale for the 'tag_correlation' table is performance. * It works as a cache for a potentially heavy load query done at the 'tag_instance' table. * So, the 'tag_correlation' table stores redundant information derived from the 'tag_instance' table. * * @global moodle_database $DB * @param int $mincorrelation Only tags with more than $mincorrelation correlations will * be identified. * @return void */ function tag_compute_correlations($mincorrelation = 2) { global $DB; // This mighty one line query fetches a row from the database for every // individual tag correlation. We then need to process the rows collecting // the correlations for each tag id. // The fields used by this query are as follows: // tagid : This is the tag id, there should be at least $mincorrelation // rows for each tag id. // correlation : This is the tag id that correlates to the above tagid field. // correlationid : This is the id of the row in the tag_correlation table that // relates to the tagid field and will be NULL if there are no // existing correlations $sql = 'SELECT ta.tagid, tb.tagid AS correlation, co.id AS correlationid, co.correlatedtags FROM {tag_instance} ta LEFT JOIN {tag_instance} tb ON (ta.itemtype = tb.itemtype AND ta.itemid = tb.itemid AND ta.tagid <> tb.tagid) LEFT JOIN {tag_correlation} co ON co.tagid = ta.tagid WHERE tb.tagid IS NOT NULL GROUP BY ta.tagid, tb.tagid HAVING COUNT(*) > :mincorrelation ORDER BY ta.tagid ASC, COUNT(*) DESC, tb.tagid ASC'; $rs = $DB->get_recordset_sql($sql, array('mincorrelation' => $mincorrelation)); // Set up an empty tag correlation object $tagcorrelation = new stdClass; $tagcorrelation->id = null; $tagcorrelation->tagid = null; $tagcorrelation->correlatedtags = array(); // Iterate each row of the result set and build them into tag correlations. foreach ($rs as $row) { if ($row->tagid != $tagcorrelation->tagid) { // The tag id has changed so its now time to process the tag // correlation information we have. tag_process_computed_correlation($tagcorrelation); // Now we reset the tag correlation object so we can reuse it and set it // up for the current record. $tagcorrelation = new stdClass; $tagcorrelation->id = $row->correlationid; $tagcorrelation->tagid = $row->tagid; $tagcorrelation->correlatedtags = array(); } $tagcorrelation->correlatedtags[] = $row->correlation; } // Update the current correlation after the last record. tag_process_computed_correlation($tagcorrelation); // Close the recordset $rs->close(); }
/** * Calculates and stores the correlated tags of all tags. * The correlations are stored in the 'tag_correlation' table. * * Two tags are correlated if they appear together a lot. * Ex.: Users tagged with "computers" will probably also be tagged with "algorithms". * * The rationale for the 'tag_correlation' table is performance. * It works as a cache for a potentially heavy load query done at the 'tag_instance' table. * So, the 'tag_correlation' table stores redundant information derived from the 'tag_instance' table. * * @global moodle_database $DB * @param int $mincorrelation Only tags with more than $mincorrelation correlations will * be identified. * @return void */ function tag_compute_correlations($mincorrelation = 2) { global $DB; // This mighty one line query fetches a row from the database for every // individual tag correlation. We then need to process the rows collecting // the correlations for each tag id. // The fields used by this query are as follows: // tagid : This is the tag id, there should be at least $mincorrelation // rows for each tag id. // correlation : This is the tag id that correlates to the above tagid field. // correlationid : This is the id of the row in the tag_correlation table that // relates to the tagid field and will be NULL if there are no // existing correlations $sql = 'SELECT pairs.tagid, pairs.correlation, pairs.ocurrences, co.id AS correlationid FROM ( SELECT ta.tagid, tb.tagid AS correlation, COUNT(*) AS ocurrences FROM {tag_instance} ta JOIN {tag_instance} tb ON (ta.itemtype = tb.itemtype AND ta.itemid = tb.itemid AND ta.tagid <> tb.tagid) GROUP BY ta.tagid, tb.tagid HAVING COUNT(*) > :mincorrelation ) pairs LEFT JOIN {tag_correlation} co ON co.tagid = pairs.tagid ORDER BY pairs.tagid ASC, pairs.ocurrences DESC, pairs.correlation ASC'; $rs = $DB->get_recordset_sql($sql, array('mincorrelation' => $mincorrelation)); // Set up an empty tag correlation object $tagcorrelation = new stdClass(); $tagcorrelation->id = null; $tagcorrelation->tagid = null; $tagcorrelation->correlatedtags = array(); // We store each correlation id in this array so we can remove any correlations // that no longer exist. $correlations = array(); // Iterate each row of the result set and build them into tag correlations. // We add all of a tag's correlations to $tagcorrelation->correlatedtags[] // then save the $tagcorrelation object foreach ($rs as $row) { if ($row->tagid != $tagcorrelation->tagid) { // The tag id has changed so we have all of the correlations for this tag $tagcorrelationid = tag_process_computed_correlation($tagcorrelation); if ($tagcorrelationid) { $correlations[] = $tagcorrelationid; } // Now we reset the tag correlation object so we can reuse it and set it // up for the current record. $tagcorrelation = new stdClass(); $tagcorrelation->id = $row->correlationid; $tagcorrelation->tagid = $row->tagid; $tagcorrelation->correlatedtags = array(); } //Save the correlation on the tag correlation object $tagcorrelation->correlatedtags[] = $row->correlation; } // Update the current correlation after the last record. $tagcorrelationid = tag_process_computed_correlation($tagcorrelation); if ($tagcorrelationid) { $correlations[] = $tagcorrelationid; } // Close the recordset $rs->close(); // Remove any correlations that weren't just identified if (empty($correlations)) { //there are no tag correlations $DB->delete_records('tag_correlation'); } else { list($sql, $params) = $DB->get_in_or_equal($correlations, SQL_PARAMS_NAMED, 'param0000', false); $DB->delete_records_select('tag_correlation', 'id ' . $sql, $params); } }
/** * Calculates and stores the correlated tags of all tags. * The correlations are stored in the 'tag_correlation' table. * * Two tags are correlated if they appear together a lot. * Ex.: Users tagged with "computers" will probably also be tagged with "algorithms". * * The rationale for the 'tag_correlation' table is performance. * It works as a cache for a potentially heavy load query done at the 'tag_instance' table. * So, the 'tag_correlation' table stores redundant information derived from the 'tag_instance' table. * * @param number $mincorrelation cutoff percentage (optional, default is 2) */ function tag_compute_correlations($mincorrelation = 2) { global $CFG; $mincorrelation = (int) $mincorrelation; // This mighty one line query fetches a row from the database for every // individual tag correlation. We then need to process the rows collecting // the correlations for each tag id. // The fields used by this query are as follows: // tagid : This is the tag id, there should be at least $mincorrelation // rows for each tag id. // correlation : This is the tag id that correlates to the above tagid field. // correlationid : This is the id of the row in the tag_correlation table that // relates to the tagid field and will be NULL if there are no // existing correlations $sql = "SELECT pairs.tagid, pairs.correlation, pairs.ocurrences, co.id AS correlationid\n FROM (\n SELECT ta.tagid, tb.tagid AS correlation, COUNT(*) AS ocurrences\n FROM {$CFG->prefix}tag_instance ta\n JOIN {$CFG->prefix}tag_instance tb ON (ta.itemtype = tb.itemtype AND ta.itemid = tb.itemid AND ta.tagid <> tb.tagid)\n GROUP BY ta.tagid, tb.tagid\n HAVING COUNT(*) > {$mincorrelation}\n ) pairs\n LEFT JOIN {$CFG->prefix}tag_correlation co ON co.tagid = pairs.tagid\n ORDER BY pairs.tagid ASC, pairs.ocurrences DESC, pairs.correlation ASC"; $rs = get_recordset_sql($sql); // Set up an empty tag correlation object $tagcorrelation = new stdClass(); $tagcorrelation->id = null; $tagcorrelation->tagid = null; $tagcorrelation->correlatedtags = array(); // We store each correction id in this array so we can remove any correlations // that no longer exist. $correlations = array(); // Iterate each row of the result set and build them into tag correlations. while ($row = rs_fetch_next_record($rs)) { if ($row->tagid != $tagcorrelation->tagid) { // The tag id has changed so its now time to process the tag // correlation information we have. $tagcorrelationid = tag_process_computed_correlation($tagcorrelation); if ($tagcorrelationid) { $correlations[] = $tagcorrelationid; } // Now we reset the tag correlation object so we can reuse it and set it // up for the current record. $tagcorrelation = new stdClass(); $tagcorrelation->id = $row->correlationid; $tagcorrelation->tagid = $row->tagid; $tagcorrelation->correlatedtags = array(); } $tagcorrelation->correlatedtags[] = $row->correlation; } // Update the current correlation after the last record. $tagcorrelationid = tag_process_computed_correlation($tagcorrelation); if ($tagcorrelationid) { $correlations[] = $tagcorrelationid; } // Close the recordset rs_close($rs); // Remove any correlations that weren't just identified if (empty($correlations)) { //there are no correlations so delete any in the database delete_records('tag_correlation'); } else { delete_records_select('tag_correlation', 'id NOT IN (' . join(',', $correlations) . ')'); } }