foreach ($nc as $n) { $edit->userid = $n->firstChild->nodeValue; if (!in_array($edit->userid, $result->user_id_list)) { array_push($result->user_id_list, $edit->userid); } } // name $nc = $xpath->query("wiki:contributor/wiki:username", $node); foreach ($nc as $n) { $edit->username = $n->firstChild->nodeValue; $result->user_name_list[$edit->userid] = $edit->username; } // text $nc = $xpath->query("wiki:text", $node); foreach ($nc as $n) { $edit->text = $n->firstChild->nodeValue; } if ($limit == 0 or $revision_count < $limit) { array_push($result->edits, $edit); } else { break; } $revision_count++; } return $result; } // test if (0) { $e = xml_edits($xml); print_r($e); }
function display_svg($page_title) { $user_agent = '*****@*****.**'; // Get XML history of Wikipedia page $page_title = str_replace(' ', '_', $page_title); $xml = get('http://en.wikipedia.org/wiki/Special:Export/' . $page_title . '?history', $user_agent); if ($xml == '') { //die('Didn\'t get XML'); return false; } //echo $xml; // Extra metadata about each revision of this page $history = xml_edits($xml); //print_r($history); if (count($history->edits) == 0) { return false; } // Texts to compare $texts = array(); foreach ($history->edits as $e) { array_push($texts, explode("\n", $e->text)); } // First revision $start = new stdclass(); $start->authors = array(); $start->match = array(); $start->id = $history->edits[0]->id; for ($i = 0; $i < count($texts[0]); $i++) { $start->authors[$i] = $history->edits[0]->userid; } // Get differences between subsequent pairs of revisions $revisions = array(); $revisions[0] = $start; $n = count($history->edits); for ($i = 1; $i < $n; $i++) { $revision = new stdclass(); $revision->authors = array(); $revision->match = array(); $revision->id = $history->edits[$i]->id; $revisions[$i] = $revision; pairwise_diff($texts[$i - 1], $texts[$i], $revisions[$i - 1]->authors, $revisions[$i]->authors, $history->edits[$i]->userid, $revisions[$i - 1]->match); } /* foreach ($revisions as $rev) { echo "Authors\n"; print_r($rev->authors); echo "Match\n"; print_r($rev->match); } */ // Generate SVG $author_colour = array(); $view_width = 1000; $view_height = 500; $x = 10; $y = 20; // How many revisions? $num_rev = count($revisions); $max_lines = 0; foreach ($texts as $text) { $max_lines = max($max_lines, count($text)); } $x_gap = $view_width / $num_rev; $x_gap = min(30, $x_gap); $x_gap = max(1, $x_gap); $y_gap = $view_height / $max_lines; $y_gap = min(30, $y_gap); $y_gap = max(1, $y_gap); $stroke_width = $x_gap / 2; $stroke_width = max(1, $stroke_width); $half_stroke = $stroke_width / 2; $svg = '<?xml version="1.0" encoding="UTF-8"?> <svg xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" width="' . $view_width . 'px" height="' . $view_height . 'px" >'; foreach ($revisions as $rev) { // Get authors in this revision $a = array(); foreach ($rev->authors as $k => $v) { array_push($a, $v); } $a = array_unique($a); // New authors $n = array_diff($a, array_keys($author_colour)); // Random colour from our range foreach ($n as $author_id) { $author_colour[$author_id] = 'rgb(' . rand(0, 255) . ',' . rand(0, 255) . ',' . rand(0, 255) . ')'; } // Column representing this edit $svg .= '<a xlink:href="http://en.wikipedia.org/w/index.php?oldid=' . $rev->id . '" title="Revision ' . $rev->id . '" target="_new" >'; foreach ($rev->authors as $k => $v) { $y_pos = $y + $k * $y_gap; $svg .= '<path style="stroke:' . $author_colour[$v] . ';stroke-width:' . $stroke_width . ';stroke-linecap:butt;" d="M ' . $x . ' ' . $y_pos . ' ' . $x . ' ' . ($y_pos + $y_gap) . '" />'; } $svg .= '</a>'; // Draw polygon linking each segement of text in two adjacent revisions $start_x = $x; $end_x = $x + $x_gap; foreach ($rev->match as $k => $v) { $start_y = $y + $k * $y_gap; $end_y = $y + $v * $y_gap; $user_url = ''; if (isset($history->user_name_list[$rev->authors[$k]])) { $user_url = 'http://en.wikipedia.org/wiki/User:'******'http://en.wikipedia.org/wiki/Special:Contributions/' . $rev->authors[$k]; } $svg .= '<a xlink:href="' . $user_url . '" title="Revision ' . $rev->id . '" target="_new" >'; $svg .= '<polygon style="fill:' . $author_colour[$rev->authors[$k]] . ';stroke-width:0;opacity:0.4;" points="' . ($start_x + $half_stroke) . ', ' . $start_y . ' ' . ($end_x - $half_stroke) . ', ' . $end_y . ' ' . ($end_x - $half_stroke) . ', ' . ($end_y + $y_gap) . ' ' . ($start_x + $half_stroke) . ', ' . ($start_y + $y_gap) . ' ' . ($start_x + $half_stroke) . ', ' . $start_y . '" />'; $svg .= '</a>'; } $x += $x_gap; } $svg .= '</svg>'; echo '<!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <title>Wikipedia History Flow</title> <style type="text/css" title="text/css"> body { font-family: sans-serif; } </style> </head> <body> <h1>Wikipedia History Flow</h1>'; echo '<p>History flow for Wikipedia page <a href="http://en.wikipedia.org/wiki/' . $page_title . '" target="_new">' . $page_title . '</a> (<a href=".">try another</a>)</p>'; echo '<div style="background-color:rgb(128,128,128);">'; echo $svg; echo '</div>'; echo '<p>Key:</p> <ul> <li>Columns represent revisions of the page (click on a colunn to see that revision)</li> <li>Rows of text that are the same between revisions are connected by lines coloured by author</li> <li>Authors are distinguished by colour, click on a line between a column to go to that user\'s Wikipedia page</li> </ul>'; echo ' <body> </html>'; return true; }