Example #1
0
        foreach ($nc as $n) {
            $edit->userid = $n->firstChild->nodeValue;
            if (!in_array($edit->userid, $result->user_id_list)) {
                array_push($result->user_id_list, $edit->userid);
            }
        }
        // name
        $nc = $xpath->query("wiki:contributor/wiki:username", $node);
        foreach ($nc as $n) {
            $edit->username = $n->firstChild->nodeValue;
            $result->user_name_list[$edit->userid] = $edit->username;
        }
        // text
        $nc = $xpath->query("wiki:text", $node);
        foreach ($nc as $n) {
            $edit->text = $n->firstChild->nodeValue;
        }
        if ($limit == 0 or $revision_count < $limit) {
            array_push($result->edits, $edit);
        } else {
            break;
        }
        $revision_count++;
    }
    return $result;
}
// test
if (0) {
    $e = xml_edits($xml);
    print_r($e);
}
Example #2
0
function display_svg($page_title)
{
    $user_agent = '*****@*****.**';
    // Get XML history of Wikipedia page
    $page_title = str_replace(' ', '_', $page_title);
    $xml = get('http://en.wikipedia.org/wiki/Special:Export/' . $page_title . '?history', $user_agent);
    if ($xml == '') {
        //die('Didn\'t get XML');
        return false;
    }
    //echo $xml;
    // Extra metadata about each revision of this page
    $history = xml_edits($xml);
    //print_r($history);
    if (count($history->edits) == 0) {
        return false;
    }
    // Texts to compare
    $texts = array();
    foreach ($history->edits as $e) {
        array_push($texts, explode("\n", $e->text));
    }
    // First revision
    $start = new stdclass();
    $start->authors = array();
    $start->match = array();
    $start->id = $history->edits[0]->id;
    for ($i = 0; $i < count($texts[0]); $i++) {
        $start->authors[$i] = $history->edits[0]->userid;
    }
    // Get differences between subsequent pairs of revisions
    $revisions = array();
    $revisions[0] = $start;
    $n = count($history->edits);
    for ($i = 1; $i < $n; $i++) {
        $revision = new stdclass();
        $revision->authors = array();
        $revision->match = array();
        $revision->id = $history->edits[$i]->id;
        $revisions[$i] = $revision;
        pairwise_diff($texts[$i - 1], $texts[$i], $revisions[$i - 1]->authors, $revisions[$i]->authors, $history->edits[$i]->userid, $revisions[$i - 1]->match);
    }
    /*
    foreach ($revisions as $rev)
    {
    	echo "Authors\n";
    	print_r($rev->authors);
    	echo "Match\n";
    	print_r($rev->match);
    }	
    */
    // Generate SVG
    $author_colour = array();
    $view_width = 1000;
    $view_height = 500;
    $x = 10;
    $y = 20;
    // How many revisions?
    $num_rev = count($revisions);
    $max_lines = 0;
    foreach ($texts as $text) {
        $max_lines = max($max_lines, count($text));
    }
    $x_gap = $view_width / $num_rev;
    $x_gap = min(30, $x_gap);
    $x_gap = max(1, $x_gap);
    $y_gap = $view_height / $max_lines;
    $y_gap = min(30, $y_gap);
    $y_gap = max(1, $y_gap);
    $stroke_width = $x_gap / 2;
    $stroke_width = max(1, $stroke_width);
    $half_stroke = $stroke_width / 2;
    $svg = '<?xml version="1.0" encoding="UTF-8"?>
	<svg xmlns:xlink="http://www.w3.org/1999/xlink" 
	xmlns="http://www.w3.org/2000/svg"
	width="' . $view_width . 'px" 
	height="' . $view_height . 'px" 
	>';
    foreach ($revisions as $rev) {
        // Get authors in this revision
        $a = array();
        foreach ($rev->authors as $k => $v) {
            array_push($a, $v);
        }
        $a = array_unique($a);
        // New authors
        $n = array_diff($a, array_keys($author_colour));
        // Random colour from our range
        foreach ($n as $author_id) {
            $author_colour[$author_id] = 'rgb(' . rand(0, 255) . ',' . rand(0, 255) . ',' . rand(0, 255) . ')';
        }
        // Column representing this edit
        $svg .= '<a xlink:href="http://en.wikipedia.org/w/index.php?oldid=' . $rev->id . '" title="Revision ' . $rev->id . '" target="_new" >';
        foreach ($rev->authors as $k => $v) {
            $y_pos = $y + $k * $y_gap;
            $svg .= '<path style="stroke:' . $author_colour[$v] . ';stroke-width:' . $stroke_width . ';stroke-linecap:butt;"  d="M ' . $x . ' ' . $y_pos . ' ' . $x . ' ' . ($y_pos + $y_gap) . '" />';
        }
        $svg .= '</a>';
        // Draw polygon linking each segement of text in two adjacent revisions
        $start_x = $x;
        $end_x = $x + $x_gap;
        foreach ($rev->match as $k => $v) {
            $start_y = $y + $k * $y_gap;
            $end_y = $y + $v * $y_gap;
            $user_url = '';
            if (isset($history->user_name_list[$rev->authors[$k]])) {
                $user_url = 'http://en.wikipedia.org/wiki/User:'******'http://en.wikipedia.org/wiki/Special:Contributions/' . $rev->authors[$k];
            }
            $svg .= '<a xlink:href="' . $user_url . '" title="Revision ' . $rev->id . '" target="_new" >';
            $svg .= '<polygon style="fill:' . $author_colour[$rev->authors[$k]] . ';stroke-width:0;opacity:0.4;"  points="' . ($start_x + $half_stroke) . ', ' . $start_y . '  ' . ($end_x - $half_stroke) . ', ' . $end_y . '  ' . ($end_x - $half_stroke) . ', ' . ($end_y + $y_gap) . '  ' . ($start_x + $half_stroke) . ', ' . ($start_y + $y_gap) . '  ' . ($start_x + $half_stroke) . ', ' . $start_y . '" />';
            $svg .= '</a>';
        }
        $x += $x_gap;
    }
    $svg .= '</svg>';
    echo '<!DOCTYPE html>
<html>
	<head>
		<meta charset="utf-8" />
		<title>Wikipedia History Flow</title>
		
		<style type="text/css" title="text/css">
			body { font-family: sans-serif; }
		</style>
	</head>
	<body>
		<h1>Wikipedia History Flow</h1>';
    echo '<p>History flow for Wikipedia page <a href="http://en.wikipedia.org/wiki/' . $page_title . '" target="_new">' . $page_title . '</a> (<a href=".">try another</a>)</p>';
    echo '<div style="background-color:rgb(128,128,128);">';
    echo $svg;
    echo '</div>';
    echo '<p>Key:</p>
	<ul>
	<li>Columns represent revisions of the page (click on a colunn to see that revision)</li>
	<li>Rows of text that are the same between revisions are connected by lines coloured by author</li>
	<li>Authors are distinguished by colour, click on a line between a column to go to that user\'s Wikipedia page</li>
	</ul>';
    echo '
	<body>
</html>';
    return true;
}