Example #1
0
 /**
  * Calculate the PageRank for all nodes in $graph.
  * @param Graph $graph
  * @param bool $keepAllRoundData [optional]. Default: false. If true, the values for every round of calculation is kept. Great for understanding what's going on during the calculation.
  * @return PageRankResult
  */
 public function calculatePagerank(Graph $graph, $keepAllRoundData = false)
 {
     $round = 0;
     $rounds = [];
     $ns = $graph->getNodes();
     $edges = $graph->getEdges();
     /** @var PageRankNode[] $nodes */
     $nodes = [];
     //transform to PageRankNodes
     foreach ($edges as $edge) {
         $prFromNode = $this->getPageRankNode($edge->getFrom(), $nodes);
         $prNodeTo = $this->getPageRankNode($edge->getTo(), $nodes);
         $prFromNode->addLinkTo($prNodeTo);
         $prNodeTo->addLinkFrom($prFromNode);
     }
     //transform (remaining) disconnected Nodes (that are not part of an Edge) to PageRankNodes
     foreach ($ns as $node) {
         $this->getPageRankNode($node, $nodes);
     }
     $this->getLogger()->debug("Got " . count($nodes) . " nodes in total");
     do {
         $distance = 0;
         $currentRound = new PageRankNodeHistory($round);
         $newPrCache = [];
         foreach ($nodes as $key => $node) {
             $node->setOldPr($node->getPr());
             $newPr = $node->calculatePageRank($this->dampingFactor, count($nodes));
             $curDistance = abs($node->getOldPr() - $newPr);
             if ($curDistance > $distance) {
                 $distance = $curDistance;
             }
             $entry = new PageRankNodeHistoryEntry($node, $node->getOldPr(), $newPr);
             $newPrCache[$key] = $newPr;
             $currentRound->addEntry($entry);
         }
         foreach ($nodes as $key => $node) {
             $newPr = $newPrCache[$key];
             $node->setPr($newPr);
         }
         if (!$keepAllRoundData) {
             $rounds = [];
         }
         $rounds[$round] = $currentRound;
         $round++;
         $this->getLogger()->debug("Calculating round {$round}. Max rounds: {$this->maxRounds}. Last max distance: {$distance}. Required max distance: {$this->maxDistance}");
     } while ($round < $this->maxRounds && ($distance == 0 || $distance > $this->maxDistance));
     $result = new PageRankResult($graph, $rounds);
     return $result;
 }
Example #2
0
 /**
  * Imports the CSV file specified by $pathToFile
  * @param string $pathToFile
  * @return Graph
  */
 public function import($pathToFile)
 {
     $lines = IOUtil::readCsvFile($pathToFile, $this->hasHeader, $this->encoding, $this->delimiter, $this->enclosure, null, null, $this->offset);
     $urls = [];
     $graph = new Graph();
     foreach ($lines as $nr => $line) {
         if (!array_key_exists($this->sourceColumn, $line) || !array_key_exists($this->destinationColumn, $line)) {
             continue;
         }
         if (!parse_url($line[$this->sourceColumn]) || !parse_url($line[$this->destinationColumn])) {
             continue;
         }
         $urlFrom = trim($line[$this->sourceColumn]);
         if (!array_key_exists($urlFrom, $urls)) {
             $urls[$urlFrom] = new Node($urlFrom);
         }
         $urlTo = trim($line[$this->destinationColumn]);
         if (!array_key_exists($urlTo, $urls)) {
             $urls[$urlTo] = new Node($urlTo);
         }
         $edge = new Edge($urls[$urlFrom], $urls[$urlTo]);
         $graph->addEdge($edge);
     }
     return $graph;
 }
Example #3
0
 /**
  * See example image at http://de.wikipedia.org/wiki/PageRank
  * >> http://de.wikipedia.org/wiki/PageRank#/media/File:PageRank-Beispiel.png
  * Difference to the englisch example: No normalization is used - that means PageRankNode "a" has no outlinks at all instead if links to any other node including itself.
  */
 public function test_ShouldYieldSameResultsAsTheDeWikiExample()
 {
     $a = new Node("a");
     $b = new Node("b");
     $c = new Node("c");
     $d = new Node("d");
     $e = new Node("e");
     $f = new Node("f");
     $x1 = new Node("x1");
     $x2 = new Node("x2");
     $x3 = new Node("x3");
     $x4 = new Node("x4");
     $x5 = new Node("x5");
     $graph = new Graph();
     $graph->addEdge(new Edge($b, $c));
     $graph->addEdge(new Edge($c, $b));
     $graph->addEdge(new Edge($d, $a));
     $graph->addEdge(new Edge($d, $b));
     $graph->addEdge(new Edge($e, $b));
     $graph->addEdge(new Edge($e, $d));
     $graph->addEdge(new Edge($e, $f));
     $graph->addEdge(new Edge($f, $b));
     $graph->addEdge(new Edge($f, $e));
     $graph->addEdge(new Edge($x1, $b));
     $graph->addEdge(new Edge($x1, $e));
     $graph->addEdge(new Edge($x2, $b));
     $graph->addEdge(new Edge($x2, $e));
     $graph->addEdge(new Edge($x3, $b));
     $graph->addEdge(new Edge($x3, $e));
     $graph->addEdge(new Edge($x4, $e));
     $graph->addEdge(new Edge($x5, $e));
     $damping = 0.85;
     $maxRounds = 1000;
     $maxDistance = 1.0E-6;
     $pageRank = new PageRank($damping, $maxRounds, $maxDistance);
     $result = $pageRank->calculatePagerank($graph);
     $history = $result->getHistory();
     $actual = [];
     foreach ($history as $key => $historyElement) {
         $actual[$key] = $historyElement->toArray();
         $round = $actual[$key];
         foreach ($round as $node => $nodeValue) {
             $actual[$key][$node]["oldPr"] = round($nodeValue["oldPr"], 4);
             $actual[$key][$node]["newPr"] = round($nodeValue["newPr"], 4);
         }
     }
     $expected = [92 => ["b" => ["node" => "b", "oldPr" => 0.3242, "newPr" => 0.3242], "c" => ["node" => "c", "oldPr" => 0.2892, "newPr" => 0.2892], "d" => ["node" => "d", "oldPr" => 0.033, "newPr" => 0.033], "a" => ["node" => "a", "oldPr" => 0.0276, "newPr" => 0.0276], "e" => ["node" => "e", "oldPr" => 0.0682, "newPr" => 0.0682], "f" => ["node" => "f", "oldPr" => 0.033, "newPr" => 0.033], "x1" => ["node" => "x1", "oldPr" => 0.0136, "newPr" => 0.0136], "x2" => ["node" => "x2", "oldPr" => 0.0136, "newPr" => 0.0136], "x3" => ["node" => "x3", "oldPr" => 0.0136, "newPr" => 0.0136], "x4" => ["node" => "x4", "oldPr" => 0.0136, "newPr" => 0.0136], "x5" => ["node" => "x5", "oldPr" => 0.0136, "newPr" => 0.0136]]];
     $this->assertEquals($actual, $expected);
 }
Example #4
0
use paslandau\PageRank\Graph;
use paslandau\PageRank\Node;
require_once __DIR__ . "/bootstrap.php";
// define the nodes
$a = new Node("a");
$b = new Node("b");
$c = new Node("c");
$d = new Node("d");
$e = new Node("e");
$f = new Node("f");
$x1 = new Node("x1");
$x2 = new Node("x2");
$x3 = new Node("x3");
$x4 = new Node("x4");
$x5 = new Node("x5");
$graph = new Graph();
// uncomment to get the results of the german example http://de.wikipedia.org/wiki/PageRank
// leave commented for the results of the english one http://en.wikipedia.org/wiki/PageRank
//$graph->addEdge(new Edge($a,$a));
//$graph->addEdge(new Edge($a,$b));
//$graph->addEdge(new Edge($a,$c));
//$graph->addEdge(new Edge($a,$d));
//$graph->addEdge(new Edge($a,$e));
//$graph->addEdge(new Edge($a,$f));
//$graph->addEdge(new Edge($a,$x1));
//$graph->addEdge(new Edge($a,$x2));
//$graph->addEdge(new Edge($a,$x3));
//$graph->addEdge(new Edge($a,$x4));
//$graph->addEdge(new Edge($a,$x5));
// define the links between the nodes
$graph->addEdge(new Edge($b, $c));