예제 #1
0
파일: Crawler.php 프로젝트: Wufe/Scraper
 public static function crawl($source)
 {
     $crawler = new DomCrawler($source);
     Log::log("Generating tree..");
     $node_tree = Parser::parse($crawler, 0, "", false);
     Log::log("Calculating components..");
     $augmented_tree = Parser::augment($node_tree[0]);
     Log::log("Calculating path..");
     $marked_tree = Parser::identify_parent_path($augmented_tree);
     file_put_contents("a-tree", print_r($marked_tree, true));
     Log::log("Scanning the tree for a pattern..");
     Scanner::scan($marked_tree);
     // Test di funzionamento delle funzioni get_node_from_path e get_parent_from_path
     //Log::log( Tree::get_node_from_path( $marked_tree, "0,0" )[ 'tag' ] );
     //Log::log( Tree::get_parent_from_path( $marked_tree, "0,0" )[ 'tag' ] );
 }
예제 #2
0
파일: Scanner.php 프로젝트: Wufe/Scraper
 public static function scan_for_pattern($root, $node = "", $priority = [])
 {
     if (@(!$node)) {
         $node = $root;
     }
     $children = Tree::get_tagged_children($node);
     if (@(!!$children)) {
         if (Node::get_class($node) == "posts-list-inner") {
             //$elig = Scanner::are_eligible( $root, $node, $children );
             //Log::log( "Children count is ".count( $children )." ".( $elig ? "and" : "but" )." they are ".( $elig ? "" : "not" )." eligible." );
         }
         if (Scanner::are_eligible($root, $node, $children) && !Scanner::has_been_prioritized($node['path'])) {
             $node['count'] = count($children);
             $id = Node::get_id($node);
             if ($id !== false) {
                 $node['id'] = "#" . $id;
             }
             $class = Node::get_class($node);
             if ($class !== false) {
                 $node['class'] = "." . $class;
             }
             $priority[] = $node;
             Scanner::$been_prioritized[] = $node['path'];
         }
         foreach ($children as $child) {
             $priority = Scanner::scan_for_pattern($root, $child, $priority);
         }
     }
     return $priority;
 }