If the $selector is an node or a list of nodes all descendants that
match that node/node list are returned.
self::CONTEXT_DOCUMENT will use the document element as context. Otherwise the currently
nodes are used as contexts
self::FIND_MODE_FILTER will use the $selector only as filter and not execute it directly.
this is the like the jQuery specification - but a lot slower and needs more memory.
Additionally in this mode, it will find only element nodes. *
/** * Discover a list of possible xpaths that are collection items * in sorted order where the first element is the most likely. * @return array[string] */ public function discoverScores(Nodes $nodes) { $nonContentNodes = $nodes->find('//*[not(text())]'); $maxSibs = (new Utils())->getMaxSibCount($nodes->getDocument(), $nonContentNodes); arsort($maxSibs); $ancestorCountGrouping = []; foreach ($maxSibs as $path => $count) { if ($count < 2) { continue; } $ancestorCount = substr_count($path, '/'); if (!isset($ancestorCountGrouping[$ancestorCount])) { $ancestorCountGrouping[$ancestorCount] = [$path => $count]; } else { $ancestorCountGrouping[$ancestorCount][$path] = $count; } } ksort($ancestorCountGrouping); $result = []; foreach ($ancestorCountGrouping as $_ => $collectionScores) { foreach ($collectionScores as $path => $score) { $result[$path] = $score; } } return $result; }