function extractPages($url) { $dom = new DOMDocument(); $dom->loadHTMLFile($url); download($url, BUILD_DIR . '/downloaded-pages/toc.html'); $h3s = nodes($dom, 'h3'); foreach ($h3s as $h3) { $links = $h3->getElementsByTagName('a'); if ($links->length) { $pages[] = (object) array('name' => $h3->textContent, 'url' => $links->item(0)->getAttribute('href'), 'sections' => extractSections($h3)); } } return $pages; }
/** * handles the arguments: section, sectionhead, lines, words, bytes, * for UnfoldSubpages, IncludePage, ... */ function extractParts($c, $pagename, $args) { extract($args); if ($section) { if ($sections) { $c = extractSection($section, $c, $pagename, $quiet, 1); } else { $c = extractSection($section, $c, $pagename, $quiet, $sectionhead); } } if ($sections) { $c = extractSections($sections, $c, $pagename, $quiet, 1); } if ($lines) { $c = array_slice($c, 0, $lines); $c[] = sprintf(_(" ... first %d lines"), $lines); } if ($words) { $c = firstNWordsOfContent($words, $c); } if ($bytes) { $ct = implode("\n", $c); // one string if (strlen($ct) > $bytes) { $ct = substr($c, 0, $bytes); $c = array($ct, sprintf(_(" ... first %d bytes"), $bytes)); } } $ct = implode("\n", $c); // one string return $ct; }