// If we make it this far, push the link on the array $postURls[] = $link; echo $link . "\n"; } } echo "-----------------------------------\r\n"; include "htmlparser.inc"; foreach ($postURls as $url) { $entryID = 0; $ljParser = HtmlParser_ForFile($url); while ($ljParser->parse()) { if ($ljParser->iNodeName == "a") { if (strpos($ljParser->iNodeAttributes["href"], $archivesBase) !== false) { $archiveURL = $ljParser->iNodeAttributes["href"]; echo "\nArchive URL: " . $archiveURL . "\n"; $archiveParser = HtmlParser_ForFile($archiveURL); while ($archiveParser->parse()) { if (strtolower($archiveParser->iNodeName) == "input") { if ($archiveParser->iNodeAttributes["name"] == "entry_id") { $entryID = trim($archiveParser->iNodeAttributes["value"]); break 2; } } } break; } } } while ($ljParser->parse()) { if (strtolower($ljParser->iNodeName) == "table") { if ($ljParser->iNodeAttributes["class"] == "talk-comment") {
<?php // Example: // Dumps nodes from testfile.html. // To run: php < ex_dumpurl.php include "htmlparser.inc"; $parser = HtmlParser_ForFile("testfile.html"); //$parser = HtmlParser_ForURL ("http://yahoo.com"); while ($parser->parse()) { echo "-----------------------------------\r\n"; echo "Name=" . $parser->iNodeName . ";"; echo "Type=" . $parser->iNodeType . ";"; if ($parser->iNodeType == NODE_TYPE_TEXT || $parser->iNodeType == NODE_TYPE_COMMENT) { echo "Value='" . $parser->iNodeValue . "'"; } echo "\r\n"; if ($parser->iNodeType == NODE_TYPE_ELEMENT) { echo "ATTRIBUTES: "; $attrValues = $parser->iNodeAttributes; $attrNames = array_keys($attrValues); $size = count($attrNames); for ($i = 0; $i < $size; $i++) { $name = $attrNames[$i]; echo $attrNames[$i] . "=\"" . $attrValues[$name] . "\" "; } } echo "\r\n"; }