function addPageToRSSFeed($html, RSSFeed $rssFeed) { $html = preg_replace("#<script.*?</script>#is", "", $html); # Strip out <script> tags so loadHTML() parses the page correctly for $xpath->query() $dom = new DOMDocument(); @$dom->loadHTML($html); $xpath = new DOMXPath($dom); # Get the post wrapper divs $postDivs = $xpath->query('/descendant::div[@id="posts"]/div[starts-with(@id,"edit") and @class="postbit-wrapper "]'); # Thread URL $pageURL = current(iterator_to_array($xpath->query('/html/head/link[@rel="canonical"]/@href')))->nodeValue; # Title $title = current(iterator_to_array($xpath->query('//div[@id = "thread-header-bloglike"]//h1')))->nodeValue; $rssFeed->title = $title; # Get the post element divs foreach ($postDivs as $postDiv) { $rssItem = new RSSItem(); # Title (author) $rssItem->title = '[Post]'; // Default to "[Post]" on first post foreach ($xpath->query('.//a[starts-with(@class, "bigfusername")]', $postDiv) as $postAuthor) { $rssItem->title = trim($postAuthor->nodeValue); $rssItem->author = trim($postAuthor->nodeValue); break; } # Link, GUID $rssItem->link = $pageURL; // Default to page URL on first post $rssItem->guid = $rssItem->link; foreach ($xpath->query('.//a[@class="postCount"]/@href', $postDiv) as $postLink) { # Strip the 's' parameter out since it changes every so often.... $parsedURL = parse_url($postLink->nodeValue); $queryStr = $parsedURL['query']; parse_str($queryStr, $queryParams); unset($queryParams['s']); $rssItem->link = 'http://forum.xda-developers.com/' . $parsedURL['path'] . '?' . http_build_query($queryParams); $rssItem->guid = $rssItem->link; break; } # Description foreach ($xpath->query('.//div[starts-with(@id, "post_message") and starts-with(@class, "post-text")]', $postDiv) as $postMsgDiv) { # Strip ad foreach ($xpath->query('.//div[@class="purchad"]', $postDiv) as $postAd) { $postAd->parentNode->removeChild($postAd); } $rssItem->description = cleanPostMessageHTML($dom->saveXML($postMsgDiv)); break; } # Publication Date $rssItem->setPubDate(new DateTime('1900-01-01')); // Default to 1st JAN 1900 on first post... oh well... $rssFeed->setLastBuildDate($rssItem->getPubDate()); foreach ($xpath->query('.//span[@class="time"]', $postDiv) as $postDateSpan) { $rssItem->setPubDate(getXDADate(trim($postDateSpan->nodeValue))); $rssFeed->setLastBuildDate($rssItem->getPubDate()); # Set the feed's lastBuildDate to the last post's date break; } $rssFeed->addRSSItem($rssItem); } }