Example #1
0
function addPageToRSSFeed($html, RSSFeed $rssFeed)
{
    $html = preg_replace("#<script.*?</script>#is", "", $html);
    # Strip out <script> tags so loadHTML() parses the page correctly for $xpath->query()
    $dom = new DOMDocument();
    @$dom->loadHTML($html);
    $xpath = new DOMXPath($dom);
    # Get the post wrapper divs
    $postDivs = $xpath->query('/descendant::div[@id="posts"]/div[starts-with(@id,"edit") and @class="postbit-wrapper "]');
    # Thread URL
    $pageURL = current(iterator_to_array($xpath->query('/html/head/link[@rel="canonical"]/@href')))->nodeValue;
    # Title
    $title = current(iterator_to_array($xpath->query('//div[@id = "thread-header-bloglike"]//h1')))->nodeValue;
    $rssFeed->title = $title;
    # Get the post element divs
    foreach ($postDivs as $postDiv) {
        $rssItem = new RSSItem();
        # Title (author)
        $rssItem->title = '[Post]';
        // Default to "[Post]" on first post
        foreach ($xpath->query('.//a[starts-with(@class, "bigfusername")]', $postDiv) as $postAuthor) {
            $rssItem->title = trim($postAuthor->nodeValue);
            $rssItem->author = trim($postAuthor->nodeValue);
            break;
        }
        # Link, GUID
        $rssItem->link = $pageURL;
        // Default to page URL on first post
        $rssItem->guid = $rssItem->link;
        foreach ($xpath->query('.//a[@class="postCount"]/@href', $postDiv) as $postLink) {
            # Strip the 's' parameter out since it changes every so often....
            $parsedURL = parse_url($postLink->nodeValue);
            $queryStr = $parsedURL['query'];
            parse_str($queryStr, $queryParams);
            unset($queryParams['s']);
            $rssItem->link = 'http://forum.xda-developers.com/' . $parsedURL['path'] . '?' . http_build_query($queryParams);
            $rssItem->guid = $rssItem->link;
            break;
        }
        # Description
        foreach ($xpath->query('.//div[starts-with(@id, "post_message") and starts-with(@class, "post-text")]', $postDiv) as $postMsgDiv) {
            # Strip ad
            foreach ($xpath->query('.//div[@class="purchad"]', $postDiv) as $postAd) {
                $postAd->parentNode->removeChild($postAd);
            }
            $rssItem->description = cleanPostMessageHTML($dom->saveXML($postMsgDiv));
            break;
        }
        # Publication Date
        $rssItem->setPubDate(new DateTime('1900-01-01'));
        // Default to 1st JAN 1900 on first post... oh well...
        $rssFeed->setLastBuildDate($rssItem->getPubDate());
        foreach ($xpath->query('.//span[@class="time"]', $postDiv) as $postDateSpan) {
            $rssItem->setPubDate(getXDADate(trim($postDateSpan->nodeValue)));
            $rssFeed->setLastBuildDate($rssItem->getPubDate());
            # Set the feed's lastBuildDate to the last post's date
            break;
        }
        $rssFeed->addRSSItem($rssItem);
    }
}