Exemplo n.º 1
0
function loadArticlesFromUrl($strNewsPageAbsoluteUrl, $max_articles_to_load = 10)
{
    //fetch first page on ngb news
    $html = file_get_html($strNewsPageAbsoluteUrl);
    //list of all articles found on this page
    $lstArticles = array();
    $numArticles = 0;
    // Find all news threads on first page
    $threadInfoSelector = "ol#threads .threadbit";
    //dom selector for basic thread infos
    foreach ($html->find($threadInfoSelector) as $element) {
        //check if this is a MOVED article and ignore it
        if (strpos($element->class, "moved") !== FALSE) {
            //skip this article, cause it was moved somewhere
            continue;
        }
        //maximum number of articles to load each run
        if ($numArticles >= $max_articles_to_load) {
            break;
        }
        //get url to article
        $strUrl = $element->find(".threadtitle .title")[0]->href;
        //relative url here
        $strUrl = 'https://ngb.to/' . $strUrl;
        //absolute url now
        //remove the session id from the url s=xxxxxx0000x0x0x00
        $strUrl = preg_replace('/s=[a-zA-Z0-9]+/', '', $strUrl);
        //create new news entry object
        $objNews = new NewsEntry();
        //try to load article details from this url
        if ($objNews->loadFromUrl($strUrl) == TRUE) {
            //remember date of last update/reply. we gotta fetch
            //this here, not inside of loadFromUrl. other approach would have to go throug all pages to find last post date.
            $strUpdated = $element->find(".threadlastpost dd")[1]->plaintext;
            $objNews->setLastUpdateDate($strUpdated);
            //get url to last comment on this article
            $strLastPostUrl = $element->find(".threadlastpost a.lastpostdate")[0]->href;
            //remove session id from last post url
            $strLastPostUrl = preg_replace('/s=[a-zA-Z0-9]+/', '', $strLastPostUrl);
            $objNews->setLastCommentUrl($strLastPostUrl);
            $strLastCommentName = $element->find(".threadlastpost a.username strong")[0]->plaintext;
            $strLastCommentUrl = $element->find(".threadlastpost a.username")[0]->href;
            //remove session id from comment url
            $strLastCommentUrl = preg_replace('/s=[a-zA-Z0-9]+/', '', $strLastCommentUrl);
            //set author of the last comment. name and url to profile
            $objNews->setLastCommentAuthor($strLastCommentName, $strLastCommentUrl);
            //get number of comments
            $strNumComments = $element->find("ul.threadstats a.understate")[0]->plaintext;
            $objNews->setNumberOfComments($strNumComments);
            //save the object to our list of articles.
            $lstArticles[] = $objNews;
            $numArticles++;
        }
    }
    //eof foreach
    //returns list of all articles fetched from the url passed to this function
    return $lstArticles;
}
 /**
  * @see DatabaseObject::handleData()
  */
 protected function handleData($data)
 {
     parent::handleData($data);
     $this->user = new BASHUser($this->authorID);
 }