function loadArticlesFromUrl($strNewsPageAbsoluteUrl, $max_articles_to_load = 10) { //fetch first page on ngb news $html = file_get_html($strNewsPageAbsoluteUrl); //list of all articles found on this page $lstArticles = array(); $numArticles = 0; // Find all news threads on first page $threadInfoSelector = "ol#threads .threadbit"; //dom selector for basic thread infos foreach ($html->find($threadInfoSelector) as $element) { //check if this is a MOVED article and ignore it if (strpos($element->class, "moved") !== FALSE) { //skip this article, cause it was moved somewhere continue; } //maximum number of articles to load each run if ($numArticles >= $max_articles_to_load) { break; } //get url to article $strUrl = $element->find(".threadtitle .title")[0]->href; //relative url here $strUrl = 'https://ngb.to/' . $strUrl; //absolute url now //remove the session id from the url s=xxxxxx0000x0x0x00 $strUrl = preg_replace('/s=[a-zA-Z0-9]+/', '', $strUrl); //create new news entry object $objNews = new NewsEntry(); //try to load article details from this url if ($objNews->loadFromUrl($strUrl) == TRUE) { //remember date of last update/reply. we gotta fetch //this here, not inside of loadFromUrl. other approach would have to go throug all pages to find last post date. $strUpdated = $element->find(".threadlastpost dd")[1]->plaintext; $objNews->setLastUpdateDate($strUpdated); //get url to last comment on this article $strLastPostUrl = $element->find(".threadlastpost a.lastpostdate")[0]->href; //remove session id from last post url $strLastPostUrl = preg_replace('/s=[a-zA-Z0-9]+/', '', $strLastPostUrl); $objNews->setLastCommentUrl($strLastPostUrl); $strLastCommentName = $element->find(".threadlastpost a.username strong")[0]->plaintext; $strLastCommentUrl = $element->find(".threadlastpost a.username")[0]->href; //remove session id from comment url $strLastCommentUrl = preg_replace('/s=[a-zA-Z0-9]+/', '', $strLastCommentUrl); //set author of the last comment. name and url to profile $objNews->setLastCommentAuthor($strLastCommentName, $strLastCommentUrl); //get number of comments $strNumComments = $element->find("ul.threadstats a.understate")[0]->plaintext; $objNews->setNumberOfComments($strNumComments); //save the object to our list of articles. $lstArticles[] = $objNews; $numArticles++; } } //eof foreach //returns list of all articles fetched from the url passed to this function return $lstArticles; }
/** * @see DatabaseObject::handleData() */ protected function handleData($data) { parent::handleData($data); $this->user = new BASHUser($this->authorID); }