PHP fetchArticle Examples

Programming Language: PHP

Method/Function: fetchArticle

Examples at hotexamples.com: 2

PHP fetchArticle - 2 examples found. These are the top rated real world PHP examples of fetchArticle extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: CourrierInternationalBridge.php Project: j0k3r/rss-bridge

 public function collectData(array $param)
 {
     function fetchArticle($link)
     {
         $page = file_get_html($link);
         $contenu = $page->find(".article-text")[0];
         return strip_tags($contenu);
     }
     $html = '';
     $html = file_get_html('http://www.courrierinternational.com/article') or $this->returnError('Error.', 500);
     $element = $html->find(".type-normal");
     $article_count = 1;
     foreach ($element as $article) {
         $item = new \Item();
         $item->uri = "http://www.courrierinternational.com" . $article->find("a")[0]->getAttribute("href");
         $item->content = fetchArticle("http://www.courrierinternational.com" . $article->find("a")[0]->getAttribute("href"));
         $item->title = strip_tags($article->find("h2")[0]);
         $dateTime = date_parse($article->find("time")[0]);
         $item->timestamp = mktime($dateTime['hour'], $dateTime['minute'], $dateTime['second'], $dateTime['month'], $dateTime['day'], $dateTime['year']);
         $this->items[] = $item;
         $article_count++;
         if ($article_count > 5) {
             break;
         }
     }
 }

Example #2

Show file

File: novel.php Project: kevandotorg/nanogenmo-2015

function fetchArticle($pagename,$recurse = 1){
	
	$pagename = urlencode($pagename);
	$url = "https://en.wikipedia.org/w/api.php?action=query&prop=extracts&explaintext=&exchars=999999&format=json&exintro=&titles=".$pagename."&utf8=";
	$url = "https://en.wikipedia.org/w/api.php?action=query&prop=revisions&format=json&rvprop=content&titles=".$pagename."&utf8=";

	$ch = curl_init();
	curl_setopt($ch, CURLOPT_URL, $url);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
	curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5');
	$result = curl_exec($ch);
	curl_close($ch);

	$json_array = json_decode($result, true);
	
	$extract = "There was nothing here.";
	
	foreach($json_array['query']['pages'] as $page){

		foreach($page['revisions'] as $revision){
			
		if (isset($revision['*'])) { $extract = $revision['*']; } else { $extract = "You see nothing special."; }
		}
			
	}
	
	// If it's a redirect, follow it (but only once)
	if (preg_match("/#REDIRECT \[\[([^\]]+)\]\]/",$extract,$matches) && $recurse>0)
	{
		list($extract,$temparticle) = fetchArticle($matches[1],0);
	}
	
	if (strlen($extract)<5) { $extract = "You see nothing special here."; }

	$original = $extract;
	
    // Replace station templates with just the station name
	$extract = preg_replace("/{{([^|])+ stations\|station=([^{}]+)}}/","$2",$extract);
	
	// Unpack conversion templates
	$extract = preg_replace("/{{convert\|([^|]+)\|([^|]+)\|[^{}]+}}/","$1 $2",$extract);
	
    // Strip wiki template markup three times for good measure
	$extract = preg_replace("/{{[^{}]+}}/","",$extract);
	$extract = preg_replace("/{{[^{}]+}}/","",$extract);
	$extract = preg_replace("/{{[^{}]+}}/","",$extract);
	
	// Strip ref tags and their contents
	$extract = preg_replace("/<ref>[^<]+<\/ref>/","",$extract);

	$extract = cleanpunctuation($extract);
	$extract = strip_tags($extract); // strip all remaining HTML
	$extract = preg_replace("/'''?/","",$extract); // strip bold/italic markup
	$extract = preg_replace("/=====[^=]+=====/","",$extract); // strip headings
	$extract = preg_replace("/====[^=]+====/","",$extract); // strip headings
	$extract = preg_replace("/===[^=]+===/","",$extract); // strip headings
	$extract = preg_replace("/==[^=]+==/","",$extract); // strip headings

	// Cleanup
	$extract = preg_replace("/\n/"," ",$extract);
	$extract = preg_replace("/&nbsp;/"," ",$extract);
	$extract = preg_replace("/ +/"," ",$extract);

	$extract = preg_replace("/\[\[([^|\]]+)\]\]/","$1",$extract); // strip brackets from unpiped links
	$extract = preg_replace("/\[\[([^|]+)\|([^]]+)\]\]/","$2",$extract); // replace piped links with second term
	$extract = preg_replace("/\[([^\]]+)\]/","$1",$extract); // strip anything left in a bracket (probably just external links)
	$extract = preg_replace("/\/[^ ]+\//","",$extract); // strip IPA pronunciation

	// Avoid problematic abbreviations that look like they end sentences
	$extract = preg_replace("/(St|Dr|Mr|Mrs|Ms|Rt Hon|pp|\(b|\(ca?|No)\./","$1&period;",$extract);
	$extract = preg_replace("/\b([A-Za-z])\./","$1&period;",$extract);
	$extract = preg_replace("/(\d),(\d\d\d)/","$1&comma;$2",$extract);
	
	return array($original,$extract);
}