<!-- Name: Venkatesh Created:2/12/2016 Description: This is the index page for the webpage Parser application. User first lands here and he/she enters an URL or selects the existing URLfrom the given list. The list of articles in this page are generated by parsing the "http://cnn.com/world/" page. The list will be updated by time as it directly loads from the CNN website --> <!-- Start of PHP code --> <?php //It is adivisable to set time limit atleast 60 seconds as some articles may loads slow set_time_limit(60); //MyParser class contains all the required parsing functions which needs to be included first include 'MyParser.php'; //The following statement creates an object by calling the MyParser constructor with CNN page $myobj = new MyParser("http://www.cnn.com/world/"); //Once the object is loaded, getArticles() function parses the intital page of CNN and extracts artciiles from the other links $articles = $myobj->getArticles($myobj->getLinks()); ?> <!-- HTML code starts from here --> <html> <head> <title> Webpage Parser</title> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="stylesheet" href="http://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css"> </head> <body> <center> <h2> PHP - Webpage Parser </h2> </center> <!-- User may copy/paste a CNN article in the text field or may select article from the given list -->
//getTitle() function is called and the result is copied to the $title variable $title = $myobj->getTitle(); //getMetaContent() function is called and the result is copied to the $content variable $content = $myobj->getMetaContent(); //getStory() function is called and the result is copied to the $story variable $story = $myobj->getStory(); //getImages() function is called and the result is copied to the $images array $images = $myobj->getImages(); //array_unique function is used to remove any duplicate images $images = array_unique($images); //getStoryImage() function is called and the result is copied to the $storyimage variable variable $storyimage = getStoryImage($images); //getJpgs() function is called and the result is copied to the $jpgs variable $jpgs = getJpgs($images); //getLinks() function is called and the result is copied to the $links variable $links = $myobj->getLinks(); //getH1() function is called and the result is copied to the $h1s variable $h1s = $myobj->getH1(); //getH2() function is called and the result is copied to the $h2s variable $h2s = $myobj->getH2(); //getH3() function is called and the result is copied to the $h3s variable $h3s = $myobj->getH3(); //This function takes images as input and returns only JPG's among those images function getJpgs($images) { $i = 0; //The following foreach loop iterates through each image foreach ($images as $image) { //JPG images contain ".jpg", based on this ondition we will seperate JPG's from the rest of the images if (stripos($image, ".jpg")) { //JPGimages are coied to the array $jpgs