<!-- Name: Venkatesh Created:2/12/2016 Description: This is the index page for the webpage Parser application. User first lands here and he/she enters an URL or selects the existing URLfrom the given list. The list of articles in this page are generated by parsing the "http://cnn.com/world/" page. The list will be updated by time as it directly loads from the CNN website --> <!-- Start of PHP code --> <?php //It is adivisable to set time limit atleast 60 seconds as some articles may loads slow set_time_limit(60); //MyParser class contains all the required parsing functions which needs to be included first include 'MyParser.php'; //The following statement creates an object by calling the MyParser constructor with CNN page $myobj = new MyParser("http://www.cnn.com/world/"); //Once the object is loaded, getArticles() function parses the intital page of CNN and extracts artciiles from the other links $articles = $myobj->getArticles($myobj->getLinks()); ?> <!-- HTML code starts from here --> <html> <head> <title> Webpage Parser</title> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="stylesheet" href="http://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css"> </head> <body> <center> <h2> PHP - Webpage Parser </h2> </center> <!-- User may copy/paste a CNN article in the text field or may select article from the given list -->
private function _tmpl_body($data, $tmpl) { //printvar($data); $numstr = new num2str(); $map = array('act_sum_num' => $data['act']['sum'], 'act_sum' => $numstr->convert($data['act']['sum']), 'act_date' => $data['act']['date'] ? Yii::app()->dateFormatter->format('d MMMM yyyy', $data['act']['date'], TRUE) : '"___" _________ 20 __ г.', 'act_num' => $data['act']['num'], 'works' => $data['works'], 'contract_date' => $data['contract']['date'] ? Yii::app()->dateFormatter->format('d MMMM yyyy', $data['contract']->date, TRUE) : '"___" _________ 20 __ г.', 'contract_num' => $data['contract']['num'] ? $data['contract']->num : '___', 'client_name' => $data['client']['name'] ? $data['client']['name'] : '____________________', 'client_fullname' => $data['client']['fullname'] ? $data['client']['fullname'] : '____________________', 'client_requisite' => nl2br($data['client']['requisite']), 'client_address' => nl2br($data['client']['address']), 'client_contactdata' => nl2br($data['client']['contactdata']), 'client_headpost' => $data['client']['headpost'] ? $data['client']['headpost'] : '____________________', 'client_headfio' => $data['client']['headfio'] ? $data['client']['headfio'] : '____________________', 'client_headbasis' => $data['client']['headbasis'] ? $data['client']['headbasis'] : '____________________', 'org_name' => $data['settings']['org.name']->value ? $data['settings']['org.name']->value : '____________________', 'org_fullname' => $data['settings']['org.fullname']->value ? $data['settings']['org.fullname']->value : '____________________', 'org_requisite' => nl2br($data['settings']['org.requisite']->value), 'org_address' => nl2br($data['settings']['org.address']->value), 'org_contactdata' => nl2br($data['settings']['org.contactdata']->value), 'org_headpost' => $data['settings']['org.headpost']->value ? $data['settings']['org.headpost']->value : '____________________', 'org_headfio' => $data['settings']['org.headfio']->value ? $data['settings']['org.headfio']->value : '____________________', 'org_headbasis' => $data['settings']['org.headbasis']->value ? $data['settings']['org.headbasis']->value : '____________________'); $parser = new MyParser(); return $parser->parse($tmpl->body, $map); }
/** * * @param <type> $data * @param <type> $tmpl * @return <type> */ private function _tmpl_body($data, $tmpl) { $numstr = new num2str(); if ($data['invoice_fkt']['cargo_addr'] == 'self') { $cargo_addr = 'Он же'; } elseif ($data['invoice_fkt']['cargo_addr'] == 'other') { $cargo_addr = $data['invoice_fkt']['cargo_addr_info']; } else { $cargo_addr = '---'; } if ($data['invoice_fkt']['cargo_send'] == 'self') { $cargo_send = 'Он же'; } elseif ($data['invoice_fkt']['cargo_send'] == 'other') { $cargo_send = $data['invoice_fkt']['cargo_send_info']; } else { $cargo_send = '---'; } $map = array('inv_sum_num' => $data['invoice_fkt']['sum'], 'inv_sum' => $numstr->convert($data['invoice_fkt']['sum']), 'inv_date' => $data['invoice_fkt']['date'] ? Yii::app()->dateFormatter->format('d MMMM yyyy', $data['invoice_fkt']['date'], TRUE) : '"___" _________ 20 __ г.', 'inv_num' => $data['invoice_fkt']['num'], 'works' => $data['works'], 'works_num' => count($data['works']), 'cargo_addr' => $cargo_addr, 'cargo_send' => $cargo_send, 'client_name' => $data['client']['name'] ? $data['client']['name'] : '____________________', 'client_fullname' => $data['client']['fullname'] ? $data['client']['fullname'] : '____________________', 'client_requisite' => nl2br($data['client']['requisite']), 'client_address' => nl2br($data['client']['address']), 'client_contactdata' => nl2br($data['client']['contactdata']), 'client_headpost' => $data['client']['headpost'] ? $data['client']['headpost'] : '____________________', 'client_headfio' => $data['client']['headfio'] ? $data['client']['headfio'] : '____________________', 'client_headbasis' => $data['client']['headbasis'] ? $data['client']['headbasis'] : '____________________', 'org_name' => $data['settings']['org.name']->value ? $data['settings']['org.name']->value : '____________________', 'org_fullname' => $data['settings']['org.fullname']->value ? $data['settings']['org.fullname']->value : '____________________', 'org_requisite' => nl2br($data['settings']['org.requisite']->value), 'org_address' => nl2br($data['settings']['org.address']->value), 'org_contactdata' => nl2br($data['settings']['org.contactdata']->value), 'org_bank' => $data['settings']['org.name']->value ? $data['settings']['org.bank']->value : '____________________', 'org_inn' => $data['settings']['org.inn']->value ? $data['settings']['org.inn']->value : '____________________', 'org_kpp' => $data['settings']['org.kpp']->value ? $data['settings']['org.kpp']->value : '____________________', 'org_bik' => $data['settings']['org.bik']->value ? $data['settings']['org.bik']->value : '____________________', 'org_sett_acc' => $data['settings']['org.sett_acc']->value ? $data['settings']['org.sett_acc']->value : '____________________', 'org_correspondent_acc' => $data['settings']['org.correspondent_acc']->value ? $data['settings']['org.correspondent_acc']->value : '____________________', 'org_vat' => $data['settings']['org.vat']->value ? $data['settings']['org.vat']->value : '____________________', 'org_vat_value' => $data['settings']['org.vat_value']->value ? $data['settings']['org.vat_value']->value : '____________________', 'org_glavbuh' => $data['settings']['org.glavbuh']->value ? $data['settings']['org.glavbuh']->value : '____________________', 'org_headpost' => $data['settings']['org.headpost']->value ? $data['settings']['org.headpost']->value : '____________________', 'org_headfio' => $data['settings']['org.headfio']->value ? $data['settings']['org.headfio']->value : '____________________', 'org_headbasis' => $data['settings']['org.headbasis']->value ? $data['settings']['org.headbasis']->value : '____________________'); $parser = new MyParser(); return $parser->parse($tmpl->body, $map); }
<!-- Name: Venkatesh Created:2/12/2016 Description: This file is executed when user submits an URL/selects an URL from the list. This file helps in displaying the complete results which obtained after parsing the webpage --> <!-- Start of PHP code --> <?php //The following statement includes the MyParser file which contains all the required parsing functions include 'MyParser.php'; //The following statement helps in getting the url through get method $pageurl = $_GET['url']; //The following condition checks whether $url is not empty, if it is empty the method would be "post" if (!$pageurl) { $pageurl = $_POST['url']; } //First we will instantiate the object with the give url $myobj = new MyParser($pageurl); //getTitle() function is called and the result is copied to the $title variable $title = $myobj->getTitle(); //getMetaContent() function is called and the result is copied to the $content variable $content = $myobj->getMetaContent(); //getStory() function is called and the result is copied to the $story variable $story = $myobj->getStory(); //getImages() function is called and the result is copied to the $images array $images = $myobj->getImages(); //array_unique function is used to remove any duplicate images $images = array_unique($images); //getStoryImage() function is called and the result is copied to the $storyimage variable variable $storyimage = getStoryImage($images); //getJpgs() function is called and the result is copied to the $jpgs variable $jpgs = getJpgs($images); //getLinks() function is called and the result is copied to the $links variable