/** * Open the AlboUnict web page. Retrieve the entries insied it. * @param strin $uri */ public function __construct($uri) { $src = new DOMDocument(); $src->loadHTMLfile($uri); $table = $this->retrieveTable($src); $this->rows = $table->getElementsByTagName("tr"); }
/** * Read all the entries from the page with the specified uri. * * @return the AlboUnitoParser instance obtained by parsing the specified page. */ private function readPage($uri) { $htmlPage = new DOMDocument(); $rowParser = new AlboUnitoRowParser($uri); if (!$htmlPage->loadHTMLfile($uri)) { throw new Exception("Unable to download page {$uri}"); } return new AlboTableParser($htmlPage, $rowParser); }
function getEnglishWords($url, $tam_word) { global $word, $english_words; // $url="http://www.tamildict.com/tamilsearch.php?action=search&keyboard=&sID=ec49b9f6ec9ed6b9f9de9ff2d967d889%2F&tmode=on&word=".$word.""; echo " The english word is "; $dom = new DOMDocument("1.0"); @$dom->loadHTMLfile($url); //$xpath = new DomXpath($dom); $table = $dom->getElementById("bodyContent"); $pos = "verb"; /* echo "<br/>matches found are :<br/>".$table->length; # for($i=0;$i<$table->length;$i++) { $documentLink = $table->item($i); */ $tablerow = $table->getElementsByTagName("div"); for ($i = 1; $i < $tablerow->length - 1; $i++) { $data = $tablerow->item($i); $str = $data->nodeValue; //echo $str."--------"; $id = strpos($str, 'ஆங்கிலம்'); $id1 = strpos($str, 'பெயர்ச்சொல்'); if ($id1 != false) { $pos = "noun"; } if ($id != false) { $a = substr($str, $id + 24); $a = str_replace("- ", "", $a); //echo "searching".$a; $id = strpos($a, ','); //echo strlen($); if (!$id) { $id = strpos($a, ' '); } echo $id; if (!$id) { $id = strlen($a); } $english = substr($a, 0, $id); echo "For" . $tam_word . "===" . $english . "=== with POS" . $pos . "=="; } mysql_query("insert in to testing values ('{$tam_word}','{$pos}','{$english}',0)"); /* if($data->nodeValue=="மொழிபெயர்ப்புகள்") { //echo $tr1->item(0)->nodeValue==$word; $tr1=$data->getElementsByTagName("li"); //echo var_dump($data); //echo $tr1[0]->nodeValue ."<br/>"; //array_push($english_words,$tr1->item(1)->nodeValue); }*/ } //$english_words=array_unique($english_words); }
/** * Parse the entries of the Albo from the rows of the table in the Albo Pretorio page. * * @param $uri of the sub page to be parsed * @param $category category of the retrieved notices * @param $linkUriPrefix prefix for links */ public function __construct($uri, $category, $linkUriPrefix) { $this->uri = $uri; $this->linkUriPrefix = $linkUriPrefix; $page = new DOMDocument(); if (!$page->loadHTMLfile($uri)) { throw new Exception("Unable to download page {$uri}"); } $this->category = $category; $tables = $page->getElementsByTagName("table"); if ($tables->length < 1) { $this->rows = new DOMNodeList(); $this->index = -1; } else { if ($tables->length > 1) { throw new Exception("Multiple table elements found"); } else { $this->rows = $tables->item(0)->getElementsByTagName('tr'); $this->index = 1; $count = $this->rows->length; } } }
$request->closeCursor(); $queryInCache = $nbResults > 0; //update lastQueried value $request = $db->prepare('UPDATE cachingTable SET lastQueried = :lastQueried WHERE queryText = :queryText'); $request->execute(array('queryText' => $dbQuery, 'lastQueried' => date("Y-m-d"))); } catch (PDOException $e) { $queryInCache = false; } } if (!$databaseConnected || !$queryInCache) { //the database could not be accessed, or did not contain this query in cache, therefore we must load to populate the cache $catImgUrls = array(); $url = 'https://lite.qwant.com/?q=' . $urlQuery . '&t=images'; libxml_use_internal_errors(true); $dom = new DOMDocument(); $dom->loadHTMLfile($url); $a = new DOMXPath($dom); $results = $a->query('//*[@class="resultimgs"]'); $catimgs = array(); for ($i = 0; $i < $results->length; ++$i) { $resultimg = $results->item($i); $metadata = array(); $img = $resultimg->getElementsByTagName('img'); $metadata['src'] = $img->item(0)->getAttribute('src'); $url = $resultimg->getElementsByTagName('a'); $metadata['url'] = $url->item(0)->getAttribute('href'); $imgmeta = $resultimg->getElementsByTagName('div')->item(0); $metadata['title'] = $imgmeta->getElementsByTagName('h2')->item(0)->nodeValue; $metadata['domainname'] = $imgmeta->getElementsByTagName('p')->item(0)->nodeValue; $metadata['size'] = $imgmeta->getElementsByTagName('p')->item(1)->nodeValue; $catImgUrls[] = $metadata;
//$your_google_calendar="https://www.google.com/calendar/embed?src=usa__en@holiday.calendar.google.com&gsessionid=OK"; /* $your_google_calendar="https://www.google.com/calendar/embed?src=pki27nglipcm11crmei6t1nh5c%40group.calendar.google.com&ctz=Europe/Warsaw&gsessionid=OK"; $your_google_calendar="https://www.google.com/calendar/embed?" ."showTitle=0&showNav=0&showTabs=0&showCalendars=0&showTz=0&mode=WEEK&wkst=2&ctz=Europe/Warsaw&gsessionid=OK" ."&src=odc2lc1jagou97t36j4k8eaabo@group.calendar.google.com" ."&src=rorgf24jk2fg3bu1u209b60h88@group.calendar.google.com" ."&color=%23A32929"; */ $src = 'https://www.google.com/calendar/embed?' . 'showTitle=0&showNav=0&showTabs=0&showCalendars=0&showTz=0&mode=WEEK&wkst=2&bgcolor=%23FFFFFF&showPrint=0&' . 'src=bq1lelqa523bsino05r9p3njkg%40group.calendar.google.com&color=%230D7813&' . 'src=q6rg7mgh7t6ro73hutbqire2lk%40group.calendar.google.com&color=%23BE6D00&' . 'ctz=Europe%2FWarsaw'; $your_google_calendar = $src; $url = parse_url($your_google_calendar); $google_domain = $url['scheme'] . '://' . $url['host'] . dirname($url['path']) . '/'; // Load and parse Google's raw calendar $dom = new DOMDocument(); $dom->loadHTMLfile($your_google_calendar); $footer = $dom->getElementById('footer1'); if ($footer != null) { $dom = $dom->removeChild($footer); } $body = $dom->getElementsByTagName('body')->item(0); $body->setAttribute('style', ""); // Change Google's CSS file to use absolute URLs (assumes there's only one element) $css = $dom->getElementsByTagName('link')->item(0); $css_href = $css->getAttribute('href'); $css->setAttribute('href', $google_domain . $css_href); $css->setAttribute('href', "./custom_calendar.acid.css"); /* // Change Google's JS file to use absolute URLs $scripts = $dom->getElementsByTagName('script');
/** * Create a parser with the solely entry with the specified year and number, if * exists, empty otherwise. */ public function createByYearAndNumber($year, $number) { $page = new DOMDocument(); $page->loadHTMLfile($this->selectionFormUri . "&numeroRegistrazioneDa={$number}&annoRegistrazioneDa={$year}"); return new AlbojCityGovParser($page, $this->entryParser); }
<body> <b>Proszę podać link do pliku</b> <form action="parser.php" method="post"> <input name="s" type="text"/> <input type="Submit" name="submit" value="Akceptuj" /> </form> </body> <?php $s = @$_POST['s']; if ("{$s}" != false) { $dom = new DOMDocument(); $dom->loadHTMLfile($s); $images = $dom->getElementsByTagName('img'); $i = 0; $j = 0; foreach ($images as $image) { if ($image->hasAttribute('width') && $image->hasAttribute('height')) { $width = $image->getAttribute('width'); $height = $image->getAttribute('height'); $src = $image->getAttribute('src'); $image_info = getimagesize($src); $thumb = imagecreatetruecolor($width, $height); switch ($image_info[2]) { case 1: $i++; $imageoriginal = imagecreatefromgif($src); imagecopyresampled($thumb, $imageoriginal, 0, 0, 0, 0, $width, $height, $image_info[0], $image_info[1]);
function getEnglishWords($url, $tam_word) { try { global $word, $english_words; // $url="http://www.tamildict.com/tamilsearch.php?action=search&keyboard=&sID=ec49b9f6ec9ed6b9f9de9ff2d967d889%2F&tmode=on&word=".$word.""; echo " The english word is "; $dom = new DOMDocument("1.0"); @$dom->loadHTMLfile($url); //$xpath = new DomXpath($dom); $table = $dom->getElementById("bodyContent"); $pos = "verb"; $debug = 0; $english_found = 0; /* echo "<br/>matches found are :<br/>".$table->length; # for($i=0;$i<$table->length;$i++) { $documentLink = $table->item($i); if($table) {} */ if ($table) { $tablerow = $table->getElementsByTagName("div"); for ($i = 1; $i < $tablerow->length - 1; $i++) { if (!$english_found) { $data = $tablerow->item($i); $str = $data->nodeValue; //echo $str."--------"; $id = strpos($str, 'ஆங்கிலம்'); $id1 = strpos($str, 'பெயர்ச்சொல்'); if (!$id1) { $id1 = strpos($str, 'பெயர்ச்சொற்கள்'); } if (!$id1) { $id1 = strpos($str, '(பெ)'); } $count = 24; if (!$id) { $id = strpos($str, 'மொழிபெயர்ப்புகள்'); //echo "matching trans at ".$id."<br/>"; $count = 48; } if ($id1 != false) { $pos = "noun"; } if ($id != false) { $a = substr($str, $id + $count); // echo "The full string is ".$a; //echo "<br/><br/>"; $a = str_replace("- ", "", $a); $a = str_replace("ஆங்கிலம் ", "", $a); $a = str_replace(":", "", $a); $a = str_replace("(ஆங்)", "", $a); $a = str_replace("1.", "", $a); $a = str_replace("2.", "", $a); if ($debug) { echo "searching" . $a . "<br/>"; } /*$id=mb_strpos($a," "); echo " id1 is ".$id1."<br/><br/>"; $id1=strpos($a,"<br/>"); $id2=strpos($a,"\n"); //echo strlen($); if($id1 && $id1<$id) $id=$id1; if($id2 && $id2<$id) $id=$id2; if(!$id) $id=strpos($a,','); echo $id; if(!$id) $id=strlen($a);*/ $english = substr($a, 0, 1); $id = 1; $y = $a; while (!ctype_alpha($english)) { $a = substr($a, 1); if ($debug) { echo "Now string is " . $a . "<br/> "; } $english = substr($a, 0, 1); } //$a=$y; $english1 = 'nothing'; if ($debug) { echo "English " . $english . "<br/><br/>"; } while (ctype_alpha($english)) { $english1 = $english; if ($debug) { echo "Now string is " . $english1 . "<br/> "; } $english = substr($a, 0, $id++); } if ($debug) { echo "the string " . $english . " is not english <br/><br/>"; } if ($english1 != 'nothing') { echo "For " . $tam_word . " the english we get is " . $english1 . " with POS (pos may be wrong) " . $pos . "<br/>"; mysql_query("insert in to testing values ('{$tam_word}','{$pos}','{$english1}',0)"); $english_found = 1; } } } /* if($data->nodeValue=="மொழிபெயர்ப்புகள்") { //echo $tr1->item(0)->nodeValue==$word; $tr1=$data->getElementsByTagName("li"); //echo var_dump($data); //echo $tr1[0]->nodeValue ."<br/>"; //array_push($english_words,$tr1->item(1)->nodeValue); }*/ } } } catch (Exception $e) { echo "FIle not found for " . $tam_word; } //$english_words=array_unique($english_words); }
<?php /** * Extracts all "sub-organization names" expressed in the Diário Oficial do Município de São Paulo, of 2015 and 2016. * @version v0.1 2016-04-04 * @usage php src/php/pubnetHtml2csv.php > lix.csv */ //CONFIGS: $fout_file = 'php://output'; $fin_file = realpath(__DIR__ . '/../../_docs/PUBNET-retrancas_Dicio_2-Alef/secretaria-div.html'); $dom = new DOMDocument(); $dom->loadHTMLfile($fin_file); $xp = new DOMXpath($dom); $fout = fopen($fout_file, 'w'); foreach ($xp->query('//tr') as $tr) { $line = []; $showClass = 1; foreach ($xp->query('.//th|.//td', $tr) as $cel) { if (!ctype_digit($cel->nodeValue)) { if ($showClass) { array_push($line, $cel->getAttribute('class')); } array_push($line, $cel->nodeValue); $showClass = 0; } } fputcsv($fout, $line); } fclose($fout); ?>