Example #1
0
 /**
  *  Open the AlboUnict web page. Retrieve the entries insied it.
  *  @param strin $uri
  */
 public function __construct($uri)
 {
     $src = new DOMDocument();
     $src->loadHTMLfile($uri);
     $table = $this->retrieveTable($src);
     $this->rows = $table->getElementsByTagName("tr");
 }
 /**
  * Read all the entries from the page with the specified uri.
  *
  * @return the AlboUnitoParser instance obtained by parsing the specified page.
  */
 private function readPage($uri)
 {
     $htmlPage = new DOMDocument();
     $rowParser = new AlboUnitoRowParser($uri);
     if (!$htmlPage->loadHTMLfile($uri)) {
         throw new Exception("Unable to download page {$uri}");
     }
     return new AlboTableParser($htmlPage, $rowParser);
 }
Example #3
0
function getEnglishWords($url, $tam_word)
{
    global $word, $english_words;
    //		$url="http://www.tamildict.com/tamilsearch.php?action=search&keyboard=&sID=ec49b9f6ec9ed6b9f9de9ff2d967d889%2F&tmode=on&word=".$word."";
    echo " The english word is ";
    $dom = new DOMDocument("1.0");
    @$dom->loadHTMLfile($url);
    //$xpath = new DomXpath($dom);
    $table = $dom->getElementById("bodyContent");
    $pos = "verb";
    /*		echo "<br/>matches found are :<br/>".$table->length;
    			#
    		for($i=0;$i<$table->length;$i++) 
    		{
    			$documentLink = $table->item($i);
    */
    $tablerow = $table->getElementsByTagName("div");
    for ($i = 1; $i < $tablerow->length - 1; $i++) {
        $data = $tablerow->item($i);
        $str = $data->nodeValue;
        //echo $str."--------";
        $id = strpos($str, 'ஆங்கிலம்');
        $id1 = strpos($str, 'பெயர்ச்சொல்');
        if ($id1 != false) {
            $pos = "noun";
        }
        if ($id != false) {
            $a = substr($str, $id + 24);
            $a = str_replace("- ", "", $a);
            //echo "searching".$a;
            $id = strpos($a, ',');
            //echo strlen($);
            if (!$id) {
                $id = strpos($a, ' ');
            }
            echo $id;
            if (!$id) {
                $id = strlen($a);
            }
            $english = substr($a, 0, $id);
            echo "For" . $tam_word . "===" . $english . "=== with POS" . $pos . "==";
        }
        mysql_query("insert in to testing values ('{$tam_word}','{$pos}','{$english}',0)");
        /*	if($data->nodeValue=="மொழிபெயர்ப்புகள்")
        			{
        				//echo $tr1->item(0)->nodeValue==$word; 
        				$tr1=$data->getElementsByTagName("li");
        				//echo var_dump($data);
        				//echo $tr1[0]->nodeValue ."<br/>";
        				//array_push($english_words,$tr1->item(1)->nodeValue);
        			}*/
    }
    //$english_words=array_unique($english_words);
}
 /**
  * Parse the entries of the Albo from the rows of the table in the Albo Pretorio page.
  *
  * @param $uri of the sub page to be parsed
  * @param $category category of the retrieved notices
  * @param $linkUriPrefix prefix for links
  */
 public function __construct($uri, $category, $linkUriPrefix)
 {
     $this->uri = $uri;
     $this->linkUriPrefix = $linkUriPrefix;
     $page = new DOMDocument();
     if (!$page->loadHTMLfile($uri)) {
         throw new Exception("Unable to download page {$uri}");
     }
     $this->category = $category;
     $tables = $page->getElementsByTagName("table");
     if ($tables->length < 1) {
         $this->rows = new DOMNodeList();
         $this->index = -1;
     } else {
         if ($tables->length > 1) {
             throw new Exception("Multiple table elements found");
         } else {
             $this->rows = $tables->item(0)->getElementsByTagName('tr');
             $this->index = 1;
             $count = $this->rows->length;
         }
     }
 }
Example #5
0
         $request->closeCursor();
         $queryInCache = $nbResults > 0;
         //update lastQueried value
         $request = $db->prepare('UPDATE cachingTable SET lastQueried = :lastQueried WHERE queryText = :queryText');
         $request->execute(array('queryText' => $dbQuery, 'lastQueried' => date("Y-m-d")));
     } catch (PDOException $e) {
         $queryInCache = false;
     }
 }
 if (!$databaseConnected || !$queryInCache) {
     //the database could not be accessed, or did not contain this query in cache, therefore we must load to populate the cache
     $catImgUrls = array();
     $url = 'https://lite.qwant.com/?q=' . $urlQuery . '&t=images';
     libxml_use_internal_errors(true);
     $dom = new DOMDocument();
     $dom->loadHTMLfile($url);
     $a = new DOMXPath($dom);
     $results = $a->query('//*[@class="resultimgs"]');
     $catimgs = array();
     for ($i = 0; $i < $results->length; ++$i) {
         $resultimg = $results->item($i);
         $metadata = array();
         $img = $resultimg->getElementsByTagName('img');
         $metadata['src'] = $img->item(0)->getAttribute('src');
         $url = $resultimg->getElementsByTagName('a');
         $metadata['url'] = $url->item(0)->getAttribute('href');
         $imgmeta = $resultimg->getElementsByTagName('div')->item(0);
         $metadata['title'] = $imgmeta->getElementsByTagName('h2')->item(0)->nodeValue;
         $metadata['domainname'] = $imgmeta->getElementsByTagName('p')->item(0)->nodeValue;
         $metadata['size'] = $imgmeta->getElementsByTagName('p')->item(1)->nodeValue;
         $catImgUrls[] = $metadata;
//$your_google_calendar="https://www.google.com/calendar/embed?src=usa__en@holiday.calendar.google.com&gsessionid=OK";
/*
$your_google_calendar="https://www.google.com/calendar/embed?src=pki27nglipcm11crmei6t1nh5c%40group.calendar.google.com&ctz=Europe/Warsaw&gsessionid=OK";
$your_google_calendar="https://www.google.com/calendar/embed?"
."showTitle=0&showNav=0&showTabs=0&showCalendars=0&showTz=0&mode=WEEK&wkst=2&ctz=Europe/Warsaw&gsessionid=OK"
."&src=odc2lc1jagou97t36j4k8eaabo@group.calendar.google.com"
."&src=rorgf24jk2fg3bu1u209b60h88@group.calendar.google.com"
."&color=%23A32929";
*/
$src = 'https://www.google.com/calendar/embed?' . 'showTitle=0&showNav=0&showTabs=0&showCalendars=0&showTz=0&mode=WEEK&wkst=2&bgcolor=%23FFFFFF&showPrint=0&' . 'src=bq1lelqa523bsino05r9p3njkg%40group.calendar.google.com&color=%230D7813&' . 'src=q6rg7mgh7t6ro73hutbqire2lk%40group.calendar.google.com&color=%23BE6D00&' . 'ctz=Europe%2FWarsaw';
$your_google_calendar = $src;
$url = parse_url($your_google_calendar);
$google_domain = $url['scheme'] . '://' . $url['host'] . dirname($url['path']) . '/';
// Load and parse Google's raw calendar
$dom = new DOMDocument();
$dom->loadHTMLfile($your_google_calendar);
$footer = $dom->getElementById('footer1');
if ($footer != null) {
    $dom = $dom->removeChild($footer);
}
$body = $dom->getElementsByTagName('body')->item(0);
$body->setAttribute('style', "");
// Change Google's CSS file to use absolute URLs (assumes there's only one element)
$css = $dom->getElementsByTagName('link')->item(0);
$css_href = $css->getAttribute('href');
$css->setAttribute('href', $google_domain . $css_href);
$css->setAttribute('href', "./custom_calendar.acid.css");
/*
// Change Google's JS file to use absolute URLs
$scripts = $dom->getElementsByTagName('script');
 /**
  * Create a parser with the solely entry with the specified year and number, if
  * exists, empty otherwise.
  */
 public function createByYearAndNumber($year, $number)
 {
     $page = new DOMDocument();
     $page->loadHTMLfile($this->selectionFormUri . "&numeroRegistrazioneDa={$number}&annoRegistrazioneDa={$year}");
     return new AlbojCityGovParser($page, $this->entryParser);
 }
Example #8
0

<body>
  <b>Proszę podać link do pliku</b>
  <form action="parser.php" method="post">
  <input name="s" type="text"/>
  <input type="Submit" name="submit" value="Akceptuj" />
  </form>
  </body>


<?php 
$s = @$_POST['s'];
if ("{$s}" != false) {
    $dom = new DOMDocument();
    $dom->loadHTMLfile($s);
    $images = $dom->getElementsByTagName('img');
    $i = 0;
    $j = 0;
    foreach ($images as $image) {
        if ($image->hasAttribute('width') && $image->hasAttribute('height')) {
            $width = $image->getAttribute('width');
            $height = $image->getAttribute('height');
            $src = $image->getAttribute('src');
            $image_info = getimagesize($src);
            $thumb = imagecreatetruecolor($width, $height);
            switch ($image_info[2]) {
                case 1:
                    $i++;
                    $imageoriginal = imagecreatefromgif($src);
                    imagecopyresampled($thumb, $imageoriginal, 0, 0, 0, 0, $width, $height, $image_info[0], $image_info[1]);
Example #9
0
function getEnglishWords($url, $tam_word)
{
    try {
        global $word, $english_words;
        //		$url="http://www.tamildict.com/tamilsearch.php?action=search&keyboard=&sID=ec49b9f6ec9ed6b9f9de9ff2d967d889%2F&tmode=on&word=".$word."";
        echo " The english word is ";
        $dom = new DOMDocument("1.0");
        @$dom->loadHTMLfile($url);
        //$xpath = new DomXpath($dom);
        $table = $dom->getElementById("bodyContent");
        $pos = "verb";
        $debug = 0;
        $english_found = 0;
        /*		echo "<br/>matches found are :<br/>".$table->length;
        			#
        		for($i=0;$i<$table->length;$i++) 
        		{
        			$documentLink = $table->item($i);
        			if($table)
        			{}
        */
        if ($table) {
            $tablerow = $table->getElementsByTagName("div");
            for ($i = 1; $i < $tablerow->length - 1; $i++) {
                if (!$english_found) {
                    $data = $tablerow->item($i);
                    $str = $data->nodeValue;
                    //echo $str."--------";
                    $id = strpos($str, 'ஆங்கிலம்');
                    $id1 = strpos($str, 'பெயர்ச்சொல்');
                    if (!$id1) {
                        $id1 = strpos($str, 'பெயர்ச்சொற்கள்');
                    }
                    if (!$id1) {
                        $id1 = strpos($str, '(பெ)');
                    }
                    $count = 24;
                    if (!$id) {
                        $id = strpos($str, 'மொழிபெயர்ப்புகள்');
                        //echo "matching trans at ".$id."<br/>";
                        $count = 48;
                    }
                    if ($id1 != false) {
                        $pos = "noun";
                    }
                    if ($id != false) {
                        $a = substr($str, $id + $count);
                        // echo "The full string is ".$a;
                        //echo "<br/><br/>";
                        $a = str_replace("- ", "", $a);
                        $a = str_replace("ஆங்கிலம் ", "", $a);
                        $a = str_replace(":", "", $a);
                        $a = str_replace("(ஆங்)", "", $a);
                        $a = str_replace("1.", "", $a);
                        $a = str_replace("2.", "", $a);
                        if ($debug) {
                            echo "searching" . $a . "<br/>";
                        }
                        /*$id=mb_strpos($a,"&nbsp");
                        	 echo " id1 is ".$id1."<br/><br/>";	 
                        	 $id1=strpos($a,"<br/>");
                        	 $id2=strpos($a,"\n");
                        	 //echo strlen($);
                        	 if($id1 && $id1<$id)
                        	 	$id=$id1;
                        	 if($id2 && $id2<$id)
                        	 	$id=$id2;
                        	 if(!$id)
                        	 	$id=strpos($a,',');
                        	 echo $id;
                        	 if(!$id)
                        	 	$id=strlen($a);*/
                        $english = substr($a, 0, 1);
                        $id = 1;
                        $y = $a;
                        while (!ctype_alpha($english)) {
                            $a = substr($a, 1);
                            if ($debug) {
                                echo "Now string is " . $a . "<br/> ";
                            }
                            $english = substr($a, 0, 1);
                        }
                        //$a=$y;
                        $english1 = 'nothing';
                        if ($debug) {
                            echo "English " . $english . "<br/><br/>";
                        }
                        while (ctype_alpha($english)) {
                            $english1 = $english;
                            if ($debug) {
                                echo "Now string is " . $english1 . "<br/> ";
                            }
                            $english = substr($a, 0, $id++);
                        }
                        if ($debug) {
                            echo "the string " . $english . " is not english <br/><br/>";
                        }
                        if ($english1 != 'nothing') {
                            echo "For " . $tam_word . " the english we get is " . $english1 . " with POS (pos may be wrong) " . $pos . "<br/>";
                            mysql_query("insert in to testing values ('{$tam_word}','{$pos}','{$english1}',0)");
                            $english_found = 1;
                        }
                    }
                }
                /*	if($data->nodeValue=="மொழிபெயர்ப்புகள்")
                			{
                				//echo $tr1->item(0)->nodeValue==$word; 
                				$tr1=$data->getElementsByTagName("li");
                				//echo var_dump($data);
                				//echo $tr1[0]->nodeValue ."<br/>";
                				//array_push($english_words,$tr1->item(1)->nodeValue);
                			}*/
            }
        }
    } catch (Exception $e) {
        echo "FIle not found for " . $tam_word;
    }
    //$english_words=array_unique($english_words);
}
Example #10
0
<?php

/**
 * Extracts all "sub-organization names" expressed in the Diário Oficial do Município de São Paulo, of 2015 and 2016.
 * @version v0.1 2016-04-04
 * @usage php src/php/pubnetHtml2csv.php > lix.csv
 */
//CONFIGS:
$fout_file = 'php://output';
$fin_file = realpath(__DIR__ . '/../../_docs/PUBNET-retrancas_Dicio_2-Alef/secretaria-div.html');
$dom = new DOMDocument();
$dom->loadHTMLfile($fin_file);
$xp = new DOMXpath($dom);
$fout = fopen($fout_file, 'w');
foreach ($xp->query('//tr') as $tr) {
    $line = [];
    $showClass = 1;
    foreach ($xp->query('.//th|.//td', $tr) as $cel) {
        if (!ctype_digit($cel->nodeValue)) {
            if ($showClass) {
                array_push($line, $cel->getAttribute('class'));
            }
            array_push($line, $cel->nodeValue);
            $showClass = 0;
        }
    }
    fputcsv($fout, $line);
}
fclose($fout);
?>