}
//retrieves data about voting members of assembly from https://scraperwiki.com/scrapers/cz_praha_voting_records_retrieval/
//2010-2014
require 'scraperwiki/simple_html_dom.php';
scraperwiki::attach("cz_praha_voting_records_retrieval", "src");
$rows = scraperwiki::select("distinct(mp_id) from src.mp_vote");
foreach ($rows as $row) {
    $url = "http://www.praha.eu/jnp/cz/home/volene_organy/zastupitelstvo_hmp/slozeni_zastupitelstva/index.html?memberId=" . $row['mp_id'];
    $html = scraperwiki::scrape($url);
    $dom = new simple_html_dom();
    $dom->load($html);
    $part = get_first_string($html, '</h2>', '<div>');
    $name = trim($dom->find('h2', 0)->plaintext);
    $email = get_first_string($part, 'mailto:', '"');
    $party = trim(get_first_string($part, 'Strana:</span>', '<br'));
    $club = trim(get_first_string(get_first_string($part, 'Klub:</span>', '</a') . '::', '">', '::'));
    $data[] = array('id' => $row['mp_id'], 'name' => $name, 'party' => $party, 'club' => $club);
}
scraperwiki::save_sqlite(array('id'), $data, 'info');
/**
 * finds substrings between opening and closing markers
 * @return result array of the substrings
 */
function returnSubstrings($text, $openingMarker, $closingMarker)
{
    $openingMarkerLength = strlen($openingMarker);
    $closingMarkerLength = strlen($closingMarker);
    $result = array();
    $position = 0;
    while (($position = strpos($text, $openingMarker, $position)) !== false) {
        $position += $openingMarkerLength;
    $html = '<html><body>' . $r['html'] . '</body></html>';
    $dom = new simple_html_dom();
    $dom->load($html);
    $info = array('id' => $r['id'], 'decision_number' => $r['decision_number'], 'date' => $r['date'], 'document_number' => $r['document_number'], 'name' => $r['name'], 'passed' => $r['passed'], 'link' => $r['link']);
    $part = get_first_string($html, '</h2>', '<div>');
    $info['for'] = trim(get_first_string($part, 'pro:</span>', '<br')) == '' ? 0 : trim(get_first_string($part, 'pro:</span>', '<br'));
    $info['against'] = trim(get_first_string($part, 'proti:</span>', '<br')) == '' ? 0 : trim(get_first_string($part, 'proti:</span>', '<br'));
    $info['abstain'] = trim(get_first_string($part, 'zdržel se:</span>', '<br')) == '' ? 0 : trim(get_first_string($part, 'zdržel se:</span>', '<br'));
    $info['number_representatives'] = trim(get_first_string($part, 'Počet zastupitelů:</span>', '<br')) == '' ? 0 : trim(get_first_string($part, 'Počet zastupitelů:</span>', '<br'));
    $info['present'] = trim(get_first_string($part, 'přítomno:</span>', '<br')) == '' ? 0 : trim(get_first_string($part, 'přítomno:</span>', '<br'));
    $trs = $dom->find("table[class=data-grid]", 0)->find("tr");
    array_shift($trs);
    $data = array();
    foreach ($trs as $tr) {
        $tds = $tr->find("td");
        $mp_id = get_first_string($tds[0]->find("a", 0)->href . "&", "memberId=", "&");
        $data[] = array('division_id' => $info['id'], 'mp_id' => $mp_id, 'vote' => trim($tds[1]->plaintext), 'mp_name' => $tds[0]->plaintext);
    }
    //one division done
    scraperwiki::save_sqlite(array('id'), $info, 'division');
    scraperwiki::save_sqlite(array('division_id', 'mp_id'), $data, 'mp_vote');
    scraperwiki::save_var('last_id', $info['id']);
}
/**
 * finds substrings between opening and closing markers
 * @return result array of the substrings
 */
function returnSubstrings($text, $openingMarker, $closingMarker)
{
    $openingMarkerLength = strlen($openingMarker);
    $closingMarkerLength = strlen($closingMarker);
     //preg_match('/id=([0-9]{1,})/',$as[0]->href,$matches);  //simple_html_dom.php
     preg_match('/id=([0-9]{1,})/', $as[0], $matches);
     $data_mp['mp_unique_id'] = $matches[1];
     //$tmp = explode ('-',$as[0]->plaintext);  //simple_html_dom.php
     $tmp = explode('-', get_first_string($as[0], '">', '<'));
     $tmp2 = explode('/', trim(end($tmp)));
     $data_mp['state'] = $tmp2[1];
     $data_mp['party'] = $tmp2[0];
     array_pop($tmp);
     $data_mp['name'] = trim(implode('-', $tmp));
 }
 scraperwiki::save_sqlite(array('term', 'mp_id'), $data_mp, 'mp');
 //votes
 $data = array();
 //$trs0 = $dom->find('table[class=tabela-1]',0);  //simple_html_dom.php
 $trs0 = get_first_string($html[0]['html'], '<table class="tabela-1"', '<!--Fim Código-->');
 $trs = returnSubstrings($trs0, '<tr', '</tr>');
 if (count($trs) > 0) {
     array_shift($trs);
     //first row is the header
     foreach ($trs as $tr) {
         //$tds = $tr->find('td');  //simple_html_dom.php;
         $tds = returnSubstrings($tr, '<td', '</td>');
         //if ($tr->class == 'even') { //session  //simple_html_dom.php
         if (strpos($tr, 'even') > 0) {
             //session
             //$da = explode('/',trim($tds[0]->plaintext));  //simple_html_dom.php
             $da = explode('/', trim(strip_tags('<td' . $tds[0])));
             $date = $da[2] . '-' . $da[1] . '-' . $da[0];
             //$session = trim($tds[1]->plaintext);  //simple_html_dom.php
             $session = trim(strip_tags('<td' . $tds[1]));
        if ($number > 500) {
            echo $url1 . "** has more than 500 divisions -> problem -> needs to solve pagination";
            die;
        }
        $trs = $dom1->find("table[class=data-grid]", 0)->find("tr");
        array_shift($trs);
        $data = array();
        foreach ($trs as $tr) {
            $tds = $tr->find('td');
            $datum = array('decision_number' => $tds[0]->plaintext, 'date' => convert_date($tds[1]->plaintext), 'document_number' => $tds[2]->plaintext, 'name' => $tds[3]->plaintext, 'passed' => $tds[4]->plaintext, 'link' => htmlspecialchars_decode($tds[4]->find('a', 0)->href));
            $datum['id'] = get_first_string($datum['link'] . "&", "votingId=", "&");
            $url = "http://www.praha.eu" . $datum['link'];
            $html = scraperwiki::scrape($url);
            $dom = new simple_html_dom();
            $dom->load($html);
            $datum['html'] = '<h1>' . get_first_string($dom->innertext, '<h1>', '</table>') . '</table>';
            $data[] = $datum;
        }
        //one session done:
        scraperwiki::save_sqlite(array('id'), $data, 'division');
        scraperwiki::save_sqlite(array('id'), $s, 'session');
    }
}
/**
 * converts dates formats between Central European and ISO (ISO 8601)
 * @return converted date
 * examples:
 * convert_date('2010-02-15','to euro')
 *    returns '15.2.2010;
 * convert_date('15.2.2010')
 *    returns '2010-02-15'
コード例 #5
0
function cl_camara_division2array($division_id)
{
    include "./db.inc.php";
    $db = new MySQL();
    //if(!$db->init()) die("¡¡¡ERROR!!!<BR>\n");
    $url = "http://www.camara.cl/trabajamos/sala_votacion_detalle.aspx?prmID=";
    //3274
    $out = array();
    $out['division_id'] = $division_id;
    $html = Grabber($url . $division_id);
    $out['original_html'] = $html;
    $a_favor_sub = get_first_string($html, 'A favor</h2>', 'En contra</h2>');
    $en_contra_sub = get_first_string($html, 'En contra</h2>', 'Abstención</h2>');
    $abstencion_sub = get_first_string($html, 'Abstención</h2>', 'Dispensados Art. 5°</h2>');
    $dispensados_sub = get_first_string($html, 'Dispensados Art. 5°</h2>', 'Pareos</h2>');
    $pareos_sub = get_first_string($html, 'Pareos</h2>', '</div>');
    $table_sub = get_first_string($html, '<table class="tabla resumenvotacion">', '</table>');
    $table_control_number = returnSubstrings($table_sub, '<td>', '</td>');
    $fecha = trim(get_first_string($html, 'Fecha:</strong>', '</p>'));
    $materia = str_replace("'", "\\'", trim(get_first_string($html, 'Materia:</strong>', '</p>')));
    $out['info']['topic'] = $materia;
    if ($materia == "") {
        $materia = str_replace("'", "\\'", trim(get_first_string($html, 'Observaciones:</strong>', '</p>')));
        $out['info']['topic'] = $materia;
    }
    $articulo = str_replace("'", "\\'", trim(get_first_string($html, 'Artículo:</strong>', '</p>')));
    $out['info']['article'] = $articulo;
    $sesion = str_replace("'", "\\'", trim(get_first_string($html, 'Sesión:</strong>', '</p>')));
    $out['info']['session'] = $sesion;
    $tramite = str_replace("'", "\\'", trim(get_first_string($html, 'Trámite:</strong>', '</p>')));
    $out['info']['step'] = $tramite;
    $tipo_de_votacion = str_replace("'", "\\'", strtolower(trim(get_first_string($html, 'Tipo de votación:</strong>', '</p>'))));
    $out['info']['division_type'] = $tipo_de_votacion;
    $quorum = str_replace("'", "\\'", trim(get_first_string($html, 'Quorum:</strong>', '</p>')));
    $out['info']['quorum'] = $quorum;
    $resultado = str_replace("'", "\\'", trim(get_first_string($html, 'Resultado:</strong>', '</p>')));
    $out['info']['result'] = $resultado;
    $name_sub = trim(get_first_string($html, '<div id ="detail">', '<p>'));
    $name = trim(get_first_string($name_sub, '<h2>', '</h2>'));
    $out['info']['name'] = $name;
    $fecha_db_ar = explode(' ', $fecha);
    global $mes;
    $fecha_db = $fecha_db_ar[4] . '-' . $mes[trim($fecha_db_ar[2], '.')] . '-' . $fecha_db_ar[0] . ' ' . $fecha_db_ar[5];
    $fecha_db_date = $fecha_db_ar[4] . '-' . $mes[trim($fecha_db_ar[2], '.')] . '-' . $fecha_db_ar[0];
    $fecha_db_time = $fecha_db_ar[5];
    $out['info']['date'] = $fecha_db_date;
    $out['info']['time'] = $fecha_db_time;
    /*$query = "
    		INSERT INTO 
    			division (division_id,divided_on,name,materia,session,article,tramite,type,quorum,result)
    		VALUES 
    			($row, '$fecha_db', '$name', '$materia', '$sesion', '$articulo', '$tramite', '$tipo_de_votacion', '$quorum', '$resultado')
    	";*/
    $camara = 'C.Diputados';
    $en_sala = '1';
    $out['info']['enSala'] = 'true';
    if (strpos($name, 'Bolet') == 0) {
        $nro_boletin = substr($name, 12);
    }
    if ($nro_boletin != null) {
        $id_proyecto_ley = $db->getIdProyectoLey($nro_boletin);
    } else {
        $id_proyecto_ley = 0;
    }
    $id_sesion = $db->getIdSesion($sesion);
    $name = utf8_decode($name);
    $tipo_de_votacion = utf8_decode($tipo_de_votacion);
    $articulo = utf8_decode($articulo);
    $materia = utf8_decode($materia);
    $quorum = utf8_decode($quorum);
    $query = "INSERT INTO Votacion (name,camara,en_sala,tipo,articulo,materia,fecha,hora,voto_si,voto_no,voto_abs,voto_disp,voto_pareos,voto_aus,resultado,quorum,id_proyecto_ley,id_sesion,id_parlamento,created_at,updated_at) VALUES ('{$name}', '{$camara}', {$en_sala}, '{$tipo_de_votacion}', '{$articulo}', '{$materia}', '{$fecha_db_date}', '{$fecha_db_time}', {$table_control_number['0']}, {$table_control_number['1']}, {$table_control_number['2']}, {$table_control_number['3']}, 0, 0, '{$resultado}', '{$quorum}', {$id_proyecto_ley}, {$id_sesion}, {$division_id}, '" . date('Y-m-d H:m:s') . "', '" . date('Y-m-d H:m:s') . "')";
    //echo $query;
    $id_votacion = $db->insert($query);
    //echo $id_votacion;
    $a_favor_ar = returnSubstrings($a_favor_sub, 'ID=', '">');
    $en_contra_ar = returnSubstrings($en_contra_sub, 'ID=', '">');
    $abstencion_ar = returnSubstrings($abstencion_sub, 'ID=', '">');
    $dispensados_ar = returnSubstrings($dispensados_sub, 'ID=', '">');
    $pareos_ar = returnSubstrings($pareos_sub, 'ID=', '">');
    $out['total'] = array('yes' => 0, 'no' => 0, 'abstain' => 0, 'dispensed' => 0, 'paired' => 0);
    foreach ($a_favor_ar as $mp_row) {
        $db->insertVoto($id_votacion, $mp_row, 'y');
        $name_pom = str_replace("'", "\\'", trim(get_first_string($a_favor_sub, 'prmID=' . $mp_row . '">', '</a>')));
        $names[$name_pom][$mp_row] = $mp_row;
        $out['mp']['mp_' . $mp_row]['mp_id'] = $mp_row;
        $out['mp']['mp_' . $mp_row]['vote'] = 'y';
        $name_pom2 = explode('.', $name_pom);
        $out['mp']['mp_' . $mp_row]['name'] = trim($name_pom2[1]) . '.' . $name_pom2[2];
        if ($name_pom2[0] == 'Sra') {
            $out['mp']['mp_' . $mp_row]['sex'] = 'f';
        } else {
            $out['mp']['mp_' . $mp_row]['sex'] = 'm';
        }
        $out['total']['yes']++;
    }
    foreach ($en_contra_ar as $mp_row) {
        $db->insertVoto($id_votacion, $mp_row, 'n');
        $name_pom = str_replace("'", "\\'", trim(get_first_string($en_contra_sub, 'prmID=' . $mp_row . '">', '</a>')));
        $names[$name_pom][$mp_row] = $mp_row;
        $out['mp']['mp_' . $mp_row]['mp_id'] = $mp_row;
        $out['mp']['mp_' . $mp_row]['vote'] = 'n';
        $name_pom2 = explode('.', $name_pom);
        $out['mp']['mp_' . $mp_row]['name'] = trim($name_pom2[1]) . '.' . $name_pom2[2];
        if ($name_pom2[0] == 'Sra') {
            $out['mp']['mp_' . $mp_row]['sex'] = 'f';
        } else {
            $out['mp']['mp_' . $mp_row]['sex'] = 'm';
        }
        $out['total']['no']++;
    }
    foreach ($abstencion_ar as $mp_row) {
        $db->insertVoto($id_votacion, $mp_row, 'a');
        $name_pom = str_replace("'", "\\'", trim(get_first_string($abstencion_sub, 'prmID=' . $mp_row . '">', '</a>')));
        $names[$name_pom][$mp_row] = $mp_row;
        $out['mp']['mp_' . $mp_row]['mp_id'] = $mp_row;
        $out['mp']['mp_' . $mp_row]['vote'] = 'a';
        $name_pom2 = explode('.', $name_pom);
        $out['mp']['mp_' . $mp_row]['name'] = trim($name_pom2[1]) . '.' . $name_pom2[2];
        if ($name_pom2[0] == 'Sra') {
            $out['mp']['mp_' . $mp_row]['sex'] = 'f';
        } else {
            $out['mp']['mp_' . $mp_row]['sex'] = 'm';
        }
        $out['total']['abstain']++;
    }
    foreach ($dispensados_ar as $mp_row) {
        $db->insertVoto($id_votacion, $mp_row, 'd');
        $name_pom = str_replace("'", "\\'", trim(get_first_string($dispensados_sub, 'prmID=' . $mp_row . '">', '</a>')));
        $names[$name_pom][$mp_row] = $mp_row;
        $out['mp']['mp_' . $mp_row]['mp_id'] = $mp_row;
        $out['mp']['mp_' . $mp_row]['vote'] = 'd';
        $name_pom2 = explode('.', $name_pom);
        $out['mp']['mp_' . $mp_row]['name'] = trim($name_pom2[1]) . '.' . $name_pom2[2];
        if ($name_pom2[0] == 'Sra') {
            $out['mp']['mp_' . $mp_row]['sex'] = 'f';
        } else {
            $out['mp']['mp_' . $mp_row]['sex'] = 'm';
        }
        $out['total']['dispensed']++;
    }
    foreach ($pareos_ar as $mp_row) {
        $db->insertVoto($id_votacion, $mp_row, 'p');
        $name_pom = str_replace("'", "\\'", trim(get_first_string($pareos_sub, 'prmID=' . $mp_row . '">', '</a>')));
        $names[$name_pom][$mp_row] = $mp_row;
        $out['mp']['mp_' . $mp_row]['mp_id'] = $mp_row;
        $out['mp']['mp_' . $mp_row]['vote'] = 'p';
        $name_pom2 = explode('.', $name_pom);
        $out['mp']['mp_' . $mp_row]['name'] = trim($name_pom2[1]) . '.' . $name_pom2[2];
        if ($name_pom2[0] == 'Sra') {
            $out['mp']['mp_' . $mp_row]['sex'] = 'f';
        } else {
            $out['mp']['mp_' . $mp_row]['sex'] = 'm';
        }
        $out['total']['paired']++;
        //check
        if ($table_control_number[0] == $out['total']['yes'] and $table_control_number[1] == $out['total']['no'] and $table_control_number[2] == $out['total']['abstain'] and $table_control_number[3] == $out['total']['dispensed']) {
        } else {
            $out['error'] = 'wrong sums: yes:' . $table_control_number[0] . ' vs. ' . $out['total']['yes'] . ', no:' . $table_control_number[1] . ' vs. ' . $out['total']['no'] . ', abstain:' . $table_control_number[2] . ' vs. ' . $out['total']['abstain'] . ', dispensed:' . $table_control_number[3] . ' vs. ' . $out['total']['dispensed'];
        }
    }
    //updated Votacion con pareos y ausentes
    $ausentes = 120 - $table_control_number[0] - $table_control_number[1] - $table_control_number[2] - $out['total']['paired'];
    $db->updatePareosAusentes($id_votacion, $out['total']['paired'], $ausentes);
    if (strlen($html) < 8300) {
        $out['error'] = 'small file; might have not been downloaded correctly or wrong id';
    }
    return $out;
}
        }
        foreach ($htmls as $key => $html) {
            $dom->load($html);
            $divs = $dom->find('div[class=votacionesResultado]');
            foreach ($divs as $div) {
                $as = $div->find('a');
                if (count($as) > 1) {
                    $link = $div->find('a', 1)->href;
                    $td_ar = explode("<br>", $div->find('td', 0));
                    $si_ar = explode(':', $td_ar[0]);
                    $si = trim($si_ar[1]);
                    $no_ar = explode(':', $td_ar[1]);
                    $no = trim($no_ar[1]);
                    $abst_ar = explode(':', $td_ar[2]);
                    $abst = trim($abst_ar[1]);
                    $number = get_first_string($div->innertext, 'votacion=', '&');
                    $url = "http://www.congreso.es" . $link;
                    $xml = str_replace("ISO-8859-1", "UTF-8", iconv("ISO-8859-1", "UTF-8", scraperwiki::scrape($url)));
                    $data = array('date' => $right_date->format('Y-m-d'), 'number' => $number, 'yes' => $si, 'no' => $no, 'abstain' => $abst, 'link' => $link, 'xml' => $xml);
                    scraperwiki::save_sqlite(array('date', 'number'), $data, 'division');
                }
            }
        }
        // /foreach $htmls
    }
    $date = $right_date->modify('+1 day');
    scraperwiki::save_var('last_date', $date->format('Y-m-d'));
}
/*print_r($data);

if (isset($data)) {
$dom = new simple_html_dom();
$dom->load($html);
//selects
$selects = $dom->find("select");
//options from 2nd select
$regs = $selects[1]->find("option");
foreach ((array) $regs as $reg) {
    $regions[] = $reg->value;
}
//foreach region
foreach ((array) $regions as $key => $region) {
    if ($key >= $last_region) {
        //get number of records
        $url = "http://wwwinfo.mfcr.cz/cgi-bin/ufisreg/vyber1.pl?Viewico=1&zkokraj={$region}&uzemcelek=2&Viewnao=0&useZko=0&typ=1&pocet=0";
        $html = iconv("cp1250", "UTF-8//TRANSLIT", scraperwiki::scrape($url));
        $total = trim(get_first_string($html, 'Celkem nalezeno', 'záznamů'));
        //up to number of records
        for ($i = $last_i; $i < $total; $i = $i + 20) {
            //get the html
            $url = "http://wwwinfo.mfcr.cz/cgi-bin/ufisreg/vyber1.pl?Viewico=1&zkokraj={$region}&uzemcelek=2&Viewnao=0&useZko=0&typ=1&pocet={$i}";
            $html = iconv("cp1250", "UTF-8//TRANSLIT", scraperwiki::scrape($url));
            //get dom
            $dom = new simple_html_dom();
            $dom->load($html);
            //extract the table with data
            $tables = $dom->find("table");
            $out = array('region' => $region, 'i' => $i, 'html' => $tables[2]->innertext);
            //save it
            scraperwiki::save_sqlite(array('region', 'i'), $out);
            scraperwiki::save_var('last_i', $i);
            scraperwiki::save_var('last_region', $key);
コード例 #8
0
     $item['interpelation_oral'] = 0;
 }
 //interpelations written
 $url = "http://www.psp.cz/sqw/tisky.sqw?o={$term}&pi=" . $row['id'];
 $html = iconv("cp1250", "UTF-8//TRANSLIT", scraperwiki::scrape($url));
 if (strpos($html, 'Celkem nalezen')) {
     $ar = explode(' ', trim(get_first_string($html, 'Celkem nalezen', 'tisk')));
     $item['interpelation_written'] = $ar[count($ar) - 1];
 } else {
     $item['interpelation_written'] = 0;
 }
 //law proposals / návrhy zákonů
 $url = "http://www.psp.cz/sqw/tisky.sqw?o={$term}&nz=" . $row['id'];
 $html = iconv("cp1250", "UTF-8//TRANSLIT", scraperwiki::scrape($url));
 if (strpos($html, 'Celkem nalezen')) {
     $ar = explode(' ', trim(get_first_string($html, 'Celkem nalezen', 'tisk')));
     $item['proposal'] = end($ar);
 } else {
     $item['proposal'] = 0;
 }
 //speeches (number of sessions)
 $url = "http://www.psp.cz/eknih/2010ps/rejstrik/jmenny/{$row['id']}.html";
 //**********
 $html = iconv("cp1250", "UTF-8//TRANSLIT", scraperwiki::scrape($url));
 preg_match_all('/#sx/', $html, $matches);
 $item['speech_session'] = count($matches[0]);
 /*print_r($item);
   if ($i > 3)
     die();*/
 scraperwiki::save_var('last_id', $row['id']);
 scraperwiki::save_sqlite(array('id'), $item);