if (empty($_FILES['newfile'])) { // file was just uploaded notify(get_string("uploadproblem")); } if (!is_uploaded_file($_FILES['newfile']['tmp_name']) or $_FILES['newfile']['size'] == 0) { notify(get_string("uploadnofilefound")); } else { // Valid file is found if (readdata($course->id, 'game', $dirtemp, $r_levels, $r_titles, $r_texts, $dirfordelete)) { // first try to reall all of the data in $subchapter = $_POST['subchapter'] != 0; $overwrite = (int) $_POST['overwrite'] != 0; if ($overwrite) { game_bookquiz_deletebook($course->id, $bookid); } $pageobjects = extract_data($course->id, 'book', $bookid, $dirtemp, $subchapter, $r_levels, $r_titles, $r_texts); // parse all the html files into objects clean_temp($dirfordelete); // all done with files so dump em $objects = game_bookquiz_create_objects($pageobjects, $bookid); // function to preps the data to be sent to DB if (!game_bookquiz_save_objects($objects)) { // sends it to DB error("could not save"); } } else { error('could not get data'); } print_continue("{$CFG->wwwroot}/mod/game/view.php?id={$cm->id}"); print_footer($course); exit;
$strimportppt = get_string("importppt", "lesson"); $strlessons = get_string("modulenameplural", "lesson"); print_header_simple("{$strimportppt}", " {$strimportppt}", "<a href=\"index.php?id={$course->id}\">{$strlessons}</a> -> <a href=\"{$CFG->wwwroot}/mod/{$modname}/view.php?id={$cm->id}\">" . format_string($mod->name, true) . "</a>-> {$strimportppt}"); if ($form = data_submitted()) { /// Filename if (empty($_FILES['newfile'])) { // file was just uploaded notify(get_string("uploadproblem")); } if (!is_uploaded_file($_FILES['newfile']['tmp_name']) or $_FILES['newfile']['size'] == 0) { notify(get_string("uploadnofilefound")); } else { // Valid file is found if ($rawpages = readdata($_FILES, $course->id, $modname)) { // first try to reall all of the data in $pageobjects = extract_data($rawpages, $course->id, $mod->name, $modname); // parse all the html files into objects clean_temp(); // all done with files so dump em $mod_create_objects = $modname . '_create_objects'; $mod_save_objects = $modname . '_save_objects'; $objects = $mod_create_objects($pageobjects, $mod->id); // function to preps the data to be sent to DB if (!$mod_save_objects($objects, $mod->id, $pageid)) { // sends it to DB error("could not save"); } } else { error('could not get data'); } echo "<hr>";
function parse_detail($AppNo) { global $applications; $url = 'http://www.planning.wealden.gov.uk/aspxpages/ResultsDetail.aspx?appref=' . $AppNo . '&Category=DC'; list($junk, $data) = explode('<span id="lblSearchDetails">', fetch_page($url), 2); list($data, $junk) = explode('<div class="linkborder">', $data, 2); $data = explode('</li>', $data); $applications[$AppNo]['AppType'] = extract_data($data[1]); $applications[$AppNo]['DateRec'] = extract_data($data[2]); $applications[$AppNo]['DateExp'] = extract_data($data[3]); $applications[$AppNo]['Parish'] = extract_data($data[6]); $applications[$AppNo]['GridRef'] = extract_data($data[7]); $applications[$AppNo]['UPRN'] = extract_data($data[8]); list($status, $junk) = explode(' - ', extract_data($data[9])); $applications[$AppNo]['Status'] = trim($status); $applications[$AppNo]['DateConExp'] = extract_data($data[10]); $applications[$AppNo]['DateComDel'] = extract_data($data[11]); $applications[$AppNo]['Decision'] = extract_data($data[12]); $applications[$AppNo]['DateDec'] = extract_data($data[13]); $applications[$AppNo]['CaseOfficer'] = extract_data($data[14]); }
$item = trim($item); if ($item == '') { exit("Empty string"); } if ($item[1] == ' ' && 1 == preg_match('/[A-Za-z]/', $item[0])) { $item = trim(substr($item, 1)); } } // sanitizes time foreach ($matches[2] as &$item) { $item = trim($item); } return $matches; } $perimeter_data = extract_data($perimeter_data); $central_data = extract_data($perimeter_data); $times_raw = $perimeter_data[2]; $times_array = array(); $i = 0; foreach ($times_raw as $times) { $times_array[$i++] = explode('M', $times); } foreach ($times_array as &$times) { foreach ($times as &$time) { $time = trim($time) . 'M'; if (strlen($time) < 5) { $time = 'INVALID'; } } } $output = "";
<?php //ini_set( "display_errors", 0); $connection = connect_to_database('127.0.0.1', 'root', ''); //Read all search results //Hardcoded page size! for ($i = 0; $i < 27; $i++) { extract_data('http://meklesanas-rezultats.zl.lv/?p=' . $i . '&QProdukts=%22Viesn%C4%ABcas%22'); } function extract_data($page) { $html = new DOMDocument('1.0', 'UTF-8'); //Turn off validation for html $html->validateOnParse = false; libxml_use_internal_errors(true); //Load html data , skip error handling $html->loadHTML('<?xml encoding="UTF-8">' . file_get_contents($page)); libxml_clear_errors(); $html->preserveWhiteSpace = true; //Get list element $list = $html->getElementById("List"); //return if result is empty if ($list == NULL) { return; } //Dig deeper, to get all list <li> elements $list = $list->childNodes; //Process each li element for ($i = 0, $il = $list->length; $i < $il; $i++) { $result = array('title' => '', 'adress' => '', 'telephone' => '', 'map' => '', 'mail' => '', 'web' => ''); //Check if this is really a <li> element if ($list->item($i)->localName == 'li') {
function extrato_financeiro($inicio, $termino) { // termino é maior que inicio ou data é inválida, retorna false representando erro: if ($ts_termino < $ts_inicio or !\DateTime::createFromFormat("Ymd", $termino) or !\DateTime::createFromFormat("Ymd", $inicio)) { return false; } // chama página e pega o html: $form = array("comboPeriod" => "120", "finalDate" => \DateTime::createFromFormat("Ymd", $termino)->format("d/m/Y"), "finalDateHid" => \DateTime::createFromFormat("Ymd", $termino)->format("d/m/Y"), "initialDate" => \DateTime::createFromFormat("Ymd", $inicio)->format("d/m/Y"), "sendfilter" => "Filtrar"); $html = http_read("https://pagseguro.uol.com.br/statement/period.jhtml", http_build_query($form)); // verificamos se sessão não encerrou e refaz login se necessário if (!preg_match('#table.*id="available_extract"([^>]*)>(.*?)</table>#s', $html, $matches)) { login(); $html = http_read("https://pagseguro.uol.com.br/statement/period.jhtml", http_build_query($form)); // ainda não é o esperado, sai fora if (!preg_match('#table.*id="available_extract"([^>]*)>(.*?)</table>#s', $html, $matches)) { return false; } } // dados extraídos do html: são 3 tabelas a serem lidas: // disponível (available_extract), a receber (escrow_extract), bloqueado (contest_extract): $dados_fim = array(); $pagina = 1; do { $dados = array(); $tables = array("disponivel" => "available_extract", "receber" => "escrow_extract", "bloqueado" => "contest_extract"); foreach ($tables as $ntable => $table) { // pega tabela html: $ok = preg_match('#table.*id="' . $table . '"([^>]*)>(.*?)</table>#s', $html, $matches); // pega somente a tabela if (!$ok) { $dados[$ntable] = array(); continue; } // extrai dados da tabela html: $tabela_html = preg_replace("#(<b>|</b>|<a href='|</a>|<font.*?>|</font>| class=\"[^\"]*\"|<span.*?>|</span>)#s", "", $matches[2]); // retira formatação $tabela_html = preg_replace("#(' title=\"[^\"]*\">)#s", ";", $tabela_html); // link id $tabela = extract_data($tabela_html); // cabeçalho e corpo: $head = $tabela["thead"]["tr"]["th"]; if (array_key_exists("td", $tabela["tbody"]["tr"])) { $tabela["tbody"]["tr"] = array($tabela["tbody"]["tr"]); // só tem 1 item } $body = array_map(create_function('$i', 'return $i[td];'), $tabela["tbody"]["tr"]); // normaliza data (p/ iso), números. Coloca informação normalizada em body: foreach ($body as $k => $v) { $id_chave = explode(";", trim($v[1])); preg_match("#id=(.*)#", $id_chave[0], $matches); // extrai o id do link $id = $matches[1]; $ts = \DateTime::createFromFormat("d/m/Y H:i", $v[0]); $dia = $ts ? $ts->format("Ymd\\THi") : ""; $body[$k] = array($dia, $id_chave[0], $id_chave[1], $id, trim($v[2]), str_replace(",", ".", str_replace(".", "", $v[3])) + 0, str_replace(",", ".", str_replace(".", "", $v[4])) + 0); } // separa dados extraídos e normalizados ($body) em resumos (saldo anterior / saldo_final) e listagem analítica: $saldo_anterior = array_shift($body); // todo: retornar ao usuario $saldo_final = array_pop($body); // todo: retornar ao usuario $dados[$ntable] = $body; } // antes de ir para próxima página, juntamos com nosso somatório: foreach ($dados as $k => $v) { if (!isset($dados_fim[$k])) { $dados_fim[$k] = array(); } $dados_fim[$k] = array_merge($dados_fim[$k], $v); } // próxima página $pagina++; $form["page"] = $pagina; $form["pageCmd"] = "page"; $form["paginatorSize"] = 1651; // detecta fim de paginação (páginas ficam iguais) $old_md5 = md5($html); $html = http_read("https://pagseguro.uol.com.br/statement/period.jhtml", http_build_query($form)); $ok = $old_md5 != md5($html); // proteção contra loops infinitos caso alterem a página if ($pagina > 40) { $ok = false; } } while ($ok); // aproveitamos para alimentar cache de ids: // agrupamos as listagens das 3 tabelas: $lst_itens = array(); foreach ($dados_fim as $table) { $lst_itens = array_merge($lst_itens, $table); } // cada dia é um índice $lst_index = array(); foreach ($lst_itens as $item) { $dia = substr($item[0], 0, 8); $lst_index[$dia][$item[2]] = $item[3]; } // dias que não tiveram movimentação devem ficar em caché também $ts = \DateTime::createFromFormat("Ymd", $inicio)->getTimestamp(); $ts_termino = \DateTime::createFromFormat("Ymd", $termino)->getTimestamp(); while ($ts <= $ts_termino) { $dia = date("Ymd", $ts); $ts += 60 * 60 * 24; if (!array_key_exists($dia, $lst_index)) { $lst_index[$dia] = array(); } } // armazena cada índice. Considera o usuário logado para evitar conflito de cachés foreach ($lst_index as $dia => $vetor) { $index_name = md5($dia . $_SESSION["__ps_user"]); cache_update_index($index_name, $vetor, 0); } // fim: retorna dados das 3 tabelas de todas as páginas return $dados_fim; }
{ print $when->format(DATE_ISO8601) . "\n"; } function extract_data($value) { $htmlvalue = str_get_html($value); //print $htmlvalue; $link = $htmlvalue->find('li[class="first last"] a', 0); $title = $htmlvalue->find('li[class="first last"] a', 0); $description = $htmlvalue->find('li[class="first last"] a', 0); $date = $htmlvalue->find('span[class="date-display-single"]', 0); $processdate = substr($date->plaintext, -10); //print $link->href. "\n"; //print $title->plaintext. "\n"; //print $description->plaintext. "\n"; $when = date_create_from_format('d/m/Y', $processdate); print_date($when); $data = array('link' => $link->href, 'title' => $title->plaintext, 'description' => $description->plaintext, 'date' => $when); scraperwiki::save(array('title'), $data); } require 'scraperwiki/simple_html_dom.php'; $html_content = scraperWiki::scrape("http://www.aagbi.org/education/educational-resources/tutorial-week/my-events/tutorial"); $html = str_get_html($html_content); $html_el = $html->find(".view-content", 0); foreach ($html_el->children() as $child1) { //need to add code to extract and process the individual div strings if (preg_match("/www.aagbi.org/i", $child1)) { extract_data($child1); } else { } }
"><?php echo $acomodacao->hos_nome; ?> </a>.</td> </tr> <?php } ?> <?php } ?> <?php foreach ($acomodacoesSaida as $acomodacao) { ?> <?php if (extract_data($acomodacao->aco_saida) == $dia->selected_date) { ?> <tr> <td><span class="glyphicon glyphicon-home" aria-hidden="true"></span> <a href="<?php echo base_url(); ?> planejamento/showAcomodacao/<?php echo $acomodacao->aco_id; ?> ">Check-out</a> da atração <a href="<?php echo base_url(); ?> evento/atracao/<?php echo $acomodacao->atracao->atr_id; ?> "><?php
<?php extract_data($HTTP_RAW_POST_DATA);
} $reader->close(); return $person; } function open_file($code) { return "http://localhost/~iainemsley/text/F-{$code}.xml"; #return "http://firstfolio.bodleian.ox.ac.uk/download/xml/F-$code.xml"; } /** * Load the data into Couch */ function load_couch($data) { $url = 'http://127.0.0.1:5984/hamlet'; //foreach ($data as $value) { foreach ($data as $key => $value) { //post the data into the db $context = stream_context_create(array('http' => array('method' => 'POST', 'header' => "Content-Type: application/json\r\n", 'content' => json_encode($value)))); $result = file_get_contents($url, false, $context); var_dump($result); } } if (sizeof($argv) < 1) { die('Usage: xml_transform.php <shortcode here>'); } $code = $argv[1]; echo "Extracting the data from {$code}. \n"; $drama_coords = extract_data($code); echo "Writing the data to file"; load_couch($drama_coords);
function hack_movimentacao_vendas($data_inicial, $data_final) { $url = "https://services.redecard.com.br"; $url .= "/novoportal/portals/servicoSharepoint/extratoonline/IS_ExtratosRedecard_Extrato.aspx"; $url .= "?BankLine=&txtnu_pdv=&box_sel=010&data_inicial={$data_inicial}&data_final={$data_final}&moeda=R&Selpvs=0&Box=010&flgTrava=0"; $html = http_read($url, ""); // extrai info do html $ok = preg_match('#table class="frm_INS"([^>]*)>(.*?)</table>#s', $html, $matches); // somente a tabela if (!$ok) { return array(); } $html = preg_replace("#(<b>|</b>|<a.*?>|</a>|<font.*?>|</font>)#s", "", $matches[2]); // retira formatação $dados = extract_data($html); $dados = array_map(create_function('$i', 'return $i["td"];'), $dados["tr"]); // header / resumo $header = array_shift($dados); $resumo = array(); for ($i = 0; $i <= 2; $i++) { $resumo_ = array_pop($dados); $nome = array_pop(explode(" ", $resumo_[0])); $resumo[$nome]["VALOR_LIQUIDO"] = str_replace(",", ".", str_replace(".", "", array_pop($resumo_))) + 0; $resumo[$nome]["DESCONTO_TAXAS"] = str_replace(",", ".", str_replace(".", "", array_pop($resumo_))) + 0; $resumo[$nome]["VALOR_CORRECOES"] = str_replace(",", ".", str_replace(".", "", array_pop($resumo_))) + 0; $resumo[$nome]["VALOR_VENDAS"] = str_replace(",", ".", str_replace(".", "", array_pop($resumo_))) + 0; } // normaliza data (p/ iso), números foreach ($dados as $k => $v) { $dados[$k][0] = \DateTime::createFromFormat("d/m/Y", $v[0])->format("Ymd"); $dados[$k][1] = \DateTime::createFromFormat("d/m/Y", $v[1])->format("Ymd"); $dados[$k][7] = str_replace(",", ".", str_replace(".", "", $v[7])) + 0; $dados[$k][8] = str_replace(",", ".", str_replace(".", "", $v[8])) + 0; $dados[$k][9] = str_replace(",", ".", str_replace(".", "", $v[9])) + 0; $dados[$k][10] = str_replace(",", ".", str_replace(".", "", $v[10])) + 0; } // fim return array("resumo" => $resumo, "cabecalho" => $header, "dados" => $dados); }