function exec_cmd($cmd, $file, $isRELAT, $rmHeader = 1, $finalUTF8 = true) { global $io_options; global $dayFilter; file_put_contents('php://stderr', "\n -- ({$cmd}) {$file}\n"); if ($isRELAT) { print "\n=== {$cmd} {$file} ==="; } $dayFilter = isset($io_options['day']) ? $io_options['day'] : ''; $doc = new domParser(); $doc->getHtmlBody($file, isset($io_options['utf8']) && $io_options['utf8']); $out = $doc->output($cmd, $finalUTF8, $dayFilter); if (!$isRELAT) { if ($rmHeader) { $out = str_replace(XML_HEADER1, '', $out); } $out = trim($out); } if (!isset($io_options['breaklines'])) { // na verdade no-breaklines $out = str_replace(array('<p', '<div', '<article', '<sec', '<keys', '<days'), array("\n<p", "\n<div", "\n<article", "\n<sec", "\n<keys", "\n<days"), $out); } if (isset($io_options['normaliza'])) { // normaliza texto do autor! $out = preg_replace('/(\\d)\\s+±\\s+(\\d)/us', '$1 ± $2', $out); $out = preg_replace('/([\\dp])\\s*(<|>|=)\\s*([\\dp])/ius', '$1 $2 $3', $out); } if (isset($io_options['entnum'])) { $out = utf2html($out); } return "{$out}\n"; }
function exec_cmd($cmd, $file, $isRELAT, $isMultiSec = FALSE, $rmHeader = 1, $finalUTF8 = TRUE) { global $io_options; global $dayFilter; file_put_contents('php://stderr', "\n -- ({$cmd}) {$file}\n"); if ($isRELAT) { print "\n=== {$cmd} {$file} ==="; } $dayFilter = isset($io_options['day']) ? $io_options['day'] : ''; $doc = new domParser(); // FALTA usar a $io_options['normaliza'] pro XML na lib.php $doc->getHtmlBody($file, isset($io_options['utf8']) && $io_options['utf8']); $out = $doc->output($cmd, $finalUTF8, $dayFilter, $isMultiSec); if (!$isRELAT) { if ($rmHeader) { $out = str_replace(XML_HEADER1, '', $out); } $out = trim($out); } if (!isset($io_options['breaklines'])) { // na verdade no-breaklines $out = str_replace(['<p', '<div', '<article', '<sec', '<keys', '<days'], ["\n\n\n<p", "\n\n<div", "\n\n<article", "\n\n<sec", "\n<keys", "\n<days"], $out); $out = preg_replace("/[ \\t]*\n[ \\t]*/s", "\n", $out); // trim nas quebras de linha } $out = rmClosedFormatters($out); // repete limpeza já realizada no raw if (isset($io_options['entnum'])) { $out = utf2html($out); } return "{$out}\n"; }
} } } // for if ($isXML) { print "\n\n</root>\n"; } else { print "\n\n--- TOTAIS ---\n" . count(array_keys($lista)) . " arquivos analisados:"; foreach ($lista as $k => $v) { print "\n\t {$k} = " . ($v ? "sucesso" : "FALHOU NA LEITURA DO ARQUIVO HTML"); } print "\n"; } } else { file_put_contents('php://stderr', "\n-- rodando sem o TUDO, modo {$MODO}\n"); $dom = new domParser(); if ($dom->getHtmlBody($file)) { print $dom->output($MODO, $finalUTF8); } } /** * ASSERTS. * USO: * $ php assert.php * $ php assert.php -gen */ $VERSAO = '1.0'; // v1.0 de 2014-08-03 assert_options(ASSERT_CALLBACK, 'comunicaErro'); echo "---- ASSERT (v{$VERSAO}): COMPARANDO EXECUSSAO COM HOMOLOGADOS ---\n pwd = {$baseDir}\n"; die("DEBUG");
function asXML($dayFilter = '', $isMultiSec = FALSE) { global $Resumos_byDia; global $DESCR_byResumo; global $LocHora_byResumo; global $FILTRO; if (!$this->isXML_step1) { $XML_FINAL = ''; // LIXO? ou inicialização global? $this->newDom = new DOMDocument(); $root = $this->newDom->createElement('root'); $this->preserveWhiteSpace = false; // XML_PARSE_NOBLANKS $xp = new DOMXpath($this); $replacElements = array('pubid', 'title', 'contribs', 'aff', 'corresp', 'abstract', 'conclusion', 'ERRO'); $replacElements_n = count($replacElements); $SECLOOP = []; foreach ($xp->query('//p') as $node) { if (preg_match('/^\\s*([A-Z]{2,3})/s', $node->firstChild->nodeValue, $m)) { $node->setAttribute('sec', $m[1]); if (!isset($SECLOOP[$m[1]])) { $SECLOOP[$m[1]] = "//p[@sec='{$m['1']}']"; } } } //lixo var_dump(array_values($SECLOOP));die("\nsdhsjdhsjdh\n"); foreach ($SECLOOP as $sec => $xqSec) { $XML = "\n"; // DEPOIS AINDA PASSAR POR UM NORMALIZADOR XSLT! $xp = new DOMXpath($this); $n = 0; $nOk = 0; $secs = []; $subsecs = []; $dias = []; $locais = []; foreach ($xp->query($xqSec) as $node) { $id = ''; $n++; // || (preg_match('/^\s*(([A-Z]+)(\-?[a-z]?)\d{1,5})/su',$node->textContent,$m) if (preg_match('/^\\s*(([A-Z]+)(\\-?[a-z]?)\\d{3,4})/s', $node->firstChild->nodeValue, $m) && ($id = $m[1])) { // fora de uso && (!$dayFilter || in_array($id,$Resumos_byDia[$dayFilter])) if ($m[2] != $sec) { die("\nERRO 3472: {$sec} nao corresponde ao prefixo de {$id}.\n"); } $secs[$sec] = 1; $subsecs[$m[3]] = 1; $nOk++; $DESCR = isset($DESCR_byResumo[$id]) ? $DESCR_byResumo[$id] : array("(sem descritor de assunto)"); if (!isset($LocHora_byResumo[$id])) { //list($dia,$hini,$hfim,$local) = array("err-$id","err-$id","err-$id","err-$id"); array('err', 'err', 'err', 'err'); } else { list($dia, $hini, $hfim, $local) = $LocHora_byResumo[$id]; } // 0=dia, 1=hora-inicial, 2=final, 3=local $dias[$dia] = 1; $locais[$local] = 1; $nEle = $ntexts = 0; $nEle_name = $replacElements[0]; $auxDom = new DOMDocument(); $art = $auxDom->createElement('article'); $ele = $auxDom->createElement($nEle_name); $art->appendChild($ele); // o primeiro já é iniciado $ele2 = $auxDom->createDocumentFragment(); $per2 = ''; if (strpos($hini, ';') !== false) { list($hini, $hini2) = explode(';', $hini); list($hfim, $hfim2) = explode(';', $hfim); $per2 = "<period><start day=\"{$dia}\">{$hini2}</start><end>{$hfim2}</end></period>"; } $event2 = ''; $idloc = domParser::setIdname('loc', $local, TRUE); if ($sec == 'PN') { // faz uso de dois locais! global $PNgrupo; if (isset($PNgrupo[$id])) { $dia2 = $PNgrupo[$id]['dia']; $hora1 = $PNgrupo[$id]['h1']; $hora2 = $PNgrupo[$id]['h2']; $local2 = $PNgrupo[$id]['local']; $idloc2 = $PNgrupo[$id]['idloc']; $event2 = "\n\t\t\t\t\t\t\t<event2>\n\t\t\t\t\t\t\t\t<summary>Reunião de Grupo</summary>\n\t\t\t\t\t\t\t\t<period><start day=\"{$dia2}\">{$hora1}</start><end>{$hora2}</end></period>\n\t\t\t\t\t\t\t\t<location idref='{$idloc2}'>{$local2}</location>\n\t\t\t\t\t\t\t</event2>"; } else { $event2 = "\n\t\t\t\t\t\t\t<event2>\n\t\t\t\t\t\t\t\t<summary>Reunião de Grupo</summary>\n\t\t\t\t\t\t\t\t<period>ERRO334 em {$id}</period><location>ERRO335</location>\n\t\t\t\t\t\t\t</event2>"; } $idloc = domParser::setIdname('loc', $local, TRUE); } // PN $ele2->appendXML("<vcalendar><components>" . "<period><start day=\"{$dia}\">{$hini}</start><end>{$hfim}</end></period>" . $per2 . "<location idref='{$idloc}'>{$local}</location>" . $event2 . "</components></vcalendar>"); //var_dump($idloc,$local); die("\n-- AQUIDEBUG\n"); $art->appendChild($ele2); // o primeiro já é iniciado $ele2 = $auxDom->createDocumentFragment(); $ele2->appendXML('<keys>' . domParser::joinMarkId($DESCR, 'k', 'key', 0) . '</keys>'); $art->appendChild($ele2); foreach ($node->childNodes as $subnode) { // PARSER: split by BR, analyse and add elements $nname = $subnode->nodeName; if ($nname != 'br') { if ($nname == '#text') { // text-node $text = $subnode->nodeValue; // normalize spaces if (in_array($nEle_name, ['abstract', 'conclusion'])) { $text = $FILTRO['func']($text, $nEle_name == 'conclusion' ? ['SBPqO-bugs2' => 1] : NULL, 1); } elseif ($nEle_name == 'title') { $text = $FILTRO['func']($text, ['SBPqO-raiosx' => 1, 'norm-sps' => 1, 'reset' => 1], 1); } if ($ntexts) { // second text or more: $ele->appendChild($auxDom->createTextNode($text)); } else { // first text: $ntexts = 1; $ele->appendChild($auxDom->createTextNode(rtrim($text))); } } else { // demais nodes $imp = $auxDom->importNode($subnode, true); $ele->appendChild($imp); } } else { $nEle++; // next nEle_name (may use array funcs) $ntexts = 0; if (!isset($replacElements[$nEle]) || $replacElements[$nEle] == 'ERRO') { $nEle_name = 'ERRO'; $ele = $auxDom->createElement($nEle_name); $ele->setAttribute('linha', $n); $ele->setAttribute('tipo', "BR {$nEle} imprevisto"); } else { $nEle_name = $replacElements[$nEle]; $ele = $auxDom->createElement($nEle_name); } $art->appendChild($ele); } // else } // for childNodes $P = $auxDom->saveXML($art); if ($FILTRO['regrasDefault']['SBPqO-apoio'] && preg_match('/###funding-source: (.+?)#_##/s', $P, $m)) { $fund = trim($m[1]); $P = preg_replace('/###funding-source: .+?#_##/s', '', $P); $P = str_replace('</article>', "<funding-source>{$fund}</funding-source></article>", $P); } $XML .= "\n\n{$P}"; } // if } //for-node $locais = array_keys($locais); $local = domParser::joinMarkId($locais, 'loc', 'location', 1); // xml $dias = array_keys($dias); $ndias = count($dias); $dia = domParser::joinMarkId($dias, 'd', 'day', 1); // nao precisa id-sequencial (!) pois iso é ref. $secs = array_keys($secs); // aqui tratamento de secs, não SEC0 $sec = !count($secs) || count($secs) > 1 ? 'ERROR' : $secs[0]; $subsecs = array_keys($subsecs); $subsec = count($subsecs) ? $subsecs[0] : ''; global $SECAO; $SECAO_ordem = array_flip(array_keys($SECAO)); // ex. $SECAO_ordem['PR']==2. $title = isset($SECAO[$sec]) ? $SECAO[$sec] : 'ERROR'; $sord = isset($SECAO_ordem[$sec]) ? $SECAO_ordem[$sec] : 'ERROR'; //if ($subsec) $title.=", Parte \"$subsec\""; global $ctrl_idnames; $err = $n != $nOk ? "<ERRO-GRAVE>lidos {$n} paragrafos, usados {$nOk}!</ERRO>" : ''; $XML = "<sec id=\"{$sec}{$subsec}\" label=\"{$sec}\" sec-order=\"{$sord}\" subsec=\"{$subsec}\" sec-type=\"modalidade\">\n\t\t\t\t{$err}\n\t\t\t\t<title>{$sec}{$subsec} - {$title}</title>\n\t\t\t\t<days n='{$ndias}'>{$dia}</days>\n\t\t\t\t<locations>{$local}</locations>\n\t\t\t\t\n{$XML}\n\n\t\t\t</sec>\n"; // ".'<dump_ids>'. var_export($ctrl_idnames['loc'], true).'</dump_ids>'." $XML_FINAL .= $XML; } // for1 return "<html>{$XML_FINAL}</html>"; // talvez precise de um <html> envolvendo para nao dar pau //} elseif ($dayFilter) { // já é XML, falta só grep por dia // nao precisou pois finalXml faz grep! } else { return $this->saveXML(); } }