Exemplo n.º 1
0
/**
 * Initialization, get datapackage.json and project descriptions to transform in a SQL+JSON database.
 */
function jsonCsv_to_sql(&$items, &$projects, &$db, $SEP = ',', $nmax = 0, $verbose = 2)
{
    $OUT_report = '';
    $n = $N = $N2 = 0;
    foreach ($items as $prj => $r) {
        foreach ($r as $dataset) {
            $folder = $projects[$prj];
            $sql = array_shift($dataset);
            $OUT_report .= "\n\n---- PRJ {$prj}" . ($verbose >= 2 ? " {{ {$sql} }}" : '');
            $stmt = $db->prepare($sql);
            $jpack = json_decode(file_get_contents("{$folder}/datapackage.json"), true);
            $ds = array();
            // only for "bind" and "strict" checkings.
            foreach ($dataset as $i) {
                $i = str_replace('::bind', '', $i, $bind);
                $i = str_replace('::strict', '', $i, $aux);
                $ds["data/{$i}"] = $bind + 2 * $aux;
            }
            $ds_keys = array_keys($ds);
            foreach ($jpack['resources'] as $pack) {
                if (in_array($pack['path'], $ds_keys)) {
                    $useType = $ds[$pack['path']] > 1;
                    $OUT_report .= "\n\t-- reding dataset '{$pack['path']}' ";
                    $fields = $pack['schema']['fields'];
                    list($sql_fields, $json_fields, $json_types) = fields_to_parts($fields, false, true);
                    //,$useType);
                    $has_jfields = count($json_fields);
                    if ($has_jfields) {
                        $json_fields[] = 'source';
                    }
                    // admin fields
                    $n = $n2 = 0;
                    $nsql = count($sql_fields);
                    $file = "{$folder}/{$pack['path']}";
                    $h = fopen($file, 'r');
                    while ($h && !feof($h) && (!$nmax || $n < $nmax)) {
                        if (($lin0 = $lin = fgetcsv($h, 0, $SEP)) && $n++ > 0 && isset($lin[0])) {
                            $jsons = array_slice($lin, $nsql);
                            $types = array_slice($json_types, $nsql);
                            for ($t_i = 0; $t_i < count($types); $t_i++) {
                                settype($jsons[$t_i], $types[$t_i]);
                            }
                            // casting to string or selected type
                            $sqls = array_slice($lin, 0, $nsql);
                            if ($has_jfields) {
                                $jsons[] = $pack['path'];
                            }
                            // admin fields
                            $info = json_encode(array_combine($json_fields, $jsons));
                            // AQUI capturar exception para tratar erro de incompatibilidade entre package e CSV.
                            //print "\n##-- $info .. OPS no cast!";
                            if ($ds[$pack['path']]) {
                                // or ctype_digit()
                                $tmp = $sqls;
                                foreach ($sql_fields as $i) {
                                    if ($useType) {
                                        $stmt->bindParam(":{$i}", array_shift($tmp));
                                    } else {
                                        $stmt->bindParam(":{$i}", array_shift($tmp));
                                    }
                                }
                                if ($has_jfields && $info) {
                                    $stmt->bindParam(":json_info", $info);
                                }
                                $ok = $stmt->execute();
                            } else {
                                // implicit bind (by array order and no datatype parsing)
                                $ok = $stmt->execute(array_merge($sqls, array($info)));
                            }
                            if (!$ok) {
                                $OUT_report .= "\n ---- ERROR (at csv line-{$n} (rec. {$n2}) with error info) ----\n";
                                $OUT_report .= var_export($lin0, true);
                                $OUT_report .= var_export($stmt->errorInfo(), true);
                                return array(-1, 0, $OUT_report);
                                //die("\n");
                            } else {
                                $n2++;
                            }
                        } elseif ($nsql > 0 && count($sql_fields) && isset($lin) && count($lin) && $lin != array() && isset($lin[0])) {
                            //debug print "\n-pk-$n2...0-$nsql \nlin=".count($lin);print_r($lin);
                            $lin_check = fields_to_parts(array_slice($lin, 0, $nsql));
                            if ($sql_fields != $lin_check) {
                                var_dump($lin_check);
                                var_dump($sql_fields);
                                die("\n --- ERROR: CSV header-basic not matches SQL/datapackage field names ---\n");
                            }
                        }
                    }
                    //else print "\n-- !WARNING 2 at omLib.php, jsonCsv_to_sql(), n=$n\n";
                    $OUT_report .= " {$n} lines scanned, {$n2} used.";
                    $N += $n;
                    $N2 += $n2;
                    unset($ds[$pack['path']]);
                }
            }
            // if $pack
            $sampath = "data/samples";
            if (isset($ds[$sampath])) {
                // a kind of XML dataset
                unset($ds[$sampath]);
                $folder2 = "{$folder}/{$sampath}";
                foreach (scandir($folder2) as $ft) {
                    if (strlen($ft) > 2) {
                        // folder-types sci and law
                        foreach (scandir("{$folder2}/{$ft}") as $rpfolder) {
                            if (strlen($rpfolder) > 2) {
                                intoDb_XMLs("{$folder2}/{$ft}/{$rpfolder}", $db, $rpfolder);
                            }
                        }
                    }
                }
                // scans repository's folder
            }
            foreach ($ds as $k => $v) {
                $OUT_report .= "\n --WARNING: pack '{$k}' (bind={$v}) not used";
            }
        }
    }
    // for $r
    return array($N2, $N, $OUT_report);
}
Exemplo n.º 2
0
                     $ok = $stmt->execute();
                 } else {
                     // implicit bind (by array order and no datatype parsing)
                     $ok = $stmt->execute(array_merge($sqls, array($info)));
                 }
                 if (!$ok) {
                     print "\n ---- ERROR (at line-{$n} with error info) ----\n";
                     print_r($lin0);
                     print_r($stmt->errorInfo());
                     die("\n");
                 } else {
                     $n2++;
                 }
             } elseif ($nsql > 0 && count($sql_fields) && isset($lin) && count($lin) && $lin != array()) {
                 //debug print "\n-pk-$n2...0-$nsql \nlin=".count($lin);print_r($lin);
                 $lin_check = fields_to_parts(array_slice($lin, 0, $nsql));
                 if ($sql_fields != $lin_check) {
                     var_dump($lin_check);
                     var_dump($sql_fields);
                     die("\n --- ERROR: CSV header-basic not matches SQL/datapackage field names ---\n");
                 }
             }
         }
         print " {$n} lines scanned, {$n2} used.";
         unset($ds[$pack['path']]);
     }
 }
 // if $pack
 $sampath = "data/samples";
 if (isset($ds[$sampath])) {
     // a kind of XML dataset
Exemplo n.º 3
0
/**
 * Get all basic information for CSV interpretation.
 * See also http://data.okfn.org/doc/tabular-data-package
 * @return array =>resource_name=>(
 *		file, sep, fields_sql, fields_json, fields_json_types
 *   )
 */
function unpack_datapackage($folder, $dftSEP = ',', $prefURL = false)
{
    $p = "{$folder}/datapackage.json";
    if (!file_exists($p)) {
        die("\n\t-- ERROR: check folder ({$folder}) or add datapackage.json");
    }
    $jpack = json_decode(file_get_contents($p), true);
    $ret = [];
    foreach ($jpack['resources'] as $pack) {
        if (isset($pack['path'])) {
            $r = [];
            $rpath = $pack['path'];
            $name = isset($pack['name']) ? $pack['name'] : $rpath;
            // or get filename
            print "\n\t -- analysing pack-resource '{$name}'";
            $r['sep'] = isset($pack['sep']) ? $pack['sep'] : $dftSEP;
            if (isset($pack['schema']['fields'])) {
                list($r['fields_sql'], $r['fields_json'], $r['fields_json_types']) = fields_to_parts($pack['schema']['fields'], false, true);
            } else {
                die("\n\t -- ERROR at datapackage.json: no schema/fields in {$name}\n");
            }
            $r['file'] = (!$rpath || $prefURL) && isset($pack['url']) ? $pack['url'] : realpath("{$folder}/{$rpath}");
            $ret[$name] = $r;
        }
    }
    return $ret;
}