$count = 0; if (($handle = fopen("cleaned.csv", "r")) !== FALSE) { while (($data = fgetcsv($handle, 2500, ',', '"')) !== FALSE) { $row = array(); foreach ($labels as $key => $label) { $row[$label] = preg_replace('/\\s+/', ' ', $data[$key]); } $citations = array_filter(explode('|', $row['published'])); if (!isset($citations[0])) { echo "{$row['accnum']} does not have an ID.\n"; } else { $id = $citations[0]; $pieces = explode('.', $id); $pop = array_pop($pieces); $broader = implode('.', $pieces); $xml = generate_nuds($row, $count); //load DOMDocument $dom = new DOMDocument('1.0', 'UTF-8'); if ($dom->loadXML($xml) === FALSE) { echo "{$id} failed to validate.\n"; } else { echo "Processing {$id}\n"; $dom->preserveWhiteSpace = FALSE; $dom->formatOutput = TRUE; //echo $dom->saveXML(); $dom->save('types/' . $broader . '.xml'); } } $count++; } }
<?php //CSV arrays $data = generate_json('https://docs.google.com/spreadsheets/d/1CIfUKDeN6G3QWVjpOgbnCSxzhLFTeBBMBf6ZRVo0UHY/pub?gid=481635933&single=true&output=csv'); $stylesheet = generate_json('https://docs.google.com/spreadsheets/d/1KITdoa7W5jpu0lgCqLCQs70WpNFfFPTHjfQlTGt1sts/pub?output=csv'); $deities = generate_json('https://docs.google.com/spreadsheet/pub?hl=en_US&hl=en_US&key=0Avp6BVZhfwHAdHk2ZXBuX0RYMEZzUlNJUkZOLXRUTmc&single=true&gid=0&output=csv'); $nomismaUris = array(); foreach ($data as $row) { generate_nuds($row); //format XML output /*$dom = new DOMDocument('1.0', 'UTF-8'); $dom->preserveWhiteSpace = FALSE; $dom->formatOutput = TRUE; $dom->loadXML($xml); echo $dom->saveXML();*/ } //functions function generate_nuds($row) { global $stylesheet; global $deities; $recordId = 'price.' . $row['Price no.']; if ($row['Material'] != 'vacat') { $doc = new XMLWriter(); //$doc->openUri('php://output'); $doc->openUri('nuds/' . $recordId . '.xml'); $doc->setIndent(true); //now we need to define our Indent string,which is basically how many blank spaces we want to have for the indent $doc->setIndentString(" "); $doc->startDocument('1.0', 'UTF-8'); $doc->startElement('nuds');
<?php $findspots = generate_json('https://docs.google.com/spreadsheet/pub?hl=en_US&hl=en_US&key=0Avp6BVZhfwHAdEgzSnpGVEdDb0dzZHVCdDJoZ09IS2c&single=true&gid=0&output=csv'); $denominations = generate_json('https://docs.google.com/spreadsheet/pub?hl=en_US&hl=en_US&key=0Avp6BVZhfwHAdGZfOERpTDBiZFo3aWEtZ2pORnpZeVE&single=true&gid=0&output=csv'); $refs = generate_json('https://docs.google.com/spreadsheet/pub?hl=en_US&hl=en_US&key=0Avp6BVZhfwHAdERobDMwa05TQi1feFpyeVBVckNhTGc&single=true&gid=0&output=csv'); $nomismaURIs = generate_json('https://docs.google.com/spreadsheet/pub?hl=en_US&hl=en_US&key=0Avp6BVZhfwHAdFFSdVRwZEFmams0THV2cnJkVzQxNmc&single=true&gid=0&output=csv'); if ($handle = opendir('igch')) { while (false !== ($entry = readdir($handle))) { if (strstr($entry, '.txt')) { $id = strstr($entry, '.txt', true); echo "Processing {$id}\n"; generate_nuds($id); } } } function generate_nuds($id) { global $findspots; global $denominations; global $refs; global $nomismaURIs; $doc = new DOMDocument(); $doc->load('igch/' . $id . '.txt'); $xpath = new DOMXPath($doc); $xpath->registerNamespace('xhtml', "http://www.w3.org/1999/xhtml"); //extract findspot information $findspot_uri = ''; $findspot_text = ''; $discovery_text = ''; $date_text = ''; foreach ($findspots as $findspot) {
function process_file($filename, $labels) { if (($handle = fopen($filename, "r")) !== FALSE) { echo "Processing {$filename}\n"; $count = -1; while (($data = fgetcsv($handle, 1000, ",", '"')) !== FALSE) { //skip first (label) row if ($count > 0) { $row = array(); foreach ($labels as $key => $label) { //escape conflicting XML characters $row[$label] = $data[$key]; } $recordId = substr(md5(rand()), 0, 9); $xml = generate_nuds($row, $recordId); $xmlFile = '/tmp/' . $recordId . '.xml'; //load DOMDocument $dom = new DOMDocument('1.0', 'UTF-8'); if ($dom->loadXML($xml) === FALSE) { echo "{$recordId} failed to validate.\n"; } else { $dom->preserveWhiteSpace = FALSE; $dom->formatOutput = TRUE; //echo $dom->saveXML() . "\n"; $dom->save($xmlFile); if (($readFile = fopen($xmlFile, 'r')) === FALSE) { echo "Unable to read {$recordId}.xml\n"; } else { //PUT xml to eXist $putToExist = curl_init(); //set curl opts curl_setopt($putToExist, CURLOPT_URL, 'http://localhost:8080/exist/rest/db/electrum/objects/' . $recordId . '.xml'); curl_setopt($putToExist, CURLOPT_HTTPHEADER, array("Content-Type: text/xml; charset=utf-8")); curl_setopt($putToExist, CURLOPT_CONNECTTIMEOUT, 2); curl_setopt($putToExist, CURLOPT_RETURNTRANSFER, 1); curl_setopt($putToExist, CURLOPT_PUT, 1); curl_setopt($putToExist, CURLOPT_INFILESIZE, filesize($xmlFile)); curl_setopt($putToExist, CURLOPT_INFILE, $readFile); curl_setopt($putToExist, CURLOPT_USERPWD, "admin:"); $response = curl_exec($putToExist); $http_code = curl_getinfo($putToExist, CURLINFO_HTTP_CODE); //error and success logging if (curl_error($putToExist) === FALSE) { echo "{$recordId} failed to write to eXist.\n"; } else { if ($http_code == '201') { echo "{$recordId} written.\n"; } } //close eXist curl curl_close($putToExist); //close files and delete from /tmp fclose($readFile); unlink($xmlFile); } } } $count++; } } }
<?php $data = generate_json('collection-final.csv'); $files = scandir('/e/egypt-images/media/reference'); $nomismaUris = array(); foreach ($data as $row) { generate_nuds($row, $files); //format XML output /*$dom = new DOMDocument('1.0', 'UTF-8'); $dom->preserveWhiteSpace = FALSE; $dom->formatOutput = TRUE; $dom->loadXML($xml); echo $dom->saveXML();*/ } //functions function generate_nuds($row, $files) { $recordId = $row['recordId']; $orgs = array(); //parse references if (strlen($row['reference']) > 0) { //strip obverse and reverse descriptions out if possible preg_match('/(Obverse:?.*)Reverse/', $row['reference'], $obverse); preg_match('/(Reverse:?.*)$/', $row['reference'], $reverse); if (isset($obverse[1]) || isset($reverse[1])) { $refText = trim(str_replace($reverse[1], '', str_replace($obverse[1], '', $row['reference']))); } else { $refText = trim($row['reference']); } } $doc = new XMLWriter();