/** * @param $task * @param $args * * Arg[0] is one of "schema" (element set), "vocab" or "vocabulary" * arg[1] is the vocabulary name. * The file type is determined by the extension and must be one of "json", "rdf", "csv", "xml" * arg[2] is the vocabulary id * arg[3] is the batch id * arg[4] [optional] is -d * * @throws Exception */ function run_import_vocabulary($task, $args) { //xdebug_break(); //check the argument counts if (count($args) < 1) { throw new Exception('You must provide a vocabulary type.'); } if (count($args) < 2) { throw new Exception('You must provide a file name.'); } if (count($args) < 3) { throw new Exception('You must provide a vocabulary id.'); } //set the arguments $type = strtolower($args[0]); $filePath = $args[1]; $vocabId = $args[2]; $batchId = isset($args[3]) ? $args[3] : ""; $deleteMissing = (isset($args[4]) && ("-d" == $args[4])); //do some basic validity checks if (! in_array( $type, array( "schema", "vocab", "vocabulary" ) ) ) { throw new Exception('You must import into a schema or a vocab'); } if ("vocabulary" == $type) { $type = "vocab"; } if (! is_numeric($vocabId)) { throw new Exception('You must provide a valid ID'); } //does the file exist? if (! file_exists($filePath)) { //default to the site upload path $filePath = $GLOBALS['uploadPath'] . $filePath; if (! file_exists($filePath)) { throw new Exception('You must supply a valid file to import: ' . $filePath); } } //is the file a valid type? if (preg_match('/^.+\.([[:alpha:]]{2,4})$/', $filePath, $matches)) { if (! in_array( strtolower($matches[1]), array( "json", "rdf", "csv", "xml" ) ) ) { throw new Exception('You must provide a valid file type based on the extension'); } } else { throw new Exception("File type cannot be determined from the file extension"); } $fileType = $matches[1]; //is the object a valid object? if ('vocab' == $type) { $vocabObj = VocabularyPeer::retrieveByPK($vocabId); if (is_null($vocabObj)) { throw new Exception('Invalid vocabulary ID'); } //set some defaults $baseDomain = $vocabObj->getBaseDomain(); $language = $vocabObj->getLanguage(); $statusId = $vocabObj->getStatusId(); $userId = $vocabObj->getCreatedUserId(); $agentId = $vocabObj->getAgentId(); //get a skos property id map $skosMap = SkosPropertyPeer::getPropertyNames(); //there has to be a hash or a slash $tSlash = preg_match('@(/$)@i', $vocabObj->getUri()) ? '' : '/'; $tSlash = preg_match('/#$/', $vocabObj->getUri()) ? '' : $tSlash; } else { $import = new ImportVocab($type, $filePath, $vocabId); } /* From here on the process is the same regardless of UI */ // check to see if file has been uploaded before // check import history for file name $importHistory = FileImportHistoryPeer::retrieveByLastFilePath($filePath); // if reimport // get last import history for filename // unserialize column map // match column names to AP based on map // look for matches in unmatched field/column names to AP (ideal) // csv table of data -- // row1: parsed field names/column headers // row2: select dropdown with available fields from object AP (pre-select known matches) // each select identified by column number // row3: display datatype of selected field (updated dynamically when field selected) // row4-13: first 10 rows of parsed data from file // require a column that can match to 'URI' (maybe we'll allow an algorithm later) // require columns that are required by AP // on reimport there should be a flag to 'delete missing properties' from the current data // note: at some point there will be a reimport process that allows URI changing // this will require that there be an OMR identifier embedded in the incoming data switch ($fileType) { case "csv": if ('vocab' == $type) { // Get array of heading names found $headings = $reader->getHeadings(); $fields = ConceptPeer::getFieldNames(); //set the map // $map[] = array("property" => "Uri", "column" => "URILocal"); // $map[] = array("property" => "prefLabel", "column" => "skos:prefLabel"); // $map[] = array("property" => "definition", "column" => "skos:definition"); // $map[] = array("property" => "notation", "column" => "skos:notation"); // $map[] = array("property" => "scopeNote", "column" => "skos:scopeNote"); $map = array( "uri" => "URILocal", "prefLabel" => "skos:prefLabel", "definition" => "skos:definition", "notation" => "skos:notation", "scopeNote" => "skos:scopeNote" ); $rows = 0; //executeImport: // serialize the column map try { while ($row = $reader->getRow()) { $rows ++; // lookup the URI (or the OMR ID if available) for a match $uri = $baseDomain . $row[$map["uri"]]; $concept = ConceptPeer::getConceptByUri($uri); $updateTime = time(); $language = (isset($map['language'])) ? $row[$map['language']] : $vocabObj->getLanguage(); if (! $concept) { // create a new concept or element $concept = new Concept(); $concept->setVocabulary($vocabObj); $concept->setUri($uri); /** * @todo Need to handle updates for topconcept here, just like language **/ $concept->setIsTopConcept(false); $concept->updateFromRequest( $userId, fixEncoding(rtrim($row[$map['prefLabel']])), $language, $statusId ); } //don't update the concept if the preflabel matches else if ($row[$map['prefLabel']] != $concept->getPrefLabel()) { $concept->updateFromRequest($userId, fixEncoding(rtrim($row[$map['prefLabel']]))); } //there needs to be a language to lookup the properties unless it's an objectProperty $rowLanguage = (isset($map['language'])) ? $row[$map['language']] : $concept->getLanguage(); foreach ($map as $key => $value) { //we skip because we already did them if (! in_array( $key, array( 'uri', 'prefLabel', 'language' ) ) ) { $skosId = $skosMap[$key]; //check to see if the property already exists $property = ConceptPropertyPeer::lookupProperty($concept->getId(), $skosId, $rowLanguage); //create a new property for each unmatched column if (! empty($row[$value])) { if (! $property) { $property = new ConceptProperty(); $property->setCreatedUserId($userId); $property->setConceptId($concept->getId()); $property->setCreatedAt($updateTime); $property->setSkosPropertyId($skosId); } if (($row[$value] != $property->getObject()) || ($rowLanguage != $property->getLanguage()) ) { /** * @todo We need a check here for skos objectproperties and handle differently **/ if ($rowLanguage != $property->getLanguage()) { $property->setLanguage($rowLanguage); } if ($row[$value] != $property->getObject()) { $property->setObject(fixEncoding(rtrim($row[$value]))); } $property->setUpdatedUserId($userId); $property->setUpdatedAt($updateTime); $property->save(); } } //the row value is empty else if ($deleteMissing && $property) { $property->delete(); } } } // else // lookup and update concept or element // lookup and update each property // update the history for each property, action is 'import', should be a single timestamp for all (this should be automatic) // if 'delete missing properties' is true // delete each existing, non-required property that wasn't updated by the import } } catch(Exception $e) { // catch // if there's an error of any kind, write to error log and continue echo "Error on row: " . $rows . ", " . $uri . "\n" . $e . "\n"; continue; } $objects = $vocabObj->countConcepts(); } else //it's an element set { $import->setCsvReader($import->file); $import->processProlog(); $import->getDataColumnIds(); $import->processData(); //todo: $results should be a class $results[$vocabId] = $import->results; $bacthId = $import->saveResults($batchId); } break; case "json": break; case "rdf": break; case "xml": break; default: } /* output to stdout*/ // number of objects imported (link to history, filtered on timestamp of import) echo " Rows imported: " . count($results[$vocabId]['success']['rows']) . "\n From File:" . $filePath . "\nUse this ID for more in this batch: " . $bacthId; // number of errors (link to error log) }
/** * @param \ConceptProperty | \ConceptProperty[] $dbElement * @param $value * @param $rowStatus * @param \Concept $concept * @param \ProfileProperty $profileProperty * @param $language * * @param $key * * @return bool * @throws \PropelException */ private function upsertConceptFromRow( &$dbElement, $value, $rowStatus, &$concept, $profileProperty, $language, $key ) { //set the keys to skip, because concepts don't maintain uri and status separately $skipKeys = [ 59, 62 ]; if (in_array($key, $skipKeys)) { return true; } if ( ! is_array($dbElement)) { if (empty( $dbElement ) and $value) { $dbElement = new \ConceptProperty(); $dbElement->setSkosPropertyId($profileProperty->getSkosId()); $dbElement->setConceptId($concept->getId()); $dbElement->setLanguage($language); $dbElement->setCreatedUserId($this->userId); $concept->addConceptPropertyRelatedByConceptId($dbElement); } if ($dbElement and $value !== $dbElement->getObject()) { if (empty( $value )) { $dbElement->delete(); } else { $dbElement->setStatusId($rowStatus); $dbElement->setObject($value); $dbElement->setUpdatedUserId($this->userId); $dbElement->importId = $this->importId; $dbElement->setDeletedAt(null); } //$dbElement->save(); if ($profileProperty->getIsInForm()) { if (( $profileProperty->getHasLanguage() and $concept->getLanguage() == $dbElement->getLanguage() ) or ! $profileProperty->getHasLanguage()) { $this->setConceptValue($value, $concept, $profileProperty->getName(), ! $profileProperty->getIsObjectProp(), $key); $dbElement->setIsConceptProperty(true); } } return true; } } else { $foundOne = false; foreach ($dbElement as &$oneElement) { if ( ! $foundOne) { $foundOne = $this->upsertConceptFromRow($oneElement, $value, $rowStatus, $concept, $profileProperty, $language, $key); } } } return true; }
function run_import_marc_vocabulary($task, $args) { //check the argument counts if (count($args) < 1) { throw new Exception('You must provide a vocabulary type.'); } if (count($args) < 2) { throw new Exception('You must provide a file name.'); } if (count($args) < 3) { throw new Exception('You must provide a vocabulary id.'); } //set the arguments $type = strtolower($args[0]); $filePath = $args[1]; $id = $args[2]; $deleteMissing = (isset($args[3]) && ("-d" == $args[3])); //do some basic validity checks if (! in_array($type, array("schema", "vocab", "vocabulary"))) { throw new Exception('You must import into a schema or a vocab'); } if ("vocabulary" == $type) { $type = "vocab"; } if (! is_numeric($id)) { throw new Exception('You must provide a valid ID'); } //does the file exist? if (! file_exists($filePath)) { throw new Exception('You must supply a valid file to import: ' . $filePath); } //is the file a valid type? if (preg_match('/^.+\.([[:alpha:]]{2,4})$/', $filePath, $matches)) { if (! in_array(strtolower($matches[1]), array("json", "rdf", "csv", "xml"))) { throw new Exception('You must provide a valid file type based on the extension'); } } else { throw new Exception("File type cannot be determined from the file extension"); } $fileType = $matches[1]; //is the object a valid object? if ('vocab' == $type) { $vocabObj = VocabularyPeer::retrieveByPK($id); if (is_null($vocabObj)) { throw new Exception('Invalid vocabulary ID'); } //set some defaults $baseDomain = $vocabObj->getBaseDomain(); $language = $vocabObj->getLanguage(); $statusId = $vocabObj->getStatusId(); //get a skos property id map $skosMap = SkosPropertyPeer::getPropertyNames(); //there has to be a hash or a slash $tSlash = preg_match('@(/$)@i', $vocabObj->getUri()) ? '' : '/'; $tSlash = preg_match('/#$/', $vocabObj->getUri()) ? '' : $tSlash; } else { $schemaObj = SchemaPeer::retrieveByPK($id); if (is_null($schemaObj)) { throw new Exception('Invalid schema ID'); } //set some defaults $baseDomain = $schemaObj->getUri(); $language = $schemaObj->getLanguage(); $statusId = $schemaObj->getStatusId(); //get a element set property id map $profileId = 1; $profile = ProfilePeer::retrieveByPK($profileId); $elementMap = $profile->getAllProperties(); //there has to be a hash or a slash $tSlash = preg_match('@(/$)@i', $baseDomain) ? '' : '/'; $tSlash = preg_match('/#$/', $baseDomain) ? '' : $tSlash; } // insert jon's user id $userId = 36; /* From here on the process is the same regardless of UI */ //execute // parse file to get the fields/columns and data $file = fopen($filePath, "r"); if (! $file) { throw new Exception("Can't read supplied file"); } // check to see if file has been uploaded before // check import history for file name $importHistory = FileImportHistoryPeer::retrieveByLastFilePath($filePath); // if reimport // get last import history for filename // unserialize column map // match column names to AP based on map // look for matches in unmatched field/column names to AP (ideal) // csv table of data -- // row1: parsed field names/column headers // row2: select dropdown with available fields from object AP (pre-select known matches) // each select identified by column number // row3: display datatype of selected field (updated dynamically when field selected) // row4-13: first 10 rows of parsed data from file // require a column that can match to 'URI' (maybe we'll allow an algorithm later) // require columns that are required by AP // on reimport there should be a flag to 'delete missing properties' from the current data // note: at some point there will be a reimport process that allows URI changing // this will require that there be an OMR identifier embedded in the incoming data switch ($fileType) { case "csv": try { $reader = new aCsvReader($filePath); } catch(Exception $e) { throw new Exception("Not a happy CSV file!"); } if ('vocab' == $type) { // Get array of heading names found $headings = $reader->getHeadings(); $fields = ConceptPeer::getFieldNames(); //set the map // $map[] = array("property" => "Uri", "column" => "URILocal"); // $map[] = array("property" => "prefLabel", "column" => "skos:prefLabel"); // $map[] = array("property" => "definition", "column" => "skos:definition"); // $map[] = array("property" => "notation", "column" => "skos:notation"); // $map[] = array("property" => "scopeNote", "column" => "skos:scopeNote"); $map = array( "uri" => "URILocal", "prefLabel" => "skos:prefLabel", "definition" => "skos:definition", "notation" => "skos:notation", "scopeNote" => "skos:scopeNote" ); $rows = 0; //executeImport: // serialize the column map try { while ($row = $reader->getRow()) { $rows ++; // lookup the URI (or the OMR ID if available) for a match $uri = $baseDomain . $row[$map["uri"]]; $concept = ConceptPeer::getConceptByUri($uri); $updateTime = time(); $language = (isset($map['language'])) ? $row[$map['language']] : $vocabObj->getLanguage(); if (! $concept) { // create a new concept or element $concept = new Concept(); $concept->setVocabulary($vocabObj); $concept->setUri($uri); /** * @todo Need to handle updates for topconcept here, just like language **/ $concept->setIsTopConcept(false); $concept->updateFromRequest( $userId, fixMarcEncoding(rtrim($row[$map['prefLabel']])), $language, $statusId ); } //don't update the concept if the preflabel matches else if ($row[$map['prefLabel']] != $concept->getPrefLabel()) { $concept->updateFromRequest($userId, fixMarcEncoding(rtrim($row[$map['prefLabel']]))); } //there needs to be a language to lookup the properties unless it's an objectProperty $rowLanguage = (isset($map['language'])) ? $row[$map['language']] : $concept->getLanguage(); foreach ($map as $key => $value) { //we skip because we already did them if (! in_array($key, array('uri', 'prefLabel', 'language'))) { $skosId = $skosMap[$key]; //check to see if the property already exists $property = ConceptPropertyPeer::lookupProperty($concept->getId(), $skosId, $rowLanguage); //create a new property for each unmatched column if (! empty($row[$value])) { if (! $property) { $property = new ConceptProperty(); $property->setCreatedUserId($userId); $property->setConceptId($concept->getId()); $property->setCreatedAt($updateTime); $property->setSkosPropertyId($skosId); } if (($row[$value] != $property->getObject()) || ($rowLanguage != $property->getLanguage()) ) { /** * @todo We need a check here for skos objectproperties and handle differently **/ if ($rowLanguage != $property->getLanguage()) { $property->setLanguage($rowLanguage); } if ($row[$value] != $property->getObject()) { $property->setObject(fixMarcEncoding(rtrim($row[$value]))); } $property->setUpdatedUserId($userId); $property->setUpdatedAt($updateTime); $property->save(); } } //the row value is empty else if ($deleteMissing && $property) { $property->delete(); } } } // else // lookup and update concept or element // lookup and update each property // update the history for each property, action is 'import', should be a single timestamp for all (this should be automatic) // if 'delete missing properties' is true // delete each existing, non-required property that wasn't updated by the import } } catch(Exception $e) { // catch // if there's an error of any kind, write to error log and continue echo "Error on row: " . $rows . ", " . $uri . "\n" . $e . "\n"; continue; } $objects = $vocabObj->countConcepts(); } else //it's an element set { $map = array( "uri" => "uriLocalPart", "name" => "reg:name", "definition" => "skos:definition", "label" => "rdfs:label", "note" => array("tag" => "tagCap", "ind1" => "ind1Cap", "ind2" => "ind2Cap", "sub" => "subCap") ); $rows = 0; //executeImport: // serialize the column map try { while ($row = $reader->getRow()) { // lookup the URI (or the OMR ID if available) for a match //There always has to be a URI on either update or create if (! isset($row[$map["uri"]])) { throw new Exception('Missing URI for row: ' . $reader->getRowCount()); continue; } $rows ++; $uri = $baseDomain . $tSlash . $row[$map["uri"]]; $property = SchemaPropertyPeer::retrieveByUri($uri); $updateTime = time(); $rowLanguage = (isset($map['language'])) ? $row[$map['language']] : $language; $rowStatusId = (isset($map['status'])) ? $row[$map['status']] : $statusId; if (! $property) { // create a new property /** @var SchemaProperty * */ $property = new SchemaProperty(); $property->setSchema($schemaObj); $property->setUri($uri); $property->setCreatedUserId($userId); $property->setCreatedAt($updateTime); } $property->setLanguage($rowLanguage); $property->setStatusId($rowStatusId); $property->setUpdatedUserId($userId); $property->setUpdatedAt($updateTime); if (isset($row[$map["label"]])) { $property->setLabel($row[$map["label"]]); } if (isset($row[$map["name"]])) { $property->setName($row[$map["name"]]); } if (isset($row[$map["definition"]])) { $property->setDefinition($row[$map["definition"]]); } if (is_array($map["note"])) { $note = ''; foreach ($map["note"] as $key => $value) { $caption = ! empty($row[$value]) ? " (" . $row[$value] . ")" : ' (no caption)'; $note .= ! empty($row[$key]) ? $key . ": " . $row[$key] . $caption . "<br />" : ""; } $property->setNote($note); } else { if (isset($row[$map["note"]])) { $property->setNote($row[$map["note"]]); } } $property->saveSchemaProperty($userId); /** * @todo Need to handle domain and range **/ foreach ($map as $key => $value) { //we skip because we already did them if (! in_array( $key, array('uri', 'status', 'language', 'label', 'name', 'definition', 'comment', 'note') ) ) { $elementId = $elementMap[$key]; //check to see if the property already exists //note that this also checks the object value as well, so there's no way to update or delete an existing triple //the sheet would have to conatin the identifier for the triple $element = SchemaPropertyElementPeer::lookupElement( $schemaObj->getId(), $elementId, $map[$value] ); //create a new property for each unmatched column if (! empty($row[$value])) { if (! $element) { $element = new SchemaPropertyElement(); $element->setCreatedUserId($userId); $element->setCreatedAt($updateTime); $element->setProfilePropertyId($elementId); } if (($row[$value] != $element->getObject()) || ($rowLanguage != $element->getLanguage()) ) { /** * @todo We need a check here for objectproperties and handle differently **/ if ($rowLanguage != $element->getLanguage()) { $element->setLanguage($rowLanguage); } if ($row[$value] != $element->getObject()) { $element->setObject($row[$value]); } $element->setUpdatedUserId($userId); $element->setUpdatedAt($updateTime); $element->save(); } } //the row value is empty else if ($deleteMissing && $element) { $element->delete(); } } } // else // lookup and update concept or element // lookup and update each property // update the history for each property, action is 'import', should be a single timestamp for all (this should be automatic) // if 'delete missing properties' is true // delete each existing, non-required property that wasn't updated by the import } } catch(Exception $e) { // catch // if there's an error of any kind, write to error log and continue echo "Error on row: " . $rows . ", " . $uri . "\n" . $e . "\n"; continue; } $objects = $schemaObj->countSchemaPropertys(); } // save the import history file (match timestamp to history entries) break; case "json": break; case "rdf": break; case "xml": break; default: } /* output to stdout*/ // number of objects imported (link to history, filtered on timestamp of import) echo "File:" . $filePath . ";\n Objects imported: " . $objects . "; Rows read: " . $rows . "\n"; // number of errors (link to error log) }
public function executeEdit() { $this->setFlash('vocabID', $this->getVocabularyId()); if ($this->getRequest()->getMethod() == sfRequest::POST) { //before the save... $concept_property = $this->getRequestParameter('concept_property'); /** * @todo the list of skos property types that require a related concept should be in a master configuration array * this applies to the template too **/ //check to see if the skosproperty requires a related concept if (!in_array($concept_property['skos_property_id'], array('3', '16', '21', '32', '33', '34', '35', '36', '37'))) { $concept_property['related_concept_id'] = null; $concept_property['scheme_id'] = null; } $conceptPropertyId = $this->getRequestParameter('id'); $userId = $this->getUser()->getSubscriberId(); //does the user have editorial rights to the reciprocal... $permission = false; if (isset($concept_property['related_concept_id']) and $concept_property['related_concept_id']) { //we want to lookup the URI of the related term $related_concept = ConceptPeer::retrieveByPK($concept_property['related_concept_id']); if ($related_concept) { if ($this->getUser()->hasCredential(array(0 => 'administrator'))) { $permission = true; } else { //get the maintainers of the reciprocal property $maintainers = $related_concept->getVocabulary()->getVocabularyHasUsers(); /** @var VocabularyHasUser $maintainer */ foreach ($maintainers as $maintainer) { if ($userId === $maintainer->getUserId() and $maintainer->getIsMaintainerFor()) { $permission = true; break; } } } } } if ($permission) { if (isset($conceptPropertyId) && $conceptPropertyId) { $this->deleteReciprocalProperty($conceptPropertyId, $concept_property['related_concept_id']); } if (isset($concept_property['related_concept_id']) and $concept_property['related_concept_id']) { //we want to lookup the URI of the related term $related_concept = ConceptPeer::retrieveByPK($concept_property['related_concept_id']); if ($related_concept) { //and overwrite whatever is in the current object TODO: move this into an javascript action in the user interface $concept_property['object'] = $related_concept->getUri(); $this->getRequest()->getParameterHolder()->set('concept_property', $concept_property); } //lookup the inverse id $InverseProfileId = ProfilePropertyPeer::retrieveBySkosID($concept_property['skos_property_id'])->getInverseProfilePropertyId(); $InverseSkosId = ProfilePropertyPeer::retrieveByPK($InverseProfileId)->getSkosId(); //then we create a new reciprocal property in the related term $newProp = new ConceptProperty(); $newProp->setConceptId($concept_property['related_concept_id']); $newProp->setSkosPropertyId($InverseSkosId); $newProp->setSchemeId($this->concept->getVocabularyId()); $newProp->setRelatedConceptId($this->concept->GetId()); $newProp->setObject($this->concept->getUri()); $newProp->setStatusId($concept_property['status_id']); $newProp->setIsGenerated(true); $newProp->setCreatedUserId($this->getUser()->getSubscriberId()); $newProp->setUpdatedUserId($this->getUser()->getSubscriberId()); //TODO: make this the user's default language (actually the language is not relevant when defining relationships) //$newProp->setLanguage($this->concept->getLanguage()); $newProp->setLanguage(''); $concept_property['language'] = ''; $newProp->save(); } } //save the array back to the request parameter $this->requestParameterHolder->set('concept_property', $concept_property); } parent::executeEdit(); }