Пример #1
0
    /**
     * @param $key
     * @param $value
     * @param $propertyId
     * @param $updateTime
     * @param $rowStatusId
     * @param $cellLanguage
     * @param $cellType
     * @param $schemaId
     *
     * @return array $results
     * @throws \PropelException
     */
    private function SetPropertyElement(
        $key,
        $value,
        $propertyId,
        $updateTime,
        $rowStatusId,
        $cellLanguage,
        $cellType
    ) {
        $profilePropertyId = $this->prolog['columns'][$key]['id'];
        if (is_array($profilePropertyId)) {
            $profilePropertyId = $profilePropertyId[0];
        }

        //check to see if the property already exists
        //note that this also checks the object value as well, so there's no way to update or delete an existing triple
        //the sheet would have to contain the identifier for the triple

        //actually there is. We look for exact matches and skip the update of an exact match,
        //but then we look at just the old properties that are left -- the ones where the property matches but the object doesn't
        //and we update those. If we have any properties left after that, we add them as new.
        //We need to add an instruction to the spreadsheet to treat an empty property cell as a skip or a delete
        //and if an empty cell means delete, then we delete those

        //get the language for this thing
        //if there's a prolog set for the language for this column, use it
        //use the default for the import (already set above)
        //fall back to the default language of the vocabulary

        //get the fqn if using curies
        if ($cellType and $this->useCuries) {
            $value = $this->getFqn($value);
        }

        $StatementCounter['status'] = 'skipped';

        /** @var \SchemaPropertyElement[] $element */
        $element = \SchemaPropertyElementPeer::lookupElement($propertyId, $profilePropertyId, $value, $cellLanguage);

        if ($element) {
            if (1 === count($element)) {
                $element = $element[0];
                //make sure we handle the special case of subproperty and subclass
                if (empty( $value ) && $element->getIsSchemaProperty() && in_array($element->getProfilePropertyId(),
                                                                                   [ 6, 9 ])
                ) {
                    return $StatementCounter;
                }
            } else {
                //it's ambiguous and we stop processing, logging an error to be dealt with later
                $error['Message']           = "Ambiguous update";
                $error['PropertyId']        = $propertyId;
                $error['ProfilePropertyId'] = $profilePropertyId;
                $error['UpdateValue']       = $value;

                $this->results['errors'][] = $error;
            }
        }

        //create a new propertyelement for each unmatched column
        //we didn't find an existing element, make a new one
        if ( ! $element && ! empty( $value )) {
            $element = new \SchemaPropertyElement;
            $element->setCreatedUserId($this->userId);
            $element->setCreatedAt($updateTime);
            $element->setProfilePropertyId($profilePropertyId);
            $element->setSchemaPropertyId($propertyId);
            $StatementCounter['status'] = 'created';
        }

        if ( ! $element) {
            return $StatementCounter;
        }

        if (( $value != $element->getObject() ) || ( $cellLanguage != $element->getLanguage() )) {
            /**
             * @todo We need a check here for objectproperties and handle differently
             *       if it's a URI, and it uses namespaces, and we have the namespace, do the substitution
             **/
            if ( ! $cellType and $cellLanguage) {
                if ($cellLanguage != $element->getLanguage()) {
                    $element->setLanguage($cellLanguage);
                }
            }
        }
        if ($value != $element->getObject()) {
            if ( ! empty( $value )) {
                $element->setObject($value);
            } else {
                $element->setDeletedAt($updateTime);
            }
        }

        if ($element->isNew() or $element->isModified()) {
            $element->setUpdatedUserId($this->userId);
            $element->setUpdatedAt($updateTime);
            $element->setStatusId($rowStatusId);
            $element->importId = $this->importId;
            $element->save();

            if ($StatementCounter['status'] != 'created') {
                $StatementCounter['status'] = 'modified';
            }

            if (is_array($element->getProfilePropertyId())) {
                $profilePropertyId = $element->getProfilePropertyId()[0];
            } else {
                $profilePropertyId = $element->getProfilePropertyId();
            }
            /** @var \ProfileProperty $profileProperty */
            $profileProperty                = $this->profileProperties[$profilePropertyId];
            $StatementCounter['column']     = $key;
            $StatementCounter['id']         = $element->getId();
            $StatementCounter['propertyId'] = $element->getProfilePropertyId();
            $StatementCounter['object']     = $element->getObject();
            $StatementCounter['type']       = $cellType;
            $StatementCounter['language']   = $cellLanguage;
        }

        unset( $element );

        return $StatementCounter;
    }
function run_import_marc_vocabulary($task, $args)
{


    //check the argument counts
    if (count($args) < 1) {
        throw new Exception('You must provide a vocabulary type.');
    }

    if (count($args) < 2) {
        throw new Exception('You must provide a file name.');
    }

    if (count($args) < 3) {
        throw new Exception('You must provide a vocabulary id.');
    }

    //set the arguments
    $type          = strtolower($args[0]);
    $filePath      = $args[1];
    $id            = $args[2];
    $deleteMissing = (isset($args[3]) && ("-d" == $args[3]));

    //do some basic validity checks

    if (! in_array($type, array("schema", "vocab", "vocabulary"))) {
        throw new Exception('You must import into a schema or a vocab');
    }

    if ("vocabulary" == $type) {
        $type = "vocab";
    }

    if (! is_numeric($id)) {
        throw new Exception('You must provide a valid ID');
    }

    //does the file exist?
    if (! file_exists($filePath)) {
        throw new Exception('You must supply a valid file to import: ' . $filePath);
    }

    //is the file a valid type?
    if (preg_match('/^.+\.([[:alpha:]]{2,4})$/', $filePath, $matches)) {
        if (! in_array(strtolower($matches[1]), array("json", "rdf", "csv", "xml"))) {
            throw new Exception('You must provide a valid file type based on the extension');
        }
    } else {
        throw new Exception("File type cannot be determined from the file extension");
    }

    $fileType = $matches[1];

    //is the object a valid object?
    if ('vocab' == $type) {
        $vocabObj = VocabularyPeer::retrieveByPK($id);
        if (is_null($vocabObj)) {
            throw new Exception('Invalid vocabulary ID');
        }

        //set some defaults
        $baseDomain = $vocabObj->getBaseDomain();
        $language   = $vocabObj->getLanguage();
        $statusId   = $vocabObj->getStatusId();

        //get a skos property id map
        $skosMap = SkosPropertyPeer::getPropertyNames();

        //there has to be a hash or a slash
        $tSlash = preg_match('@(/$)@i', $vocabObj->getUri()) ? '' : '/';
        $tSlash = preg_match('/#$/', $vocabObj->getUri()) ? '' : $tSlash;
    } else {
        $schemaObj = SchemaPeer::retrieveByPK($id);
        if (is_null($schemaObj)) {
            throw new Exception('Invalid schema ID');
        }

        //set some defaults
        $baseDomain = $schemaObj->getUri();
        $language   = $schemaObj->getLanguage();
        $statusId   = $schemaObj->getStatusId();

        //get a element set property id map
        $profileId  = 1;
        $profile    = ProfilePeer::retrieveByPK($profileId);
        $elementMap = $profile->getAllProperties();

        //there has to be a hash or a slash
        $tSlash = preg_match('@(/$)@i', $baseDomain) ? '' : '/';
        $tSlash = preg_match('/#$/', $baseDomain) ? '' : $tSlash;
    }

    //     insert jon's user id
    $userId = 36;

    /* From here on the process is the same regardless of UI */

    //execute
    //     parse file to get the fields/columns and data
    $file = fopen($filePath, "r");
    if (! $file) {
        throw new Exception("Can't read supplied file");
    }

    //     check to see if file has been uploaded before
    //          check import history for file name
    $importHistory = FileImportHistoryPeer::retrieveByLastFilePath($filePath);
    //          if reimport
    //               get last import history for filename
    //               unserialize column map
    //               match column names to AP based on map
    //     look for matches in unmatched field/column names to AP (ideal)
    //     csv table of data --
    //          row1: parsed field names/column headers
    //          row2: select dropdown with available fields from object AP (pre-select known matches)
    //                each select identified by column number
    //          row3: display datatype of selected field (updated dynamically when field selected)
    //          row4-13: first 10 rows of parsed data from file
    //     require a column that can match to 'URI' (maybe we'll allow an algorithm later)
    //     require columns that are required by AP
    //     on reimport there should be a flag to 'delete missing properties' from the current data
    //     note: at some point there will be a reimport process that allows URI changing
    //          this will require that there be an OMR identifier embedded in the incoming data

    switch ($fileType) {
        case "csv":
            try {
                $reader = new aCsvReader($filePath);
            } catch(Exception $e) {
                throw new Exception("Not a happy CSV file!");
            }

            if ('vocab' == $type) {
                // Get array of heading names found
                $headings = $reader->getHeadings();
                $fields   = ConceptPeer::getFieldNames();

                //set the map
                //      $map[] = array("property" => "Uri", "column" => "URILocal");
                //      $map[] = array("property" => "prefLabel", "column" => "skos:prefLabel");
                //      $map[] = array("property" => "definition", "column" => "skos:definition");
                //      $map[] = array("property" => "notation", "column" => "skos:notation");
                //      $map[] = array("property" => "scopeNote", "column" => "skos:scopeNote");

                $map = array(
                  "uri"        => "URILocal",
                  "prefLabel"  => "skos:prefLabel",
                  "definition" => "skos:definition",
                  "notation"   => "skos:notation",
                  "scopeNote"  => "skos:scopeNote"
                );

                $rows = 0;

                //executeImport:

                //    serialize the column map
                try {
                    while ($row = $reader->getRow()) {
                        $rows ++;
                        //        lookup the URI (or the OMR ID if available) for a match
                        $uri        = $baseDomain . $row[$map["uri"]];
                        $concept    = ConceptPeer::getConceptByUri($uri);
                        $updateTime = time();
                        $language   = (isset($map['language'])) ? $row[$map['language']] : $vocabObj->getLanguage();

                        if (! $concept) {
                            //          create a new concept or element
                            $concept = new Concept();
                            $concept->setVocabulary($vocabObj);
                            $concept->setUri($uri);
                            /**
                             * @todo Need to handle updates for topconcept here, just like language
                             **/
                            $concept->setIsTopConcept(false);
                            $concept->updateFromRequest(
                                    $userId,
                                      fixMarcEncoding(rtrim($row[$map['prefLabel']])),
                                      $language,
                                      $statusId
                            );
                        } //don't update the concept if the preflabel matches
                        else if ($row[$map['prefLabel']] != $concept->getPrefLabel()) {
                            $concept->updateFromRequest($userId, fixMarcEncoding(rtrim($row[$map['prefLabel']])));
                        }

                        //there needs to be a language to lookup the properties unless it's an objectProperty
                        $rowLanguage = (isset($map['language'])) ? $row[$map['language']] : $concept->getLanguage();

                        foreach ($map as $key => $value) {
                            //we skip because we already did them
                            if (! in_array($key, array('uri', 'prefLabel', 'language'))) {
                                $skosId = $skosMap[$key];
                                //check to see if the property already exists
                                $property =
                                  ConceptPropertyPeer::lookupProperty($concept->getId(), $skosId, $rowLanguage);

                                //create a new property for each unmatched column
                                if (! empty($row[$value])) {
                                    if (! $property) {
                                        $property = new ConceptProperty();
                                        $property->setCreatedUserId($userId);
                                        $property->setConceptId($concept->getId());
                                        $property->setCreatedAt($updateTime);
                                        $property->setSkosPropertyId($skosId);
                                    }

                                    if (($row[$value] != $property->getObject()) ||
                                        ($rowLanguage != $property->getLanguage())
                                    ) {
                                        /**
                                         * @todo We need a check here for skos objectproperties and handle differently
                                         **/
                                        if ($rowLanguage != $property->getLanguage()) {
                                            $property->setLanguage($rowLanguage);
                                        }
                                        if ($row[$value] != $property->getObject()) {
                                            $property->setObject(fixMarcEncoding(rtrim($row[$value])));
                                        }
                                        $property->setUpdatedUserId($userId);
                                        $property->setUpdatedAt($updateTime);
                                        $property->save();
                                    }
                                } //the row value is empty
                                else if ($deleteMissing && $property) {
                                    $property->delete();
                                }
                            }
                        }

                        //          else
                        //               lookup and update concept or element
                        //               lookup and update each property
                        //          update the history for each property, action is 'import', should be a single timestamp for all (this should be automatic)
                        //          if 'delete missing properties' is true
                        //               delete each existing, non-required property that wasn't updated by the import
                    }
                } catch(Exception $e) {
                    //          catch
                    //            if there's an error of any kind, write to error log and continue
                    echo "Error on row: " . $rows . ", " . $uri . "\n" . $e . "\n";
                    continue;
                }
                $objects = $vocabObj->countConcepts();
            } else //it's an element set
            {
                $map  = array(
                  "uri"        => "uriLocalPart",
                  "name"       => "reg:name",
                  "definition" => "skos:definition",
                  "label"      => "rdfs:label",
                  "note"       => array("tag" => "tagCap", "ind1" => "ind1Cap", "ind2" => "ind2Cap", "sub" => "subCap")
                );
                $rows = 0;

                //executeImport:
                //    serialize the column map
                try {
                    while ($row = $reader->getRow()) {
                        //        lookup the URI (or the OMR ID if available) for a match

                        //There always has to be a URI on either update or create
                        if (! isset($row[$map["uri"]])) {
                            throw new Exception('Missing URI for row: ' . $reader->getRowCount());
                            continue;
                        }

                        $rows ++;
                        $uri         = $baseDomain . $tSlash . $row[$map["uri"]];
                        $property    = SchemaPropertyPeer::retrieveByUri($uri);
                        $updateTime  = time();
                        $rowLanguage = (isset($map['language'])) ? $row[$map['language']] : $language;
                        $rowStatusId = (isset($map['status'])) ? $row[$map['status']] : $statusId;

                        if (! $property) {
                            //          create a new property
                            /** @var SchemaProperty * */
                            $property = new SchemaProperty();
                            $property->setSchema($schemaObj);
                            $property->setUri($uri);
                            $property->setCreatedUserId($userId);
                            $property->setCreatedAt($updateTime);
                        }

                        $property->setLanguage($rowLanguage);
                        $property->setStatusId($rowStatusId);
                        $property->setUpdatedUserId($userId);
                        $property->setUpdatedAt($updateTime);

                        if (isset($row[$map["label"]])) {
                            $property->setLabel($row[$map["label"]]);
                        }

                        if (isset($row[$map["name"]])) {
                            $property->setName($row[$map["name"]]);
                        }

                        if (isset($row[$map["definition"]])) {
                            $property->setDefinition($row[$map["definition"]]);
                        }

                        if (is_array($map["note"])) {
                            $note = '';
                            foreach ($map["note"] as $key => $value) {
                                $caption = ! empty($row[$value]) ? " (" . $row[$value] . ")" : ' (no caption)';
                                $note .= ! empty($row[$key]) ? $key . ": " . $row[$key] . $caption . "<br />" : "";
                            }
                            $property->setNote($note);
                        } else {
                            if (isset($row[$map["note"]])) {
                                $property->setNote($row[$map["note"]]);
                            }
                        }
                        $property->saveSchemaProperty($userId);

                        /**
                         * @todo Need to handle domain and range
                         **/

                        foreach ($map as $key => $value) {
                            //we skip because we already did them
                            if (! in_array(
                              $key,
                              array('uri', 'status', 'language', 'label', 'name', 'definition', 'comment', 'note')
                            )
                            ) {
                                $elementId = $elementMap[$key];
                                //check to see if the property already exists
                                //note that this also checks the object value as well, so there's no way to update or delete an existing triple
                                //the sheet would have to conatin the identifier for the triple
                                $element = SchemaPropertyElementPeer::lookupElement(
                                                                    $schemaObj->getId(),
                                                                      $elementId,
                                                                      $map[$value]
                                );

                                //create a new property for each unmatched column
                                if (! empty($row[$value])) {
                                    if (! $element) {
                                        $element = new SchemaPropertyElement();
                                        $element->setCreatedUserId($userId);
                                        $element->setCreatedAt($updateTime);
                                        $element->setProfilePropertyId($elementId);
                                    }

                                    if (($row[$value] != $element->getObject()) ||
                                        ($rowLanguage != $element->getLanguage())
                                    ) {
                                        /**
                                         * @todo We need a check here for objectproperties and handle differently
                                         **/
                                        if ($rowLanguage != $element->getLanguage()) {
                                            $element->setLanguage($rowLanguage);
                                        }
                                        if ($row[$value] != $element->getObject()) {
                                            $element->setObject($row[$value]);
                                        }
                                        $element->setUpdatedUserId($userId);
                                        $element->setUpdatedAt($updateTime);
                                        $element->save();
                                    }
                                } //the row value is empty
                                else if ($deleteMissing && $element) {
                                    $element->delete();
                                }
                            }
                        }

                        //          else
                        //               lookup and update concept or element
                        //               lookup and update each property
                        //          update the history for each property, action is 'import', should be a single timestamp for all (this should be automatic)
                        //          if 'delete missing properties' is true
                        //               delete each existing, non-required property that wasn't updated by the import
                    }
                } catch(Exception $e) {
                    //          catch
                    //            if there's an error of any kind, write to error log and continue
                    echo "Error on row: " . $rows . ", " . $uri . "\n" . $e . "\n";
                    continue;
                }
                $objects = $schemaObj->countSchemaPropertys();
            }
            //     save the import history file (match timestamp to history entries)
            break;
        case "json":
            break;
        case "rdf":
            break;
        case "xml":
            break;
        default:
    }

    /* output to stdout*/
    //          number of objects imported (link to history, filtered on timestamp of import)
    echo "File:" . $filePath . ";\n     Objects imported: " . $objects . "; Rows read: " . $rows . "\n";
    //          number of errors (link to error log)

}
 /**
  * create a new element
  *
  * @param  SchemaPropertyElement $element
  * @param integer                $userId
  * @param                        $field
  * @param string                 $object
  * @param \Connection            $con
  *
  * @return bool|\SchemaPropertyElement
  */
 public function updateElement(SchemaPropertyElement $element, $userId, $field, $object, $con)
 {
     if ($element) {
         $element->setIsSchemaProperty(true);
         $element->setUpdatedUserId($userId);
         //SchemaPropertyElementPeer::updateElement($schema_property, $element, $userId, $field, $con);
         if ('is_subproperty_of' == $field || 'is_subclass_of' == $field) {
             if ($this->getIsSubpropertyOf() && empty($object)) {
                 $element->setRelatedSchemaPropertyId($this->getIsSubpropertyOf());
                 $object = $this->getParentUri();
             }
         }
         $element->setObject($object ? $object : '');
         $element->save($con);
     }
     return $element;
 }