Exemplo n.º 1
0
 public function testProcessProlog()
 {
     $I = $this->tester;
     $this->tester->wantToTest("processing the prolog and importing the file into the database");
     $this->import->setCsvReader($this->import->file);
     $this->import->processProlog();
     $prolog = $this->import->prolog;
     $this->assertEquals(14, count($prolog['columns']), "There are the correct number of columns");
     $this->assertEquals(6, count($prolog['prefix']), "There are the correct number of prefix entries");
     $this->assertEquals(10, count($prolog['meta']), "There are the correct number of meta entries");
     $this->import->getDataColumnIds();
     $this->import->processData();
     $results = $this->import->results['success'];
     verify(
       "There were 8 rows processed",
       count($results['rows'])
     )->equals(12);
     $this->import->processParents();
     $I->seeRecordCountInDatabaseTable("SchemaPropertyElement", 138);
     $I->seeRecordCountInDatabaseTable("SchemaProperty", 12);
     //prolog namespace entries are readable
     //prolog headers are actually in row 1
     //prolog headers not in row 1 produce fatal error (logged)
     //prolog entries can be matched to database (column uri matched to profile id)
     //prolog entries that can't be matched produce fatal error (logged)
 } //
    public function testSparseUpdate()
    {
        $I = $this->tester;

        $CsvHeader = "id,created_at,updated_at,deleted_at,created_user_id,updated_user_id,schema_id,name,label,definition,comment,type,is_subproperty_of,parent_uri,uri,status_id,language,note,domain,orange,is_deprecated,url,lexical_alias";
        $CsvValues = '"15368","2014-01-19 03:48:16","2015-05-29 18:47:56",,"422","422","77","creator","has creator","Relates a work to a person, family, or corporate body responsible for the creation of a work.",,"property","15039","http://rdaregistry.info/Elements/u/P60447","http://rdaregistry.info/Elements/w/P10065","1","en",,"http://rdaregistry.info/Elements/c/C10001","http://rdaregistry.info/Elements/c/C10002",,,';

        $correctData = $I->getArrayFromCsv($CsvHeader, $CsvValues, ['updated_at','deleted_at','created_user_id']);
        $I->wantToTest("if nothing in the main table gets changed");
        $this->import->setCsvReader($this->import->file);
        $this->import->processProlog();
        $this->import->getDataColumnIds();
        $results = $this->import->processData();
        verify("There were 1 rows processed",
          $results->getSuccessCount())->equals(1);
        //$this->import->processParents();
        foreach ($correctData as $key => $value) {
            $I->canSeeInDatabase('reg_schema_property', [$key => $value]);
        }

        $I->canSeeInDatabase('reg_schema_property', ['name' => 'creator', 'label' => "has creator"]);

    }
/**
 * @param $task
 * @param $args
 *
 * Arg[0] is one of "schema" (element set), "vocab" or "vocabulary"
 * arg[1] is the vocabulary name.
 *        The file type is determined by the extension and must be one of "json", "rdf", "csv", "xml"
 * arg[2] is the vocabulary id
 * arg[3] is the batch id
 * arg[4] [optional] is -d
 *
 * @throws Exception
 */
function run_import_vocabulary($task, $args)
{
    //xdebug_break();

    //check the argument counts
    if (count($args) < 1) {
        throw new Exception('You must provide a vocabulary type.');
    }

    if (count($args) < 2) {
        throw new Exception('You must provide a file name.');
    }

    if (count($args) < 3) {
        throw new Exception('You must provide a vocabulary id.');
    }

    //set the arguments
    $type          = strtolower($args[0]);
    $filePath      = $args[1];
    $vocabId       = $args[2];
    $batchId       = isset($args[3]) ? $args[3] : "";
    $deleteMissing = (isset($args[4]) && ("-d" == $args[4]));

    //do some basic validity checks

    if (! in_array(
      $type,
      array(
        "schema",
        "vocab",
        "vocabulary"
      )
    )
    ) {
        throw new Exception('You must import into a schema or a vocab');
    }

    if ("vocabulary" == $type) {
        $type = "vocab";
    }

    if (! is_numeric($vocabId)) {
        throw new Exception('You must provide a valid ID');
    }

    //does the file exist?
    if (! file_exists($filePath)) {
        //default to the site upload path
        $filePath = $GLOBALS['uploadPath'] . $filePath;
        if (! file_exists($filePath)) {
        throw new Exception('You must supply a valid file to import: ' . $filePath);
        }
    }

    //is the file a valid type?
    if (preg_match('/^.+\.([[:alpha:]]{2,4})$/', $filePath, $matches)) {
        if (! in_array(
          strtolower($matches[1]),
          array(
            "json",
            "rdf",
            "csv",
            "xml"
          )
        )
        ) {
            throw new Exception('You must provide a valid file type based on the extension');
        }
    } else {
        throw new Exception("File type cannot be determined from the file extension");
    }

    $fileType = $matches[1];

    //is the object a valid object?
    if ('vocab' == $type) {
        $vocabObj = VocabularyPeer::retrieveByPK($vocabId);
        if (is_null($vocabObj)) {
            throw new Exception('Invalid vocabulary ID');
        }

        //set some defaults
        $baseDomain = $vocabObj->getBaseDomain();
        $language   = $vocabObj->getLanguage();
        $statusId   = $vocabObj->getStatusId();
        $userId     = $vocabObj->getCreatedUserId();
        $agentId    = $vocabObj->getAgentId();

        //get a skos property id map
        $skosMap = SkosPropertyPeer::getPropertyNames();

        //there has to be a hash or a slash
        $tSlash = preg_match('@(/$)@i', $vocabObj->getUri()) ? '' : '/';
        $tSlash = preg_match('/#$/', $vocabObj->getUri()) ? '' : $tSlash;
    } else {
        $import               = new ImportVocab($type, $filePath, $vocabId);
    }

    /* From here on the process is the same regardless of UI */
    //     check to see if file has been uploaded before
    //          check import history for file name
    $importHistory = FileImportHistoryPeer::retrieveByLastFilePath($filePath);
    //          if reimport
    //               get last import history for filename
    //               unserialize column map
    //               match column names to AP based on map
    //     look for matches in unmatched field/column names to AP (ideal)
    //     csv table of data --
    //          row1: parsed field names/column headers
    //          row2: select dropdown with available fields from object AP (pre-select known matches)
    //                each select identified by column number
    //          row3: display datatype of selected field (updated dynamically when field selected)
    //          row4-13: first 10 rows of parsed data from file
    //     require a column that can match to 'URI' (maybe we'll allow an algorithm later)
    //     require columns that are required by AP
    //     on reimport there should be a flag to 'delete missing properties' from the current data
    //     note: at some point there will be a reimport process that allows URI changing
    //          this will require that there be an OMR identifier embedded in the incoming data

    switch ($fileType) {
        case "csv":
            if ('vocab' == $type) {
                // Get array of heading names found
                $headings = $reader->getHeadings();
                $fields   = ConceptPeer::getFieldNames();

                //set the map
                //      $map[] = array("property" => "Uri", "column" => "URILocal");
                //      $map[] = array("property" => "prefLabel", "column" => "skos:prefLabel");
                //      $map[] = array("property" => "definition", "column" => "skos:definition");
                //      $map[] = array("property" => "notation", "column" => "skos:notation");
                //      $map[] = array("property" => "scopeNote", "column" => "skos:scopeNote");

                $map = array(
                  "uri"        => "URILocal",
                  "prefLabel"  => "skos:prefLabel",
                  "definition" => "skos:definition",
                  "notation"   => "skos:notation",
                  "scopeNote"  => "skos:scopeNote"
                );

                $rows = 0;

                //executeImport:

                //    serialize the column map
                try {
                    while ($row = $reader->getRow()) {
                        $rows ++;
                        //        lookup the URI (or the OMR ID if available) for a match
                        $uri        = $baseDomain . $row[$map["uri"]];
                        $concept    = ConceptPeer::getConceptByUri($uri);
                        $updateTime = time();
                        $language   = (isset($map['language'])) ? $row[$map['language']] : $vocabObj->getLanguage();

                        if (! $concept) {
                            //          create a new concept or element
                            $concept = new Concept();
                            $concept->setVocabulary($vocabObj);
                            $concept->setUri($uri);
                            /**
                             * @todo Need to handle updates for topconcept here, just like language
                             **/
                            $concept->setIsTopConcept(false);
                            $concept->updateFromRequest(
                                    $userId,
                                      fixEncoding(rtrim($row[$map['prefLabel']])),
                                      $language,
                                      $statusId
                            );
                        } //don't update the concept if the preflabel matches
                        else if ($row[$map['prefLabel']] != $concept->getPrefLabel()) {
                            $concept->updateFromRequest($userId, fixEncoding(rtrim($row[$map['prefLabel']])));
                        }

                        //there needs to be a language to lookup the properties unless it's an objectProperty
                        $rowLanguage = (isset($map['language'])) ? $row[$map['language']] : $concept->getLanguage();

                        foreach ($map as $key => $value) {
                            //we skip because we already did them
                            if (! in_array(
                              $key,
                              array(
                                'uri',
                                'prefLabel',
                                'language'
                              )
                            )
                            ) {
                                $skosId = $skosMap[$key];
                                //check to see if the property already exists
                                $property =
                                  ConceptPropertyPeer::lookupProperty($concept->getId(), $skosId, $rowLanguage);

                                //create a new property for each unmatched column
                                if (! empty($row[$value])) {
                                    if (! $property) {
                                        $property = new ConceptProperty();
                                        $property->setCreatedUserId($userId);
                                        $property->setConceptId($concept->getId());
                                        $property->setCreatedAt($updateTime);
                                        $property->setSkosPropertyId($skosId);
                                    }

                                    if (($row[$value] != $property->getObject()) ||
                                        ($rowLanguage != $property->getLanguage())
                                    ) {
                                        /**
                                         * @todo We need a check here for skos objectproperties and handle differently
                                         **/
                                        if ($rowLanguage != $property->getLanguage()) {
                                            $property->setLanguage($rowLanguage);
                                        }
                                        if ($row[$value] != $property->getObject()) {
                                            $property->setObject(fixEncoding(rtrim($row[$value])));
                                        }
                                        $property->setUpdatedUserId($userId);
                                        $property->setUpdatedAt($updateTime);
                                        $property->save();
                                    }
                                } //the row value is empty
                                else if ($deleteMissing && $property) {
                                    $property->delete();
                                }
                            }
                        }

                        //          else
                        //               lookup and update concept or element
                        //               lookup and update each property
                        //          update the history for each property, action is 'import', should be a single timestamp for all (this should be automatic)
                        //          if 'delete missing properties' is true
                        //               delete each existing, non-required property that wasn't updated by the import
                    }
                } catch(Exception $e) {
                    //          catch
                    //            if there's an error of any kind, write to error log and continue
                    echo "Error on row: " . $rows . ", " . $uri . "\n" . $e . "\n";
                    continue;
                }
                $objects = $vocabObj->countConcepts();
            } else //it's an element set
            {
                $import->setCsvReader($import->file);
                $import->processProlog();
                $import->getDataColumnIds();
                $import->processData();
                //todo: $results should be a class
                $results[$vocabId] = $import->results;
                $bacthId = $import->saveResults($batchId);
            }
            break;
        case "json":
            break;
        case "rdf":
            break;
        case "xml":
            break;
        default:
    }

    /* output to stdout*/
    //          number of objects imported (link to history, filtered on timestamp of import)
    echo " Rows imported: " . count($results[$vocabId]['success']['rows']) . "\n From File:" . $filePath . "\nUse this ID for more in this batch: " . $bacthId;
    //          number of errors (link to error log)

}
    public function TestUnchanged(ImportTester $I)
    {
        $import = new ImportVocab("schema", "updatedata_nochange.CSV", 77);
        $import->importFolder = Fixtures::get("importFolder");
        $import->importId = 41;
        $import->setCsvReader($import->file);
        $import->processProlog();
        $import->getDataColumnIds();
        $results = $import->processData();
        verify("There were 1 rows processed", $results->getSuccessCount())->equals(1);


        $CsvValues[] =
              '"15536","2014-01-19 11:29:58","2015-06-12 16:06:08",,"422","422","81","respondentOf","is respondent of","Relates a candidate for a degree who defends or opposes a thesis provided by the praeses in an academic disputation to the work.",,"Property","14069","http://rdaregistry.info/Elements/a/P50204","http://rdaregistry.info/Elements/a/P50001","1","en",,"http://rdaregistry.info/Elements/c/C10004","http://rdaregistry.info/Elements/c/C10001",,,"http://rdaregistry.info/Elements/a/respondentOf.en"';
        //$import->processParents();
        $exclude = [
              'created_at',
              'updated_at',
              'deleted_at',
              'created_user_id',
        ];
        $this->TestData($I, 'property', $CsvValues, $exclude);

        //reg_schema_property_element

        $CsvValues = array();

        $CsvValues[] =
              '"121276","2014-01-19 11:29:58","2014-01-19 06:29:58",,"422","422","15536","1","1","respondentOf",,"en","1"';
        $CsvValues[] =
              '"121277","2014-01-19 11:29:58","2014-01-19 06:29:58",,"422","422","15536","2","1","is respondent of",,"en","1"';
        $CsvValues[] =
              '"121278","2014-01-19 11:29:58","2014-01-19 06:29:58",,"422","422","15536","3","1","Relates a candidate for a degree who defends or opposes a thesis provided by the praeses in an academic disputation to the work.",,"en","1"';
        $CsvValues[] =
              '"121279","2014-01-19 11:29:58","2015-06-12 16:06:08",,"422","422","15536","4","1","property",,,"1"';
        $CsvValues[] =
              '"121280","2014-01-19 11:29:58","2014-01-19 06:29:58",,"422","422","15536","11","1","http://rdaregistry.info/Elements/c/C10004","14331",,"1"';
        $CsvValues[] =
              '"121281","2014-01-19 11:29:58","2014-01-19 06:29:58",,"422","422","15536","12","1","http://rdaregistry.info/Elements/c/C10001","14328",,"1"';
        $CsvValues[] =
              '"121282","2014-01-19 11:29:58","2014-01-19 06:29:58",,"422","422","15536","13","1","http://rdaregistry.info/Elements/a/P50001","15536",,"1"';
        $CsvValues[] = '"121283","2014-01-19 11:29:58","2014-01-19 06:29:58",,"422","422","15536","14","1","1",,,"1"';
        $CsvValues[] =
              '"121284","2014-01-19 11:29:58","2015-06-12 16:06:06",,"422","422","15536","6","1","http://rdaregistry.info/Elements/a/P50204","14069",,"1"';
        $CsvValues[] =
              '"121285","2014-01-19 11:31:45","2015-06-12 16:06:08","2015-06-12 16:06:08","422","422","15536","6",,"http://rdaregistry.info/Elements/u/P60001","14603",,"1"';
        $CsvValues[] =
              '"121286","2014-01-19 11:33:14","2014-01-19 06:33:14",,"422","422","15536","15",,"http://rdaregistry.info/Elements/w/P10001","15304",,"1"';
        $CsvValues[] =
              '"122794","2014-04-26 06:27:36","2015-06-12 16:06:08","2015-06-12 16:06:08","422","422","15536","16",,"http://rdaregistry.info/Elements/a/respondentOf",,,"1"';
        $CsvValues[] =
              '"122968","2015-06-12 16:06:08","2015-06-12 16:06:08",,"422","422","15536","27",,"http://rdaregistry.info/Elements/a/respondentOf.en",,"en","1"';
        $CsvValues[] =
              '"122969","2015-06-12 16:06:08","2015-06-12 16:06:08",,"422","422","15536","26",,"http://rdaregistry.info/Elements/u/P60001",,"","1"';
        $dateFields = [
              'created_at' => [
                    122968,
                    122969,
              ],
              'updated_at' => [
                    121285,
                    122794,
                    122968,
                    122969,
              ],
              'deleted_at' => [
                    121285,
                    122794,
              ],
        ];
        $this->TestData($I, 'element', $CsvValues, [], $dateFields);

        //reg_schema_property_element_history

        $CsvValues = array();

        $CsvValues[] =
              '"141929","2015-06-13 20:18:09","422","deleted","121285","15536","81","6","http://rdaregistry.info/Elements/u/P60001","14603",,"1",,"41"';
        $CsvValues[] =
              '"141930","2015-06-13 20:10:39","422","deleted","122794","15536","81","16","http://rdaregistry.info/Elements/a/respondentOf",,,"1",,"41"';
        $CsvValues[] =
              '"141931","2015-06-12 19:24:43","422","added","122968","15536","81","27","http://rdaregistry.info/Elements/a/respondentOf.en",,"en","1",,"41"';
        $CsvValues[] =
              '"141932","2015-06-12 19:24:43","422","added","122969","15536","81","26","http://rdaregistry.info/Elements/u/P60001",,,"1",,"41"';

        $dateFields = [
              'created_at' => [
                    141929,
                    141930,
                    141931,
                    141932,
              ],
        ];
        $this->TestData($I, 'history', $CsvValues, [], $dateFields);

        //this history element should not exist. The element should not have been updated -- property should have been Property
        //'id,created_at,created_user_id,action,schema_property_element_id,schema_property_id,schema_id,profile_property_id,object,related_schema_property_id,language,status_id,change_note,import_id
        //"141932","2015-06-12 22:38:40","422","updated","121279","15536","81","4","property",,,"1",,"41"'
        $table = 'reg_schema_property_element_history';
        $I->dontSeeInDatabase($table,['id'=>141932, 'object'=> 'property']);

    //count of schema_property_element schema_property_id = 14603 should be 9

    }
    public function testSparseUpdate()
    {
        $I=$this->tester;
        $I->wantToTest("if a changed cell in the main table gets changed");
        $I->seeRecordCountInDatabaseTable("SchemaPropertyElement", 138);
        $I->seeRecordCountInDatabaseTable("SchemaProperty", 12);
        $I->seeRecordCountInDatabaseTable("SchemaPropertyElementHistory", 144);
        $I->canSeeInDatabase('reg_schema_property', ['id' => 1, "definition" => "This property associates a publication, i.e. an instance of F3 Manifestation Product Type, with an instance of E30 Right, which applies to all exemplars of that publication, as long as they are recognised as exemplars of that publication."]);
        $this->import->setCsvReader($this->import->file);
        $this->import->processProlog();
        $prolog    = $this->import->prolog;
        $this->assertEquals(14, count($prolog['columns']), "There are the correct number of columns");
        $this->assertEquals(6, count($prolog['prefix']), "There are the correct number of prefix entries");
        $this->assertEquals(10, count($prolog['meta']), "There are the correct number of meta entries");
        $this->import->getDataColumnIds();
        $this->import->processData();
        $results = $this->import->results['success'];
        verify("There were 12 rows processed",
          count($results['rows']))->equals(12);
        $this->import->processParents();
        $I->canSeeInDatabase('reg_schema_property', ['id' => 1, "definition" => "fubar, baby"]);

        //test if a changed cell in the main table gets changed in the statement table
        $I->canSeeInDatabase('reg_schema_property_element', ['id' => 3, "object" => "fubar, baby"]);
        //test if the history is updated
        $I->canSeeInDatabase('reg_schema_property_element_history', ['schema_property_element_id' => 3, 'schema_property_id' => 1, "object" => "fubar, baby"]);
        $historyDate = $I->grabFromDatabase('reg_schema_property_element_history', 'created_at', ['schema_property_element_id' => 3, 'schema_property_id' => 1, "object" => "fubar, baby"]);
        //the other rows haven't been updated
        $updateDate = $I->grabFromDatabase('reg_schema_property', 'updated_at', ['id' => 3]);
        verify("another property row hasn't been updated",
          $historyDate)->greaterThan($updateDate);
        $elementUpdateDate = $I->grabFromDatabase('reg_schema_property_element', 'updated_at', ['id' => 3]);
        verify("the element row has been updated",
          $historyDate)->equals($elementUpdateDate);
        $otherPropertyUpdateDate = $I->grabFromDatabase('reg_schema_property_element', 'updated_at', ['id' => 12]);
        verify("the inverse property statement has not been updated",
          $historyDate)->greaterThan($otherPropertyUpdateDate);
        $I->canSeeInDatabase('reg_schema_property_element', ['id' => 3, "object" => "fubar, baby"]);
        //test if a NEW cell in the main table gets sdded
        $I->canSeeInDatabase('reg_schema_property_element', ["object" => "New definition"]);
        //test if a NEW cell in the main table gets changed in the statement table
        $I->canSeeInDatabase('reg_schema_property', ['id' => 2, "definition" => "New definition"]);
        //test if the history is updated
        $I->canSeeInDatabase('reg_schema_property_element_history', ['profile_property_id' => 3, 'schema_property_id' => 2, "object" => "New definition", "action" => "added"]);
        //test if a changed cell that exists only in the statement table gets changed in the statement table
        $I->canSeeInDatabase('reg_schema_property_element', ["object" => "http://iflastandards.info/ns/fr/frbr/frbroo/CLP105TestMe"]);
        //test if the history is updated
        $I->canSeeInDatabase('reg_schema_property_element_history', ['profile_property_id' => 15, 'schema_property_id' => 4, "object" => "http://iflastandards.info/ns/fr/frbr/frbroo/CLP105TestMe", "action" => "added"]);
        //test if the parent update date matches the update of a statement when the statement is the only thing changed
        $updateDate = $I->grabFromDatabase('reg_schema_property', 'updated_at', ['id' => 4]);
        $elementUpdateDate = $I->grabFromDatabase('reg_schema_property_element', 'updated_at', ['id' => 140]);
        verify("the element row has been updated",
          $updateDate)->equals($elementUpdateDate);
        $I->wantTo('see if a deleted cell is removed from the schema_property record');
        $I->dontSeeInDatabase('reg_schema_property', ['id' => 2, "comment" => "Inverse of CLP104_subject_to."]);
        $I->wantTo('see if a deleted cell that was removed from the schema_property record, was also marked as deleted in the schema_property_element table');
        $I->dontSeeInDatabase('reg_schema_property_element', ['schema_property_id' => 2, "object" => "Inverse of CLP104_subject_to.", "deleted_at" => null]);
        $I->wantTo('see if a deleted cell, marked as deleted in the schema_property_element table, is also marked as deleted in the history table');
        $I->canSeeInDatabase('reg_schema_property_element_history', ['profile_property_id' => 5, 'schema_property_id' => 2, "object" => "Inverse of CLP104_subject_to.", "action" => "deleted"]);

        $I->wantTo('see if a deleted cell, not in the main property record, deletes the schema_property_element record');

        $I->dontSeeInDatabase('reg_schema_property_element', ['schema_property_id' => 2, "object" => "http://iflastandards.info/ns/fr/frbr/frbroo/CLP104", "deleted_at" => null]);
        $I->wantTo('see if a deleted cell, marked as deleted in the schema_property_element table, is also marked as deleted in the history table');
        $I->canSeeInDatabase('reg_schema_property_element_history', ['profile_property_id' => 15, 'schema_property_id' => 2, "object" => "http://iflastandards.info/ns/fr/frbr/frbroo/CLP104", "action" => "deleted"]);

        $I->wantTo('see if a subproperty was inappropriately deleted');
        $I->canSeeInDatabase('reg_schema_property_element', ['schema_property_id' => 7, "object" => "http://www.cidoc-crm.org/cidoc-crm/P130_shows_features_of", "deleted_at" => null]);

        //row 9 is converted to property from subproperty
        $I->wantTo('see if a subproperty was appropriately deleted');
        $I->canSeeInDatabase('reg_schema_property_element_history', ['object' => 'http://iflastandards.info/ns/fr/frbr/frbroo/R3', "action" => "deleted"]);

        //row 12 is converted to subproperty from property
        $I->canSeeInDatabase('reg_schema_property', ['id' => 12, 'type' => 'subproperty', 'parent_uri' => 'http://iflastandards.info/ns/fr/frbr/frbroo/R3i']);
        $I->canSeeInDatabase('reg_schema_property_element_history', ['profile_property_id' => 6, 'schema_property_id' => 12, 'object' => 'http://iflastandards.info/ns/fr/frbr/frbroo/R3i', "action" => "added"]);

        //row 5 removes one subclass, but keeps the parent_class and the second subclass
        $I->canSeeInDatabase('reg_schema_property', ['id' => 5, 'parent_uri' => 'http://iflastandards.info/ns/fr/frbr/frbroo/F2']);
        $I->canSeeInDatabase('reg_schema_property_element_history', ['profile_property_id' => 9, 'schema_property_id' => 5, 'object' => 'http://www.cidoc-crm.org/cidoc-crm/E32_Authority_Document', "action" => "deleted"]);
    }