コード例 #1
0
ファイル: BaseRecord.php プロジェクト: grharry/RecordManager
 /**
  * Verify that a string is valid ISO8601 date
  *
  * @param string $dateString Date string
  *
  * @return string Valid date string or an empty string if invalid
  */
 protected function validateDate($dateString)
 {
     if (MetadataUtils::validateISO8601Date($dateString) !== false) {
         return $dateString;
     }
     return '';
 }
コード例 #2
0
 /**
  * Attempt to parse a string (in finnish) into a normalized date range.
  *
  * TODO: complicated normalizations like this should preferably reside within
  * their own, separate component which should allow modification of the algorithm
  * by methods other than hard-coding rules into source.
  *
  * @param string $input Date range
  *
  * @return string[] Two ISO 8601 dates
  */
 protected function parseDateRange($input)
 {
     $input = trim(strtolower($input));
     $dateMappings = ['kivikausi' => ['-8600-01-01T00:00:00Z', '-1501-12-31T23:59:59Z'], 'pronssikausi' => ['-1500-01-01T00:00:00Z', '-0501-12-31T23:59:59Z'], 'rautakausi' => ['-0500-01-01T00:00:00Z', '1299-12-31T23:59:59Z'], 'keskiaika' => ['1300-01-01T00:00:00Z', '1550-12-31T23:59:59Z'], 'ajoittamaton' => null, 'tuntematon' => null];
     foreach ($dateMappings as $str => $value) {
         if (strstr($input, $str)) {
             return $value;
         }
     }
     $k = ['tammikuu' => '01', 'helmikuu' => '02', 'maaliskuu' => '03', 'huhtikuu' => '04', 'toukokuu' => '05', 'kesäkuu' => '06', 'heinäkuu' => '07', 'elokuu' => '08', 'syyskuu' => '09', 'lokakuu' => '10', 'marraskuu' => '11', 'joulukuu' => '12'];
     $imprecise = false;
     list($input) = explode(',', $input, 2);
     if (preg_match('/(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[3], $matches[2], $matches[1]);
         $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[6], $matches[5], $matches[4]);
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-01-01T00:00:00Z', $matches[1]);
         $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[4], $matches[3], $matches[2]);
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[3], $matches[2], $matches[1]);
         $endDate = sprintf('%04d-12-31T23:59:59Z', $matches[4]);
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*-\\s*(\\d\\d\\d\\d)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[1], $matches[2], $matches[3]);
         $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[4], $matches[5], $matches[6]);
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d?)(\\d\\d?)\\s*-\\s*(\\d\\d\\d\\d)(\\d\\d?)(\\d\\d?)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[1], $matches[2], $matches[3]);
         $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[4], $matches[5], $matches[6]);
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d?)\\s*-\\s*(\\d\\d\\d\\d)(\\d\\d?)/', $input, $matches) > 0) {
         $startDate = sprintf('%04d-%02d-01T00:00:00Z', $matches[1], $matches[2]);
         $endDate = sprintf('%04d-%02d-01', $matches[3], $matches[4]);
         try {
             $d = new DateTime($endDate);
         } catch (Exception $e) {
             global $logger;
             $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR);
             return null;
         }
         $endDate = $d->format('Y-m-t') . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)-(\\d\\d?)-(\\d\\d?)/', $input, $matches) > 0) {
         // This one needs to be before the lazy matcher below
         $year = $matches[1];
         $month = sprintf('%02d', $matches[2]);
         $day = sprintf('%02d', $matches[3]);
         $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d\\d\\d)\\s*(-luvun|-l)\\s+(loppupuoli|loppu)/', $input, $matches) > 0) {
         $startDate = $matches[1];
         $endDate = $matches[2];
         if ($endDate % 100 == 0) {
             // Century
             $endDate += 99;
         } elseif ($endDate % 10 == 0) {
             // Decade
             $endDate += 9;
         }
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*(-|~)\\s*(\\d?\\d?\\d\\d)\\s*(-luku|-l)?\\s*(\\(?\\?\\)?)?/', $input, $matches) > 0) {
         // 1940-1960-luku
         // 1940-1960-l
         // 1940-60-l
         // 1930 - 1970-luku
         // 30-40-luku
         $startDate = $matches[1];
         $endDate = $matches[3];
         if (isset($matches[4])) {
             if ($endDate % 10 == 0) {
                 $endDate += 9;
             }
         }
         $imprecise = isset($matches[5]);
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s+(tammikuu|helmikuu|maaliskuu|huhtikuu|toukokuu|kesäkuu|heinäkuu|elokuu|syyskuu|lokakuu|marraskuu|joulukuu)/', $input, $matches) > 0) {
         $year = $matches[1];
         $month = $k[$matches[2]];
         $startDate = $year . '-' . $month . '-01T00:00:00Z';
         $endDate = $year . '-' . $month . '-01';
         try {
             $d = new DateTime($endDate);
             $endDate = $d->format('Y-m-t') . 'T23:59:59Z';
         } catch (Exception $e) {
             global $logger;
             $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR);
             return null;
         }
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d)(\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[1];
         $month = sprintf('%02d', $matches[2]);
         $day = sprintf('%02d', $matches[3]);
         $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[1];
         $month = sprintf('%02d', $matches[2]);
         $startDate = $year . '-' . $month . '-01T00:00:00Z';
         $endDate = $year . '-' . $month . '-01';
         try {
             $d = new DateTime($endDate);
         } catch (Exception $e) {
             global $logger;
             $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR);
             return null;
         }
         $endDate = $d->format('Y-m-t') . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d?)\\s*\\.\\s*(\\d\\d?)\\s*\\.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[3];
         $month = sprintf('%02d', $matches[2]);
         $day = sprintf('%02d', $matches[1]);
         $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d?)\\s*\\.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[2];
         $month = sprintf('%02d', $matches[1]);
         $startDate = $year . '-' . $month . '-01' . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-01';
         try {
             $d = new DateTime($endDate);
             $endDate = $d->format('Y-m-t') . 'T23:59:59Z';
         } catch (Exception $e) {
             global $logger;
             $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR);
             return null;
         }
         $noprocess = true;
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*-(luvun|luku)\\s+(alkupuolelta|alkupuoli|alku|alusta)/', $input, $matches) > 0) {
         $year = $matches[1];
         if ($year % 100 == 0) {
             // Century
             $startDate = $year;
             $endDate = $year + 29;
         } elseif ($year % 10 == 0) {
             // Decade
             $startDate = $year;
             $endDate = $year + 3;
         } else {
             // Uhh?
             $startDate = $year;
             $endDate = $year;
         }
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*-(luvun|luku)\\s+(puoliväli)/', $input, $matches) > 0) {
         $year = $matches[1];
         if ($year % 100 == 0) {
             // Century
             $startDate = $year + 29;
             $endDate = $year + 70;
         } elseif ($year % 10 == 0) {
             // Decade
             $startDate = $year + 3;
             $endDate = $year + 7;
         } else {
             // Uhh?
             $startDate = $year;
             $endDate = $year;
         }
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*(-luvun|-l)\\s+(loppupuoli|loppu|lopulta|loppupuolelta)/', $input, $matches) > 0) {
         $year = $matches[1];
         if ($year % 100 == 0) {
             // Century
             $startDate = $year + 70;
             $endDate = $year + 99;
         } elseif ($year % 10 == 0) {
             // Decade
             $startDate = $year + 7;
             $endDate = $year + 9;
         } else {
             $startDate = $year;
             $endDate = $year;
         }
     } elseif (preg_match('/(-?\\d?\\d?\\d\\d)\\s*-(luku|luvulta|l)/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year;
         if ($year % 100 == 0) {
             $endDate = $year + 99;
         } elseif ($year % 10 == 0) {
             $endDate = $year + 9;
         } else {
             $endDate = $year;
         }
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*ekr.?\\s*\\-\\s*(\\d?\\d?\\d\\d)\\s*ekr.?/', $input, $matches) > 0) {
         $startDate = -$matches[1];
         $endDate = -$matches[2];
     } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*ekr.?\\s*\\-\\s*(\\d?\\d?\\d\\d)\\s*jkr.?/', $input, $matches) > 0) {
         $startDate = -$matches[1];
         $endDate = $matches[2];
     } elseif (preg_match('/(-?\\d?\\d?\\d\\d) jälkeen/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year;
         $endDate = $year + 9;
     } elseif (preg_match('/(-?\\d\\d\\d\\d)\\s*-\\s*(-?\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $startDate = $matches[1];
         $endDate = $matches[2];
     } elseif (preg_match('/(-?\\d{1-4})\\s+-\\s+(-?\\d{1-4})/', $input, $matches) > 0) {
         $startDate = $matches[1];
         $endDate = $matches[2];
     } elseif (preg_match('/(-?\\d?\\d?\\d\\d)\\s*\\?/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year;
         $endDate = $year;
         $imprecise = true;
     } elseif (preg_match('/(-?\\d?\\d?\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year;
         $endDate = $year;
     } else {
         return null;
     }
     if ($startDate < 0) {
         $startDate = '-' . substr('0000', 0, 5 - strlen($startDate)) . substr($startDate, 1);
     } elseif ($startDate == 0) {
         $startDate = '0000';
     }
     if ($endDate < 0) {
         $endDate = '-' . substr('0000', 0, 5 - strlen($endDate)) . substr($endDate, 1);
     } elseif ($endDate == 0) {
         $endDate = '0000';
     }
     switch (strlen($startDate)) {
         case 1:
             $startDate = "000{$startDate}";
             break;
         case 2:
             $startDate = "19{$startDate}";
             break;
         case 3:
             $startDate = "0{$startDate}";
             break;
     }
     switch (strlen($endDate)) {
         case 1:
             $endDate = "000{$endDate}";
             break;
         case 2:
             // Take into account possible negative sign
             $endDate = substr($startDate, 0, -2) . $endDate;
             break;
         case 3:
             $endDate = "0{$endDate}";
             break;
     }
     if ($imprecise) {
         // This is way arbitrary, so disabled for now..
         //$startDate -= 2;
         //$endDate += 2;
     }
     if (empty($noprocess)) {
         $startDate = $startDate . '-01-01T00:00:00Z';
         $endDate = $endDate . '-12-31T23:59:59Z';
     }
     // Trying to index dates into the future? I don't think so...
     $yearNow = date('Y');
     if ($startDate > $yearNow || $endDate > $yearNow) {
         return null;
     }
     $start = MetadataUtils::validateISO8601Date($startDate);
     $end = MetadataUtils::validateISO8601Date($endDate);
     if ($start === false || $end === false) {
         global $logger;
         $logger->log('NdlLidoRecord', "Invalid date range {$startDate} - {$endDate} parsed from " . "'{$input}', record {$this->source}." . $this->getID(), Logger::WARNING);
         if ($start !== false) {
             $endDate = substr($startDate, 0, 4) . '-12-31T23:59:59Z';
         } elseif ($end !== false) {
             $startDate = substr($endDate, 0, 4) . '-01-01T00:00:00Z';
         } else {
             return null;
         }
     } elseif ($start > $end) {
         global $logger;
         $logger->log('NdlLidoRecord', "Invalid date range {$startDate} - {$endDate} parsed from '{$input}', " . "record {$this->source}." . $this->getID(), Logger::WARNING);
         $endDate = substr($startDate, 0, 4) . '-12-31T23:59:59Z';
     }
     return [$startDate, $endDate];
 }
コード例 #3
0
ファイル: LidoRecord.php プロジェクト: grharry/RecordManager
 /**
  * Attempt to parse a string (in finnish) into a normalized date range.
  *
  * TODO: complicated normalizations like this should preferably reside within
  * their own, separate component which should allow modification of the
  * algorithm by methods other than hard-coding rules into source.
  *
  * @param string $input Date range
  *
  * @return string Two ISO 8601 dates separated with a comma on success, and null
  * on failure
  */
 protected function parseDateRange($input)
 {
     $input = trim(strtolower($input));
     if (preg_match('/(\\d\\d\\d\\d) ?- (\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $startDate = $matches[1];
         $endDate = $matches[2];
     } elseif (preg_match('/(\\d\\d\\d\\d)-(\\d\\d?)-(\\d\\d?)/', $input, $matches) > 0) {
         $year = $matches[1];
         $month = sprintf('%02d', $matches[2]);
         $day = sprintf('%02d', $matches[3]);
         $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[3];
         $month = sprintf('%02d', $matches[2]);
         $day = sprintf('%02d', $matches[1]);
         $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z';
         $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z';
         $noprocess = true;
     } elseif (preg_match('/(\\d?\\d?\\d\\d) ?\\?/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year - 3;
         $endDate = $year + 3;
     } elseif (preg_match('/(\\d?\\d?\\d\\d)/', $input, $matches) > 0) {
         $year = $matches[1];
         $startDate = $year;
         $endDate = $year;
     } else {
         return null;
     }
     if (strlen($startDate) == 2) {
         $startDate = 1900 + (int) $startDate;
     }
     if (strlen($endDate) == 2) {
         $century = substr($startDate, 0, 2) . '00';
         $endDate = (int) $century + (int) $endDate;
     }
     if (empty($noprocess)) {
         $startDate = $startDate . '-01-01T00:00:00Z';
         $endDate = $endDate . '-12-31T23:59:59Z';
     }
     // Trying to index dates into the future? I don't think so...
     $yearNow = date('Y');
     if ($startDate > $yearNow || $endDate > $yearNow) {
         return null;
     }
     if (MetadataUtils::validateISO8601Date($startDate) === false || MetadataUtils::validateISO8601Date($endDate) === false) {
         return null;
     }
     return "{$startDate},{$endDate}";
 }
コード例 #4
0
 /**
  * Return publication year/date range
  *
  * @return array Date range
  */
 protected function getPublicationDateRange()
 {
     $field008 = $this->getField('008');
     if ($field008) {
         switch (substr($field008, 6, 1)) {
             case 'c':
                 $year = substr($field008, 7, 4);
                 $startDate = "{$year}-01-01T00:00:00Z";
                 $endDate = '9999-12-31T23:59:59Z';
                 break;
             case 'd':
             case 'i':
             case 'k':
             case 'm':
             case 'q':
                 $year1 = substr($field008, 7, 4);
                 $year2 = substr($field008, 11, 4);
                 $startDate = "{$year1}-01-01T00:00:00Z";
                 $endDate = "{$year2}-12-31T23:59:59Z";
                 break;
             case 'e':
                 $year = substr($field008, 7, 4);
                 $mon = substr($field008, 11, 2);
                 $day = substr($field008, 13, 2);
                 $startDate = "{$year}-{$mon}-{$day}T00:00:00Z";
                 $endDate = "{$year}-{$mon}-{$day}T23:59:59Z";
                 break;
             case 's':
             case 't':
             case 'u':
                 $year = substr($field008, 7, 4);
                 $startDate = "{$year}-01-01T00:00:00Z";
                 $endDate = "{$year}-12-31T23:59:59Z";
                 break;
         }
     }
     if (!isset($startDate) || !isset($endDate) || MetadataUtils::validateISO8601Date($startDate) === false || MetadataUtils::validateISO8601Date($endDate) === false) {
         $field = $this->getField('260');
         if ($field) {
             $year = $this->getSubfield($field, 'c');
             $matches = [];
             if ($year && preg_match('/(\\d{4})/', $year, $matches)) {
                 $startDate = "{$matches[1]}-01-01T00:00:00Z";
                 $endDate = "{$matches[1]}-12-31T23:59:59Z";
             }
         }
     }
     if (!isset($startDate) || !isset($endDate) || MetadataUtils::validateISO8601Date($startDate) === false || MetadataUtils::validateISO8601Date($endDate) === false) {
         $fields = $this->getFields('264');
         foreach ($fields as $field) {
             if ($this->getIndicator($field, 2) == '1') {
                 $year = $this->getSubfield($field, 'c');
                 $matches = [];
                 if ($year && preg_match('/(\\d{4})/', $year, $matches)) {
                     $startDate = "{$matches[1]}-01-01T00:00:00Z";
                     $endDate = "{$matches[1]}-12-31T23:59:59Z";
                     break;
                 }
             }
         }
     }
     if (isset($startDate) && isset($endDate) && MetadataUtils::validateISO8601Date($startDate) !== false && MetadataUtils::validateISO8601Date($endDate) !== false) {
         if ($endDate < $startDate) {
             global $logger;
             $logger->log('NdlMarcRecord', "Invalid date range {$startDate} - {$endDate}, record " . "{$this->source}." . $this->getID(), Logger::WARNING);
             $endDate = substr($startDate, 0, 4) . '-12-31T23:59:59Z';
         }
         return [$startDate, $endDate];
     }
     return '';
 }