/** * Verify that a string is valid ISO8601 date * * @param string $dateString Date string * * @return string Valid date string or an empty string if invalid */ protected function validateDate($dateString) { if (MetadataUtils::validateISO8601Date($dateString) !== false) { return $dateString; } return ''; }
/** * Attempt to parse a string (in finnish) into a normalized date range. * * TODO: complicated normalizations like this should preferably reside within * their own, separate component which should allow modification of the algorithm * by methods other than hard-coding rules into source. * * @param string $input Date range * * @return string[] Two ISO 8601 dates */ protected function parseDateRange($input) { $input = trim(strtolower($input)); $dateMappings = ['kivikausi' => ['-8600-01-01T00:00:00Z', '-1501-12-31T23:59:59Z'], 'pronssikausi' => ['-1500-01-01T00:00:00Z', '-0501-12-31T23:59:59Z'], 'rautakausi' => ['-0500-01-01T00:00:00Z', '1299-12-31T23:59:59Z'], 'keskiaika' => ['1300-01-01T00:00:00Z', '1550-12-31T23:59:59Z'], 'ajoittamaton' => null, 'tuntematon' => null]; foreach ($dateMappings as $str => $value) { if (strstr($input, $str)) { return $value; } } $k = ['tammikuu' => '01', 'helmikuu' => '02', 'maaliskuu' => '03', 'huhtikuu' => '04', 'toukokuu' => '05', 'kesäkuu' => '06', 'heinäkuu' => '07', 'elokuu' => '08', 'syyskuu' => '09', 'lokakuu' => '10', 'marraskuu' => '11', 'joulukuu' => '12']; $imprecise = false; list($input) = explode(',', $input, 2); if (preg_match('/(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) { $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[3], $matches[2], $matches[1]); $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[6], $matches[5], $matches[4]); $noprocess = true; } elseif (preg_match('/(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) { $startDate = sprintf('%04d-01-01T00:00:00Z', $matches[1]); $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[4], $matches[3], $matches[2]); $noprocess = true; } elseif (preg_match('/(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) { $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[3], $matches[2], $matches[1]); $endDate = sprintf('%04d-12-31T23:59:59Z', $matches[4]); $noprocess = true; } elseif (preg_match('/(\\d\\d\\d\\d)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*-\\s*(\\d\\d\\d\\d)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d?)/', $input, $matches) > 0) { $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[1], $matches[2], $matches[3]); $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[4], $matches[5], $matches[6]); $noprocess = true; } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d?)(\\d\\d?)\\s*-\\s*(\\d\\d\\d\\d)(\\d\\d?)(\\d\\d?)/', $input, $matches) > 0) { $startDate = sprintf('%04d-%02d-%02dT00:00:00Z', $matches[1], $matches[2], $matches[3]); $endDate = sprintf('%04d-%02d-%02dT23:59:59Z', $matches[4], $matches[5], $matches[6]); $noprocess = true; } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d?)\\s*-\\s*(\\d\\d\\d\\d)(\\d\\d?)/', $input, $matches) > 0) { $startDate = sprintf('%04d-%02d-01T00:00:00Z', $matches[1], $matches[2]); $endDate = sprintf('%04d-%02d-01', $matches[3], $matches[4]); try { $d = new DateTime($endDate); } catch (Exception $e) { global $logger; $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR); return null; } $endDate = $d->format('Y-m-t') . 'T23:59:59Z'; $noprocess = true; } elseif (preg_match('/(\\d\\d\\d\\d)-(\\d\\d?)-(\\d\\d?)/', $input, $matches) > 0) { // This one needs to be before the lazy matcher below $year = $matches[1]; $month = sprintf('%02d', $matches[2]); $day = sprintf('%02d', $matches[3]); $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z'; $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z'; $noprocess = true; } elseif (preg_match('/(\\d\\d\\d\\d)\\s*-\\s*(\\d\\d\\d\\d)\\s*(-luvun|-l)\\s+(loppupuoli|loppu)/', $input, $matches) > 0) { $startDate = $matches[1]; $endDate = $matches[2]; if ($endDate % 100 == 0) { // Century $endDate += 99; } elseif ($endDate % 10 == 0) { // Decade $endDate += 9; } } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*(-|~)\\s*(\\d?\\d?\\d\\d)\\s*(-luku|-l)?\\s*(\\(?\\?\\)?)?/', $input, $matches) > 0) { // 1940-1960-luku // 1940-1960-l // 1940-60-l // 1930 - 1970-luku // 30-40-luku $startDate = $matches[1]; $endDate = $matches[3]; if (isset($matches[4])) { if ($endDate % 10 == 0) { $endDate += 9; } } $imprecise = isset($matches[5]); } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s+(tammikuu|helmikuu|maaliskuu|huhtikuu|toukokuu|kesäkuu|heinäkuu|elokuu|syyskuu|lokakuu|marraskuu|joulukuu)/', $input, $matches) > 0) { $year = $matches[1]; $month = $k[$matches[2]]; $startDate = $year . '-' . $month . '-01T00:00:00Z'; $endDate = $year . '-' . $month . '-01'; try { $d = new DateTime($endDate); $endDate = $d->format('Y-m-t') . 'T23:59:59Z'; } catch (Exception $e) { global $logger; $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR); return null; } $noprocess = true; } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d)(\\d\\d)/', $input, $matches) > 0) { $year = $matches[1]; $month = sprintf('%02d', $matches[2]); $day = sprintf('%02d', $matches[3]); $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z'; $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z'; $noprocess = true; } elseif (preg_match('/(\\d\\d\\d\\d)(\\d\\d)/', $input, $matches) > 0) { $year = $matches[1]; $month = sprintf('%02d', $matches[2]); $startDate = $year . '-' . $month . '-01T00:00:00Z'; $endDate = $year . '-' . $month . '-01'; try { $d = new DateTime($endDate); } catch (Exception $e) { global $logger; $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR); return null; } $endDate = $d->format('Y-m-t') . 'T23:59:59Z'; $noprocess = true; } elseif (preg_match('/(\\d\\d?)\\s*\\.\\s*(\\d\\d?)\\s*\\.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) { $year = $matches[3]; $month = sprintf('%02d', $matches[2]); $day = sprintf('%02d', $matches[1]); $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z'; $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z'; $noprocess = true; } elseif (preg_match('/(\\d\\d?)\\s*\\.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) { $year = $matches[2]; $month = sprintf('%02d', $matches[1]); $startDate = $year . '-' . $month . '-01' . 'T00:00:00Z'; $endDate = $year . '-' . $month . '-01'; try { $d = new DateTime($endDate); $endDate = $d->format('Y-m-t') . 'T23:59:59Z'; } catch (Exception $e) { global $logger; $logger->log('NdlLidoRecord', "Failed to parse date {$endDate}, record {$this->source}." . $this->getID(), Logger::ERROR); return null; } $noprocess = true; } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*-(luvun|luku)\\s+(alkupuolelta|alkupuoli|alku|alusta)/', $input, $matches) > 0) { $year = $matches[1]; if ($year % 100 == 0) { // Century $startDate = $year; $endDate = $year + 29; } elseif ($year % 10 == 0) { // Decade $startDate = $year; $endDate = $year + 3; } else { // Uhh? $startDate = $year; $endDate = $year; } } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*-(luvun|luku)\\s+(puoliväli)/', $input, $matches) > 0) { $year = $matches[1]; if ($year % 100 == 0) { // Century $startDate = $year + 29; $endDate = $year + 70; } elseif ($year % 10 == 0) { // Decade $startDate = $year + 3; $endDate = $year + 7; } else { // Uhh? $startDate = $year; $endDate = $year; } } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*(-luvun|-l)\\s+(loppupuoli|loppu|lopulta|loppupuolelta)/', $input, $matches) > 0) { $year = $matches[1]; if ($year % 100 == 0) { // Century $startDate = $year + 70; $endDate = $year + 99; } elseif ($year % 10 == 0) { // Decade $startDate = $year + 7; $endDate = $year + 9; } else { $startDate = $year; $endDate = $year; } } elseif (preg_match('/(-?\\d?\\d?\\d\\d)\\s*-(luku|luvulta|l)/', $input, $matches) > 0) { $year = $matches[1]; $startDate = $year; if ($year % 100 == 0) { $endDate = $year + 99; } elseif ($year % 10 == 0) { $endDate = $year + 9; } else { $endDate = $year; } } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*ekr.?\\s*\\-\\s*(\\d?\\d?\\d\\d)\\s*ekr.?/', $input, $matches) > 0) { $startDate = -$matches[1]; $endDate = -$matches[2]; } elseif (preg_match('/(\\d?\\d?\\d\\d)\\s*ekr.?\\s*\\-\\s*(\\d?\\d?\\d\\d)\\s*jkr.?/', $input, $matches) > 0) { $startDate = -$matches[1]; $endDate = $matches[2]; } elseif (preg_match('/(-?\\d?\\d?\\d\\d) jälkeen/', $input, $matches) > 0) { $year = $matches[1]; $startDate = $year; $endDate = $year + 9; } elseif (preg_match('/(-?\\d\\d\\d\\d)\\s*-\\s*(-?\\d\\d\\d\\d)/', $input, $matches) > 0) { $startDate = $matches[1]; $endDate = $matches[2]; } elseif (preg_match('/(-?\\d{1-4})\\s+-\\s+(-?\\d{1-4})/', $input, $matches) > 0) { $startDate = $matches[1]; $endDate = $matches[2]; } elseif (preg_match('/(-?\\d?\\d?\\d\\d)\\s*\\?/', $input, $matches) > 0) { $year = $matches[1]; $startDate = $year; $endDate = $year; $imprecise = true; } elseif (preg_match('/(-?\\d?\\d?\\d\\d)/', $input, $matches) > 0) { $year = $matches[1]; $startDate = $year; $endDate = $year; } else { return null; } if ($startDate < 0) { $startDate = '-' . substr('0000', 0, 5 - strlen($startDate)) . substr($startDate, 1); } elseif ($startDate == 0) { $startDate = '0000'; } if ($endDate < 0) { $endDate = '-' . substr('0000', 0, 5 - strlen($endDate)) . substr($endDate, 1); } elseif ($endDate == 0) { $endDate = '0000'; } switch (strlen($startDate)) { case 1: $startDate = "000{$startDate}"; break; case 2: $startDate = "19{$startDate}"; break; case 3: $startDate = "0{$startDate}"; break; } switch (strlen($endDate)) { case 1: $endDate = "000{$endDate}"; break; case 2: // Take into account possible negative sign $endDate = substr($startDate, 0, -2) . $endDate; break; case 3: $endDate = "0{$endDate}"; break; } if ($imprecise) { // This is way arbitrary, so disabled for now.. //$startDate -= 2; //$endDate += 2; } if (empty($noprocess)) { $startDate = $startDate . '-01-01T00:00:00Z'; $endDate = $endDate . '-12-31T23:59:59Z'; } // Trying to index dates into the future? I don't think so... $yearNow = date('Y'); if ($startDate > $yearNow || $endDate > $yearNow) { return null; } $start = MetadataUtils::validateISO8601Date($startDate); $end = MetadataUtils::validateISO8601Date($endDate); if ($start === false || $end === false) { global $logger; $logger->log('NdlLidoRecord', "Invalid date range {$startDate} - {$endDate} parsed from " . "'{$input}', record {$this->source}." . $this->getID(), Logger::WARNING); if ($start !== false) { $endDate = substr($startDate, 0, 4) . '-12-31T23:59:59Z'; } elseif ($end !== false) { $startDate = substr($endDate, 0, 4) . '-01-01T00:00:00Z'; } else { return null; } } elseif ($start > $end) { global $logger; $logger->log('NdlLidoRecord', "Invalid date range {$startDate} - {$endDate} parsed from '{$input}', " . "record {$this->source}." . $this->getID(), Logger::WARNING); $endDate = substr($startDate, 0, 4) . '-12-31T23:59:59Z'; } return [$startDate, $endDate]; }
/** * Attempt to parse a string (in finnish) into a normalized date range. * * TODO: complicated normalizations like this should preferably reside within * their own, separate component which should allow modification of the * algorithm by methods other than hard-coding rules into source. * * @param string $input Date range * * @return string Two ISO 8601 dates separated with a comma on success, and null * on failure */ protected function parseDateRange($input) { $input = trim(strtolower($input)); if (preg_match('/(\\d\\d\\d\\d) ?- (\\d\\d\\d\\d)/', $input, $matches) > 0) { $startDate = $matches[1]; $endDate = $matches[2]; } elseif (preg_match('/(\\d\\d\\d\\d)-(\\d\\d?)-(\\d\\d?)/', $input, $matches) > 0) { $year = $matches[1]; $month = sprintf('%02d', $matches[2]); $day = sprintf('%02d', $matches[3]); $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z'; $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z'; $noprocess = true; } elseif (preg_match('/(\\d\\d?)\\s*.\\s*(\\d\\d?)\\s*.\\s*(\\d\\d\\d\\d)/', $input, $matches) > 0) { $year = $matches[3]; $month = sprintf('%02d', $matches[2]); $day = sprintf('%02d', $matches[1]); $startDate = $year . '-' . $month . '-' . $day . 'T00:00:00Z'; $endDate = $year . '-' . $month . '-' . $day . 'T23:59:59Z'; $noprocess = true; } elseif (preg_match('/(\\d?\\d?\\d\\d) ?\\?/', $input, $matches) > 0) { $year = $matches[1]; $startDate = $year - 3; $endDate = $year + 3; } elseif (preg_match('/(\\d?\\d?\\d\\d)/', $input, $matches) > 0) { $year = $matches[1]; $startDate = $year; $endDate = $year; } else { return null; } if (strlen($startDate) == 2) { $startDate = 1900 + (int) $startDate; } if (strlen($endDate) == 2) { $century = substr($startDate, 0, 2) . '00'; $endDate = (int) $century + (int) $endDate; } if (empty($noprocess)) { $startDate = $startDate . '-01-01T00:00:00Z'; $endDate = $endDate . '-12-31T23:59:59Z'; } // Trying to index dates into the future? I don't think so... $yearNow = date('Y'); if ($startDate > $yearNow || $endDate > $yearNow) { return null; } if (MetadataUtils::validateISO8601Date($startDate) === false || MetadataUtils::validateISO8601Date($endDate) === false) { return null; } return "{$startDate},{$endDate}"; }
/** * Return publication year/date range * * @return array Date range */ protected function getPublicationDateRange() { $field008 = $this->getField('008'); if ($field008) { switch (substr($field008, 6, 1)) { case 'c': $year = substr($field008, 7, 4); $startDate = "{$year}-01-01T00:00:00Z"; $endDate = '9999-12-31T23:59:59Z'; break; case 'd': case 'i': case 'k': case 'm': case 'q': $year1 = substr($field008, 7, 4); $year2 = substr($field008, 11, 4); $startDate = "{$year1}-01-01T00:00:00Z"; $endDate = "{$year2}-12-31T23:59:59Z"; break; case 'e': $year = substr($field008, 7, 4); $mon = substr($field008, 11, 2); $day = substr($field008, 13, 2); $startDate = "{$year}-{$mon}-{$day}T00:00:00Z"; $endDate = "{$year}-{$mon}-{$day}T23:59:59Z"; break; case 's': case 't': case 'u': $year = substr($field008, 7, 4); $startDate = "{$year}-01-01T00:00:00Z"; $endDate = "{$year}-12-31T23:59:59Z"; break; } } if (!isset($startDate) || !isset($endDate) || MetadataUtils::validateISO8601Date($startDate) === false || MetadataUtils::validateISO8601Date($endDate) === false) { $field = $this->getField('260'); if ($field) { $year = $this->getSubfield($field, 'c'); $matches = []; if ($year && preg_match('/(\\d{4})/', $year, $matches)) { $startDate = "{$matches[1]}-01-01T00:00:00Z"; $endDate = "{$matches[1]}-12-31T23:59:59Z"; } } } if (!isset($startDate) || !isset($endDate) || MetadataUtils::validateISO8601Date($startDate) === false || MetadataUtils::validateISO8601Date($endDate) === false) { $fields = $this->getFields('264'); foreach ($fields as $field) { if ($this->getIndicator($field, 2) == '1') { $year = $this->getSubfield($field, 'c'); $matches = []; if ($year && preg_match('/(\\d{4})/', $year, $matches)) { $startDate = "{$matches[1]}-01-01T00:00:00Z"; $endDate = "{$matches[1]}-12-31T23:59:59Z"; break; } } } } if (isset($startDate) && isset($endDate) && MetadataUtils::validateISO8601Date($startDate) !== false && MetadataUtils::validateISO8601Date($endDate) !== false) { if ($endDate < $startDate) { global $logger; $logger->log('NdlMarcRecord', "Invalid date range {$startDate} - {$endDate}, record " . "{$this->source}." . $this->getID(), Logger::WARNING); $endDate = substr($startDate, 0, 4) . '-12-31T23:59:59Z'; } return [$startDate, $endDate]; } return ''; }