function matching_pages($publication, $year) { global $debug; $pages = array(); $matches = array(); $matched = false; // Parse citation if (!$matched) { //echo $publication; if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+):\\s*(?<page>\\d+)\\.?$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(\\((?<series>.*)\\)\\s+)?(?<volume>\\d+),(\\s+\\((?<issue>\\d+)\\))?\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { //echo $publication; if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+):\\s*(?<page>\\d+)\\s+(?<year>[0-9]{4})/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+)/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+),\\s+(?<page>\\d+),/Uu', $publication, $matches)) { $matched = true; } } // Proc. U.S. nat. Mus., 99, no. 3247, 475. if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+), no. (?<issue>\\d+),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } if (!$matched) { if (preg_match('/(?<journal>.*),\\s+(?<volume>\\d+) \\((?<issue>\\d+)\\),\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } // Spixiana 7 (2): 125. if (!$matched) { if (preg_match('/(?<journal>.*)\\s+(?<volume>\\d+) \\((?<issue>\\d+)\\):\\s+(?<page>\\d+)\\.$/Uu', $publication, $matches)) { $matched = true; } } // Ann. Mag. Nat. Hist. , ser. 8 vol. 13 p. 436 if (!$matched) { if (preg_match('/(?<journal>.*)\\s*,\\s+ser.\\s+(?<series>\\d+)\\s+vol.\\s*(?<volume>\\d+)\\s+p.\\s+(?<page>\\d+)\\s+(?<year>[0-9]{4})/Uu', $publication, $matches)) { $matched = true; } } print_r($matches); echo $publication; if (!$matched) { } else { $reference = reference_from_matches($matches); if (!isset($reference->issn)) { // Try and get ISSN from bioGUID $issn = issn_from_title($reference->secondary_title); if ($issn != '') { $reference->issn = $issn; } else { // No luck with ISSN, look for OCLC if (!isset($reference->oclc)) { $oclc = oclc_for_title($reference->secondary_title); if ($oclc != 0) { $reference->oclc = $oclc; } } } } //print_r($reference); $atitle = ''; if (isset($reference->title)) { $atitle = $reference->title; } $search_hits = bhl_find_article($atitle, $reference->secondary_title, $reference->volume, $reference->spage, isset($reference->series) ? $reference->series : '', isset($reference->year) ? $reference->year : ''); if (count($search_hits) == 0) { // try alternative way of searching using article title $search_hits = bhl_find_article_from_article_title($atitle, $reference->secondary_title, $reference->volume, $reference->spage, isset($reference->series) ? $reference->series : ''); } //print_r($search_hits); foreach ($search_hits as $hit) { $pages[] = $hit->PageID; } } print_r($pages); return $pages; }
<?php require_once dirname(__FILE__) . '/utils.php'; $filename = 'notparsed.txt'; $file_handle = fopen($filename, "r"); $failed = array(); while (!feof($file_handle)) { $line = fgets($file_handle); $parts = explode("\t", $line); $parts[1] = trim($parts[1]); if (preg_match('/(?<authorstring>.*)\\s*(?<year>[0-9]{4})\\.\\s+(?<title>.*)\\s+<em>(?<journal>.*)<\\/em>\\s+(?<series>.*)?\\s*<strong>(?<volume>\\d+)<\\/strong>(\\((?<issue>\\d+)\\))?:\\s+(?<spage>\\d+)([–|-](?<epage>\\d+))\\b/Uu', $parts[1], $m)) { //print_r($m); $matched = true; $reference = reference_from_matches($m); //print_r($reference); $reference->id = $parts[0]; $reference->keywords[] = $parts[0]; $openurl = reference2openurl($reference); //echo $openurl . "\n"; bioguid($reference); if (isset($reference->epage)) { $biostor_id = import_from_openurl($openurl); if ($biostor_id != 0) { $found = true; $reference->url = 'http://biostor.org/reference/' . $biostor_id; } } //print_r($reference); echo reference2ris($reference); } else { /*