コード例 #1
0
ファイル: parseutils.php プロジェクト: horrabin/opendb
/**
* Specify a range of characters in the following format:
* 	1-15,10,1,12,423,312312,123-124.  If you specify
* a range, that is not valid, that portion will be ignored.
*/
function expand_number_range($range)
{
    $retval = '';
    $i = 0;
    $number = '';
    $left_number = '';
    $right_number = '';
    while ($i < strlen($range)) {
        if (is_numeric($range[$i])) {
            if (is_numeric($left_number)) {
                $right_number .= $range[$i];
            } else {
                $number .= $range[$i];
            }
        } else {
            if ($range[$i] == '-') {
                $left_number = $number;
                //reset
                $number = '';
            } else {
                if ($range[$i] == ',') {
                    if (is_numeric($left_number) && is_numeric($right_number)) {
                        $retval .= expand_range($left_number, $right_number);
                        //reset
                        $left_number = '';
                        $right_number = '';
                    } else {
                        $retval .= $number;
                        //reset
                        $number = '';
                    }
                    $retval .= ',';
                }
            }
        }
        $i++;
    }
    if (is_numeric($left_number) && is_numeric($right_number)) {
        $retval .= expand_range($left_number, $right_number);
    } else {
        $retval .= $number;
    }
    // get rid of last character, if a comma.
    if ($retval[strlen($retval) - 1] == ',') {
        $retval = substr($retval, 0, strlen($retval) - 1);
    }
    return $retval;
}
コード例 #2
0
ファイル: extract_specimens.php プロジェクト: rdmpage/bioguid
function extract_specimen_codes($t)
{
    // Standard acronyms that have simple [Acronym] [number] specimen codes
    // (allowing for a prefix before [number]
    $acronyms = array('ABTC', 'ADT-CRBMUV', 'AM M', 'AMCC', 'AMNH', 'ANSP', 'ANWC', 'AMS', 'AMS\\. [1|I]\\.', 'ANSP', 'ASIZB', 'ASU', 'BBM', 'BNHS', 'BPBM', 'CAS', 'CASENT', 'CAS-SU', 'CFBH', 'CM', 'CMK', 'CRBMUV', 'CWM', 'DHMECN', 'FMNH', 'HKU', 'IBUNAM-EM', 'ICN', 'ICN-MHN-CR', 'ILPLA', 'INHS', 'IRSNB', 'IZUA', 'JAC', 'JCV', 'JM', 'KFBG', 'KU', 'KUHE', 'LACM', 'LSUMZ', 'MACN', 'MACN-Ict', 'MCP', 'MCNU', 'MCSN', 'MCZ', 'MFA-ZV-I', 'MHNCI', 'MNCN', 'MHNG', 'MHNUC', 'MNRJ', 'MPEG', 'MRAC', 'MRT', 'MUJ', 'MVUP', 'MVZ', 'MZUC', 'MZUFV', 'MZUSP', 'NHMW', 'NRM', 'NSV', 'NT', 'NTM', 'OMNH', 'QCAZ', 'QM', 'QMJ', 'RAN', 'RMNH', 'ROM', 'SAMA', 'SIUC', 'TNHC', 'THNHM', 'UAZ', 'UCR', 'UFMG', 'UMFS', 'UMMZ', 'UNT', 'USNM', 'USNM\\.', 'USNMENT', 'USNM\\sENT', 'UTA', 'UWBM', 'WAM', 'WHT', 'YPM', 'ZFMK', 'ZMA', 'ZMB', 'ZMH', 'ZRC', 'ZSI F', 'ZUFRJ');
    $specimens = array();
    $ids = array();
    // Try and match typical code [A-Z] \d+, allowing for some quirks such as
    // letter prefixes for number, and support ranges
    if (preg_match_all('/
		(?<code>
		' . join("|", $acronyms) . '
		)
		\\s*
		(:|_|\\-)?
		(?<number>((?<prefix>(J|R|A[\\.|\\s]?|A\\-))?[0-9]{3,}))
		
		(
			(\\-|–|­|—)
			(?<end>[0-9]{2,})
		)?		
		
		/x', $t, $out, PREG_PATTERN_ORDER)) {
        //print_r($out);
        $found = true;
        for ($i = 0; $i < count($out[0]); $i++) {
            $s = new stdClass();
            $s->code = $out['code'][$i];
            $s->prefix = $out['prefix'][$i];
            $s->number = $out['number'][$i];
            $s->end = $out['end'][$i];
            array_push($specimens, $s);
        }
    }
    // Special cases -------------------------------------------------------------------------------
    // ---------------------------------------------------------------------------------------------
    // BMNH, e.g. BMNH1947.2.26.89
    if (preg_match_all('/
		(?<code>BMNH)
		\\s*
		(?<number>([0-9]{2,4}(\\.[0-9]+)+) )
		
		(
			(\\-|–|­|—)
			(?<end>[0-9]+)
		)?		
		
		/x', $t, $out, PREG_PATTERN_ORDER)) {
        //print_r($out);
        $found = true;
        for ($i = 0; $i < count($out[0]); $i++) {
            $s = new stdClass();
            $s->code = $out['code'][$i];
            $s->prefix = '';
            $s->number = $out['number'][$i];
            $s->end = $out['end'][$i];
            array_push($specimens, $s);
        }
        //print_r($specimens);
    }
    // ---------------------------------------------------------------------------------------------
    // MNHN
    if (preg_match_all('/
		(?<code>MNHN)
		\\s*
		(?<number>([0-9]{4}\\.[0-9]+) )

		(
			(\\-|–|­|—)
			(?<end>[0-9]+)
		)?		

		/x', $t, $out, PREG_PATTERN_ORDER)) {
        //print_r($out);
        $found = true;
        for ($i = 0; $i < count($out[0]); $i++) {
            $s = new stdClass();
            $s->code = $out['code'][$i];
            $s->prefix = '';
            $s->number = $out['number'][$i];
            $s->end = $out['end'][$i];
            array_push($specimens, $s);
        }
        //print_r($specimens);
    }
    // ---------------------------------------------------------------------------------------------
    if (preg_match_all('/
		(?<code>NCA|QVM|ZSM)
		\\s*
		(?<number>([0-9]+(:|\\/)[0-9]+))
		/x', $t, $out, PREG_PATTERN_ORDER)) {
        //print_r($out);
        $found = true;
        for ($i = 0; $i < count($out[0]); $i++) {
            $s = new stdClass();
            $s->code = $out['code'][$i];
            $s->number = $out['number'][$i];
            array_push($specimens, $s);
        }
        //print_r($specimens);
    }
    // ---------------------------------------------------------------------------------------------
    if (preg_match_all('/
		(?<code>NHM)
		\\s+
		(?<number>(R\\.?[0-9]+))
		/x', $t, $out, PREG_PATTERN_ORDER)) {
        //print_r($out);
        $found = true;
        for ($i = 0; $i < count($out[0]); $i++) {
            $s = new stdClass();
            $s->code = $out['code'][$i];
            $s->number = $out['number'][$i];
            array_push($specimens, $s);
        }
        //print_r($specimens);
    }
    // ---------------------------------------------------------------------------------------------
    // Post process to handle lists of specimens
    foreach ($specimens as $z) {
        // Fix any codes that seem broken
        if ($z->code == 'USNM ENT') {
            $z->code = 'USNMENT';
        }
        if ($z->code == 'USNM.') {
            $z->code = 'USNM';
        }
        if ($z->end == '') {
            $ids[] = $z->code . ' ' . $z->number;
        } else {
            $range = expand_range($z->number, $z->end);
            foreach ($range as $r) {
                $ids[] = $z->code . ' ' . $r;
            }
        }
    }
    return $ids;
}