/** * Parse given street address string in to street_name, * street_unit, street_number and street_number_suffix * eg "54A Excelsior Ave. Apt 1C", or "917 1/2 Elm Street" * * NB: civic street formats for en_CA and fr_CA used by default if those locales are active * otherwise en_US format is default action * * @param string $streetAddress * Street address including number and apt. * @param string $locale * Locale used to parse address. * * @return array * parsed fields values. */ public static function parseStreetAddress($streetAddress, $locale = NULL) { $config = CRM_Core_Config::singleton(); /* locales supported include: * en_US - http://pe.usps.com/cpim/ftp/pubs/pub28/pub28.pdf * en_CA - http://www.canadapost.ca/tools/pg/manual/PGaddress-e.asp * fr_CA - http://www.canadapost.ca/tools/pg/manual/PGaddress-f.asp * NB: common use of comma after street number also supported * default is en_US */ $supportedLocalesForParsing = array('en_US', 'en_CA', 'fr_CA'); if (!$locale) { $locale = $config->lcMessages; } // as different locale explicitly requested but is not available, display warning message and set $locale = 'en_US' if (!in_array($locale, $supportedLocalesForParsing)) { CRM_Core_Session::setStatus(ts('Unsupported locale specified to parseStreetAddress: %1. Proceeding with en_US locale.', array(1 => $locale)), ts('Unsupported Locale'), 'alert'); $locale = 'en_US'; } $emptyParseFields = $parseFields = array('street_name' => '', 'street_unit' => '', 'street_number' => '', 'street_number_suffix' => ''); if (empty($streetAddress)) { return $parseFields; } $streetAddress = trim($streetAddress); $matches = array(); if (in_array($locale, array('en_CA', 'fr_CA')) && preg_match('/^([A-Za-z0-9]+)[ ]*\\-[ ]*/', $streetAddress, $matches)) { $parseFields['street_unit'] = $matches[1]; // unset from rest of street address $streetAddress = preg_replace('/^([A-Za-z0-9]+)[ ]*\\-[ ]*/', '', $streetAddress); } // get street number and suffix. $matches = array(); //alter street number/suffix handling so that we accept -digit if (preg_match('/^[A-Za-z0-9]+([\\S]+)/', $streetAddress, $matches)) { // check that $matches[0] is numeric, else assume no street number if (preg_match('/^(\\d+)/', $matches[0])) { $streetNumAndSuffix = $matches[0]; // get street number. $matches = array(); if (preg_match('/^(\\d+)/', $streetNumAndSuffix, $matches)) { $parseFields['street_number'] = $matches[0]; $suffix = preg_replace('/^(\\d+)/', '', $streetNumAndSuffix); $parseFields['street_number_suffix'] = trim($suffix); } // unset from main street address. $streetAddress = preg_replace('/^[A-Za-z0-9]+([\\S]+)/', '', $streetAddress); $streetAddress = trim($streetAddress); } } elseif (preg_match('/^(\\d+)/', $streetAddress, $matches)) { $parseFields['street_number'] = $matches[0]; // unset from main street address. $streetAddress = preg_replace('/^(\\d+)/', '', $streetAddress); $streetAddress = trim($streetAddress); } // suffix might be like 1/2 $matches = array(); if (preg_match('/^\\d\\/\\d/', $streetAddress, $matches)) { $parseFields['street_number_suffix'] .= $matches[0]; // unset from main street address. $streetAddress = preg_replace('/^\\d+\\/\\d+/', '', $streetAddress); $streetAddress = trim($streetAddress); } // now get the street unit. // supportable street unit formats. $streetUnitFormats = array('APT', 'APARTMENT', 'BSMT', 'BASEMENT', 'BLDG', 'BUILDING', 'DEPT', 'DEPARTMENT', 'FL', 'FLOOR', 'FRNT', 'FRONT', 'HNGR', 'HANGER', 'LBBY', 'LOBBY', 'LOWR', 'LOWER', 'OFC', 'OFFICE', 'PH', 'PENTHOUSE', 'TRLR', 'TRAILER', 'UPPR', 'RM', 'ROOM', 'SIDE', 'SLIP', 'KEY', 'LOT', 'PIER', 'REAR', 'SPC', 'SPACE', 'STOP', 'STE', 'SUITE', 'UNIT', '#'); // overwriting $streetUnitFormats for 'en_CA' and 'fr_CA' locale if (in_array($locale, array('en_CA', 'fr_CA'))) { $streetUnitFormats = array('APT', 'APP', 'SUITE', 'BUREAU', 'UNIT'); } //@todo per CRM-14459 this regex picks up words with the string in them - e.g APT picks up //Captain - presuming fixing regex (& adding test) to ensure a-z does not preced string will fix $streetUnitPreg = '/(' . implode('|\\s', $streetUnitFormats) . ')(.+)?/i'; $matches = array(); if (preg_match($streetUnitPreg, $streetAddress, $matches)) { $parseFields['street_unit'] = trim($matches[0]); $streetAddress = str_replace($matches[0], '', $streetAddress); $streetAddress = trim($streetAddress); } // consider remaining string as street name. $parseFields['street_name'] = $streetAddress; //run parsed fields through stripSpaces to clean foreach ($parseFields as $parseField => $value) { $parseFields[$parseField] = CRM_Utils_String::stripSpaces($value); } //CRM-14459 if the field is too long we should assume it didn't get it right & skip rather than allow // the DB to fatal $fields = CRM_Core_BAO_Address::fields(); foreach ($fields as $fieldname => $field) { if (!empty($field['maxlength']) && strlen(CRM_Utils_Array::value($fieldname, $parseFields)) > $field['maxlength']) { return $emptyParseFields; } } return $parseFields; }