function testContainsPhoneNumber() { $validStrings = array('770-667-5085 (Cell)', 'My phone number is 770-667-5085.', 'Cell:770-667-5085.', 'ph7706675085'); $invalidStrings = array('My phone number is 770-667-508.', 'ph770667509'); foreach ($validStrings as $key => $value) { $this->assertTrue(StringUtility::containsPhoneNumber($value), sprintf("'%s' should be recognized as containing a phone number", $value)); } foreach ($invalidStrings as $key => $value) { $this->assertFalse(StringUtility::containsPhoneNumber($value), sprintf("'%s' should not be recognized as containing a phone number", $value)); } /* Some sample text to test with. */ $fairyTale = implode('', file('./modules/tests/SampleText.txt')); /* I can assure you that none of Grimm's fairy tales contain phone numbers. */ $this->assertFalse(StringUtility::containsPhoneNumber($fairyTale)); }
protected function _getPhoneNumbers() { /* Sanity check. It is possible that the only line of the address * block has been removed during e-mail address extraction. */ if (empty($this->_addressBlock)) { return array(); } $unknownNumbers = array(); $numbers = array(); /* Loop through each line of the address block and attempt to extract * and identify phone numbers. */ foreach ($this->_addressBlock as $lineNumber => $line) { /* Skip lines that don't contain phone numbers. */ if (!StringUtility::containsPhoneNumber($line)) { continue; } /* Regular expressions to help identify phone number types. */ $cell = '/cell|[\\x28\\x5b][CM][\\x29\\x5d]|mob(:?ile|\\b)|\\bc[:\\x5d]|\\bm[:\\x5d]/i'; $home = '/[\\x28\\x5b]H[\\x29\\x5d]|home|evening|night|house/i'; $work = '/work|off(:?ice|\\b)|[\\x28\\x5b][WO][\\x29\\x5d]|direct|day(?:time)?|job/i'; $general = '/[\\x28\\x5b]PH?[\\x29\\x5d]|primary|voice|main|toll|ph(:?one|\\b)/i'; $fax = '/[\\x28\\x5b]FX?[\\x29\\x5d]|fax|facsimile|\\bFX?[:\\x5d]/i'; $tty = '/\\bTT[YD]\\b/i'; $pager = '/pager|beeper/i'; /* Look for keywords that might tell us what type of number it is. * First check to see if the line is ONLY a phone number. If not, * try do identify what kind of phone number it is. * * \x28 is a '(', \x5b is a '[', \x29 is a ')', \x5d is a ']'. */ if (preg_match($cell, $line)) { $numbers[] = array('number' => StringUtility::extractPhoneNumber($line), 'type' => 'cell'); } else { if (preg_match($home, $line)) { $numbers[] = array('number' => StringUtility::extractPhoneNumber($line), 'type' => 'home'); } else { if (preg_match($work, $line)) { $numbers[] = array('number' => StringUtility::extractPhoneNumber($line), 'type' => 'work'); } else { if (preg_match($general, $line)) { if ($this->_mode != ADDRESSPARSER_MODE_COMPANY) { $unknownNumbers[] = StringUtility::extractPhoneNumber($line); } else { $numbers[] = array('number' => StringUtility::extractPhoneNumber($line), 'type' => 'general'); } } else { if (preg_match($fax, $line)) { $numbers[] = array('number' => StringUtility::extractPhoneNumber($line), 'type' => 'fax'); } else { if (preg_match($tty, $line)) { $numbers[] = array('number' => StringUtility::extractPhoneNumber($line), 'type' => 'tty'); } else { if (preg_match($pager, $line)) { $numbers[] = array('number' => StringUtility::extractPhoneNumber($line), 'type' => 'pager'); } else { if (StringUtility::isPhoneNumber($line)) { /* In this case, the line contains only a phone number, and is * truely unknown. */ $unknownNumbers[] = StringUtility::extractPhoneNumber($line); } else { /* In this case, the line contains other data besides just a * phone number. We just can't identify it as anything. */ $unknownNumbers[] = StringUtility::extractPhoneNumber($line); } } } } } } } } } /* Figure out which phone number types we've already found. We'll * use this below. */ $homePhoneRow = ResultSetUtility::findRowByColumnValue($numbers, 'type', 'home'); $workPhoneRow = ResultSetUtility::findRowByColumnValue($numbers, 'type', 'work'); $cellPhoneRow = ResultSetUtility::findRowByColumnValue($numbers, 'type', 'cell'); /* Did we find any unknown phone numbers? If so, we have to try to * guess their types. */ $unknownCount = count($unknownNumbers); if ($unknownCount == 1) { /* If we're only missing one of the three phone number types, and we * found a number on a line by itself, we will assume that the extra * number is one of the missing ones. * * If we don't have a work number, but we have a home number * and a cell number, this is probably a work number. */ if ($workPhoneRow === false && $homePhoneRow !== false && $cellPhoneRow !== false) { $numbers[] = array('number' => $unknownNumbers[0], 'type' => 'work'); } else { if ($homePhoneRow === false && $workPhoneRow !== false && $cellPhoneRow !== false) { $numbers[] = array('number' => $unknownNumbers[0], 'type' => 'home'); } else { if ($cellPhoneRow === false && $workPhoneRow !== false && $homePhoneRow !== false) { $numbers[] = array('number' => $unknownNumbers[0], 'type' => 'cell'); } else { if ($cellPhoneRow !== false && $workPhoneRow !== false && $homePhoneRow !== false) { /* We already know all the phone numbers we need to know, and * it's probably not a fax number, as fax numbers are usually * labeled. Nothing to do except mark it as unknown. */ $numbers[] = array('number' => $unknownNumbers[0], 'type' => 'unknown'); } else { /* We have more than one phone number missing. We will make a * "best guess" according to the mode we are in. */ switch ($this->_mode) { case ADDRESSPARSER_MODE_PERSON: if ($homePhoneRow === false) { $type = 'home'; } else { if ($cellPhoneRow === false) { $type = 'cell'; } else { if ($workPhoneRow === false) { $type = 'work'; } else { $type = 'unknown'; } } } break; case ADDRESSPARSER_MODE_CONTACT: /* 'Contacts' are more likely to list a work or cell * number than a home number. */ if ($workPhoneRow === false) { $type = 'work'; } else { if ($cellPhoneRow === false) { $type = 'cell'; } else { if ($homePhoneRow === false) { $type = 'home'; } else { $type = 'unknown'; } } } break; case ADDRESSPARSER_MODE_COMPANY: // FIXME: Here we should be looking for "general". // We could also have two phone phone numbers. $type = 'general'; break; default: /* Error! Invalid mode. */ $type = 'unknown'; break; } $numbers[] = array('number' => $unknownNumbers[0], 'type' => $type); } } } } } else { if ($unknownCount > 1) { // FIXME } } return $numbers; }