function normalize($string) { require_once 'classes/class.normalize.php'; $norm = new Normalize(); $norm->set('debug_flag', false); $data = $norm->normalize($string); return $data; }
$search_genus_name = $norm->normalize($genus_desc); $near_match_genus = $nm->near_match($genus_desc); $query = sprintf("INSERT INTO `genlist%s` (`GENUS_ID`, `GENUS`, `AUTHORITY`, `GEN_LENGTH`, `NEAR_MATCH_GENUS`, `SEARCH_GENUS_NAME`) VALUES ('%s','%s','%s',%s,'%s','%s')", mysql_escape_string($postfix), mysql_escape_string($genus_id_desc), mysql_escape_string($genus_desc), mysql_escape_string($authority_desc), mysql_escape_string($genus_length), mysql_escape_string($near_match_genus), mysql_escape_string($search_genus_name)); $db->query($query); } elseif (trim($data[3]) == '') { // Used to slow down the script for shared hosted sites usleep(20000); // Create Species $species_id_desc = $data[$species_id]; $species_desc = $data[$species]; $species_length = strlen($species_desc); $genus_id_desc = $data[1]; $genus_desc = $master[$data[1]]; $norm = new Normalize(); $nm = new NearMatch(); $search_species_name = $norm->normalize($species_desc); $near_match_species = $nm->near_match($species_desc); $authority_desc = $data[$authority]; $query = sprintf("INSERT INTO `splist%s` (`SPECIES_ID`, `GENUS_ORIG`, `SPECIES`, `GENUS_ID`, `AUTHORITY`, `SP_LENGTH`, `NEAR_MATCH_SPECIES`, `SEARCH_SPECIES_NAME`) VALUES ('%s','%s','%s','%s','%s',%s,'%s','%s') ", mysql_escape_string($postfix), mysql_escape_string($species_id_desc), mysql_escape_string($genus_desc), mysql_escape_string($species_desc), mysql_escape_string($genus_id_desc), mysql_escape_string($authority_desc), mysql_escape_string($species_length), mysql_escape_string($near_match_species), mysql_escape_string($search_species_name)); $db->query($query); } unset($query); } // end while fclose($handle); print round(memory_get_usage() * 0.0009) . "KB - Final Memory Used<br>"; } } } else { print ' A valid name parameter has to be supplied.'; }
// import data from the csv $first = true; $sp_index = 1; $handle = fopen('../authorities/' . $sourcefile, "r"); while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { if ($first) { $first = false; continue; } $norm = new Normalize(); $nm = new NearMatch(); $genus_desc = $data[$genus]; $gen_length_desc = strlen($genus_desc); # $genus_array = json_decode(file_get_contents(TAXAMATCH_URL . '?cmd=normalize&str=' . $genus_desc),true); # $search_genus_name_desc = $genus_array['data']; $search_genus_name_desc = $norm->normalize($genus_desc); # $near_match_genus_array = json_decode(file_get_contents(TAXAMATCH_URL . '?cmd=near_match&str=' . $genus_desc),true); # $near_match_genus_desc = $near_match_genus_array['data']; $near_match_genus_desc = $nm->near_match($genus_desc); $species_desc = $data[$species]; $sp_length_desc = strlen($species_desc); # $species_array = json_decode(file_get_contents(TAXAMATCH_URL . '?cmd=normalize&str=' . $species_desc),true); # $search_species_name_desc = $species_array['data']; $search_species_name_desc = $norm->normalize($species_desc); # $near_match_species_array = json_decode(file_get_contents(TAXAMATCH_URL . '?cmd=near_match&str=' . $species_desc),true); # $near_match_species_desc = $near_match_species_array['data']; $near_match_species_desc = $nm->near_match($species_desc); if ($authority_abbr) { $authority_abbr_desc = $data[$authority_abbr]; $authority_desc = $data[$authority]; $query = sprintf(" INSERT INTO auth_abbrev%s (`AUTH_ABBR`, `AUTH_FULL`) VALUES ('%s','%s') ", mysql_escape_string($postfix), mysql_escape_string($authority_abbr_desc), mysql_escape_string($authority_desc));
/** * Function to treat the word * @param string $str2 * @param integer $strip_ending * @param integer $normalize : 1 -> normalize the word , 0 -> no normalization action * @return string */ public function treat_word($str2, $strip_ending = 0, $normalize = 1) { $temp2 = ''; $start_letter = ''; $next_char = ''; $result2 = ''; $this->input = $str2; $this->output = ''; if ($str2 == NULL || trim($str2) == '') { return ''; } else { if ($normalize) { $n = new Normalize(); $temp2 = $n->normalize($str2); $this->debug['Normalize'][] = $n->debug; } else { $temp2 = $str2; } $this->debug['TW'][] = "1 (temp2:{$temp2})"; // Do some selective replacement on the leading letter/s only: ('soundalikes') if (preg_match('/^AE/', $temp2)) { $temp2 = preg_replace('/^AE/', 'E', $temp2); } elseif (preg_match('/^CN/', $temp2)) { $temp2 = preg_replace('/^CN/', 'N', $temp2); } elseif (preg_match('/^CT/', $temp2)) { $temp2 = preg_replace('/^CT/', 'Z', $temp2); } elseif (preg_match('/^CZ/', $temp2)) { $temp2 = preg_replace('/^CZ/', 'V', $temp2); } elseif (preg_match('/^DJ/', $temp2)) { $temp2 = preg_replace('/^DJ/', 'J', $temp2); } elseif (preg_match('/^EA/', $temp2)) { $temp2 = preg_replace('/^EA/', 'E', $temp2); } elseif (preg_match('/^EU/', $temp2)) { $temp2 = preg_replace('/^EU/', 'U', $temp2); } elseif (preg_match('/^GN/', $temp2)) { $temp2 = preg_replace('/^GN/', 'N', $temp2); } elseif (preg_match('/^KN/', $temp2)) { $temp2 = preg_replace('/^KN/', 'N', $temp2); } elseif (preg_match('/^MC/', $temp2)) { $temp2 = preg_replace('/^MC/', 'MAC', $temp2); } elseif (preg_match('/^MN/', $temp2)) { $temp2 = preg_replace('/^MN/', 'N', $temp2); } elseif (preg_match('/^OE/', $temp2)) { $temp2 = preg_replace('/^OE/', 'E', $temp2); } elseif (preg_match('/^QU/', $temp2)) { $temp2 = preg_replace('/^QU/', 'Q', $temp2); } elseif (preg_match('/^PS/', $temp2)) { $temp2 = preg_replace('/^PS/', 'S', $temp2); } elseif (preg_match('/^PT/', $temp2)) { $temp2 = preg_replace('/^PT/', 'T', $temp2); } elseif (preg_match('/^TS/', $temp2)) { $temp2 = preg_replace('/^TS/', 'S', $temp2); } elseif (preg_match('/^WR/', $temp2)) { $temp2 = preg_replace('/^WR/', 'R', $temp2); } elseif (preg_match('/^X/', $temp2)) { $temp2 = preg_replace('/^X/', 'Z', $temp2); } elseif (preg_match('/^ph/', $temp2)) { $temp2 = preg_replace('/^ph/', 'f', $temp2); } $this->debug['TW'][] = "2 (temp2:{$temp2})"; // Now keep the leading character, then do selected "soundalike" replacements. The // following letters are equated: AE, OE, E, U, Y and I; IA and A are equated; // K and C; Z and S; and H is dropped. Also, A and O are equated, MAC and MC are equated, and SC and S. $start_letter = substr($temp2, 0, 1); // quarantine the leading letter $temp2 = substr($temp2, 1); // snip off the leading letter $this->debug['TW'][] = "3 (start_letter:{$start_letter}) (temp2:{$temp2})"; // now do the replacements $temp2 = str_ireplace('AE', 'I', $temp2); $temp2 = str_ireplace('IA', 'A', $temp2); $temp2 = str_ireplace('OE', 'I', $temp2); $temp2 = str_ireplace('OI', 'A', $temp2); $temp2 = str_ireplace('SC', 'S', $temp2); $temp2 = str_ireplace('E', 'I', $temp2); $temp2 = str_ireplace('O', 'A', $temp2); $temp2 = str_ireplace('U', 'I', $temp2); $temp2 = str_ireplace('Y', 'I', $temp2); $temp2 = str_ireplace('K', 'C', $temp2); $temp2 = str_ireplace('Z', 'S', $temp2); $temp2 = str_ireplace('H', '', $temp2); // $temp2 = str_ireplace ('io', 'a', $temp2); // Not used in taxamatch? // $temp2 = str_ireplace ('ou', 'u', $temp2); // Not used in taxamatch? // $temp2 = str_ireplace ('ph', 'f', $temp2); // Not used in taxamatch? $this->debug['TW'][] = "4 (temp2:{$temp2})"; //add back the leading letter $temp2 = $start_letter . $temp2; $this->debug['TW'][] = "5 (temp2:{$temp2})"; // now drop any repeated characters (AA becomes A, BB or BBB becomes B, etc.) for ($i = 0; $i <= strlen($temp2); $i++) { $next_char = substr($temp2, $i, 1); if ($i == 0) { $result2 = $next_char; } elseif ($next_char == substr($result2, -1)) { } else { $result2 = $result2 . $next_char; } } $this->debug['TW'][] = "6 (result2:{$result2}) (temp2:{$temp2})"; if (strlen($result2) > 4 && $strip_ending) { $this->debug['TW'][] = "7 (result2:{$result2})"; // deal with variant endings -is (includes -us, -ys, -es), -im (was -um), -as (-os) // at end of string or word: translate all to -a if (substr($result2, -2) == 'IS') { $result2 = preg_replace('/IS$/', 'A', $result2); } if (substr($result2, -2) == 'IM') { $result2 = preg_replace('/IM$/', 'A', $result2); } if (substr($result2, -2) == 'AS') { $result2 = preg_replace('/AS$/', 'A', $result2); } $this->debug['TW'][] = "7a (result2:{$result2})"; } $this->debug['TW'][] = "Return: ({$result2})"; $this->output = $result2; return $this->output; } // End else }
foreach ($expected as $formvar) { ${$formvar} = isset(${"_{$_SERVER['REQUEST_METHOD']}"}[$formvar]) ? urldecode(${"_{$_SERVER['REQUEST_METHOD']}"}[$formvar]) : NULL; } //$source = (trim($source) == '' ? 'test1' : $source); $source = explode(",", $source); //$classification = isset($classification) ? $classification : DEFAULT_CLASSIFICATION; $cache = trim($cache) == 'true' ? true : false; $parse_only = trim($parse_only) == 'true' ? true : false; require_once 'classes/class.mysqli_database.php'; require_once 'classes/class.misc.php'; switch ($cmd) { case 'normalize': require_once 'classes/class.normalize.php'; $norm = new Normalize(); $norm->set('debug_flag', $_REQUEST['debug']); $data = $norm->normalize($str); if ($output == 'xml') { $data = $norm->getXML(); } $debug = $norm->debug; break; case 'normalize_auth': require_once 'classes/class.normalize.php'; $db = select_source($source, $classification); $norm = new Normalize($db); $norm->set('post_fix', '_' . $source); $norm->set('source', $source); $data = $norm->normalize_auth($str); if ($output == 'xml') { $data = $norm->getXML(); }