/** * Discover the format of a CSV file (the number of fields, the separator * and if it quote string fields) * * @param string the CSV file name * @param array extra separators that should be checked for. * @return mixed Assoc array or false */ function discoverFormat($file, $extraSeps = array()) { if (!($fp = @fopen($file, 'rb'))) { return File_CSV::raiseError("Could not open file: {$file}"); } // Set auto detect line ending for Mac EOL support $oldini = ini_get('auto_detect_line_endings'); if ($oldini != '1') { ini_set('auto_detect_line_endings', '1'); } // Take the first 30 lines and store the number of ocurrences // for each separator in each line $lines = ''; for ($i = 0; $i < 30 && !feof($fp) && ($line = fgets($fp, 4096)); $i++) { $lines .= $line; } fclose($fp); if ($oldini != '1') { ini_set('auto_detect_line_endings', $oldini); } $seps = array("\t", ';', ':', ','); $seps = array_merge($seps, $extraSeps); $matches = array(); $quotes = '"\''; $lines = str_replace('""', '', $lines); while ($lines != ($newLines = preg_replace('|((["\'])[^"]*(\\2))|', '\\2_\\2', $lines))) { $lines = $newLines; } $eol = strpos($lines, "\r") ? "\r" : "\n"; $lines = explode($eol, $lines); foreach ($lines as $line) { $orgLine = $line; foreach ($seps as $sep) { $line = preg_replace("|^[^{$quotes}{$sep}]*{$sep}*([{$quotes}][^{$quotes}]*[{$quotes}])|sm", '_', $orgLine); // Find all seps that are within qoutes ///FIXME ... counts legitimit lines as bad ones // In case there's a whitespace infront the field $regex = '|\\s*?'; // Match the first quote (optional), also optionally match = since it's excel stuff $regex .= "(?:\\=?[{$quotes}])"; $regex .= '(.*'; // Don't match a sep if we are inside a quote // also don't accept the sep if it has a quote on the either side ///FIXME has to be possible if we are inside a quote! (tests fail because of this) $regex .= "(?:[^{$quotes}]){$sep}(?:[^{$quotes}])"; $regex .= '.*)'; // Close quote (if it's present) and the sep (optional, could be end of line) $regex .= "(?:[{$quotes}](?:{$sep}?))|Ums"; preg_match_all($regex, $line, $match); // Finding all seps, within quotes or not $sep_count = substr_count($line, $sep); // Real count $matches[$sep][] = $sep_count - count($match[0]); } } $final = array(); // Group the results by amount of equal ocurrences foreach ($matches as $sep => $res) { $times = array(); $times[0] = 0; foreach ($res as $k => $num) { if ($num > 0) { $times[$num] = isset($times[$num]) ? $times[$num] + 1 : 1; } } arsort($times); // Use max fields count. $fields[$sep] = max(array_flip($times)); $amount[$sep] = $times[key($times)]; } arsort($amount); $sep = key($amount); $conf['fields'] = $fields[$sep] + 1; $conf['sep'] = $sep; // Test if there are fields with quotes around in the first 30 lines $quote = null; $string = implode('', $lines); foreach (array('"', '\'') as $q) { if (preg_match_all("|{$sep}(?:\\s*?)(\\=?[{$q}]).*([{$q}]){$sep}|Us", $string, $match)) { if ($match[1][0] == $match[2][0]) { $quote = $match[1][0]; break; } } if (preg_match_all("|^(\\=?[{$q}]).*([{$q}]){$sep}{0,1}|Ums", $string, $match) || preg_match_all("|(\\=?[{$q}]).*([{$q}]){$sep}\\s\$|Ums", $string, $match)) { if ($match[1][0] == $match[2][0]) { $quote = $match[1][0]; break; } } } $conf['quote'] = $quote; return $conf; }
/** * Discover the format of a CSV file (the number of fields, the separator * and if it quote string fields) * * @param string the CSV file name * @param array extra separators that should be checked for. * @return mixed Assoc array or false */ function discoverFormat($file, $extraSeps = array()) { if (!($fp = @fopen($file, 'r'))) { return File_CSV::raiseError("Could not open file: {$file}"); } $seps = array("\t", ';', ':', ','); $seps = array_merge($seps, $extraSeps); $matches = array(); // Set auto detect line ending for Mac EOL support if < PHP 4.3.0. $phpver = version_compare('4.1.0', phpversion(), '<'); if ($phpver) { $oldini = ini_get('auto_detect_line_endings'); ini_set('auto_detect_line_endings', '1'); } // Take the first 10 lines and store the number of ocurrences // for each separator in each line $lines = file($file); if (count($lines) > 10) { $lines = array_slice($lines, 0, 10); } if ($phpver) { ini_set('auto_detect_line_endings', $oldini); } foreach ($lines as $line) { foreach ($seps as $sep) { $matches[$sep][] = substr_count($line, $sep); } } $final = array(); // Group the results by amount of equal ocurrences foreach ($matches as $sep => $res) { $times = array(); $times[0] = 0; foreach ($res as $k => $num) { if ($num > 0) { $times[$num] = isset($times[$num]) ? $times[$num] + 1 : 1; } } arsort($times); // Use max fields count. $fields[$sep] = max(array_flip($times)); $amount[$sep] = $times[key($times)]; } arsort($amount); $sep = key($amount); $conf['fields'] = $fields[$sep] + 1; $conf['sep'] = $sep; // Test if there are fields with quotes arround in the first 5 lines $quotes = '"\''; $quote = null; if (count($lines) > 5) { $lines = array_slice($lines, 0, 5); } foreach ($lines as $line) { if (preg_match("|{$sep}([{$quotes}]).*([{$quotes}]){$sep}|U", $line, $match)) { if ($match[1] == $match[2]) { $quote = $match[1]; break; } } if (preg_match("|^([{$quotes}]).*([{$quotes}]){$sep}|", $line, $match) || preg_match("|([{$quotes}]).*([{$quotes}]){$sep}\\s\$|Us", $line, $match)) { if ($match[1] == $match[2]) { $quote = $match[1]; break; } } } $conf['quote'] = $quote; fclose($fp); // XXX What about trying to discover the "header"? return $conf; }
/** * Discover the format of a CSV file (the number of fields, the separator * and if it quote string fields) * * @param string the CSV file name * @return mixed Assoc array or false */ function discoverFormat($file) { if (!($fp = @fopen($file, 'r'))) { return File_CSV::raiseError("Could not open file: {$file}"); } $seps = array("\t", ';', ':', ','); $matches = array(); // Take the first 10 lines and store the number of ocurrences // for each separator in each line for ($i = 0; $i < 10 && ($line = fgets($fp, 4096)); $i++) { foreach ($seps as $sep) { $matches[$sep][$i] = substr_count($line, $sep); } } $final = array(); // Group the results by amount of equal ocurrences foreach ($matches as $sep => $res) { $times = array(); $times[0] = 0; foreach ($res as $k => $num) { if ($num > 0) { $times[$num] = isset($times[$num]) ? $times[$num] + 1 : 1; } } arsort($times); $fields[$sep] = key($times); $amount[$sep] = $times[key($times)]; } arsort($amount); $sep = key($amount); $fields = $fields[$sep]; if (empty($fields)) { return File_CSV::raiseError('Could not discover the separator'); } $conf['fields'] = $fields + 1; $conf['sep'] = $sep; // Test if there are fields with quotes arround in the first 5 lines $quotes = '"\''; $quote = null; rewind($fp); for ($i = 0; $i < 5 && ($line = fgets($fp, 4096)); $i++) { if (preg_match("|{$sep}([{$quotes}]).*([{$quotes}]){$sep}|U", $line, $match)) { if ($match[1] == $match[2]) { $quote = $match[1]; break; } } if (preg_match("|^([{$quotes}]).*([{$quotes}]){$sep}|", $line, $match) || preg_match("|([{$quotes}]).*([{$quotes}]){$sep}\\s\$|Us", $line, $match)) { if ($match[1] == $match[2]) { $quote = $match[1]; break; } } } $conf['quote'] = $quote; fclose($fp); // XXX What about trying to discover the "header"? return $conf; }
/** * Writes a struc (array) in a file as CSV * * @param string $file The filename where to write the data * @param array $fields Ordered array with the data * @param array &$conf The configuration of the dest CSV * * @return bool True on success false otherwise */ function append($fields, &$conf) { if (count($fields) != $conf['fields']) { File_CSV::raiseError("Wrong fields number count: '" . count($fields) . "' expected " . $conf['fields']); return true; } $write = ''; for ($i = 0; $i < count($fields); $i++) { if (!is_numeric($fields[$i]) && $conf['quote']) { $write .= $conf['quote'] . $fields[$i] . $conf['quote']; } else { $write .= $fields[$i]; } if ($i < count($fields) - 1) { $write .= $conf['sep']; } else { $write .= $conf['crlf']; } } $this->filedata .= $write; }