示例#1
0
文件: CSV.php 项目: villos/tree_admin
 /**
  * Discover the format of a CSV file (the number of fields, the separator
  * and if it quote string fields)
  *
  * @param string the CSV file name
  * @param array extra separators that should be checked for.
  * @return mixed Assoc array or false
  */
 function discoverFormat($file, $extraSeps = array())
 {
     if (!($fp = @fopen($file, 'rb'))) {
         return File_CSV::raiseError("Could not open file: {$file}");
     }
     // Set auto detect line ending for Mac EOL support
     $oldini = ini_get('auto_detect_line_endings');
     if ($oldini != '1') {
         ini_set('auto_detect_line_endings', '1');
     }
     // Take the first 30 lines and store the number of ocurrences
     // for each separator in each line
     $lines = '';
     for ($i = 0; $i < 30 && !feof($fp) && ($line = fgets($fp, 4096)); $i++) {
         $lines .= $line;
     }
     fclose($fp);
     if ($oldini != '1') {
         ini_set('auto_detect_line_endings', $oldini);
     }
     $seps = array("\t", ';', ':', ',');
     $seps = array_merge($seps, $extraSeps);
     $matches = array();
     $quotes = '"\'';
     $lines = str_replace('""', '', $lines);
     while ($lines != ($newLines = preg_replace('|((["\'])[^"]*(\\2))|', '\\2_\\2', $lines))) {
         $lines = $newLines;
     }
     $eol = strpos($lines, "\r") ? "\r" : "\n";
     $lines = explode($eol, $lines);
     foreach ($lines as $line) {
         $orgLine = $line;
         foreach ($seps as $sep) {
             $line = preg_replace("|^[^{$quotes}{$sep}]*{$sep}*([{$quotes}][^{$quotes}]*[{$quotes}])|sm", '_', $orgLine);
             // Find all seps that are within qoutes
             ///FIXME ... counts legitimit lines as bad ones
             // In case there's a whitespace infront the field
             $regex = '|\\s*?';
             // Match the first quote (optional), also optionally match = since it's excel stuff
             $regex .= "(?:\\=?[{$quotes}])";
             $regex .= '(.*';
             // Don't match a sep if we are inside a quote
             // also don't accept the sep if it has a quote on the either side
             ///FIXME has to be possible if we are inside a quote! (tests fail because of this)
             $regex .= "(?:[^{$quotes}]){$sep}(?:[^{$quotes}])";
             $regex .= '.*)';
             // Close quote (if it's present) and the sep (optional, could be end of line)
             $regex .= "(?:[{$quotes}](?:{$sep}?))|Ums";
             preg_match_all($regex, $line, $match);
             // Finding all seps, within quotes or not
             $sep_count = substr_count($line, $sep);
             // Real count
             $matches[$sep][] = $sep_count - count($match[0]);
         }
     }
     $final = array();
     // Group the results by amount of equal ocurrences
     foreach ($matches as $sep => $res) {
         $times = array();
         $times[0] = 0;
         foreach ($res as $k => $num) {
             if ($num > 0) {
                 $times[$num] = isset($times[$num]) ? $times[$num] + 1 : 1;
             }
         }
         arsort($times);
         // Use max fields count.
         $fields[$sep] = max(array_flip($times));
         $amount[$sep] = $times[key($times)];
     }
     arsort($amount);
     $sep = key($amount);
     $conf['fields'] = $fields[$sep] + 1;
     $conf['sep'] = $sep;
     // Test if there are fields with quotes around in the first 30 lines
     $quote = null;
     $string = implode('', $lines);
     foreach (array('"', '\'') as $q) {
         if (preg_match_all("|{$sep}(?:\\s*?)(\\=?[{$q}]).*([{$q}]){$sep}|Us", $string, $match)) {
             if ($match[1][0] == $match[2][0]) {
                 $quote = $match[1][0];
                 break;
             }
         }
         if (preg_match_all("|^(\\=?[{$q}]).*([{$q}]){$sep}{0,1}|Ums", $string, $match) || preg_match_all("|(\\=?[{$q}]).*([{$q}]){$sep}\\s\$|Ums", $string, $match)) {
             if ($match[1][0] == $match[2][0]) {
                 $quote = $match[1][0];
                 break;
             }
         }
     }
     $conf['quote'] = $quote;
     return $conf;
 }
示例#2
0
 /**
  * Discover the format of a CSV file (the number of fields, the separator
  * and if it quote string fields)
  *
  * @param string the CSV file name
  * @param array extra separators that should be checked for.
  * @return mixed Assoc array or false
  */
 function discoverFormat($file, $extraSeps = array())
 {
     if (!($fp = @fopen($file, 'r'))) {
         return File_CSV::raiseError("Could not open file: {$file}");
     }
     $seps = array("\t", ';', ':', ',');
     $seps = array_merge($seps, $extraSeps);
     $matches = array();
     // Set auto detect line ending for Mac EOL support if < PHP 4.3.0.
     $phpver = version_compare('4.1.0', phpversion(), '<');
     if ($phpver) {
         $oldini = ini_get('auto_detect_line_endings');
         ini_set('auto_detect_line_endings', '1');
     }
     // Take the first 10 lines and store the number of ocurrences
     // for each separator in each line
     $lines = file($file);
     if (count($lines) > 10) {
         $lines = array_slice($lines, 0, 10);
     }
     if ($phpver) {
         ini_set('auto_detect_line_endings', $oldini);
     }
     foreach ($lines as $line) {
         foreach ($seps as $sep) {
             $matches[$sep][] = substr_count($line, $sep);
         }
     }
     $final = array();
     // Group the results by amount of equal ocurrences
     foreach ($matches as $sep => $res) {
         $times = array();
         $times[0] = 0;
         foreach ($res as $k => $num) {
             if ($num > 0) {
                 $times[$num] = isset($times[$num]) ? $times[$num] + 1 : 1;
             }
         }
         arsort($times);
         // Use max fields count.
         $fields[$sep] = max(array_flip($times));
         $amount[$sep] = $times[key($times)];
     }
     arsort($amount);
     $sep = key($amount);
     $conf['fields'] = $fields[$sep] + 1;
     $conf['sep'] = $sep;
     // Test if there are fields with quotes arround in the first 5 lines
     $quotes = '"\'';
     $quote = null;
     if (count($lines) > 5) {
         $lines = array_slice($lines, 0, 5);
     }
     foreach ($lines as $line) {
         if (preg_match("|{$sep}([{$quotes}]).*([{$quotes}]){$sep}|U", $line, $match)) {
             if ($match[1] == $match[2]) {
                 $quote = $match[1];
                 break;
             }
         }
         if (preg_match("|^([{$quotes}]).*([{$quotes}]){$sep}|", $line, $match) || preg_match("|([{$quotes}]).*([{$quotes}]){$sep}\\s\$|Us", $line, $match)) {
             if ($match[1] == $match[2]) {
                 $quote = $match[1];
                 break;
             }
         }
     }
     $conf['quote'] = $quote;
     fclose($fp);
     // XXX What about trying to discover the "header"?
     return $conf;
 }
示例#3
0
 /**
  * Discover the format of a CSV file (the number of fields, the separator
  * and if it quote string fields)
  *
  * @param string the CSV file name
  * @return mixed Assoc array or false
  */
 function discoverFormat($file)
 {
     if (!($fp = @fopen($file, 'r'))) {
         return File_CSV::raiseError("Could not open file: {$file}");
     }
     $seps = array("\t", ';', ':', ',');
     $matches = array();
     // Take the first 10 lines and store the number of ocurrences
     // for each separator in each line
     for ($i = 0; $i < 10 && ($line = fgets($fp, 4096)); $i++) {
         foreach ($seps as $sep) {
             $matches[$sep][$i] = substr_count($line, $sep);
         }
     }
     $final = array();
     // Group the results by amount of equal ocurrences
     foreach ($matches as $sep => $res) {
         $times = array();
         $times[0] = 0;
         foreach ($res as $k => $num) {
             if ($num > 0) {
                 $times[$num] = isset($times[$num]) ? $times[$num] + 1 : 1;
             }
         }
         arsort($times);
         $fields[$sep] = key($times);
         $amount[$sep] = $times[key($times)];
     }
     arsort($amount);
     $sep = key($amount);
     $fields = $fields[$sep];
     if (empty($fields)) {
         return File_CSV::raiseError('Could not discover the separator');
     }
     $conf['fields'] = $fields + 1;
     $conf['sep'] = $sep;
     // Test if there are fields with quotes arround in the first 5 lines
     $quotes = '"\'';
     $quote = null;
     rewind($fp);
     for ($i = 0; $i < 5 && ($line = fgets($fp, 4096)); $i++) {
         if (preg_match("|{$sep}([{$quotes}]).*([{$quotes}]){$sep}|U", $line, $match)) {
             if ($match[1] == $match[2]) {
                 $quote = $match[1];
                 break;
             }
         }
         if (preg_match("|^([{$quotes}]).*([{$quotes}]){$sep}|", $line, $match) || preg_match("|([{$quotes}]).*([{$quotes}]){$sep}\\s\$|Us", $line, $match)) {
             if ($match[1] == $match[2]) {
                 $quote = $match[1];
                 break;
             }
         }
     }
     $conf['quote'] = $quote;
     fclose($fp);
     // XXX What about trying to discover the "header"?
     return $conf;
 }
示例#4
0
 /**
  * Writes a struc (array) in a file as CSV
  *
  * @param string $file   The filename where to write the data
  * @param array  $fields Ordered array with the data
  * @param array  &$conf   The configuration of the dest CSV
  *
  * @return bool True on success false otherwise
  */
 function append($fields, &$conf)
 {
     if (count($fields) != $conf['fields']) {
         File_CSV::raiseError("Wrong fields number count: '" . count($fields) . "' expected " . $conf['fields']);
         return true;
     }
     $write = '';
     for ($i = 0; $i < count($fields); $i++) {
         if (!is_numeric($fields[$i]) && $conf['quote']) {
             $write .= $conf['quote'] . $fields[$i] . $conf['quote'];
         } else {
             $write .= $fields[$i];
         }
         if ($i < count($fields) - 1) {
             $write .= $conf['sep'];
         } else {
             $write .= $conf['crlf'];
         }
     }
     $this->filedata .= $write;
 }