/** * Determines if a csv sample has a header row - not 100% accurate by any means * It basically looks at each row in each column. If all but the first column are similar, * it likely has a header. The way we determine this is first by type, then by length * Other possible methods I could use to determine whether the first row is a header is I * could look to see if all but the first CONTAIN certain characters or something - think about this */ public function hasHeader($data) { $reader = new Csv_Reader_String($data, $this->detect($data)); list($has_headers, $checked, $types, $lengths, $total_lines, $headers) = array(0, 0, array(), array(), $reader->count(), $reader->getRow()); if ($total_lines <= 2) { // please try again with a a larger file :) return false; } $total_columns = count($headers); foreach (range(0, $total_columns - 1) as $key => $col) { $types[$col] = null; } // loop through each remaining rows while ($row = $reader->current()) { // no need to check more than 20 lines if ($checked > 20) { break; } $checked++; $line = $reader->key(); // loop through row and grab type for each column foreach ($row as $col => $val) { $types[$col][] = $this->getType($val); $lengths[$col][] = strlen($val); } $reader->next(); } // now take a vote and if more than a certain threshold have a likely header, we'll return that we think it has a header foreach ($types as $key => $column) { $unique = array_unique($column); if (count($unique) == 1) { // if all are of the same type if ($unique[0] == $this->getType($headers[$key])) { // all rows type matched header type, so try length now $unique = array_unique($lengths[$key]); if (count($unique) == 1) { if ($unique[0] == strlen($headers[$key])) { $has_headers--; } else { $has_headers++; } } //printf ("%s is the same as %s<br>", $unique[0], $this->getType($headers[$key])); } else { $has_headers++; } } } return $has_headers > 0; }
public function test_Reader_String() { $sample = ""; for ($i = 0; $i < 10; $i++) { $sample .= "this,is,some,test,data,{$i}\r\n"; } $reader = new Csv_Reader_String($sample); $this->assertEqual($reader->count(), 10); }