/** * Parse this content * * @global object * @global object * @param string $content passed by ref for memory reasons, unset after return * @param string $encoding content encoding * @param string $delimiter_name separator (comma, semicolon, colon, cfg) * @param string $column_validation name of function for columns validation, must have one param $columns * @return bool false if error, count of data lines if ok; use get_error() to get error string */ function load_csv_content(&$content, $encoding, $delimiter_name, $column_validation = null) { global $USER, $CFG; $this->close(); $this->_error = null; $content = textlib::convert($content, $encoding, 'utf-8'); // remove Unicode BOM from first line $content = textlib::trim_utf8_bom($content); // Fix mac/dos newlines $content = preg_replace('!\\r\\n?!', "\n", $content); // is there anyting in file? $columns = strtok($content, "\n"); if ($columns === false) { $this->_error = get_string('csvemptyfile', 'error'); return false; } $csv_delimiter = csv_import_reader::get_delimiter($delimiter_name); $csv_encode = csv_import_reader::get_encoded_delimiter($delimiter_name); // process header - list of columns $columns = explode($csv_delimiter, $columns); $col_count = count($columns); if ($col_count === 0) { $this->_error = get_string('csvemptyfile', 'error'); return false; } foreach ($columns as $key => $value) { $columns[$key] = str_replace($csv_encode, $csv_delimiter, trim($value)); } if ($column_validation) { $result = $column_validation($columns); if ($result !== true) { $this->_error = $result; return false; } } $this->_columns = $columns; // cached columns // open file for writing $filename = $CFG->tempdir . '/csvimport/' . $this->_type . '/' . $USER->id . '/' . $this->_iid; $fp = fopen($filename, "w"); fwrite($fp, serialize($columns) . "\n"); // again - do we have any data for processing? $line = strtok("\n"); $data_count = 0; while ($line !== false) { $line = explode($csv_delimiter, $line); foreach ($line as $key => $value) { $line[$key] = str_replace($csv_encode, $csv_delimiter, trim($value)); } if (count($line) !== $col_count) { // this is critical!! $this->_error = get_string('csvweirdcolumns', 'error'); fclose($fp); $this->cleanup(); return false; } fwrite($fp, serialize($line) . "\n"); $data_count++; $line = strtok("\n"); } fclose($fp); return $data_count; }
/** * Parse this content * * @global object * @global object * @param string $content passed by ref for memory reasons, unset after return * @param string $encoding content encoding * @param string $delimiter_name separator (comma, semicolon, colon, cfg) * @param string $column_validation name of function for columns validation, must have one param $columns * @param string $enclosure field wrapper. One character only. * @return bool false if error, count of data lines if ok; use get_error() to get error string */ function load_csv_content(&$content, $encoding, $delimiter_name, $column_validation = null, $enclosure = '"') { global $USER, $CFG; $this->close(); $this->_error = null; $content = textlib::convert($content, $encoding, 'utf-8'); // remove Unicode BOM from first line $content = textlib::trim_utf8_bom($content); // Fix mac/dos newlines $content = preg_replace('!\\r\\n?!', "\n", $content); // Remove any spaces or new lines at the end of the file. if ($delimiter_name == 'tab') { // trim() by default removes tabs from the end of content which is undesirable in a tab separated file. $content = trim($content, chr(0x20) . chr(0xa) . chr(0xd) . chr(0x0) . chr(0xb)); } else { $content = trim($content); } $csv_delimiter = csv_import_reader::get_delimiter($delimiter_name); // $csv_encode = csv_import_reader::get_encoded_delimiter($delimiter_name); // create a temporary file and store the csv file there. $fp = tmpfile(); fwrite($fp, $content); fseek($fp, 0); // Create an array to store the imported data for error checking. $columns = array(); // str_getcsv doesn't iterate through the csv data properly. It has // problems with line returns. while ($fgetdata = fgetcsv($fp, 0, $csv_delimiter, $enclosure)) { // Check to see if we have an empty line. if (count($fgetdata) == 1) { if ($fgetdata[0] !== null) { // The element has data. Add it to the array. $columns[] = $fgetdata; } } else { $columns[] = $fgetdata; } } $col_count = 0; // process header - list of columns if (!isset($columns[0])) { $this->_error = get_string('csvemptyfile', 'error'); fclose($fp); return false; } else { $col_count = count($columns[0]); } // Column validation. if ($column_validation) { $result = $column_validation($columns[0]); if ($result !== true) { $this->_error = $result; fclose($fp); return false; } } $this->_columns = $columns[0]; // cached columns // check to make sure that the data columns match up with the headers. foreach ($columns as $rowdata) { if (count($rowdata) !== $col_count) { $this->_error = get_string('csvweirdcolumns', 'error'); fclose($fp); $this->cleanup(); return false; } } $filename = $CFG->tempdir . '/csvimport/' . $this->_type . '/' . $USER->id . '/' . $this->_iid; $filepointer = fopen($filename, "w"); // The information has been stored in csv format, as serialized data has issues // with special characters and line returns. $storedata = csv_export_writer::print_array($columns, ',', '"', true); fwrite($filepointer, $storedata); fclose($fp); fclose($filepointer); $datacount = count($columns); return $datacount; }
/** * Tests the static trim_utf8_bom method * @return void */ public function test_trim_utf8_bom() { $bom = ""; $str = "Žluťoučký koníček"; $this->assertSame(textlib::trim_utf8_bom($bom . $str . $bom), $str . $bom); }
/** * Check a CSV input line format for empty or commented lines * Ensures compatbility to UTF-8 BOM or unBOM formats */ function vchamilo_is_empty_line_or_format(&$text, $resetfirst = false) { global $CFG; static $textlib; static $first = true; // We may have a risk the BOM is present on first line if ($resetfirst) { $first = true; } if (!isset($textlib)) { $textlib = new textlib(); } // Singleton $text = $textlib->trim_utf8_bom($text); $first = false; $text = preg_replace("/\n?\r?/", '', $text); // last chance if ('ASCII' == mb_detect_encoding($text)) { $text = utf8_encode($text); } // Check the text is empty or comment line and answer true if it is. return preg_match('/^$/', $text) || preg_match('/^(\\(|\\[|-|#|\\/| )/', $text); }
/** * Return complete file within an array, one item per line * @param string filename name of file * @return mixed contents array or false on failure */ protected function readdata($filename) { if (is_readable($filename)) { $filearray = file($filename); // If the first line of the file starts with a UTF-8 BOM, remove it. $filearray[0] = textlib::trim_utf8_bom($filearray[0]); // Check for Macintosh OS line returns (ie file on one line), and fix. if (preg_match("~\r~", $filearray[0]) AND !preg_match("~\n~", $filearray[0])) { return explode("\r", $filearray[0]); } else { return $filearray; } } return false; }
// if import form is submitted if ($formdata = $mform->get_data()) { // Large files are likely to take their time and memory. Let PHP know // that we'll take longer, and that the process should be recycled soon // to free up memory. @set_time_limit(0); raise_memory_limit(MEMORY_EXTRA); // use current (non-conflicting) time stamp $importcode = get_new_importcode(); $filename = make_temp_directory('gradeimport/cvs/' . $USER->id); $filename = $filename . '/' . $importcode; $text = $mform->get_file_content('userfile'); // trim utf-8 bom /// normalize line endings and do the encoding conversion $text = textlib::convert($text, $formdata->encoding); $text = textlib::trim_utf8_bom($text); // Fix mac/dos newlines $text = preg_replace('!\\r\\n?!', "\n", $text); $fp = fopen($filename, "w"); fwrite($fp, $text); fclose($fp); if (!($fp = fopen($filename, "r"))) { print_error('cannotopenfile'); } // --- get header (field names) --- $header = explode($csv_delimiter, fgets($fp, GRADE_CSV_LINE_LENGTH)); // print some preview $numlines = 0; // 0 preview lines displayed echo $OUTPUT->heading(get_string('importpreview', 'grades')); echo '<table>';
public function test_trim_utf8_bom() { $bom = "\xef\xbb\xbf"; $str = "Žluťoučký koníček"; $this->assertIdentical(textlib::trim_utf8_bom($bom.$str.$bom), $str.$bom); }
/// If a file has been uploaded, then process it $um = new upload_manager('userfile', false, false, null, false, 0); if ($um->preprocess_files() && confirm_sesskey()) { $filename = $um->files['userfile']['tmp_name']; // Large files are likely to take their time and memory. Let PHP know // that we'll take longer, and that the process should be recycled soon // to free up memory. @set_time_limit(0); @raise_memory_limit("192M"); if (function_exists('apache_child_terminate')) { @apache_child_terminate(); } $text = my_file_get_contents($filename); //trim utf-8 bom $textlib = new textlib(); $text = $textlib->trim_utf8_bom($text); //Fix mac/dos newlines $text = preg_replace('!\\r\\n?!', "\n", $text); $fp = fopen($filename, "w"); fwrite($fp, $text); fclose($fp); $fp = fopen($filename, "r"); // make arrays of valid fields for error checking $required = array("username" => 1, "password" => !$createpassword, "firstname" => 1, "lastname" => 1, "email" => 1); $optionalDefaults = array("mnethostid" => 1, "institution" => 1, "department" => 1, "city" => 1, "country" => 1, "lang" => 1, "auth" => 1, "timezone" => 1); $optional = array("idnumber" => 1, "icq" => 1, "phone1" => 1, "phone2" => 1, "address" => 1, "url" => 1, "description" => 1, "mailformat" => 1, "maildisplay" => 1, "htmleditor" => 1, "autosubscribe" => 1, "course1" => 1, "course2" => 1, "course3" => 1, "course4" => 1, "course5" => 1, "group1" => 1, "group2" => 1, "group3" => 1, "group4" => 1, "group5" => 1, "type1" => 1, "type2" => 1, "type3" => 1, "type4" => 1, "type5" => 1, "role1" => 1, "role2" => 1, "role3" => 1, "role4" => 1, "role5" => 1, "password" => $createpassword, "oldusername" => $allowrenames); // --- get header (field names) --- $header = split($csv_delimiter, fgets($fp, 1024)); // check for valid field names foreach ($header as $i => $h) { $h = trim($h);