public function parseFile() { $this->logEvent("Parsing file.."); require_once 'CsvParser.php'; $parser = new CsvParser($this->uploadedFile); $this->parsedFile = array_filter($parser->getParsedFile()); $this->logEvent("File parsed!"); }
public function parseFile() { require_once 'CsvParser.php'; $separator = $this->parserInstructions['separator']; $headlineDetection = $this->parserInstructions['headlineDetection']; $parser = new CsvParser($this->uploadedFile, $separator, $headlineDetection); $this->parsedFile = array_filter($parser->getParsedFile()); }
function test_parse() { $string = "One,Two,Three\nFour,Five,Six"; $parsed = CsvParser::parse($string); $this->assertEquals(2, count($parsed)); $this->assertEquals('Two', $parsed[0][1]); $this->assertEquals('Six', $parsed[1][2]); $string = "Title,Date,Author,Published,Content,Tags\nThis is a test,2012-01-16 3:23 AM,jbroadway,Yes,\"<p>Testing \"\"one\"\" two 'three'<br />\\nfour five.</p>\",\"one, two\"\nThis is also a test,2012-01-20 12:23 PM,jbroadway,No,\"<p>Testing \"\"one\"\" two 'three'<br />\\nfour five.</p>\",\"three, four\""; $parsed = CsvParser::parse($string); $this->assertEquals(3, count($parsed)); $this->assertEquals('Content', $parsed[0][4]); $this->assertEquals('2012-01-16 3:23 AM', $parsed[1][1]); $this->assertEquals("<p>Testing \"one\" two 'three'<br />\\nfour five.</p>", $parsed[1][4]); $this->assertEquals("one, two", $parsed[1][5]); $this->assertEquals('This is also a test', $parsed[2][0]); $this->assertEquals("three, four", $parsed[2][5]); }
private static function load_one($f) { if (!file_exists($f)) { return false; } $csv = new CsvParser(file_get_contents($f), CsvParser::TYPE_GUESS); $csv->set_comment_chars("%#"); if ($req = $csv->next()) { if (array_search("name", $req) !== false) { $csv->set_header($req); } else { $csv->set_header(array("name", "html")); $csv->unshift($req); } while (($req = $csv->next()) !== false) { self::$messages[$req["name"]] = (object) $req; } return true; } else { return false; } }
function upload_grades($pset, $text, $fname) { global $Conf; assert($pset->gitless_grades); $csv = new CsvParser($text); $csv->set_header($csv->next()); while ($line = $csv->next()) { if (($who = get($line, "github_username")) && $who !== "-") { $user = $Conf->user_by_query("github_username=?", [$who]); } else { if (($who = get($line, "seascode_username")) && $who !== "-") { $user = $Conf->user_by_query("seascode_username=?", [$who]); } else { if (($who = get($line, "huid")) && $who !== "-") { $user = $Conf->user_by_query("huid=?", [$who]); } else { if (($who = get($line, "username")) && $who !== "-") { $user = $Conf->user_by_query("github_username=? or seascode_username=? order by github_username=? desc limit 1", [$who, $who, $who]); } else { if (($who = get($line, "email")) && $who !== "-") { $user = $Conf->user_by_email($who); } else { if ($who = get($line, "name")) { list($first, $last) = Text::split_name($who); $user = $Conf->user_by_query("firstName like '?s%' and lastName=?", [$first, $last]); if ($user && $user->firstName != $first && !str_starts_with($user->firstName, "{$first} ")) { $user = null; } } else { continue; } } } } } } if ($user) { if (!save_grades($user, $pset, null, $line, true)) { $Conf->errorMsg("no grades set for “" . htmlspecialchars($who) . "”"); } } else { $Conf->errorMsg(htmlspecialchars($fname) . ":" . $csv->lineno() . ": unknown user “" . htmlspecialchars($who) . "”"); } } return true; }
public function parse_dimacs($str) { $this->reset(); $vnames = array(); $ismax = null; $next_cap = $next_cost = null; $has_edges = false; foreach (CsvParser::split_lines($str) as $lineno => $line) { if ($line[0] !== "f") { $next_cap = $next_cost = null; } if (preg_match('/\\An (\\d+) (-?\\d+|s|t)\\s*\\z/', $line, $m)) { $issink = $m[2] === "t" || $m[2] < 0; assert(!get($vnames, $m[1])); $vnames[$m[1]] = $v = $issink ? $this->sink : $this->source; if ($m[2] !== "s" && $m[2] !== "t") { $v->excess = (int) $m[2]; $this->maxflow = abs($v->excess); } } else { if (preg_match('/\\Ac ninfo (\\d+) (\\S+)\\s*(\\S*)\\s*\\z/', $line, $m)) { $this->dimacs_node($vnames, $m[1], $m[2], $m[3]); } else { if (preg_match('/\\Ac nprice (\\d+) (\\S+)\\s*\\z/', $line, $m) && is_numeric($m[2])) { $v = $this->dimacs_node($vnames, $m[1]); $v->price = (double) $m[2]; } else { if (preg_match('/\\Aa (\\d+) (\\d+) (\\d+)\\s*\\z/', $line, $m)) { assert(!$has_edges); $this->add_edge($this->dimacs_node($vnames, $m[1]), $this->dimacs_node($vnames, $m[2]), (int) $m[3], 0); } else { if (preg_match('/\\Aa (\\d+) (\\d+) (\\d+) (\\d+) (-?\\d+)\\s*\\z/', $line, $m)) { assert(!$has_edges); $this->add_edge($this->dimacs_node($vnames, $m[1]), $this->dimacs_node($vnames, $m[2]), (int) $m[4], (int) $m[5], (int) $m[3]); } else { if (preg_match('/\\Ac finfo (\\d+)\\s*(|-?\\d+)\\s*\\z/', $line, $m)) { $next_cap = (int) $m[1]; $next_cost = (int) $m[2]; } else { if (preg_match('/\\Af (\\d+) (\\d+) (-?\\d+)\\s*\\z/', $line, $m)) { if (!$has_edges) { $this->initialize_edges(); $has_edges = true; } $src = $this->dimacs_node($vnames, $m[1]); $dst = $this->dimacs_node($vnames, $m[2]); $found = false; foreach ($src->e as $e) { if ($e->dst === $dst && ($next_cap === null || $e->cap === $next_cap) && ($next_cost === null || $e->cost === $next_cost)) { $e->flow = (int) $m[3]; $src->excess -= $e->flow; $dst->excess += $e->flow; $found = true; break; } } if (!$found) { error_log("MinCostMaxFlow::parse_dimacs: line " . ($lineno + 1) . ": no such edge"); } $next_cap = $next_cost = null; } else { if (preg_match('/\\As (\\d+)\\s*\\z/', $line, $m) && $this->source->excess === 0) { $this->source->excess = -(int) $m[1]; $this->sink->excess = (int) $m[1]; $this->maxflow = (int) $m[1]; } else { if (preg_match('/\\Ac min_epsilon (\\S+)\\s*\\z/', $line, $m) && is_numeric($m[1])) { $this->epsilon = (double) $m[1]; } else { if ($line[0] === "a" || $line[0] === "f") { error_log("MinCostMaxFlow::parse_dimacs: line " . ($lineno + 1) . ": parse error"); } } } } } } } } } } } ksort($vnames, SORT_NUMERIC); $this->v = array_values($vnames); }
protected function isValidCsvConversion($textAreaContent) { $super = User::getByUsername('super'); Yii::app()->user->userModel = $super; $testItem = new ExportTestModelItem(); $testItem->firstName = 'Bob3'; $testItem->lastName = 'Bob3'; $testItem->boolean = true; $testItem->date = '2002-04-03'; $testItem->dateTime = '2002-04-03 02:00:43'; $testItem->float = 54.22; $testItem->integer = 10; $testItem->phone = '21313213'; $testItem->string = 'aString'; $testItem->textArea = $textAreaContent; $testItem->url = 'http://www.asite.com'; $testItem->email = '*****@*****.**'; $testItem->save(); $id = $testItem->id; $testItem->forget(); unset($testItem); $data = array(); $testItem = ExportTestModelItem::getById($id); $adapter = new ModelToExportAdapter($testItem); $data[] = $adapter->getData(); $headerData = $adapter->getHeaderData(); // Export data to csv, and then revert csv back to array, so we compare data $csvData = ExportItemToCsvFileUtil::export($data, $headerData, 'exports.csv', false); $revertedData = CsvParser::parseFromString($csvData); // We are testing ModelToExportAdapter in details in another test // so in this test we suppose that ModelToExportAdapter::getData // return correct results $adapter = new ModelToExportAdapter($testItem); $compareData = $adapter->getData(); $compareHeaderData = $adapter->getHeaderData(); // Using === here would fail as we are not setting all keys part of getData()'s return array if ($compareData == array_values($revertedData[0]) && $compareHeaderData == array_keys($revertedData[0])) { return true; } return false; }
function upload_grades($pset, $text, $fname) { global $Conf; assert($pset->gitless_grades); $csv = new CsvParser($text); $csv->set_header($csv->next()); while ($line = $csv->next()) { if (@$line["seascode_username"]) { $who = $line["seascode_username"]; $user = Contact::find_by_username($who); } else { if (@$line["email"]) { $who = $line["email"]; $user = Contact::find_by_email($who); } else { if (@$line["name"]) { $who = $line["name"]; list($first, $last) = Text::split_name($who); $user = Contact::find_by_query("firstName like '" . sqlqtrim($first) . "%' and lastName='" . sqlqtrim($last) . "'"); if ($user && $user->firstName != $first && !str_starts_with($user->firstName, "{$first} ")) { $user = null; } } else { continue; } } } if ($user) { if (!save_grades($user, $pset, null, $line, true)) { $Conf->errorMsg("no grades set for “" . htmlspecialchars($who) . "”"); } } else { $Conf->errorMsg(htmlspecialchars($fname) . ":" . $csv->lineno() . ": unknown user “" . htmlspecialchars($who) . "”"); } } return true; }
function parse($text, $filename = null, $defaults = null, $alertf = null) { global $Conf; $this->filename = $filename; $this->astate->defaults = $defaults ?: array(); $csv = new CsvParser($text, CsvParser::TYPE_GUESS); $csv->set_comment_chars("%#"); $csv->set_comment_function(array($this, "parse_csv_comment")); if (!($req = $csv->next())) { return $this->error($csv->lineno(), "empty file"); } if (!$this->install_csv_header($csv, $req)) { return false; } // parse file, load papers all at once $lines = $pids = []; while (($req = $csv->next()) !== false) { $lines[] = [$csv->lineno(), $req]; $this->collect_papers($req, $pids, false); } if (count($pids)) { $this->astate->lineno = $csv->lineno(); $this->astate->fetch_prows(array_keys($pids)); } // now parse assignment foreach ($lines as $i => $linereq) { $this->astate->lineno = $linereq[0]; if ($i % 100 == 0) { if ($alertf) { call_user_func($alertf, $this, $linereq[0], $linereq[1]); } set_time_limit(30); } $this->apply($linereq[1]); } if ($alertf) { call_user_func($alertf, $this, $csv->lineno(), false); } $this->finish(); }
/** * detectEncodingFromFile * * @param $filePath */ public function detectEncodingFromFile($filePath) { $dataCount = 0; $parseLimit = 0; $d = preg_quote($this->options['delimiter']); $e = preg_quote($this->options['enclosure']); // for skip header if ($this->options['hasHeader']) { $parseLimit = $this->options['skipHeaderCount']; } $handle = fopen($filePath, "r"); while (($result = CsvParser::parseCsvLine($handle, $d, $e)) !== false) { $dataCount++; if ($dataCount > $parseLimit) { fclose($handle); // @see http://d.hatena.ne.jp/t_komura/20090615/1245078430 if (preg_replace('/\\A([\\x00-\\x7f]|[\\xc0-\\xdf][\\x80-\\xbf]|[\\xe0-\\xef][\\x80-\\xbf]{2}|[\\xf0-\\xf7][\\x80-\\xbf]{3}|[\\xf8-\\xfb][\\x80-\\xbf]{4}|[\\xfc-\\xfd][\\x80-\\xbf]{5})*\\z/', '', $result['line']) === '') { return 'UTF-8'; } if (preg_replace('/\\A([\\x00-\\x7f]|[\\xa1-\\xdf]|[\\x81-\\x9f\\xe0-\\xfc][\\x40-\\x7e\\x80-\\xfc])*\\z/', '', $result['line']) === '') { return 'SJIS-win'; } if (preg_replace('/\\A([\\x00-\\x7f]|[\\xa1-\\xfe][\\xa1-\\xfe]|\\x8e[\\xa1-\\xdf]|\\x8f[\\xa1-\\xfe][\\xa1-\\xfe])*\\z/', '', $result['line']) === '') { return 'eucJP-win'; } if (preg_replace('/\\A([\\x00-\\x1a\\x1c-\\x7f]|\\x1b\\x24[\\x40\\x42](?:[\\x21-\\x7e][\\x21-\\x7e])+|\\x1b\\x24\\x28[\\x40\\x42\\x44](?:[\\x21-\\x7e][\\x21-\\x7e])+|\\x1b\\x28\\x42|\\x1b\\x28\\x4a[\\x00-\\x1a\\x1c-\\x7f]+|\\x1b\\x28\\x49[\\x00-\\x1a\\x1c-\\x7f]+\\x1b\\x28\\x42)*\\z/', '', $result['line']) === '') { return 'ISO-2022-JP-MS'; } return mb_detect_encoding($result['line'], array('UTF-8', 'eucJP-win', 'SJIS-win', 'ISO-2022-JP')); } } }
function parseBulkFile($text, $filename) { global $Conf; $text = cleannl($text); if (!is_valid_utf8($text)) { $text = windows_1252_to_utf8($text); } $filename = $filename ? "{$filename}:" : "line "; $success = array(); if (!preg_match('/\\A[^\\r\\n]*(?:,|\\A)(?:user|email)(?:[,\\r\\n]|\\z)/', $text) && !preg_match('/\\A[^\\r\\n]*,[^\\r\\n]*,/', $text)) { $tarr = CsvParser::split_lines($text); foreach ($tarr as &$t) { if (($t = trim($t)) && $t[0] !== "#" && $t[0] !== "%") { $t = CsvGenerator::quote($t); } $t .= "\n"; } unset($t); $text = join("", $tarr); } $csv = new CsvParser($text); $csv->set_comment_chars("#%"); $line = $csv->next(); if ($line && (array_search("email", $line) !== false || array_search("user", $line) !== false)) { $csv->set_header($line); } else { $csv->set_header(array("user")); $csv->unshift($line); } $cj_template = (object) array(); $topic_revmap = array(); foreach ($Conf->topic_map() as $id => $name) { $topic_revmap[strtolower($name)] = $id; } $unknown_topics = array(); $errors = array(); while (($line = $csv->next()) !== false) { $cj = clone $cj_template; foreach ($line as $k => $v) { $cj->{$k} = $v; } foreach (array("firstname" => "firstName", "first" => "firstName", "lastname" => "lastName", "last" => "lastName", "fullname" => "name", "fullName" => "name", "voice" => "voicePhoneNumber", "phone" => "voicePhoneNumber", "address1" => "addressLine1", "province" => "state", "region" => "state", "address2" => "addressLine2", "postalcode" => "zipCode", "zip" => "zipCode", "tags" => "contactTags") as $k => $x) { if (isset($cj->{$k}) && !isset($cj->{$x})) { $cj->{$x} = $cj->{$k}; } } // thou shalt not set passwords by bulk update unset($cj->password, $cj->password_plaintext, $cj->new_password); if (isset($cj->name) && !isset($cj->firstName) && !isset($cj->lastName)) { list($cj->firstName, $cj->lastName) = Text::split_name($cj->name); } if (count($topic_revmap)) { foreach (array_keys($line) as $k) { if (preg_match('/^topic:\\s*(.*?)\\s*$/i', $k, $m)) { if (($ti = @$topic_revmap[strtolower($m[1])]) !== null) { $x = $line[$k]; if (strtolower($x) === "low") { $x = -2; } else { if (strtolower($x) === "high") { $x = 4; } else { if (!is_numeric($x)) { $x = 0; } } } if (!@$cj->topics) { $cj->topics = (object) array(); } $cj->topics->{$ti} = $x; } else { $unknown_topics[$m[1]] = true; } } } } $cj->id = "new"; $ustatus = new UserStatus(array("send_email" => true, "no_deprivilege_self" => true)); if ($saved_user = save_user($cj, $ustatus, null, true)) { $success[] = "<a href=\"" . hoturl("profile", "u=" . urlencode($saved_user->email)) . "\">" . Text::user_html_nolink($saved_user) . "</a>"; } foreach ($ustatus->error_messages() as $e) { $errors[] = "<span class='lineno'>" . $filename . $csv->lineno() . ":</span> " . $e; } } if (count($unknown_topics)) { $errors[] = "There were unrecognized topics (" . htmlspecialchars(commajoin($unknown_topics)) . ")."; } if (count($success) == 1) { $successMsg = "Saved account " . $success[0] . "."; } else { if (count($success)) { $successMsg = "Saved " . plural($success, "account") . ": " . commajoin($success) . "."; } } if (count($errors)) { $errorMsg = "<div class='parseerr'><p>" . join("</p>\n<p>", $errors) . "</p></div>"; } if (count($success) && count($errors)) { $Conf->confirmMsg($successMsg . "<br />{$errorMsg}"); } else { if (count($success)) { $Conf->confirmMsg($successMsg); } else { if (count($errors)) { Conf::msg_error($errorMsg); } else { $Conf->warnMsg("Nothing to do."); } } } return count($errors) == 0; }
// Dbl::format_query tests xassert_eqq(Dbl::format_query("Hello"), "Hello"); xassert_eqq(Dbl::format_query("Hello??"), "Hello?"); xassert_eqq(Dbl::format_query("Hello????"), "Hello??"); xassert_eqq(Dbl::format_query("select ?, ?, ?, ?s, ?s, ?s, ?", 1, "a", null, 2, "b", null, 3), "select 1, 'a', NULL, 2, b, , 3"); xassert_eqq(Dbl::format_query_apply("select ?, ?, ?, ?s, ?s, ?s, ?", array(1, "a", null, 2, "b", null, 3)), "select 1, 'a', NULL, 2, b, , 3"); xassert_eqq(Dbl::format_query_apply("select ?{2}, ?{1}, ?, ?s, ?s, ?s, ?", array(1, "a", null, 2, "b", null, 3)), "select 'a', 1, NULL, 2, b, , 3"); xassert_eqq(Dbl::format_query_apply("select ?{2}, ?{1}, ?{ab}, ?{2}s, ?{1}s, ?{ab}s, ?", array(1, "a", "ab" => "Woah", "Leftover")), "select 'a', 1, 'Woah', a, 1, Woah, 'Leftover'"); // Csv::split_lines tests xassert_array_eqq(CsvParser::split_lines(""), array()); xassert_array_eqq(CsvParser::split_lines("\r"), array("\r")); xassert_array_eqq(CsvParser::split_lines("\n"), array("\n")); xassert_array_eqq(CsvParser::split_lines("\r\n"), array("\r\n")); xassert_array_eqq(CsvParser::split_lines("\r\r\n"), array("\r", "\r\n")); xassert_array_eqq(CsvParser::split_lines("\r\naaa"), array("\r\n", "aaa")); xassert_array_eqq(CsvParser::split_lines("\na\r\nb\rc\n"), array("\n", "a\r\n", "b\r", "c\n")); // random PHP behavior tests if (PHP_MAJOR_VERSION >= 7) { xassert_eqq(substr("", 0, 1), ""); } else { xassert_eqq(substr("", 0, 1), false); } $s = ""; xassert_eqq(@$s[0], ""); // Json tests xassert_eqq(json_encode(Json::decode("{}")), "{}"); xassert_eqq(json_encode(Json::decode('"\\u0030"')), '"0"'); xassert_eqq(Json::encode("\n"), '"\\n"'); xassert_eqq(Json::encode(""), '"\\u0007"'); xassert_eqq(json_encode(Json::decode('{"1":"1"}')), '{"1":"1"}'); $x = Json::decode_landmarks('{