Skip to content

Commit

Permalink
Unquote csv strings (#18)
Browse files Browse the repository at this point in the history
* Unescape csv strings

* Add test to make sure we don't crash on invalid csv
  • Loading branch information
jbruechert authored May 2, 2024
1 parent 4c1503a commit 81821f8
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 4 deletions.
22 changes: 20 additions & 2 deletions include/utl/parser/csv.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,19 @@ inline void parse_column(cstr& s, T& arg) {
adjust_for_quote + adjust_for_cr));
}

inline void unescape_quoted_string(std::string& arg) {
std::string::size_type found_at = 0;
while ((found_at = arg.find('"', found_at)) != std::string::npos) {
if (found_at < arg.size() - 1 && arg[found_at + 1] == '"') {
arg.erase(found_at, 1); // Since the string is now one character shorter,
// found_at now points to the next character
++found_at; // Skip following character ("), we are now after the ""
} else {
++found_at; // Continue search from next character
}
}
}

template <typename IntType,
std::enable_if_t<std::is_integral<IntType>::value, int> = 0>
inline void parse_value(cstr& s, IntType& arg) {
Expand All @@ -71,8 +84,13 @@ inline void parse_value(cstr& s, bool& arg) {
s = s.skip_whitespace_front();
parse_arg(s, arg);
}
inline void parse_value(cstr& s, std::string& arg) { parse_arg(s, arg); }
inline void parse_value(cstr& s, cstr& arg) { parse_arg(s, arg); }
inline void parse_value(cstr& s, std::string& arg) {
parse_arg(s, arg);
unescape_quoted_string(arg);
}
inline void parse_value(cstr& s, cstr& arg) {
parse_arg(s, arg);
}

template <int Index, typename... Args>
typename std::enable_if<Index == sizeof...(Args)>::type read(
Expand Down
2 changes: 1 addition & 1 deletion include/utl/parser/csv_range.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ struct csv_range : public LineRange {
T t{};
cista::for_each_field(t, [&, i = 0u](auto& f) mutable {
if (row[i]) {
parse_arg(row[i], f.val());
parse_value(row[i], f.val());
}
++i;
});
Expand Down
23 changes: 22 additions & 1 deletion test/parser/pipe_csv_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,28 @@ TEST(pipe_csv, csv_escaped_string) {
| vec();

ASSERT_TRUE(result.size() == 1);
EXPECT_TRUE(result[0].foo_.val() == R"([""asd"", ""bsd""])");
EXPECT_TRUE(result[0].foo_.val() == R"(["asd", "bsd"])");
EXPECT_TRUE(result[0].bar_.val() == "asd");
EXPECT_TRUE(result[0].baz_.val() == "xxx");
}

TEST(pipe_csv, csv_invalid_escaped_string) {
struct dat {
csv_col<std::string, UTL_NAME("FOO")> foo_;
csv_col<std::string, UTL_NAME("BAR")> bar_;
csv_col<std::string, UTL_NAME("BAZ")> baz_;
};

// This is invalid, but we need to make sure not to crash
constexpr auto const input = R"(BAR,FOO,BAZ
"asd","[""asd"", ""bsd""]","xxx""
)";
auto const result = line_range{make_buf_reader(input, {})} //
| csv<dat, ','>() //
| vec();

ASSERT_TRUE(result.size() == 1);
EXPECT_TRUE(result[0].foo_.val() == R"(["asd", "bsd"])");
EXPECT_TRUE(result[0].bar_.val() == "asd");
EXPECT_TRUE(result[0].baz_.val() == R"(xxx")");
}

0 comments on commit 81821f8

Please sign in to comment.