-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjson_parser_example.cpp
More file actions
237 lines (210 loc) · 9.38 KB
/
json_parser_example.cpp
File metadata and controls
237 lines (210 loc) · 9.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
// Example JSON parser using parser-combinators2
#include "parser_combinators2.hpp"
#include <map>
#include <print>
namespace pc = parser_combinators;
// parser hook implementation for tracing
struct hook_impl : pc::details::parser_hook {
int indent_level = 0;
void on_enter(std::string_view name, std::string_view input) override {
std::println("{:>{}}--> Entering parser '{}' with input '{}'", "", indent_level * 2, name, input);
++indent_level;
}
void on_exit(std::string_view name, std::string_view input) override {
--indent_level;
std::println("{:>{}}<-- Exiting parser '{}' with remaining input '{}'", "", indent_level * 2, name, input);
}
void on_failure(std::string_view name, std::string_view remaining) override {
std::println("{:>{}}!! Parser '{}' failed at remaining input '{}'", "", indent_level * 2, name, remaining);
}
};
// example JSON value representation
struct json_value;
using json_object = std::map<std::string, json_value>;
using json_array = std::vector<json_value>;
struct json_value {
std::variant<
std::nullptr_t,
bool,
double,
std::string,
json_array,
json_object
> data;
template<typename T>
requires (!std::is_same_v<std::remove_cvref_t<T>, json_value>)
json_value(T&& v) : data(std::forward<T>(v)) {}
json_value(const json_value&) = default;
json_value(json_value&&) = default;
json_value& operator=(const json_value&) = default;
json_value& operator=(json_value&&) = default;
json_value() : data(nullptr) {}
};
void walk_json(const json_value& val, int indent = 0) {
std::string indent_str(indent * 2, ' ');
std::visit([&](auto&& inner_val) {
using T = std::decay_t<decltype(inner_val)>;
if constexpr (std::is_same_v<T, std::nullptr_t>) {
std::println("{}null", indent_str);
} else if constexpr (std::is_same_v<T, bool>) {
std::println("{}{}", indent_str, inner_val ? "true" : "false");
} else if constexpr (std::is_same_v<T, double>) {
std::println("{}{}", indent_str, inner_val);
} else if constexpr (std::is_same_v<T, std::string>) {
std::println("{}\"{}\"", indent_str, inner_val);
} else if constexpr (std::is_same_v<T, json_array>) {
if (inner_val.empty()) {
std::println("{}[]", indent_str);
return;
}
std::println("{}[", indent_str);
for (const auto& item : inner_val) {
walk_json(item, indent + 1);
}
std::println("{}]", indent_str);
} else if constexpr (std::is_same_v<T, json_object>) {
std::println("{}{{", indent_str);
for (const auto& [key, value] : inner_val) {
std::println("{} \"{}\":", indent_str, key);
walk_json(value, indent + 2);
}
std::println("{}}}", indent_str);
}
}, val.data);
}
// example JSON parser
// lexeme parser that skips surrounding whitespace
// P is a detail::parser_wrapper
template <auto P>
constexpr auto lexeme =
pc::dsl::combine<{
pc::dsl::whitespace().many().discard(), // use many to skip zero or more whitespace
P, // use the parser P
pc::dsl::whitespace().many().discard(), // use discard to drop the result (don't map into result tuple/struct)
}>();
// symbol parsers for JSON syntax
constexpr auto lparen = lexeme<pc::dsl::symbol<"(">()>;
constexpr auto rparen = lexeme<pc::dsl::symbol<")">()>;
constexpr auto lbracket = lexeme<pc::dsl::symbol<"[">()>;
constexpr auto rbracket = lexeme<pc::dsl::symbol<"]">()>;
constexpr auto lbrace = lexeme<pc::dsl::symbol<"{">()>;
constexpr auto rbrace = lexeme<pc::dsl::symbol<"}">()>;
constexpr auto colon = lexeme<pc::dsl::symbol<":">()>;
constexpr auto comma = lexeme<pc::dsl::symbol<",">()>;
// helper parser to parse sequences separated by a specific separator
template <auto Sep>
consteval auto sepby(auto P) {
return pc::dsl::combine({ // combine first element and rest
P, // first element
(pc::dsl::combine({ // combine separator and element
Sep.discard(), // separator
P, // element
}).many()), // takes zero or more
}) | pc::dsl::fmap<[](auto&& first, auto&& rest) {
// we use fmap to transform the result into a vector
// note that the type of rest is std::vector<std::tuple<element_type>>
// this is because each element is wrapped in a tuple due to combine,
// and the Sep is discarded, so we just get<0> to extract the element
// if you want to map the result tuple to a specific type, use fmap_struct
using T = std::decay_t<decltype(first)>;
std::vector<T> result;
result.push_back(first);
for (auto&& val : rest) {
result.push_back(std::get<0>(val));
}
return result;
}>;
};
// the main JSON parser
// as JSON is recursive, we use a Y-combinator via dsl::lazy
// and the recursive self (an detail::parser_wrapper) is passed as an argument to the lambda
constexpr auto json_parser = pc::dsl::lazy<[](auto self) {
// parse JSON arrays
auto array = pc::dsl::combine({
lbracket.discard(),
sepby<comma>(self).optional(), // use optional to allow empty arrays
rbracket.discard(),
}) | pc::dsl::fmap<[](std::optional<json_array> opt) -> json_value {
return opt.value_or(json_array{});
}>;
// string and numeric literals
auto string_lit = pc::dsl::combine({
pc::dsl::symbol<"\"">().discard(),
pc::dsl::alphanumeric().many().collect(),
pc::dsl::symbol<"\"">().discard(),
}) | pc::dsl::fmap<[](std::string_view s) -> json_value {
return std::string(s);
}>;
auto numeric_lit = pc::dsl::combine({
pc::dsl::numeric().many().collect(),
pc::dsl::combine({
pc::dsl::symbol<".">().discard(), // decimal point
pc::dsl::numeric().many().collect(),
}).optional(), // optional fractional part
}).collect().guard<pc::preds::not_empty>() | pc::dsl::fmap<[](std::string_view s) -> json_value {
// the collect() will just ignore all underlying structures and produce a single string_view
// and then we use guard to ensure it's not empty, or it will break std::stod
// finally, we convert to double
return std::stod(std::string(s));
}>;
auto kwpair = pc::dsl::combine({
string_lit,
colon.commit().discard(), // commit to avoid backtracking after colon
self,
}) | pc::dsl::fmap<[](auto&& key_val, auto&& value) {
return std::make_pair(std::get<std::string>(key_val.data), value);
}>;
auto object = pc::dsl::combine({
lbrace.discard(),
sepby<comma>(kwpair).optional(),
rbrace.discard(),
}) | pc::dsl::fmap<[](std::optional<std::vector<std::pair<std::string, json_value>>> opt) -> json_value {
json_object obj;
if (opt) {
for (auto& p : *opt) obj.emplace(std::move(p));
}
return obj;
}>;
// finally, we use choice to combine all possible JSON value types
return pc::dsl::choice({
object,
array,
string_lit,
numeric_lit,
pc::dsl::symbol<"true">() | pc::dsl::fmap<[](...) -> json_value { return true; }>,
pc::dsl::symbol<"false">() | pc::dsl::fmap<[](...) -> json_value { return false; }>,
pc::dsl::symbol<"null">() | pc::dsl::fmap<[](...) -> json_value { return nullptr; }>,
}) | pc::dsl::fmap<[](auto&& v) -> json_value {
// choice() produces a variant (though all the inner types are json_value already)
// we need to flatten it into json_value
return std::visit([](auto&& inner_val) -> json_value {
return std::forward<decltype(inner_val)>(inner_val);
}, std::forward<decltype(v)>(v));
}>;
}>.fix<json_value>();
int main() {
// set up parse context with hook
hook_impl hook;
auto parse_context = pc::context();
parse_context.set_hook(&hook);
// apply hook to json_parser recursively
// however due to the Y-combinator, it will only apply to the outermost level
// to apply to all levels, it's best you define the parsers without Y-combinator first,
// apply_hook to that, then use dsl::lazy to wrap it into a Y-combinator
// but for demonstration purposes, this is sufficient
constexpr auto trace = pc::apply_hook<json_parser>();
auto str = R"({"key1": "value1", "key2": [true, false, null], "key3": {"nestedKey": 123.4, "anotherKey": [114, 514]}})";
auto result = pc::parser<trace>::parse(str, parse_context);
// check result
if (!result.success) {
// if failed, print error message and position
// note that the internal position is a pointer into the original input string
// us diff to get the offset
std::println("Parse failed: {}, pos={}", parse_context.error.message, parse_context.error.diff(str));
return 1;
}
std::println("Parse succeeded");
// print the parsed JSON value
walk_json(result.parsed);
return 0;
}