rapidyaml 0.14.0
parse and emit YAML, and do it fast
Loading...
Searching...
No Matches
parser_state.hpp
Go to the documentation of this file.
1#ifndef _C4_YML_PARSER_STATE_HPP_
2#define _C4_YML_PARSER_STATE_HPP_
3
4#ifndef _C4_YML_ERROR_HPP_
5#include "c4/yml/error.hpp"
6#endif
7
8// NOLINTBEGIN(hicpp-signed-bitwise)
9
10namespace c4 {
11namespace yml {
12
13/** data type for @ref ParserState_e */
14using ParserFlag_t = int;
15
16/** Enumeration of the state flags for the parser */
17typedef enum : ParserFlag_t { // NOLINT
18 RTOP = 0x01 << 0, ///< reading at top level
19 RUNK = 0x01 << 1, ///< reading unknown state (when starting): must determine whether scalar, map or seq
20 RMAP = 0x01 << 2, ///< reading a map
21 RSEQ = 0x01 << 3, ///< reading a seq
22 RFLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {}
23 RBLCK = 0x01 << 5, ///< reading in block mode
24 QMRK = 0x01 << 6, ///< reading an explicit key (`? key`)
25 RKEY = 0x01 << 7, ///< reading a key
26 RVAL = 0x01 << 9, ///< reading a val
27 RKCL = 0x01 << 8, ///< reading the key colon (ie the : after the key in the map)
28 RNXT = 0x01 << 10, ///< read next sibling
29 SSCL = 0x01 << 11, ///< there's a stored scalar
30 QSCL = 0x01 << 12, ///< stored scalar was quoted
31 RSET = 0x01 << 13, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html
32 RDOC = 0x01 << 14, ///< reading a document
33 NDOC = 0x01 << 15, ///< no document mode. a document has ended and another has not started yet.
34 USTY = 0x01 << 16, ///< reading in unknown style mode - must determine FLOW or BLCK
35 //! reading an implicit map nested in an explicit seq.
36 //! eg, {key: [key2: value2, key3: value3]}
37 //! is parsed as {key: [{key2: value2}, {key3: value3}]}
38 RSEQIMAP = 0x01 << 17,
40
41
42/** @cond dev */
43#ifdef RYML_DBG
44namespace detail {
45RYML_EXPORT csubstr _parser_flags_to_str(substr buf, ParserFlag_t flags);
46} // namespace detail
47#endif
48/** @endcond */
49
50
51//-----------------------------------------------------------------------------
52//-----------------------------------------------------------------------------
53//-----------------------------------------------------------------------------
54
55/** Helper to control the line contents while parsing a buffer */
57{
58 substr rem; ///< current line remainder, without newline characters
59 substr full; ///< full line, including newline characters `\n` and `\r`
60 size_t num_cols; ///< number of columns in the line, excluding newline
61 ///< characters (ie the initial size of rem)
62 size_t indentation; ///< number of spaces on the beginning of the line.
63
65
67 {
68 _RYML_ASSERT_BASIC(start <= buf.len);
69 size_t end = start;
70 // get the current line stripped of newline chars
71 while((end < buf.len) && (buf.str[end] != '\n'))
72 ++end;
73 if(end < buf.len)
74 {
75 _RYML_ASSERT_BASIC(buf[end] == '\n');
76 full = buf.range(start, end + 1);
77 rem = buf.range(start, end);
78 }
79 else
80 {
81 // buffer ends without newline
82 full = rem = buf.sub(start);
83 }
84 size_t pos = rem.last_not_of('\r');
85 rem.len = (pos != npos) ? pos + 1 : 0;
86 num_cols = rem.len;
87 _RYML_ASSERT_BASIC(rem.find('\r') == npos);
88 // TODO move this to the parser state
89 indentation = rem.first_not_of(' '); // find the first column where the character is not a space
90 }
91
92 C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT
93 {
94 _RYML_ASSERT_BASIC(rem.str >= full.str);
95 return static_cast<size_t>(rem.str - full.str);
96 }
97
98 C4_ALWAYS_INLINE size_t current_col(csubstr s) const RYML_NOEXCEPT
99 {
100 _RYML_ASSERT_BASIC(s.str >= full.str);
101 _RYML_ASSERT_BASIC(s.str <= rem.end());
102 return static_cast<size_t>(s.str - full.str);
103 }
104};
105static_assert(std::is_standard_layout<LineContents>::value, "LineContents not standard");
106
107
108//-----------------------------------------------------------------------------
109//-----------------------------------------------------------------------------
110//-----------------------------------------------------------------------------
111
113{
117 size_t indref; ///< the reference indentation in the current block scope
119 id_type node_id; ///< don't hold a pointer to the node as it will be relocated during tree resizes
120 size_t scalar_col; // the column where the scalar (or its quotes) begin
123
124 ParserState() = default;
125
126 void start_parse(const char *file, id_type node_id_)
127 {
128 level = 0;
129 pos.name = to_csubstr(file);
130 pos.offset = 0;
131 pos.line = 1;
132 pos.col = 1;
133 node_id = node_id_;
134 more_indented = false;
135 scalar_col = 0;
136 indref = 0;
137 has_children = false;
138 }
139
141 {
142 node_id = NONE;
143 indref = npos;
144 more_indented = false;
145 ++level;
146 has_children = false;
147 }
148
149 C4_ALWAYS_INLINE void reset_before_pop(ParserState const& to_pop)
150 {
151 pos = to_pop.pos;
153 }
154
155public:
156
157 C4_ALWAYS_INLINE bool at_line_beginning() const noexcept
158 {
159 return line_contents.rem.str == line_contents.full.str;
160 }
161 C4_ALWAYS_INLINE bool at_first_token() const noexcept
162 {
163 _RYML_ASSERT_BASIC(line_contents.indentation != npos);
164 return pos.col == line_contents.indentation + 1;
165 }
166 C4_ALWAYS_INLINE bool indentation_eq() const noexcept
167 {
168 _RYML_ASSERT_BASIC(indref != npos);
169 return line_contents.indentation != npos
170 && line_contents.indentation == indref;
171 }
172 C4_ALWAYS_INLINE bool indentation_eq_extra() const noexcept
173 {
174 _RYML_ASSERT_BASIC(indref != npos);
175 return line_contents.indentation != npos
176 && line_contents.indentation == indref + 1u;
177 }
178 C4_ALWAYS_INLINE bool indentation_ge() const noexcept
179 {
180 _RYML_ASSERT_BASIC(indref != npos);
181 return line_contents.indentation != npos
182 && line_contents.indentation >= indref;
183 }
184 C4_ALWAYS_INLINE bool indentation_ge_extra() const noexcept
185 {
186 _RYML_ASSERT_BASIC(indref != npos);
187 return line_contents.indentation != npos
188 && line_contents.indentation >= indref + 1u;
189 }
190 C4_ALWAYS_INLINE bool indentation_gt() const noexcept
191 {
192 _RYML_ASSERT_BASIC(indref != npos);
193 return line_contents.indentation != npos
194 && line_contents.indentation > indref;
195 }
196 C4_ALWAYS_INLINE bool indentation_gt_extra() const noexcept
197 {
198 _RYML_ASSERT_BASIC(indref != npos);
199 return line_contents.indentation != npos
200 && line_contents.indentation > indref + 1u;
201 }
202 C4_ALWAYS_INLINE bool indentation_lt() const noexcept
203 {
204 _RYML_ASSERT_BASIC(indref != npos);
205 return line_contents.indentation != npos && line_contents.indentation < indref;
206 }
207 C4_ALWAYS_INLINE bool indentation_lt_extra() const noexcept
208 {
209 _RYML_ASSERT_BASIC(indref != npos);
210 return line_contents.indentation != npos
211 && line_contents.indentation < indref + 1u;
212 }
213};
214static_assert(std::is_standard_layout<ParserState>::value, "ParserState not standard");
215
216
217} // namespace yml
218} // namespace c4
219
220// NOLINTEND(hicpp-signed-bitwise)
221
222#endif /* _C4_YML_PARSER_STATE_HPP_ */
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition common.hpp:197
Error utilities used by ryml.
#define RYML_EXPORT
Definition export.hpp:18
csubstr to_csubstr(const char(&s)[N]) noexcept
Definition substr.hpp:2381
basic_substring< char > substr
a mutable string view
Definition substr.hpp:2356
basic_substring< const char > csubstr
an immutable string view
Definition substr.hpp:2357
int ParserFlag_t
data type for ParserState_e
ParserState_e
Enumeration of the state flags for the parser.
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next sibling
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a val
@ RFLOW
reading is inside explicit flow chars: [] or {}
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition common.hpp:249
@ npos
a null string position
Definition common.hpp:263
@ NONE
an index to none
Definition common.hpp:256
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition common.cpp:14
Helper to control the line contents while parsing a buffer.
substr rem
current line remainder, without newline characters
substr full
full line, including newline characters \n and \r
void reset_with_next_line(substr buf, size_t start) RYML_NOEXCEPT
size_t num_cols
number of columns in the line, excluding newline characters (ie the initial size of rem)
LineContents() RYML_NOEXCEPT=default
size_t current_col(csubstr s) const RYML_NOEXCEPT
size_t indentation
number of spaces on the beginning of the line.
size_t current_col() const RYML_NOEXCEPT
holds a source or yaml file position, for example when an error is detected; See also location_format...
Definition common.hpp:289
bool indentation_eq_extra() const noexcept
bool at_line_beginning() const noexcept
void start_parse(const char *file, id_type node_id_)
bool indentation_lt() const noexcept
bool indentation_gt_extra() const noexcept
bool indentation_ge_extra() const noexcept
bool indentation_lt_extra() const noexcept
bool at_first_token() const noexcept
LineContents line_contents
bool indentation_eq() const noexcept
size_t indref
the reference indentation in the current block scope
id_type node_id
don't hold a pointer to the node as it will be relocated during tree resizes
bool indentation_ge() const noexcept
bool indentation_gt() const noexcept
void reset_before_pop(ParserState const &to_pop)