rapidyaml  0.13.0
parse and emit YAML, and do it fast
parser_state.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSER_STATE_HPP_
2 #define _C4_YML_PARSER_STATE_HPP_
3 
4 #ifndef _C4_YML_ERROR_HPP_
5 #include "c4/yml/error.hpp"
6 #endif
7 
8 // NOLINTBEGIN(hicpp-signed-bitwise)
9 
10 namespace c4 {
11 namespace yml {
12 
13 /** data type for @ref ParserState_e */
14 using ParserFlag_t = int;
15 
16 /** Enumeration of the state flags for the parser */
17 typedef enum : ParserFlag_t { // NOLINT
18  RTOP = 0x01 << 0, ///< reading at top level
19  RUNK = 0x01 << 1, ///< reading unknown state (when starting): must determine whether scalar, map or seq
20  RMAP = 0x01 << 2, ///< reading a map
21  RSEQ = 0x01 << 3, ///< reading a seq
22  RFLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {}
23  RBLCK = 0x01 << 5, ///< reading in block mode
24  QMRK = 0x01 << 6, ///< reading an explicit key (`? key`)
25  RKEY = 0x01 << 7, ///< reading a key
26  RVAL = 0x01 << 9, ///< reading a val
27  RKCL = 0x01 << 8, ///< reading the key colon (ie the : after the key in the map)
28  RNXT = 0x01 << 10, ///< read next sibling
29  SSCL = 0x01 << 11, ///< there's a stored scalar
30  QSCL = 0x01 << 12, ///< stored scalar was quoted
31  RSET = 0x01 << 13, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html
32  RDOC = 0x01 << 14, ///< reading a document
33  NDOC = 0x01 << 15, ///< no document mode. a document has ended and another has not started yet.
34  USTY = 0x01 << 16, ///< reading in unknown style mode - must determine FLOW or BLCK
35  //! reading an implicit map nested in an explicit seq.
36  //! eg, {key: [key2: value2, key3: value3]}
37  //! is parsed as {key: [{key2: value2}, {key3: value3}]}
38  RSEQIMAP = 0x01 << 17,
40 
41 
42 /** @cond dev */
43 #ifdef RYML_DBG
44 namespace detail {
45 RYML_EXPORT csubstr _parser_flags_to_str(substr buf, ParserFlag_t flags);
46 } // namespace detail
47 #endif
48 /** @endcond */
49 
50 
51 //-----------------------------------------------------------------------------
52 //-----------------------------------------------------------------------------
53 //-----------------------------------------------------------------------------
54 
55 /** Helper to control the line contents while parsing a buffer */
57 {
58  substr rem; ///< current line remainder, without newline characters
59  substr full; ///< full line, including newline characters `\n` and `\r`
60  size_t num_cols; ///< number of columns in the line, excluding newline
61  ///< characters (ie the initial size of rem)
62  size_t indentation; ///< number of spaces on the beginning of the line.
63 
65 
66  void reset_with_next_line(substr buf, size_t start) RYML_NOEXCEPT
67  {
68  _RYML_ASSERT_BASIC(start <= buf.len);
69  size_t end = start;
70  // get the current line stripped of newline chars
71  while((end < buf.len) && (buf.str[end] != '\n'))
72  ++end;
73  if(end < buf.len)
74  {
75  _RYML_ASSERT_BASIC(buf[end] == '\n');
76  full = buf.range(start, end + 1);
77  rem = buf.range(start, end);
78  }
79  else
80  {
81  // buffer ends without newline
82  full = rem = buf.sub(start);
83  }
84  size_t pos = rem.last_not_of('\r');
85  rem.len = (pos != npos) ? pos + 1 : 0;
86  num_cols = rem.len;
87  _RYML_ASSERT_BASIC(rem.find('\r') == npos);
88  // TODO move this to the parser state
89  indentation = rem.first_not_of(' '); // find the first column where the character is not a space
90  }
91 
92  C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT
93  {
94  _RYML_ASSERT_BASIC(rem.str >= full.str);
95  return static_cast<size_t>(rem.str - full.str);
96  }
97 
98  C4_ALWAYS_INLINE size_t current_col(csubstr s) const RYML_NOEXCEPT
99  {
100  _RYML_ASSERT_BASIC(s.str >= full.str);
101  _RYML_ASSERT_BASIC(s.str <= rem.end());
102  return static_cast<size_t>(s.str - full.str);
103  }
104 };
105 static_assert(std::is_standard_layout<LineContents>::value, "LineContents not standard");
106 
107 
108 //-----------------------------------------------------------------------------
109 //-----------------------------------------------------------------------------
110 //-----------------------------------------------------------------------------
111 
113 {
117  size_t indref; ///< the reference indentation in the current block scope
119  id_type node_id; ///< don't hold a pointer to the node as it will be relocated during tree resizes
120  size_t scalar_col; // the column where the scalar (or its quotes) begin
123 
124  ParserState() = default;
125 
126  void start_parse(const char *file, id_type node_id_)
127  {
128  level = 0;
129  pos.name = to_csubstr(file);
130  pos.offset = 0;
131  pos.line = 1;
132  pos.col = 1;
133  node_id = node_id_;
134  more_indented = false;
135  scalar_col = 0;
136  indref = 0;
137  has_children = false;
138  }
139 
141  {
142  node_id = NONE;
143  indref = npos;
144  more_indented = false;
145  ++level;
146  has_children = false;
147  }
148 
149  C4_ALWAYS_INLINE void reset_before_pop(ParserState const& to_pop)
150  {
151  pos = to_pop.pos;
152  line_contents = to_pop.line_contents;
153  }
154 
155 public:
156 
157  C4_ALWAYS_INLINE bool at_line_beginning() const noexcept
158  {
159  return line_contents.rem.str == line_contents.full.str;
160  }
161  C4_ALWAYS_INLINE bool at_first_token() const noexcept
162  {
163  _RYML_ASSERT_BASIC(line_contents.indentation != npos);
164  return pos.col == line_contents.indentation + 1;
165  }
166  C4_ALWAYS_INLINE bool indentation_eq() const noexcept
167  {
168  _RYML_ASSERT_BASIC(indref != npos);
169  return line_contents.indentation != npos
171  }
172  C4_ALWAYS_INLINE bool indentation_eq_extra() const noexcept
173  {
174  _RYML_ASSERT_BASIC(indref != npos);
175  return line_contents.indentation != npos
176  && line_contents.indentation == indref + 1u;
177  }
178  C4_ALWAYS_INLINE bool indentation_ge() const noexcept
179  {
180  _RYML_ASSERT_BASIC(indref != npos);
181  return line_contents.indentation != npos
183  }
184  C4_ALWAYS_INLINE bool indentation_ge_extra() const noexcept
185  {
186  _RYML_ASSERT_BASIC(indref != npos);
187  return line_contents.indentation != npos
188  && line_contents.indentation >= indref + 1u;
189  }
190  C4_ALWAYS_INLINE bool indentation_gt() const noexcept
191  {
192  _RYML_ASSERT_BASIC(indref != npos);
193  return line_contents.indentation != npos
195  }
196  C4_ALWAYS_INLINE bool indentation_gt_extra() const noexcept
197  {
198  _RYML_ASSERT_BASIC(indref != npos);
199  return line_contents.indentation != npos
200  && line_contents.indentation > indref + 1u;
201  }
202  C4_ALWAYS_INLINE bool indentation_lt() const noexcept
203  {
204  _RYML_ASSERT_BASIC(indref != npos);
206  }
207  C4_ALWAYS_INLINE bool indentation_lt_extra() const noexcept
208  {
209  _RYML_ASSERT_BASIC(indref != npos);
210  return line_contents.indentation != npos
211  && line_contents.indentation < indref + 1u;
212  }
213 };
214 static_assert(std::is_standard_layout<ParserState>::value, "ParserState not standard");
215 
216 
217 } // namespace yml
218 } // namespace c4
219 
220 // NOLINTEND(hicpp-signed-bitwise)
221 
222 #endif /* _C4_YML_PARSER_STATE_HPP_ */
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition: common.hpp:192
Error utilities used by ryml.
#define RYML_EXPORT
Definition: export.hpp:18
csubstr to_csubstr(substr s) noexcept
neutral version for use in generic code
Definition: substr.hpp:2204
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:244
@ npos
a null string position
Definition: common.hpp:258
ParserState_e
Enumeration of the state flags for the parser.
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next sibling
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a val
@ RFLOW
reading is inside explicit flow chars: [] or {}
int ParserFlag_t
data type for ParserState_e
@ NONE
an index to none
Definition: common.hpp:251
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
Helper to control the line contents while parsing a buffer.
substr rem
current line remainder, without newline characters
substr full
full line, including newline characters \n and \r
void reset_with_next_line(substr buf, size_t start) RYML_NOEXCEPT
size_t num_cols
number of columns in the line, excluding newline characters (ie the initial size of rem)
LineContents() RYML_NOEXCEPT=default
size_t current_col(csubstr s) const RYML_NOEXCEPT
size_t indentation
number of spaces on the beginning of the line.
size_t current_col() const RYML_NOEXCEPT
holds a source or yaml file position, for example when an error is detected; See also location_format...
Definition: common.hpp:284
size_t col
column
Definition: common.hpp:287
size_t line
line
Definition: common.hpp:286
size_t offset
number of bytes from the beginning of the source buffer
Definition: common.hpp:285
csubstr name
name of the file
Definition: common.hpp:288
bool indentation_eq_extra() const noexcept
bool at_line_beginning() const noexcept
void start_parse(const char *file, id_type node_id_)
bool indentation_lt() const noexcept
bool indentation_gt_extra() const noexcept
bool indentation_ge_extra() const noexcept
bool indentation_lt_extra() const noexcept
bool at_first_token() const noexcept
LineContents line_contents
bool indentation_eq() const noexcept
size_t indref
the reference indentation in the current block scope
id_type node_id
don't hold a pointer to the node as it will be relocated during tree resizes
bool indentation_ge() const noexcept
bool indentation_gt() const noexcept
void reset_before_pop(ParserState const &to_pop)