rapidyaml  0.11.0
parse and emit YAML, and do it fast
parser_state.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSER_STATE_HPP_
2 #define _C4_YML_PARSER_STATE_HPP_
3 
4 #ifndef _C4_YML_ERROR_HPP_
5 #include "c4/yml/error.hpp"
6 #endif
7 
8 // NOLINTBEGIN(hicpp-signed-bitwise)
9 
10 namespace c4 {
11 namespace yml {
12 
13 /** data type for @ref ParserState_e */
14 using ParserFlag_t = int;
15 
16 /** Enumeration of the state flags for the parser */
17 typedef enum : ParserFlag_t {
18  RTOP = 0x01 << 0, ///< reading at top level
19  RUNK = 0x01 << 1, ///< reading unknown state (when starting): must determine whether scalar, map or seq
20  RMAP = 0x01 << 2, ///< reading a map
21  RSEQ = 0x01 << 3, ///< reading a seq
22  RFLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {}
23  RBLCK = 0x01 << 5, ///< reading in block mode
24  QMRK = 0x01 << 6, ///< reading an explicit key (`? key`)
25  RKEY = 0x01 << 7, ///< reading a scalar as key
26  RVAL = 0x01 << 9, ///< reading a scalar as val
27  RKCL = 0x01 << 8, ///< reading the key colon (ie the : after the key in the map)
28  RNXT = 0x01 << 10, ///< read next val or keyval
29  SSCL = 0x01 << 11, ///< there's a stored scalar
30  QSCL = 0x01 << 12, ///< stored scalar was quoted
31  RSET = 0x01 << 13, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html
32  RDOC = 0x01 << 14, ///< reading a document
33  NDOC = 0x01 << 15, ///< no document mode. a document has ended and another has not started yet.
34  USTY = 0x01 << 16, ///< reading in unknown style mode - must determine FLOW or BLCK
35  //! reading an implicit map nested in an explicit seq.
36  //! eg, {key: [key2: value2, key3: value3]}
37  //! is parsed as {key: [{key2: value2}, {key3: value3}]}
38  RSEQIMAP = 0x01 << 17,
40 
41 
42 /** @cond dev */
43 #ifdef RYML_DBG
44 namespace detail {
45 csubstr _parser_flags_to_str(substr buf, ParserFlag_t flags);
46 } // namespace detail
47 #endif
48 /** @endcond */
49 
50 
51 //-----------------------------------------------------------------------------
52 //-----------------------------------------------------------------------------
53 //-----------------------------------------------------------------------------
54 
55 /** Helper to control the line contents while parsing a buffer */
57 {
58  substr rem; ///< current line remainder, without newline characters
59  substr full; ///< full line, including newline characters `\n` and `\r`
60  size_t num_cols; ///< number of columns in the line, excluding newline
61  ///< characters (ie the initial size of rem)
62  size_t indentation; ///< number of spaces on the beginning of the line.
63  ///< TODO this should not be a member of this object.
64  ///< We only care about indentation in block mode, so
65  ///< this should be moved to the parser state.
66 
68 
69  void reset_with_next_line(substr buf, size_t start) RYML_NOEXCEPT
70  {
71  _RYML_ASSERT_BASIC(start <= buf.len);
72  size_t end = start;
73  // get the current line stripped of newline chars
74  while((end < buf.len) && (buf.str[end] != '\n'))
75  ++end;
76  if(end < buf.len)
77  {
78  _RYML_ASSERT_BASIC(buf[end] == '\n');
79  full = buf.range(start, end + 1);
80  rem = buf.range(start, end);
81  }
82  else
83  {
84  // buffer ends without newline
85  full = rem = buf.sub(start);
86  }
87  size_t pos = rem.last_not_of('\r');
88  rem.len = (pos != npos) ? pos + 1 : 0;
89  num_cols = rem.len;
90  _RYML_ASSERT_BASIC(rem.find('\r') == npos);
91  // TODO move this to the parser state
92  indentation = rem.first_not_of(' '); // find the first column where the character is not a space
93  }
94 
95  C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT
96  {
97  _RYML_ASSERT_BASIC(rem.str >= full.str);
98  return static_cast<size_t>(rem.str - full.str);
99  }
100 
101  C4_ALWAYS_INLINE size_t current_col(csubstr s) const RYML_NOEXCEPT
102  {
103  _RYML_ASSERT_BASIC(s.str >= full.str);
104  _RYML_ASSERT_BASIC(s.str <= rem.end());
105  return static_cast<size_t>(s.str - full.str);
106  }
107 };
108 static_assert(std::is_standard_layout<LineContents>::value, "LineContents not standard");
109 
110 
111 //-----------------------------------------------------------------------------
112 //-----------------------------------------------------------------------------
113 //-----------------------------------------------------------------------------
114 
116 {
120  size_t indref; ///< the reference indentation in the current block scope
122  id_type node_id; ///< don't hold a pointer to the node as it will be relocated during tree resizes
123  size_t scalar_col; // the column where the scalar (or its quotes) begin
126 
127  ParserState() = default;
128 
129  void start_parse(const char *file, id_type node_id_)
130  {
131  level = 0;
132  pos.name = to_csubstr(file);
133  pos.offset = 0;
134  pos.line = 1;
135  pos.col = 1;
136  node_id = node_id_;
137  more_indented = false;
138  scalar_col = 0;
139  indref = 0;
140  has_children = false;
141  }
142 
144  {
145  node_id = NONE;
146  indref = npos;
147  more_indented = false;
148  ++level;
149  has_children = false;
150  }
151 
152  C4_ALWAYS_INLINE void reset_before_pop(ParserState const& to_pop)
153  {
154  pos = to_pop.pos;
155  line_contents = to_pop.line_contents;
156  }
157 
158 public:
159 
160  C4_ALWAYS_INLINE bool at_line_beginning() const noexcept
161  {
162  return line_contents.rem.str == line_contents.full.str;
163  }
164  C4_ALWAYS_INLINE bool indentation_eq() const noexcept
165  {
166  _RYML_ASSERT_BASIC(indref != npos);
168  }
169  C4_ALWAYS_INLINE bool indentation_ge() const noexcept
170  {
171  _RYML_ASSERT_BASIC(indref != npos);
173  }
174  C4_ALWAYS_INLINE bool indentation_gt() const noexcept
175  {
176  _RYML_ASSERT_BASIC(indref != npos);
178  }
179  C4_ALWAYS_INLINE bool indentation_lt() const noexcept
180  {
181  _RYML_ASSERT_BASIC(indref != npos);
183  }
184 };
185 static_assert(std::is_standard_layout<ParserState>::value, "ParserState not standard");
186 
187 
188 } // namespace yml
189 } // namespace c4
190 
191 // NOLINTEND(hicpp-signed-bitwise)
192 
193 #endif /* _C4_YML_PARSER_STATE_HPP_ */
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition: common.hpp:192
Error utilities used by ryml.
csubstr to_csubstr(substr s) noexcept
neutral version for use in generic code
Definition: substr.hpp:2210
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:244
@ npos
a null string position
Definition: common.hpp:258
ParserState_e
Enumeration of the state flags for the parser.
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
@ RFLOW
reading is inside explicit flow chars: [] or {}
int ParserFlag_t
data type for ParserState_e
@ NONE
an index to none
Definition: common.hpp:251
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
Helper to control the line contents while parsing a buffer.
substr rem
current line remainder, without newline characters
substr full
full line, including newline characters \n and \r
void reset_with_next_line(substr buf, size_t start) RYML_NOEXCEPT
size_t num_cols
number of columns in the line, excluding newline characters (ie the initial size of rem)
LineContents() RYML_NOEXCEPT=default
size_t current_col(csubstr s) const RYML_NOEXCEPT
size_t indentation
number of spaces on the beginning of the line. TODO this should not be a member of this object....
size_t current_col() const RYML_NOEXCEPT
holds a source or yaml file position, for example when an error is detected; See also location_format...
Definition: common.hpp:283
size_t col
column
Definition: common.hpp:286
size_t line
line
Definition: common.hpp:285
size_t offset
number of bytes from the beginning of the source buffer
Definition: common.hpp:284
csubstr name
name of the file
Definition: common.hpp:287
bool at_line_beginning() const noexcept
void start_parse(const char *file, id_type node_id_)
bool indentation_lt() const noexcept
LineContents line_contents
bool indentation_eq() const noexcept
size_t indref
the reference indentation in the current block scope
id_type node_id
don't hold a pointer to the node as it will be relocated during tree resizes
bool indentation_ge() const noexcept
bool indentation_gt() const noexcept
void reset_before_pop(ParserState const &to_pop)