rapidyaml  0.8.0
parse and emit YAML, and do it fast
parser_state.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSER_STATE_HPP_
2 #define _C4_YML_PARSER_STATE_HPP_
3 
4 #ifndef _C4_YML_COMMON_HPP_
5 #include "c4/yml/common.hpp"
6 #endif
7 
8 // NOLINTBEGIN(hicpp-signed-bitwise)
9 
10 namespace c4 {
11 namespace yml {
12 
13 /** data type for @ref ParserState_e */
14 using ParserFlag_t = int;
15 
16 /** Enumeration of the state flags for the parser */
17 typedef enum : ParserFlag_t {
18  RTOP = 0x01 << 0, ///< reading at top level
19  RUNK = 0x01 << 1, ///< reading unknown state (when starting): must determine whether scalar, map or seq
20  RMAP = 0x01 << 2, ///< reading a map
21  RSEQ = 0x01 << 3, ///< reading a seq
22  FLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {}
23  BLCK = 0x01 << 5, ///< reading in block mode
24  QMRK = 0x01 << 6, ///< reading an explicit key (`? key`)
25  RKEY = 0x01 << 7, ///< reading a scalar as key
26  RVAL = 0x01 << 9, ///< reading a scalar as val
27  RKCL = 0x01 << 8, ///< reading the key colon (ie the : after the key in the map)
28  RNXT = 0x01 << 10, ///< read next val or keyval
29  SSCL = 0x01 << 11, ///< there's a stored scalar
30  QSCL = 0x01 << 12, ///< stored scalar was quoted
31  RSET = 0x01 << 13, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html
32  RDOC = 0x01 << 14, ///< reading a document
33  NDOC = 0x01 << 15, ///< no document mode. a document has ended and another has not started yet.
34  USTY = 0x01 << 16, ///< reading in unknown style mode - must determine FLOW or BLCK
35  //! reading an implicit map nested in an explicit seq.
36  //! eg, {key: [key2: value2, key3: value3]}
37  //! is parsed as {key: [{key2: value2}, {key3: value3}]}
38  RSEQIMAP = 0x01 << 17,
40 
41 #ifdef RYML_DBG
42 /** @cond dev */
43 namespace detail {
44 csubstr _parser_flags_to_str(substr buf, ParserFlag_t flags);
45 } // namespace
46 /** @endcond */
47 #endif
48 
49 
50 /** Helper to control the line contents while parsing a buffer */
52 {
53  substr rem; ///< the stripped line remainder; initially starts at the first non-space character
54  size_t indentation; ///< the number of spaces on the beginning of the line
55  substr full; ///< the full line, including newlines on the right
56  substr stripped; ///< the stripped line, excluding newlines on the right
57 
58  LineContents() = default;
59 
60  void reset_with_next_line(substr buf, size_t offset)
61  {
62  RYML_ASSERT(offset <= buf.len);
63  size_t e = offset;
64  // get the current line stripped of newline chars
65  while(e < buf.len && (buf.str[e] != '\n' && buf.str[e] != '\r'))
66  ++e;
67  RYML_ASSERT(e >= offset);
68  const substr stripped_ = buf.range(offset, e);
69  #if defined(__GNUC__) && __GNUC__ == 11
70  C4_DONT_OPTIMIZE(stripped_);
71  #endif
72  // advance pos to include the first line ending
73  if(e < buf.len && buf.str[e] == '\r')
74  ++e;
75  if(e < buf.len && buf.str[e] == '\n')
76  ++e;
77  const substr full_ = buf.range(offset, e);
78  reset(full_, stripped_);
79  }
80 
81  void reset(substr full_, substr stripped_)
82  {
83  rem = stripped_;
84  indentation = stripped_.first_not_of(' '); // find the first column where the character is not a space
85  full = full_;
86  stripped = stripped_;
87  }
88 
89  C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT
90  {
91  // WARNING: gcc x86 release builds were wrong (eg returning 0
92  // when the result should be 4 ) when this function was like
93  // this:
94  //
95  //return current_col(rem);
96  //
97  // (see below for the full definition of the called overload
98  // of current_col())
99  //
100  // ... so we explicitly inline the code in here:
101  RYML_ASSERT(rem.str >= full.str);
102  size_t col = static_cast<size_t>(rem.str - full.str);
103  return col;
104  //
105  // this was happening only on builds specifically with (gcc
106  // AND x86 AND release); no other builds were having the
107  // problem: not in debug, not in x64, not in other
108  // architectures, not in clang, not in visual studio. WTF!?
109  //
110  // Enabling debug prints with RYML_DBG made the problem go
111  // away, so these could not be used to debug the
112  // problem. Adding prints inside the called current_col() also
113  // made the problem go away! WTF!???
114  //
115  // a prize will be offered to anybody able to explain why this
116  // was happening.
117  }
118 
119  C4_ALWAYS_INLINE size_t current_col(csubstr s) const RYML_NOEXCEPT
120  {
121  RYML_ASSERT(s.str >= full.str);
122  RYML_ASSERT(full.is_super(s));
123  size_t col = static_cast<size_t>(s.str - full.str);
124  return col;
125  }
126 };
127 static_assert(std::is_standard_layout<LineContents>::value, "LineContents not standard");
128 
129 
130 //-----------------------------------------------------------------------------
131 //-----------------------------------------------------------------------------
132 //-----------------------------------------------------------------------------
133 
135 {
139  size_t indref; ///< the reference indentation in the current block scope
141  id_type node_id; ///< don't hold a pointer to the node as it will be relocated during tree resizes
142  size_t scalar_col; // the column where the scalar (or its quotes) begin
145 
146  ParserState() = default;
147 
148  void start_parse(const char *file, id_type node_id_)
149  {
150  level = 0;
151  pos.name = to_csubstr(file);
152  pos.offset = 0;
153  pos.line = 1;
154  pos.col = 1;
155  node_id = node_id_;
156  more_indented = false;
157  scalar_col = 0;
158  indref = 0;
159  has_children = false;
160  }
161 
163  {
164  node_id = NONE;
165  indref = npos;
166  more_indented = false;
167  ++level;
168  has_children = false;
169  }
170 
171  C4_ALWAYS_INLINE void reset_before_pop(ParserState const& to_pop)
172  {
173  pos = to_pop.pos;
174  line_contents = to_pop.line_contents;
175  }
176 
177 public:
178 
179  C4_ALWAYS_INLINE bool at_line_beginning() const noexcept
180  {
181  return line_contents.rem.str == line_contents.full.str;
182  }
183  C4_ALWAYS_INLINE bool indentation_eq() const noexcept
184  {
185  RYML_ASSERT(indref != npos);
187  }
188  C4_ALWAYS_INLINE bool indentation_ge() const noexcept
189  {
190  RYML_ASSERT(indref != npos);
192  }
193  C4_ALWAYS_INLINE bool indentation_gt() const noexcept
194  {
195  RYML_ASSERT(indref != npos);
197  }
198  C4_ALWAYS_INLINE bool indentation_lt() const noexcept
199  {
200  RYML_ASSERT(indref != npos);
202  }
203 };
204 static_assert(std::is_standard_layout<ParserState>::value, "ParserState not standard");
205 
206 
207 } // namespace yml
208 } // namespace c4
209 
210 // NOLINTEND(hicpp-signed-bitwise)
211 
212 #endif /* _C4_YML_PARSER_STATE_HPP_ */
Common utilities and infrastructure used by ryml.
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition: common.hpp:167
csubstr to_csubstr(substr s) noexcept
neutral version for use in generic code
Definition: substr.hpp:2186
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:253
@ npos
a null string position
Definition: common.hpp:267
ParserState_e
Enumeration of the state flags for the parser.
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
@ NONE
an index to none
Definition: common.hpp:260
Definition: common.cpp:12
Helper to control the line contents while parsing a buffer.
substr rem
the stripped line remainder; initially starts at the first non-space character
substr full
the full line, including newlines on the right
void reset(substr full_, substr stripped_)
size_t current_col(csubstr s) const RYML_NOEXCEPT
void reset_with_next_line(substr buf, size_t offset)
substr stripped
the stripped line, excluding newlines on the right
size_t indentation
the number of spaces on the beginning of the line
size_t current_col() const RYML_NOEXCEPT
a source file position
Definition: common.hpp:297
size_t col
column
Definition: common.hpp:303
size_t line
line
Definition: common.hpp:301
size_t offset
number of bytes from the beginning of the source buffer
Definition: common.hpp:299
csubstr name
file name
Definition: common.hpp:305
bool at_line_beginning() const noexcept
void start_parse(const char *file, id_type node_id_)
bool indentation_lt() const noexcept
LineContents line_contents
bool indentation_eq() const noexcept
size_t indref
the reference indentation in the current block scope
id_type node_id
don't hold a pointer to the node as it will be relocated during tree resizes
bool indentation_ge() const noexcept
bool indentation_gt() const noexcept
void reset_before_pop(ParserState const &to_pop)