rapidyaml  0.7.0
parse and emit YAML, and do it fast
parser_state.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSER_STATE_HPP_
2 #define _C4_YML_PARSER_STATE_HPP_
3 
4 #ifndef _C4_YML_COMMON_HPP_
5 #include "c4/yml/common.hpp"
6 #endif
7 
8 namespace c4 {
9 namespace yml {
10 
11 /** data type for @ref ParserState_e */
12 using ParserFlag_t = int;
13 
14 /** Enumeration of the state flags for the parser */
15 typedef enum : ParserFlag_t {
16  RTOP = 0x01 << 0, ///< reading at top level
17  RUNK = 0x01 << 1, ///< reading unknown state (when starting): must determine whether scalar, map or seq
18  RMAP = 0x01 << 2, ///< reading a map
19  RSEQ = 0x01 << 3, ///< reading a seq
20  FLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {}
21  BLCK = 0x01 << 5, ///< reading in block mode
22  QMRK = 0x01 << 6, ///< reading an explicit key (`? key`)
23  RKEY = 0x01 << 7, ///< reading a scalar as key
24  RVAL = 0x01 << 9, ///< reading a scalar as val
25  RKCL = 0x01 << 8, ///< reading the key colon (ie the : after the key in the map)
26  RNXT = 0x01 << 10, ///< read next val or keyval
27  SSCL = 0x01 << 11, ///< there's a stored scalar
28  QSCL = 0x01 << 12, ///< stored scalar was quoted
29  RSET = 0x01 << 13, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html
30  RDOC = 0x01 << 14, ///< reading a document
31  NDOC = 0x01 << 15, ///< no document mode. a document has ended and another has not started yet.
32  USTY = 0x01 << 16, ///< reading in unknown style mode - must determine FLOW or BLCK
33  //! reading an implicit map nested in an explicit seq.
34  //! eg, {key: [key2: value2, key3: value3]}
35  //! is parsed as {key: [{key2: value2}, {key3: value3}]}
36  RSEQIMAP = 0x01 << 17,
38 
39 #ifdef RYML_DBG
40 /** @cond dev */
41 namespace detail {
42 csubstr _parser_flags_to_str(substr buf, ParserFlag_t flags);
43 } // namespace
44 /** @endcond */
45 #endif
46 
47 
48 /** Helper to control the line contents while parsing a buffer */
50 {
51  substr rem; ///< the stripped line remainder; initially starts at the first non-space character
52  size_t indentation; ///< the number of spaces on the beginning of the line
53  substr full; ///< the full line, including newlines on the right
54  substr stripped; ///< the stripped line, excluding newlines on the right
55 
56  LineContents() = default;
57 
58  void reset_with_next_line(substr buf, size_t offset)
59  {
60  RYML_ASSERT(offset <= buf.len);
61  char const* C4_RESTRICT b = &buf[offset];
62  char const* C4_RESTRICT e = b;
63  // get the current line stripped of newline chars
64  while(e < buf.end() && (*e != '\n' && *e != '\r'))
65  ++e;
66  RYML_ASSERT(e >= b);
67  const substr stripped_ = buf.sub(offset, static_cast<size_t>(e - b));
68  // advance pos to include the first line ending
69  if(e != buf.end() && *e == '\r')
70  ++e;
71  if(e != buf.end() && *e == '\n')
72  ++e;
73  RYML_ASSERT(e >= b);
74  const substr full_ = buf.sub(offset, static_cast<size_t>(e - b));
75  reset(full_, stripped_);
76  }
77 
78  void reset(substr full_, substr stripped_)
79  {
80  full = full_;
81  stripped = stripped_;
82  rem = stripped_;
83  // find the first column where the character is not a space
84  indentation = stripped.first_not_of(' ');
85  }
86 
87  C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT
88  {
89  // WARNING: gcc x86 release builds were wrong (eg returning 0
90  // when the result should be 4 ) when this function was like
91  // this:
92  //
93  //return current_col(rem);
94  //
95  // (see below for the full definition of the called overload
96  // of current_col())
97  //
98  // ... so we explicitly inline the code in here:
99  RYML_ASSERT(rem.str >= full.str);
100  size_t col = static_cast<size_t>(rem.str - full.str);
101  return col;
102  //
103  // this was happening only on builds specifically with (gcc
104  // AND x86 AND release); no other builds were having the
105  // problem: not in debug, not in x64, not in other
106  // architectures, not in clang, not in visual studio. WTF!?
107  //
108  // Enabling debug prints with RYML_DBG made the problem go
109  // away, so these could not be used to debug the
110  // problem. Adding prints inside the called current_col() also
111  // made the problem go away! WTF!???
112  //
113  // a prize will be offered to anybody able to explain why this
114  // was happening.
115  }
116 
117  C4_ALWAYS_INLINE size_t current_col(csubstr s) const RYML_NOEXCEPT
118  {
119  RYML_ASSERT(s.str >= full.str);
120  RYML_ASSERT(full.is_super(s));
121  size_t col = static_cast<size_t>(s.str - full.str);
122  return col;
123  }
124 };
125 static_assert(std::is_standard_layout<LineContents>::value, "LineContents not standard");
126 
127 
128 //-----------------------------------------------------------------------------
129 //-----------------------------------------------------------------------------
130 //-----------------------------------------------------------------------------
131 
133 {
137  size_t indref; ///< the reference indentation in the current block scope
139  id_type node_id; ///< don't hold a pointer to the node as it will be relocated during tree resizes
140  size_t scalar_col; // the column where the scalar (or its quotes) begin
143 
144  ParserState() = default;
145 
146  void start_parse(const char *file, id_type node_id_)
147  {
148  level = 0;
149  pos.name = to_csubstr(file);
150  pos.offset = 0;
151  pos.line = 1;
152  pos.col = 1;
153  node_id = node_id_;
154  more_indented = false;
155  scalar_col = 0;
156  indref = 0;
157  has_children = false;
158  }
159 
161  {
162  node_id = NONE;
163  indref = npos;
164  more_indented = false;
165  ++level;
166  has_children = false;
167  }
168 
169  C4_ALWAYS_INLINE void reset_before_pop(ParserState const& to_pop)
170  {
171  pos = to_pop.pos;
172  line_contents = to_pop.line_contents;
173  }
174 
175 public:
176 
177  C4_ALWAYS_INLINE bool at_line_beginning() const noexcept
178  {
179  return line_contents.rem.str == line_contents.full.str;
180  }
181  C4_ALWAYS_INLINE bool indentation_eq() const noexcept
182  {
183  RYML_ASSERT(indref != npos);
185  }
186  C4_ALWAYS_INLINE bool indentation_ge() const noexcept
187  {
188  RYML_ASSERT(indref != npos);
190  }
191  C4_ALWAYS_INLINE bool indentation_gt() const noexcept
192  {
193  RYML_ASSERT(indref != npos);
195  }
196  C4_ALWAYS_INLINE bool indentation_lt() const noexcept
197  {
198  RYML_ASSERT(indref != npos);
200  }
201 };
202 static_assert(std::is_standard_layout<ParserState>::value, "ParserState not standard");
203 
204 
205 } // namespace yml
206 } // namespace c4
207 
208 #endif /* _C4_YML_PARSER_STATE_HPP_ */
Common utilities and infrastructure used by ryml.
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition: common.hpp:166
csubstr to_csubstr(substr s) noexcept
neutral version for use in generic code
Definition: substr.hpp:2189
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:252
@ npos
a null string position
Definition: common.hpp:266
ParserState_e
Enumeration of the state flags for the parser.
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
@ NONE
an index to none
Definition: common.hpp:259
Definition: common.cpp:12
Helper to control the line contents while parsing a buffer.
substr rem
the stripped line remainder; initially starts at the first non-space character
substr full
the full line, including newlines on the right
void reset(substr full_, substr stripped_)
size_t current_col(csubstr s) const RYML_NOEXCEPT
void reset_with_next_line(substr buf, size_t offset)
substr stripped
the stripped line, excluding newlines on the right
size_t indentation
the number of spaces on the beginning of the line
size_t current_col() const RYML_NOEXCEPT
a source file position
Definition: common.hpp:296
size_t col
column
Definition: common.hpp:302
size_t line
line
Definition: common.hpp:300
size_t offset
number of bytes from the beginning of the source buffer
Definition: common.hpp:298
csubstr name
file name
Definition: common.hpp:304
bool at_line_beginning() const noexcept
void start_parse(const char *file, id_type node_id_)
bool indentation_lt() const noexcept
LineContents line_contents
bool indentation_eq() const noexcept
size_t indref
the reference indentation in the current block scope
id_type node_id
don't hold a pointer to the node as it will be relocated during tree resizes
bool indentation_ge() const noexcept
bool indentation_gt() const noexcept
void reset_before_pop(ParserState const &to_pop)