rapidyaml  0.7.1
parse and emit YAML, and do it fast
parser_state.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_PARSER_STATE_HPP_
2 #define _C4_YML_PARSER_STATE_HPP_
3 
4 #ifndef _C4_YML_COMMON_HPP_
5 #include "c4/yml/common.hpp"
6 #endif
7 
8 namespace c4 {
9 namespace yml {
10 
11 /** data type for @ref ParserState_e */
12 using ParserFlag_t = int;
13 
14 /** Enumeration of the state flags for the parser */
15 typedef enum : ParserFlag_t {
16  RTOP = 0x01 << 0, ///< reading at top level
17  RUNK = 0x01 << 1, ///< reading unknown state (when starting): must determine whether scalar, map or seq
18  RMAP = 0x01 << 2, ///< reading a map
19  RSEQ = 0x01 << 3, ///< reading a seq
20  FLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {}
21  BLCK = 0x01 << 5, ///< reading in block mode
22  QMRK = 0x01 << 6, ///< reading an explicit key (`? key`)
23  RKEY = 0x01 << 7, ///< reading a scalar as key
24  RVAL = 0x01 << 9, ///< reading a scalar as val
25  RKCL = 0x01 << 8, ///< reading the key colon (ie the : after the key in the map)
26  RNXT = 0x01 << 10, ///< read next val or keyval
27  SSCL = 0x01 << 11, ///< there's a stored scalar
28  QSCL = 0x01 << 12, ///< stored scalar was quoted
29  RSET = 0x01 << 13, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html
30  RDOC = 0x01 << 14, ///< reading a document
31  NDOC = 0x01 << 15, ///< no document mode. a document has ended and another has not started yet.
32  USTY = 0x01 << 16, ///< reading in unknown style mode - must determine FLOW or BLCK
33  //! reading an implicit map nested in an explicit seq.
34  //! eg, {key: [key2: value2, key3: value3]}
35  //! is parsed as {key: [{key2: value2}, {key3: value3}]}
36  RSEQIMAP = 0x01 << 17,
38 
39 #ifdef RYML_DBG
40 /** @cond dev */
41 namespace detail {
42 csubstr _parser_flags_to_str(substr buf, ParserFlag_t flags);
43 } // namespace
44 /** @endcond */
45 #endif
46 
47 
48 /** Helper to control the line contents while parsing a buffer */
50 {
51  substr rem; ///< the stripped line remainder; initially starts at the first non-space character
52  size_t indentation; ///< the number of spaces on the beginning of the line
53  substr full; ///< the full line, including newlines on the right
54  substr stripped; ///< the stripped line, excluding newlines on the right
55 
56  LineContents() = default;
57 
58  void reset_with_next_line(substr buf, size_t offset)
59  {
60  RYML_ASSERT(offset <= buf.len);
61  size_t e = offset;
62  // get the current line stripped of newline chars
63  while(e < buf.len && (buf.str[e] != '\n' && buf.str[e] != '\r'))
64  ++e;
65  RYML_ASSERT(e >= offset);
66  const substr stripped_ = buf.range(offset, e);
67  // advance pos to include the first line ending
68  if(e < buf.len && buf.str[e] == '\r')
69  ++e;
70  if(e < buf.len && buf.str[e] == '\n')
71  ++e;
72  const substr full_ = buf.range(offset, e);
73  reset(full_, stripped_);
74  }
75 
76  void reset(substr full_, substr stripped_)
77  {
78  rem = stripped_;
79  indentation = stripped_.first_not_of(' '); // find the first column where the character is not a space
80  full = full_;
81  stripped = stripped_;
82  }
83 
84  C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT
85  {
86  // WARNING: gcc x86 release builds were wrong (eg returning 0
87  // when the result should be 4 ) when this function was like
88  // this:
89  //
90  //return current_col(rem);
91  //
92  // (see below for the full definition of the called overload
93  // of current_col())
94  //
95  // ... so we explicitly inline the code in here:
96  RYML_ASSERT(rem.str >= full.str);
97  size_t col = static_cast<size_t>(rem.str - full.str);
98  return col;
99  //
100  // this was happening only on builds specifically with (gcc
101  // AND x86 AND release); no other builds were having the
102  // problem: not in debug, not in x64, not in other
103  // architectures, not in clang, not in visual studio. WTF!?
104  //
105  // Enabling debug prints with RYML_DBG made the problem go
106  // away, so these could not be used to debug the
107  // problem. Adding prints inside the called current_col() also
108  // made the problem go away! WTF!???
109  //
110  // a prize will be offered to anybody able to explain why this
111  // was happening.
112  }
113 
114  C4_ALWAYS_INLINE size_t current_col(csubstr s) const RYML_NOEXCEPT
115  {
116  RYML_ASSERT(s.str >= full.str);
117  RYML_ASSERT(full.is_super(s));
118  size_t col = static_cast<size_t>(s.str - full.str);
119  return col;
120  }
121 };
122 static_assert(std::is_standard_layout<LineContents>::value, "LineContents not standard");
123 
124 
125 //-----------------------------------------------------------------------------
126 //-----------------------------------------------------------------------------
127 //-----------------------------------------------------------------------------
128 
130 {
134  size_t indref; ///< the reference indentation in the current block scope
136  id_type node_id; ///< don't hold a pointer to the node as it will be relocated during tree resizes
137  size_t scalar_col; // the column where the scalar (or its quotes) begin
140 
141  ParserState() = default;
142 
143  void start_parse(const char *file, id_type node_id_)
144  {
145  level = 0;
146  pos.name = to_csubstr(file);
147  pos.offset = 0;
148  pos.line = 1;
149  pos.col = 1;
150  node_id = node_id_;
151  more_indented = false;
152  scalar_col = 0;
153  indref = 0;
154  has_children = false;
155  }
156 
158  {
159  node_id = NONE;
160  indref = npos;
161  more_indented = false;
162  ++level;
163  has_children = false;
164  }
165 
166  C4_ALWAYS_INLINE void reset_before_pop(ParserState const& to_pop)
167  {
168  pos = to_pop.pos;
169  line_contents = to_pop.line_contents;
170  }
171 
172 public:
173 
174  C4_ALWAYS_INLINE bool at_line_beginning() const noexcept
175  {
176  return line_contents.rem.str == line_contents.full.str;
177  }
178  C4_ALWAYS_INLINE bool indentation_eq() const noexcept
179  {
180  RYML_ASSERT(indref != npos);
182  }
183  C4_ALWAYS_INLINE bool indentation_ge() const noexcept
184  {
185  RYML_ASSERT(indref != npos);
187  }
188  C4_ALWAYS_INLINE bool indentation_gt() const noexcept
189  {
190  RYML_ASSERT(indref != npos);
192  }
193  C4_ALWAYS_INLINE bool indentation_lt() const noexcept
194  {
195  RYML_ASSERT(indref != npos);
197  }
198 };
199 static_assert(std::is_standard_layout<ParserState>::value, "ParserState not standard");
200 
201 
202 } // namespace yml
203 } // namespace c4
204 
205 #endif /* _C4_YML_PARSER_STATE_HPP_ */
Common utilities and infrastructure used by ryml.
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition: common.hpp:166
csubstr to_csubstr(substr s) noexcept
neutral version for use in generic code
Definition: substr.hpp:2189
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:252
@ npos
a null string position
Definition: common.hpp:266
ParserState_e
Enumeration of the state flags for the parser.
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
@ NONE
an index to none
Definition: common.hpp:259
Definition: common.cpp:12
Helper to control the line contents while parsing a buffer.
substr rem
the stripped line remainder; initially starts at the first non-space character
substr full
the full line, including newlines on the right
void reset(substr full_, substr stripped_)
size_t current_col(csubstr s) const RYML_NOEXCEPT
void reset_with_next_line(substr buf, size_t offset)
substr stripped
the stripped line, excluding newlines on the right
size_t indentation
the number of spaces on the beginning of the line
size_t current_col() const RYML_NOEXCEPT
a source file position
Definition: common.hpp:296
size_t col
column
Definition: common.hpp:302
size_t line
line
Definition: common.hpp:300
size_t offset
number of bytes from the beginning of the source buffer
Definition: common.hpp:298
csubstr name
file name
Definition: common.hpp:304
bool at_line_beginning() const noexcept
void start_parse(const char *file, id_type node_id_)
bool indentation_lt() const noexcept
LineContents line_contents
bool indentation_eq() const noexcept
size_t indref
the reference indentation in the current block scope
id_type node_id
don't hold a pointer to the node as it will be relocated during tree resizes
bool indentation_ge() const noexcept
bool indentation_gt() const noexcept
void reset_before_pop(ParserState const &to_pop)