rapidyaml  0.9.0
parse and emit YAML, and do it fast
tag.cpp
Go to the documentation of this file.
1 #include "c4/yml/tag.hpp"
2 #include "c4/yml/tree.hpp"
3 #include "c4/yml/detail/parser_dbg.hpp"
4 
5 
6 namespace c4 {
7 namespace yml {
8 
9 bool is_custom_tag(csubstr tag)
10 {
11  if((tag.len > 2) && (tag.str[0] == '!'))
12  {
13  size_t pos = tag.find('!', 1);
14  return pos != npos && pos > 1 && tag.str[1] != '<';
15  }
16  return false;
17 }
18 
19 csubstr normalize_tag(csubstr tag)
20 {
21  YamlTag_e t = to_tag(tag);
22  if(t != TAG_NONE)
23  return from_tag(t);
24  if(tag.begins_with("!<"))
25  tag = tag.sub(1);
26  if(tag.begins_with("<!"))
27  return tag;
28  return tag;
29 }
30 
31 csubstr normalize_tag_long(csubstr tag)
32 {
33  YamlTag_e t = to_tag(tag);
34  if(t != TAG_NONE)
35  return from_tag_long(t);
36  if(tag.begins_with("!<"))
37  tag = tag.sub(1);
38  if(tag.begins_with("<!"))
39  return tag;
40  return tag;
41 }
42 
43 csubstr normalize_tag_long(csubstr tag, substr output)
44 {
45  csubstr result = normalize_tag_long(tag);
46  if(result.begins_with("!!"))
47  {
48  tag = tag.sub(2);
49  const csubstr pfx = "<tag:yaml.org,2002:";
50  const size_t len = pfx.len + tag.len + 1;
51  if(len <= output.len)
52  {
53  memcpy(output.str , pfx.str, pfx.len);
54  memcpy(output.str + pfx.len, tag.str, tag.len);
55  output[pfx.len + tag.len] = '>';
56  result = output.first(len);
57  }
58  else
59  {
60  result.str = nullptr;
61  result.len = len;
62  }
63  }
64  return result;
65 }
66 
67 YamlTag_e to_tag(csubstr tag)
68 {
69  if(tag.begins_with("!<"))
70  tag = tag.sub(1);
71  if(tag.begins_with("!!"))
72  tag = tag.sub(2);
73  else if(tag.begins_with('!'))
74  return TAG_NONE;
75  else if(tag.begins_with("tag:yaml.org,2002:"))
76  {
77  RYML_ASSERT(csubstr("tag:yaml.org,2002:").len == 18);
78  tag = tag.sub(18);
79  }
80  else if(tag.begins_with("<tag:yaml.org,2002:"))
81  {
82  RYML_ASSERT(csubstr("<tag:yaml.org,2002:").len == 19);
83  tag = tag.sub(19);
84  if(!tag.len)
85  return TAG_NONE;
86  tag = tag.offs(0, 1);
87  }
88 
89  if(tag == "map")
90  return TAG_MAP;
91  else if(tag == "omap")
92  return TAG_OMAP;
93  else if(tag == "pairs")
94  return TAG_PAIRS;
95  else if(tag == "set")
96  return TAG_SET;
97  else if(tag == "seq")
98  return TAG_SEQ;
99  else if(tag == "binary")
100  return TAG_BINARY;
101  else if(tag == "bool")
102  return TAG_BOOL;
103  else if(tag == "float")
104  return TAG_FLOAT;
105  else if(tag == "int")
106  return TAG_INT;
107  else if(tag == "merge")
108  return TAG_MERGE;
109  else if(tag == "null")
110  return TAG_NULL;
111  else if(tag == "str")
112  return TAG_STR;
113  else if(tag == "timestamp")
114  return TAG_TIMESTAMP;
115  else if(tag == "value")
116  return TAG_VALUE;
117  else if(tag == "yaml")
118  return TAG_YAML;
119 
120  return TAG_NONE;
121 }
122 
124 {
125  switch(tag)
126  {
127  case TAG_MAP:
128  return {"<tag:yaml.org,2002:map>"};
129  case TAG_OMAP:
130  return {"<tag:yaml.org,2002:omap>"};
131  case TAG_PAIRS:
132  return {"<tag:yaml.org,2002:pairs>"};
133  case TAG_SET:
134  return {"<tag:yaml.org,2002:set>"};
135  case TAG_SEQ:
136  return {"<tag:yaml.org,2002:seq>"};
137  case TAG_BINARY:
138  return {"<tag:yaml.org,2002:binary>"};
139  case TAG_BOOL:
140  return {"<tag:yaml.org,2002:bool>"};
141  case TAG_FLOAT:
142  return {"<tag:yaml.org,2002:float>"};
143  case TAG_INT:
144  return {"<tag:yaml.org,2002:int>"};
145  case TAG_MERGE:
146  return {"<tag:yaml.org,2002:merge>"};
147  case TAG_NULL:
148  return {"<tag:yaml.org,2002:null>"};
149  case TAG_STR:
150  return {"<tag:yaml.org,2002:str>"};
151  case TAG_TIMESTAMP:
152  return {"<tag:yaml.org,2002:timestamp>"};
153  case TAG_VALUE:
154  return {"<tag:yaml.org,2002:value>"};
155  case TAG_YAML:
156  return {"<tag:yaml.org,2002:yaml>"};
157  case TAG_NONE:
158  default:
159  return {""};
160  }
161 }
162 
163 csubstr from_tag(YamlTag_e tag)
164 {
165  switch(tag)
166  {
167  case TAG_MAP:
168  return {"!!map"};
169  case TAG_OMAP:
170  return {"!!omap"};
171  case TAG_PAIRS:
172  return {"!!pairs"};
173  case TAG_SET:
174  return {"!!set"};
175  case TAG_SEQ:
176  return {"!!seq"};
177  case TAG_BINARY:
178  return {"!!binary"};
179  case TAG_BOOL:
180  return {"!!bool"};
181  case TAG_FLOAT:
182  return {"!!float"};
183  case TAG_INT:
184  return {"!!int"};
185  case TAG_MERGE:
186  return {"!!merge"};
187  case TAG_NULL:
188  return {"!!null"};
189  case TAG_STR:
190  return {"!!str"};
191  case TAG_TIMESTAMP:
192  return {"!!timestamp"};
193  case TAG_VALUE:
194  return {"!!value"};
195  case TAG_YAML:
196  return {"!!yaml"};
197  case TAG_NONE:
198  default:
199  return {""};
200  }
201 }
202 
203 
204 bool TagDirective::create_from_str(csubstr directive_)
205 {
206  csubstr directive = directive_;
207  directive = directive.sub(4);
208  if(!directive.begins_with(' '))
209  return false;
210  directive = directive.triml(' ');
211  size_t pos = directive.find(' ');
212  if(pos == npos)
213  return false;
214  handle = directive.first(pos);
215  directive = directive.sub(handle.len).triml(' ');
216  pos = directive.find(' ');
217  if(pos != npos)
218  directive = directive.first(pos);
219  prefix = directive;
220  next_node_id = NONE;
221  _c4dbgpf("%TAG: handle={} prefix={}", handle, prefix);
222  return true;
223 }
224 
225 bool TagDirective::create_from_str(csubstr directive_, Tree *tree)
226 {
227  _RYML_CB_CHECK(tree->callbacks(), directive_.begins_with("%TAG "));
228  if(!create_from_str(directive_))
229  {
230  _RYML_CB_ERR(tree->callbacks(), "invalid tag directive");
231  }
232  next_node_id = tree->size();
233  if(!tree->empty())
234  {
235  const id_type prev = tree->size() - 1;
236  if(tree->is_root(prev) && tree->type(prev) != NOTYPE && !tree->is_stream(prev))
237  ++next_node_id;
238  }
239  _c4dbgpf("%TAG: handle={} prefix={} next_node={}", handle, prefix, next_node_id);
240  return true;
241 }
242 
243 size_t TagDirective::transform(csubstr tag, substr output, Callbacks const& callbacks) const
244 {
245  _c4dbgpf("%TAG: handle={} prefix={} next_node={}. tag={}", handle, prefix, next_node_id, tag);
246  _RYML_CB_ASSERT(callbacks, tag.len >= handle.len);
247  csubstr rest = tag.sub(handle.len);
248  _c4dbgpf("%TAG: rest={}", rest);
249  if(rest.begins_with('<'))
250  {
251  _c4dbgpf("%TAG: begins with <. rest={}", rest);
252  if(C4_UNLIKELY(!rest.ends_with('>')))
253  _RYML_CB_ERR(callbacks, "malformed tag");
254  rest = rest.offs(1, 1);
255  if(rest.begins_with(prefix))
256  {
257  _c4dbgpf("%TAG: already transformed! actual={}", rest.sub(prefix.len));
258  return 0; // return 0 to signal that the tag is local and cannot be resolved
259  }
260  }
261  size_t len = 1u + prefix.len + rest.len + 1u;
262  size_t numpc = rest.count('%');
263  if(numpc == 0)
264  {
265  if(len <= output.len)
266  {
267  output.str[0] = '<';
268  memcpy(1u + output.str, prefix.str, prefix.len);
269  memcpy(1u + output.str + prefix.len, rest.str, rest.len);
270  output.str[1u + prefix.len + rest.len] = '>';
271  }
272  }
273  else
274  {
275  // need to decode URI % sequences
276  size_t pos = rest.find('%');
277  _RYML_CB_ASSERT(callbacks, pos != npos);
278  do {
279  size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1);
280  if(next == npos)
281  next = rest.len;
282  _RYML_CB_CHECK(callbacks, pos+1 < next);
283  _RYML_CB_CHECK(callbacks, pos+1 + 2 <= next);
284  size_t delta = next - (pos+1);
285  len -= delta;
286  pos = rest.find('%', pos+1);
287  } while(pos != npos);
288  if(len <= output.len)
289  {
290  size_t prev = 0, wpos = 0;
291  auto appendstr = [&](csubstr s) { memcpy(output.str + wpos, s.str, s.len); wpos += s.len; };
292  auto appendchar = [&](char c) { output.str[wpos++] = c; };
293  appendchar('<');
294  appendstr(prefix);
295  pos = rest.find('%');
296  _RYML_CB_ASSERT(callbacks, pos != npos);
297  do {
298  size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1);
299  if(next == npos)
300  next = rest.len;
301  _RYML_CB_CHECK(callbacks, pos+1 < next);
302  _RYML_CB_CHECK(callbacks, pos+1 + 2 <= next);
303  uint8_t val;
304  if(C4_UNLIKELY(!read_hex(rest.range(pos+1, next), &val) || val > 127))
305  _RYML_CB_ERR(callbacks, "invalid URI character");
306  appendstr(rest.range(prev, pos));
307  appendchar(static_cast<char>(val));
308  prev = next;
309  pos = rest.find('%', pos+1);
310  } while(pos != npos);
311  _RYML_CB_ASSERT(callbacks, pos == npos);
312  _RYML_CB_ASSERT(callbacks, prev > 0);
313  _RYML_CB_ASSERT(callbacks, rest.len >= prev);
314  appendstr(rest.sub(prev));
315  appendchar('>');
316  _RYML_CB_ASSERT(callbacks, wpos == len);
317  }
318  }
319  return len;
320 }
321 
322 } // namespace yml
323 } // namespace c4
bool is_stream(id_type node) const
Definition: tree.hpp:363
NodeType type(id_type node) const
Definition: tree.hpp:337
bool is_root(id_type node) const
Definition: tree.hpp:415
Callbacks const & callbacks() const
Definition: tree.hpp:241
bool empty() const
Definition: tree.hpp:235
id_type size() const
Definition: tree.hpp:237
@ NOTYPE
no node type or style is set
Definition: node_type.hpp:32
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:890
csubstr from_tag_long(YamlTag_e tag)
Definition: tag.cpp:123
bool is_custom_tag(csubstr tag)
Definition: tag.cpp:9
csubstr normalize_tag_long(csubstr tag)
Definition: tag.cpp:31
YamlTag_e
a bit mask for marking tags for types
Definition: tag.hpp:26
csubstr normalize_tag(csubstr tag)
Definition: tag.cpp:19
csubstr from_tag(YamlTag_e tag)
Definition: tag.cpp:163
YamlTag_e to_tag(csubstr tag)
Definition: tag.cpp:67
@ TAG_SET
!!set Unordered set of non-equal values.
Definition: tag.hpp:32
@ TAG_MERGE
!!merge Specify one or more mapping to be merged with the current one.
Definition: tag.hpp:39
@ TAG_INT
!!float Mathematical integers.
Definition: tag.hpp:38
@ TAG_SEQ
!!seq Sequence of arbitrary values.
Definition: tag.hpp:33
@ TAG_NULL
!!null Devoid of value.
Definition: tag.hpp:40
@ TAG_YAML
!!yaml Specify the default value of a mapping https://yaml.org/type/yaml.html
Definition: tag.hpp:44
@ TAG_TIMESTAMP
!!timestamp A point in time https://yaml.org/type/timestamp.html
Definition: tag.hpp:42
@ TAG_NONE
Definition: tag.hpp:27
@ TAG_STR
!!str A sequence of zero or more Unicode characters.
Definition: tag.hpp:41
@ TAG_BOOL
!!bool Mathematical Booleans.
Definition: tag.hpp:36
@ TAG_MAP
!!map Unordered set of key: value pairs without duplicates.
Definition: tag.hpp:29
@ TAG_BINARY
!!binary A sequence of zero or more octets (8 bit values).
Definition: tag.hpp:35
@ TAG_PAIRS
!!pairs Ordered sequence of key: value pairs allowing duplicates.
Definition: tag.hpp:31
@ TAG_VALUE
!!value Specify the default value of a mapping https://yaml.org/type/value.html
Definition: tag.hpp:43
@ TAG_OMAP
!!omap Ordered sequence of key: value pairs without duplicates.
Definition: tag.hpp:30
@ TAG_FLOAT
!!float Floating-point approximation to real numbers.
Definition: tag.hpp:37
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:253
@ npos
a null string position
Definition: common.hpp:267
@ NONE
an index to none
Definition: common.hpp:260
Definition: common.cpp:12
a c-style callbacks class.
Definition: common.hpp:376
bool create_from_str(csubstr directive_)
leaves next_node_id unfilled
Definition: tag.cpp:204
csubstr handle
Eg.
Definition: tag.hpp:60
id_type next_node_id
The next node to which this tag directive applies.
Definition: tag.hpp:64
size_t transform(csubstr tag, substr output, Callbacks const &callbacks) const
Definition: tag.cpp:243
csubstr prefix
Eg.
Definition: tag.hpp:62