rapidyaml  0.11.0
parse and emit YAML, and do it fast
tag.cpp
Go to the documentation of this file.
1 #include "c4/yml/tag.hpp"
2 #include "c4/yml/error.hpp"
3 #include "c4/yml/detail/dbgprint.hpp"
4 
5 
6 namespace c4 {
7 namespace yml {
8 
9 bool is_custom_tag(csubstr tag)
10 {
11  if((tag.len > 2) && (tag.str[0] == '!'))
12  {
13  size_t pos = tag.find('!', 1);
14  return pos != npos && pos > 1 && tag.str[1] != '<';
15  }
16  return false;
17 }
18 
19 csubstr normalize_tag(csubstr tag)
20 {
21  YamlTag_e t = to_tag(tag);
22  if(t != TAG_NONE)
23  return from_tag(t);
24  if(tag.begins_with("!<"))
25  tag = tag.sub(1);
26  if(tag.begins_with("<!"))
27  return tag;
28  return tag;
29 }
30 
31 csubstr normalize_tag_long(csubstr tag)
32 {
33  YamlTag_e t = to_tag(tag);
34  if(t != TAG_NONE)
35  return from_tag_long(t);
36  if(tag.begins_with("!<"))
37  tag = tag.sub(1);
38  if(tag.begins_with("<!"))
39  return tag;
40  return tag;
41 }
42 
43 csubstr normalize_tag_long(csubstr tag, substr output)
44 {
45  csubstr result = normalize_tag_long(tag);
46  if(result.begins_with("!!"))
47  {
48  tag = tag.sub(2);
49  const csubstr pfx = "<tag:yaml.org,2002:";
50  const size_t len = pfx.len + tag.len + 1;
51  if(len <= output.len)
52  {
53  memcpy(output.str , pfx.str, pfx.len);
54  memcpy(output.str + pfx.len, tag.str, tag.len);
55  output[pfx.len + tag.len] = '>';
56  result = output.first(len);
57  }
58  else
59  {
60  result.str = nullptr;
61  result.len = len;
62  }
63  }
64  return result;
65 }
66 
67 YamlTag_e to_tag(csubstr tag)
68 {
69  if(tag.begins_with("!<"))
70  tag = tag.sub(1);
71  if(tag.begins_with("!!"))
72  tag = tag.sub(2);
73  else if(tag.begins_with('!'))
74  {
75  return TAG_NONE;
76  }
77  else
78  {
79  csubstr pfx = "<tag:yaml.org,2002:";
80  csubstr pfx2 = pfx.sub(1);
81  if(tag.begins_with(pfx2))
82  {
83  tag = tag.sub(pfx2.len);
84  }
85  else if(tag.begins_with(pfx))
86  {
87  tag = tag.sub(pfx.len);
88  if(!tag.len)
89  return TAG_NONE;
90  tag = tag.offs(0, 1);
91  }
92  }
93 
94  if(tag == "map")
95  return TAG_MAP;
96  else if(tag == "omap")
97  return TAG_OMAP;
98  else if(tag == "pairs")
99  return TAG_PAIRS;
100  else if(tag == "set")
101  return TAG_SET;
102  else if(tag == "seq")
103  return TAG_SEQ;
104  else if(tag == "binary")
105  return TAG_BINARY;
106  else if(tag == "bool")
107  return TAG_BOOL;
108  else if(tag == "float")
109  return TAG_FLOAT;
110  else if(tag == "int")
111  return TAG_INT;
112  else if(tag == "merge")
113  return TAG_MERGE;
114  else if(tag == "null")
115  return TAG_NULL;
116  else if(tag == "str")
117  return TAG_STR;
118  else if(tag == "timestamp")
119  return TAG_TIMESTAMP;
120  else if(tag == "value")
121  return TAG_VALUE;
122  else if(tag == "yaml")
123  return TAG_YAML;
124 
125  return TAG_NONE;
126 }
127 
129 {
130  switch(tag)
131  {
132  case TAG_MAP:
133  return {"<tag:yaml.org,2002:map>"};
134  case TAG_OMAP:
135  return {"<tag:yaml.org,2002:omap>"};
136  case TAG_PAIRS:
137  return {"<tag:yaml.org,2002:pairs>"};
138  case TAG_SET:
139  return {"<tag:yaml.org,2002:set>"};
140  case TAG_SEQ:
141  return {"<tag:yaml.org,2002:seq>"};
142  case TAG_BINARY:
143  return {"<tag:yaml.org,2002:binary>"};
144  case TAG_BOOL:
145  return {"<tag:yaml.org,2002:bool>"};
146  case TAG_FLOAT:
147  return {"<tag:yaml.org,2002:float>"};
148  case TAG_INT:
149  return {"<tag:yaml.org,2002:int>"};
150  case TAG_MERGE:
151  return {"<tag:yaml.org,2002:merge>"};
152  case TAG_NULL:
153  return {"<tag:yaml.org,2002:null>"};
154  case TAG_STR:
155  return {"<tag:yaml.org,2002:str>"};
156  case TAG_TIMESTAMP:
157  return {"<tag:yaml.org,2002:timestamp>"};
158  case TAG_VALUE:
159  return {"<tag:yaml.org,2002:value>"};
160  case TAG_YAML:
161  return {"<tag:yaml.org,2002:yaml>"};
162  case TAG_NONE:
163  default:
164  return {""};
165  }
166 }
167 
168 csubstr from_tag(YamlTag_e tag)
169 {
170  switch(tag)
171  {
172  case TAG_MAP:
173  return {"!!map"};
174  case TAG_OMAP:
175  return {"!!omap"};
176  case TAG_PAIRS:
177  return {"!!pairs"};
178  case TAG_SET:
179  return {"!!set"};
180  case TAG_SEQ:
181  return {"!!seq"};
182  case TAG_BINARY:
183  return {"!!binary"};
184  case TAG_BOOL:
185  return {"!!bool"};
186  case TAG_FLOAT:
187  return {"!!float"};
188  case TAG_INT:
189  return {"!!int"};
190  case TAG_MERGE:
191  return {"!!merge"};
192  case TAG_NULL:
193  return {"!!null"};
194  case TAG_STR:
195  return {"!!str"};
196  case TAG_TIMESTAMP:
197  return {"!!timestamp"};
198  case TAG_VALUE:
199  return {"!!value"};
200  case TAG_YAML:
201  return {"!!yaml"};
202  case TAG_NONE:
203  default:
204  return {""};
205  }
206 }
207 
208 
209 bool TagDirective::create_from_str(csubstr directive)
210 {
211  _RYML_CHECK_BASIC(directive.begins_with("%TAG "));
212  directive = directive.sub(4);
213  if(!directive.begins_with(' '))
214  return false;
215  directive = directive.triml(' ');
216  size_t pos = directive.find(' ');
217  if(pos == npos)
218  return false;
219  handle = directive.first(pos);
220  directive = directive.sub(handle.len).triml(' ');
221  pos = directive.find(' ');
222  if(pos != npos)
223  directive = directive.first(pos);
224  prefix = directive;
225  next_node_id = NONE;
226  _c4dbgpf("%TAG: handle={} prefix={}", handle, prefix);
227  return true;
228 }
229 
230 size_t TagDirective::transform(csubstr tag, substr output, Callbacks const& callbacks, bool with_brackets) const
231 {
232  _c4dbgpf("%TAG: handle={} prefix={} next_node={}. tag={}", handle, prefix, next_node_id, tag);
233  _RYML_ASSERT_BASIC_(callbacks, tag.len >= handle.len);
234  csubstr rest = tag.sub(handle.len);
235  _c4dbgpf("%TAG: rest={}", rest);
236  if(rest.begins_with('<'))
237  {
238  _c4dbgpf("%TAG: begins with <. rest={}", rest);
239  if(C4_UNLIKELY(!rest.ends_with('>')))
240  _RYML_ERR_BASIC_(callbacks, "malformed tag");
241  rest = rest.offs(1, 1);
242  if(rest.begins_with(prefix))
243  {
244  _c4dbgpf("%TAG: already transformed! actual={}", rest.sub(prefix.len));
245  return 0; // return 0 to signal that the tag is local and cannot be resolved
246  }
247  }
248  size_t len = prefix.len + rest.len;
249  if(with_brackets)
250  len += 2;
251  size_t numpc = rest.count('%');
252  if(numpc == 0)
253  {
254  if(len <= output.len)
255  {
256  if(with_brackets)
257  {
258  output.str[0] = '<';
259  memcpy(1u + output.str, prefix.str, prefix.len);
260  memcpy(1u + output.str + prefix.len, rest.str, rest.len);
261  output.str[1u + prefix.len + rest.len] = '>';
262  }
263  else
264  {
265  memcpy(output.str, prefix.str, prefix.len);
266  memcpy(output.str + prefix.len, rest.str, rest.len);
267  }
268  }
269  }
270  else
271  {
272  // need to decode URI % sequences
273  size_t pos = rest.find('%');
274  _RYML_ASSERT_BASIC_(callbacks, pos != npos);
275  do {
276  size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1);
277  if(next == npos)
278  next = rest.len;
279  _RYML_CHECK_BASIC_(callbacks, pos+1 < next);
280  _RYML_CHECK_BASIC_(callbacks, pos+1 + 2 <= next);
281  size_t delta = next - (pos+1);
282  len -= delta;
283  pos = rest.find('%', pos+1);
284  } while(pos != npos);
285  if(len <= output.len)
286  {
287  size_t prev = 0, wpos = 0;
288  auto appendstr = [&](csubstr s) { memcpy(output.str + wpos, s.str, s.len); wpos += s.len; };
289  auto appendchar = [&](char c) { output.str[wpos++] = c; };
290  if(with_brackets)
291  appendchar('<');
292  appendstr(prefix);
293  pos = rest.find('%');
294  _RYML_ASSERT_BASIC_(callbacks, pos != npos);
295  do {
296  size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1);
297  if(next == npos)
298  next = rest.len;
299  _RYML_CHECK_BASIC_(callbacks, pos+1 < next);
300  _RYML_CHECK_BASIC_(callbacks, pos+1 + 2 <= next);
301  uint8_t val;
302  if(C4_UNLIKELY(!read_hex(rest.range(pos+1, next), &val) || val > 127))
303  _RYML_ERR_BASIC_(callbacks, "invalid URI character");
304  appendstr(rest.range(prev, pos));
305  appendchar(static_cast<char>(val));
306  prev = next;
307  pos = rest.find('%', pos+1);
308  } while(pos != npos);
309  _RYML_ASSERT_BASIC_(callbacks, pos == npos);
310  _RYML_ASSERT_BASIC_(callbacks, prev > 0);
311  _RYML_ASSERT_BASIC_(callbacks, rest.len >= prev);
312  appendstr(rest.sub(prev));
313  if(with_brackets)
314  appendchar('>');
315  _RYML_ASSERT_BASIC_(callbacks, wpos == len);
316  }
317  }
318  return len;
319 }
320 
321 } // namespace yml
322 } // namespace c4
Error utilities used by ryml.
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:889
csubstr from_tag_long(YamlTag_e tag)
Definition: tag.cpp:128
bool is_custom_tag(csubstr tag)
Definition: tag.cpp:9
csubstr normalize_tag_long(csubstr tag)
Definition: tag.cpp:31
YamlTag_e
a bit mask for marking tags for types
Definition: tag.hpp:26
csubstr normalize_tag(csubstr tag)
Definition: tag.cpp:19
csubstr from_tag(YamlTag_e tag)
Definition: tag.cpp:168
YamlTag_e to_tag(csubstr tag)
Definition: tag.cpp:67
@ TAG_SET
!!set Unordered set of non-equal values.
Definition: tag.hpp:32
@ TAG_MERGE
!!merge Specify one or more mapping to be merged with the current one.
Definition: tag.hpp:39
@ TAG_INT
!!float Mathematical integers.
Definition: tag.hpp:38
@ TAG_SEQ
!!seq Sequence of arbitrary values.
Definition: tag.hpp:33
@ TAG_NULL
!!null Devoid of value.
Definition: tag.hpp:40
@ TAG_YAML
!!yaml Specify the default value of a mapping https://yaml.org/type/yaml.html
Definition: tag.hpp:44
@ TAG_TIMESTAMP
!!timestamp A point in time https://yaml.org/type/timestamp.html
Definition: tag.hpp:42
@ TAG_NONE
Definition: tag.hpp:27
@ TAG_STR
!!str A sequence of zero or more Unicode characters.
Definition: tag.hpp:41
@ TAG_BOOL
!!bool Mathematical Booleans.
Definition: tag.hpp:36
@ TAG_MAP
!!map Unordered set of key: value pairs without duplicates.
Definition: tag.hpp:29
@ TAG_BINARY
!!binary A sequence of zero or more octets (8 bit values).
Definition: tag.hpp:35
@ TAG_PAIRS
!!pairs Ordered sequence of key: value pairs allowing duplicates.
Definition: tag.hpp:31
@ TAG_VALUE
!!value Specify the default value of a mapping https://yaml.org/type/value.html
Definition: tag.hpp:43
@ TAG_OMAP
!!omap Ordered sequence of key: value pairs without duplicates.
Definition: tag.hpp:30
@ TAG_FLOAT
!!float Floating-point approximation to real numbers.
Definition: tag.hpp:37
@ npos
a null string position
Definition: common.hpp:258
@ NONE
an index to none
Definition: common.hpp:251
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
A c-style callbacks class to customize behavior on errors or allocation.
Definition: common.hpp:511
bool create_from_str(csubstr directive_)
leaves next_node_id unfilled
Definition: tag.cpp:209
csubstr handle
Eg.
Definition: tag.hpp:60
id_type next_node_id
The next node to which this tag directive applies.
Definition: tag.hpp:64
csubstr prefix
Eg.
Definition: tag.hpp:62
size_t transform(csubstr tag, substr output, Callbacks const &callbacks, bool with_brackets=true) const
Definition: tag.cpp:230