rapidyaml  0.10.0
parse and emit YAML, and do it fast
tag.cpp
Go to the documentation of this file.
1 #include "c4/yml/tag.hpp"
2 #include "c4/yml/detail/dbgprint.hpp"
3 
4 
5 namespace c4 {
6 namespace yml {
7 
8 bool is_custom_tag(csubstr tag)
9 {
10  if((tag.len > 2) && (tag.str[0] == '!'))
11  {
12  size_t pos = tag.find('!', 1);
13  return pos != npos && pos > 1 && tag.str[1] != '<';
14  }
15  return false;
16 }
17 
18 csubstr normalize_tag(csubstr tag)
19 {
20  YamlTag_e t = to_tag(tag);
21  if(t != TAG_NONE)
22  return from_tag(t);
23  if(tag.begins_with("!<"))
24  tag = tag.sub(1);
25  if(tag.begins_with("<!"))
26  return tag;
27  return tag;
28 }
29 
30 csubstr normalize_tag_long(csubstr tag)
31 {
32  YamlTag_e t = to_tag(tag);
33  if(t != TAG_NONE)
34  return from_tag_long(t);
35  if(tag.begins_with("!<"))
36  tag = tag.sub(1);
37  if(tag.begins_with("<!"))
38  return tag;
39  return tag;
40 }
41 
42 csubstr normalize_tag_long(csubstr tag, substr output)
43 {
44  csubstr result = normalize_tag_long(tag);
45  if(result.begins_with("!!"))
46  {
47  tag = tag.sub(2);
48  const csubstr pfx = "<tag:yaml.org,2002:";
49  const size_t len = pfx.len + tag.len + 1;
50  if(len <= output.len)
51  {
52  memcpy(output.str , pfx.str, pfx.len);
53  memcpy(output.str + pfx.len, tag.str, tag.len);
54  output[pfx.len + tag.len] = '>';
55  result = output.first(len);
56  }
57  else
58  {
59  result.str = nullptr;
60  result.len = len;
61  }
62  }
63  return result;
64 }
65 
66 YamlTag_e to_tag(csubstr tag)
67 {
68  if(tag.begins_with("!<"))
69  tag = tag.sub(1);
70  if(tag.begins_with("!!"))
71  tag = tag.sub(2);
72  else if(tag.begins_with('!'))
73  return TAG_NONE;
74  else if(tag.begins_with("tag:yaml.org,2002:"))
75  {
76  RYML_ASSERT(csubstr("tag:yaml.org,2002:").len == 18);
77  tag = tag.sub(18);
78  }
79  else if(tag.begins_with("<tag:yaml.org,2002:"))
80  {
81  RYML_ASSERT(csubstr("<tag:yaml.org,2002:").len == 19);
82  tag = tag.sub(19);
83  if(!tag.len)
84  return TAG_NONE;
85  tag = tag.offs(0, 1);
86  }
87 
88  if(tag == "map")
89  return TAG_MAP;
90  else if(tag == "omap")
91  return TAG_OMAP;
92  else if(tag == "pairs")
93  return TAG_PAIRS;
94  else if(tag == "set")
95  return TAG_SET;
96  else if(tag == "seq")
97  return TAG_SEQ;
98  else if(tag == "binary")
99  return TAG_BINARY;
100  else if(tag == "bool")
101  return TAG_BOOL;
102  else if(tag == "float")
103  return TAG_FLOAT;
104  else if(tag == "int")
105  return TAG_INT;
106  else if(tag == "merge")
107  return TAG_MERGE;
108  else if(tag == "null")
109  return TAG_NULL;
110  else if(tag == "str")
111  return TAG_STR;
112  else if(tag == "timestamp")
113  return TAG_TIMESTAMP;
114  else if(tag == "value")
115  return TAG_VALUE;
116  else if(tag == "yaml")
117  return TAG_YAML;
118 
119  return TAG_NONE;
120 }
121 
123 {
124  switch(tag)
125  {
126  case TAG_MAP:
127  return {"<tag:yaml.org,2002:map>"};
128  case TAG_OMAP:
129  return {"<tag:yaml.org,2002:omap>"};
130  case TAG_PAIRS:
131  return {"<tag:yaml.org,2002:pairs>"};
132  case TAG_SET:
133  return {"<tag:yaml.org,2002:set>"};
134  case TAG_SEQ:
135  return {"<tag:yaml.org,2002:seq>"};
136  case TAG_BINARY:
137  return {"<tag:yaml.org,2002:binary>"};
138  case TAG_BOOL:
139  return {"<tag:yaml.org,2002:bool>"};
140  case TAG_FLOAT:
141  return {"<tag:yaml.org,2002:float>"};
142  case TAG_INT:
143  return {"<tag:yaml.org,2002:int>"};
144  case TAG_MERGE:
145  return {"<tag:yaml.org,2002:merge>"};
146  case TAG_NULL:
147  return {"<tag:yaml.org,2002:null>"};
148  case TAG_STR:
149  return {"<tag:yaml.org,2002:str>"};
150  case TAG_TIMESTAMP:
151  return {"<tag:yaml.org,2002:timestamp>"};
152  case TAG_VALUE:
153  return {"<tag:yaml.org,2002:value>"};
154  case TAG_YAML:
155  return {"<tag:yaml.org,2002:yaml>"};
156  case TAG_NONE:
157  default:
158  return {""};
159  }
160 }
161 
162 csubstr from_tag(YamlTag_e tag)
163 {
164  switch(tag)
165  {
166  case TAG_MAP:
167  return {"!!map"};
168  case TAG_OMAP:
169  return {"!!omap"};
170  case TAG_PAIRS:
171  return {"!!pairs"};
172  case TAG_SET:
173  return {"!!set"};
174  case TAG_SEQ:
175  return {"!!seq"};
176  case TAG_BINARY:
177  return {"!!binary"};
178  case TAG_BOOL:
179  return {"!!bool"};
180  case TAG_FLOAT:
181  return {"!!float"};
182  case TAG_INT:
183  return {"!!int"};
184  case TAG_MERGE:
185  return {"!!merge"};
186  case TAG_NULL:
187  return {"!!null"};
188  case TAG_STR:
189  return {"!!str"};
190  case TAG_TIMESTAMP:
191  return {"!!timestamp"};
192  case TAG_VALUE:
193  return {"!!value"};
194  case TAG_YAML:
195  return {"!!yaml"};
196  case TAG_NONE:
197  default:
198  return {""};
199  }
200 }
201 
202 
203 bool TagDirective::create_from_str(csubstr directive_)
204 {
205  csubstr directive = directive_;
206  directive = directive.sub(4);
207  if(!directive.begins_with(' '))
208  return false;
209  directive = directive.triml(' ');
210  size_t pos = directive.find(' ');
211  if(pos == npos)
212  return false;
213  handle = directive.first(pos);
214  directive = directive.sub(handle.len).triml(' ');
215  pos = directive.find(' ');
216  if(pos != npos)
217  directive = directive.first(pos);
218  prefix = directive;
219  next_node_id = NONE;
220  _c4dbgpf("%TAG: handle={} prefix={}", handle, prefix);
221  return true;
222 }
223 
224 size_t TagDirective::transform(csubstr tag, substr output, Callbacks const& callbacks, bool with_brackets) const
225 {
226  _c4dbgpf("%TAG: handle={} prefix={} next_node={}. tag={}", handle, prefix, next_node_id, tag);
227  _RYML_CB_ASSERT(callbacks, tag.len >= handle.len);
228  csubstr rest = tag.sub(handle.len);
229  _c4dbgpf("%TAG: rest={}", rest);
230  if(rest.begins_with('<'))
231  {
232  _c4dbgpf("%TAG: begins with <. rest={}", rest);
233  if(C4_UNLIKELY(!rest.ends_with('>')))
234  _RYML_CB_ERR(callbacks, "malformed tag");
235  rest = rest.offs(1, 1);
236  if(rest.begins_with(prefix))
237  {
238  _c4dbgpf("%TAG: already transformed! actual={}", rest.sub(prefix.len));
239  return 0; // return 0 to signal that the tag is local and cannot be resolved
240  }
241  }
242  size_t len = prefix.len + rest.len;
243  if(with_brackets)
244  len += 2;
245  size_t numpc = rest.count('%');
246  if(numpc == 0)
247  {
248  if(len <= output.len)
249  {
250  if(with_brackets)
251  {
252  output.str[0] = '<';
253  memcpy(1u + output.str, prefix.str, prefix.len);
254  memcpy(1u + output.str + prefix.len, rest.str, rest.len);
255  output.str[1u + prefix.len + rest.len] = '>';
256  }
257  else
258  {
259  memcpy(output.str, prefix.str, prefix.len);
260  memcpy(output.str + prefix.len, rest.str, rest.len);
261  }
262  }
263  }
264  else
265  {
266  // need to decode URI % sequences
267  size_t pos = rest.find('%');
268  _RYML_CB_ASSERT(callbacks, pos != npos);
269  do {
270  size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1);
271  if(next == npos)
272  next = rest.len;
273  _RYML_CB_CHECK(callbacks, pos+1 < next);
274  _RYML_CB_CHECK(callbacks, pos+1 + 2 <= next);
275  size_t delta = next - (pos+1);
276  len -= delta;
277  pos = rest.find('%', pos+1);
278  } while(pos != npos);
279  if(len <= output.len)
280  {
281  size_t prev = 0, wpos = 0;
282  auto appendstr = [&](csubstr s) { memcpy(output.str + wpos, s.str, s.len); wpos += s.len; };
283  auto appendchar = [&](char c) { output.str[wpos++] = c; };
284  if(with_brackets)
285  appendchar('<');
286  appendstr(prefix);
287  pos = rest.find('%');
288  _RYML_CB_ASSERT(callbacks, pos != npos);
289  do {
290  size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1);
291  if(next == npos)
292  next = rest.len;
293  _RYML_CB_CHECK(callbacks, pos+1 < next);
294  _RYML_CB_CHECK(callbacks, pos+1 + 2 <= next);
295  uint8_t val;
296  if(C4_UNLIKELY(!read_hex(rest.range(pos+1, next), &val) || val > 127))
297  _RYML_CB_ERR(callbacks, "invalid URI character");
298  appendstr(rest.range(prev, pos));
299  appendchar(static_cast<char>(val));
300  prev = next;
301  pos = rest.find('%', pos+1);
302  } while(pos != npos);
303  _RYML_CB_ASSERT(callbacks, pos == npos);
304  _RYML_CB_ASSERT(callbacks, prev > 0);
305  _RYML_CB_ASSERT(callbacks, rest.len >= prev);
306  appendstr(rest.sub(prev));
307  if(with_brackets)
308  appendchar('>');
309  _RYML_CB_ASSERT(callbacks, wpos == len);
310  }
311  }
312  return len;
313 }
314 
315 } // namespace yml
316 } // namespace c4
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition: charconv.hpp:890
csubstr from_tag_long(YamlTag_e tag)
Definition: tag.cpp:122
bool is_custom_tag(csubstr tag)
Definition: tag.cpp:8
csubstr normalize_tag_long(csubstr tag)
Definition: tag.cpp:30
YamlTag_e
a bit mask for marking tags for types
Definition: tag.hpp:26
csubstr normalize_tag(csubstr tag)
Definition: tag.cpp:18
csubstr from_tag(YamlTag_e tag)
Definition: tag.cpp:162
YamlTag_e to_tag(csubstr tag)
Definition: tag.cpp:66
@ TAG_SET
!!set Unordered set of non-equal values.
Definition: tag.hpp:32
@ TAG_MERGE
!!merge Specify one or more mapping to be merged with the current one.
Definition: tag.hpp:39
@ TAG_INT
!!float Mathematical integers.
Definition: tag.hpp:38
@ TAG_SEQ
!!seq Sequence of arbitrary values.
Definition: tag.hpp:33
@ TAG_NULL
!!null Devoid of value.
Definition: tag.hpp:40
@ TAG_YAML
!!yaml Specify the default value of a mapping https://yaml.org/type/yaml.html
Definition: tag.hpp:44
@ TAG_TIMESTAMP
!!timestamp A point in time https://yaml.org/type/timestamp.html
Definition: tag.hpp:42
@ TAG_NONE
Definition: tag.hpp:27
@ TAG_STR
!!str A sequence of zero or more Unicode characters.
Definition: tag.hpp:41
@ TAG_BOOL
!!bool Mathematical Booleans.
Definition: tag.hpp:36
@ TAG_MAP
!!map Unordered set of key: value pairs without duplicates.
Definition: tag.hpp:29
@ TAG_BINARY
!!binary A sequence of zero or more octets (8 bit values).
Definition: tag.hpp:35
@ TAG_PAIRS
!!pairs Ordered sequence of key: value pairs allowing duplicates.
Definition: tag.hpp:31
@ TAG_VALUE
!!value Specify the default value of a mapping https://yaml.org/type/value.html
Definition: tag.hpp:43
@ TAG_OMAP
!!omap Ordered sequence of key: value pairs without duplicates.
Definition: tag.hpp:30
@ TAG_FLOAT
!!float Floating-point approximation to real numbers.
Definition: tag.hpp:37
@ npos
a null string position
Definition: common.hpp:267
@ NONE
an index to none
Definition: common.hpp:260
Definition: common.cpp:12
a c-style callbacks class.
Definition: common.hpp:377
bool create_from_str(csubstr directive_)
leaves next_node_id unfilled
Definition: tag.cpp:203
csubstr handle
Eg.
Definition: tag.hpp:60
id_type next_node_id
The next node to which this tag directive applies.
Definition: tag.hpp:64
csubstr prefix
Eg.
Definition: tag.hpp:62
size_t transform(csubstr tag, substr output, Callbacks const &callbacks, bool with_brackets=true) const
Definition: tag.cpp:224