rapidyaml 0.15.2
parse and emit YAML, and do it fast
Loading...
Searching...
No Matches
tag.cpp
Go to the documentation of this file.
1#include "c4/yml/tag.hpp"
2#include "c4/yml/error.hpp"
3#include "c4/yml/detail/dbgprint.hpp"
4
5
6namespace c4 {
7namespace yml {
8
10{
11 if((tag.len > 2) && (tag.str[0] == '!'))
12 {
13 size_t pos = tag.find('!', 1);
14 return pos != npos && pos > 1 && tag.str[1] != '<';
15 }
16 return false;
17}
18
20{
21 YamlTag_e t = to_tag(tag);
22 if(t != TAG_NONE)
23 return from_tag(t);
24 if(tag.begins_with("!<"))
25 tag = tag.sub(1);
26 if(tag.begins_with("<!"))
27 return tag;
28 return tag;
29}
30
32{
33 YamlTag_e t = to_tag(tag);
34 if(t != TAG_NONE)
35 return from_tag_long(t);
36 if(tag.begins_with("!<"))
37 tag = tag.sub(1);
38 if(tag.begins_with("<!"))
39 return tag;
40 return tag;
41}
42
44{
45 csubstr result = normalize_tag_long(tag);
46 if(result.begins_with("!!"))
47 {
48 RYML_CHECK_BASIC_(!output.overlaps(tag));
49 tag = tag.sub(2);
50 const csubstr pfx = "<tag:yaml.org,2002:";
51 const size_t len = pfx.len + tag.len + 1;
52 if(len <= output.len)
53 {
54 memcpy(output.str , pfx.str, pfx.len);
55 memcpy(output.str + pfx.len, tag.str, tag.len);
56 output[pfx.len + tag.len] = '>';
57 result = output.first(len);
58 }
59 else
60 {
61 result.str = nullptr;
62 result.len = len;
63 }
64 }
65 return result;
66}
67
69{
70 if(tag.begins_with("!<"))
71 tag = tag.sub(1);
72 if(tag.begins_with("!!"))
73 {
74 tag = tag.sub(2);
75 }
76 else if(tag.begins_with('!'))
77 {
78 return TAG_NONE;
79 }
80 else
81 {
82 csubstr pfx = "<tag:yaml.org,2002:";
83 csubstr pfx2 = pfx.sub(1);
84 if(tag.begins_with(pfx2))
85 {
86 tag = tag.sub(pfx2.len);
87 }
88 else if(tag.begins_with(pfx))
89 {
90 tag = tag.sub(pfx.len);
91 if(!tag.len)
92 return TAG_NONE;
93 tag = tag.offs(0, 1);
94 }
95 }
96 if(tag == "map")
97 return TAG_MAP;
98 else if(tag == "omap")
99 return TAG_OMAP;
100 else if(tag == "pairs")
101 return TAG_PAIRS;
102 else if(tag == "set")
103 return TAG_SET;
104 else if(tag == "seq")
105 return TAG_SEQ;
106 else if(tag == "binary")
107 return TAG_BINARY;
108 else if(tag == "bool")
109 return TAG_BOOL;
110 else if(tag == "float")
111 return TAG_FLOAT;
112 else if(tag == "int")
113 return TAG_INT;
114 else if(tag == "merge")
115 return TAG_MERGE;
116 else if(tag == "null")
117 return TAG_NULL;
118 else if(tag == "str")
119 return TAG_STR;
120 else if(tag == "timestamp")
121 return TAG_TIMESTAMP;
122 else if(tag == "value")
123 return TAG_VALUE;
124 else if(tag == "yaml")
125 return TAG_YAML;
126
127 return TAG_NONE;
128}
129
131{
132 switch(tag)
133 {
134 case TAG_MAP:
135 return {"<tag:yaml.org,2002:map>"};
136 case TAG_OMAP:
137 return {"<tag:yaml.org,2002:omap>"};
138 case TAG_PAIRS:
139 return {"<tag:yaml.org,2002:pairs>"};
140 case TAG_SET:
141 return {"<tag:yaml.org,2002:set>"};
142 case TAG_SEQ:
143 return {"<tag:yaml.org,2002:seq>"};
144 case TAG_BINARY:
145 return {"<tag:yaml.org,2002:binary>"};
146 case TAG_BOOL:
147 return {"<tag:yaml.org,2002:bool>"};
148 case TAG_FLOAT:
149 return {"<tag:yaml.org,2002:float>"};
150 case TAG_INT:
151 return {"<tag:yaml.org,2002:int>"};
152 case TAG_MERGE:
153 return {"<tag:yaml.org,2002:merge>"};
154 case TAG_NULL:
155 return {"<tag:yaml.org,2002:null>"};
156 case TAG_STR:
157 return {"<tag:yaml.org,2002:str>"};
158 case TAG_TIMESTAMP:
159 return {"<tag:yaml.org,2002:timestamp>"};
160 case TAG_VALUE:
161 return {"<tag:yaml.org,2002:value>"};
162 case TAG_YAML:
163 return {"<tag:yaml.org,2002:yaml>"};
164 case TAG_NONE:
165 default:
166 return {""};
167 }
168}
169
171{
172 switch(tag)
173 {
174 case TAG_MAP:
175 return {"!!map"};
176 case TAG_OMAP:
177 return {"!!omap"};
178 case TAG_PAIRS:
179 return {"!!pairs"};
180 case TAG_SET:
181 return {"!!set"};
182 case TAG_SEQ:
183 return {"!!seq"};
184 case TAG_BINARY:
185 return {"!!binary"};
186 case TAG_BOOL:
187 return {"!!bool"};
188 case TAG_FLOAT:
189 return {"!!float"};
190 case TAG_INT:
191 return {"!!int"};
192 case TAG_MERGE:
193 return {"!!merge"};
194 case TAG_NULL:
195 return {"!!null"};
196 case TAG_STR:
197 return {"!!str"};
198 case TAG_TIMESTAMP:
199 return {"!!timestamp"};
200 case TAG_VALUE:
201 return {"!!value"};
202 case TAG_YAML:
203 return {"!!yaml"};
204 case TAG_NONE:
205 default:
206 return {""};
207 }
208}
209
211{
212 if(handle.begins_with('!') && handle.ends_with('!'))
213 {
214 _c4dbgpf("handle={}", prs_(handle, true));
215 csubstr trimmed = handle.sub(1);
216 if(trimmed.ends_with('!'))
217 trimmed = trimmed.offs(0, 1);
218 _c4dbgpf("handle_trimmed={}", prs_(trimmed, true));
219 // https://yaml.org/spec/1.2.2/#rule-ns-word-char
220 for(char c : trimmed)
221 {
222 bool ok = (c >= '0' && c <= '9')
223 || (c >= 'a' && c <= 'z')
224 || (c >= 'A' && c <= 'Z')
225 || c == '-';
226 if(!ok)
227 {
228 _c4dbgpf("invalid handle character: '{}'", _c4prc(c));
229 return false;
230 }
231 }
232 return true;
233 }
234 return false;
235}
236
237namespace {
238bool is_valid_tag_char(char c)
239{
240 // https://yaml.org/spec/1.2.2/#691-node-tags
241 bool ok = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
242 if(!ok)
243 {
244 switch(c)
245 {
246 case '-':
247 case '#':
248 case ';':
249 case '/':
250 case '?':
251 case ':':
252 case '@':
253 case '&':
254 case '=':
255 case '+':
256 case '$':
257 case '_':
258 case '.':
259 case '~':
260 case '*':
261 case '\'':
262 case '(':
263 case ')':
264 case '%':
265 break;
266 default:
267 return false;
268 }
269 }
270 return true;
271}
272bool read_hex_char(csubstr suffix, size_t pos, char *out)
273{
274 // must be succeeded by 2 hex digits
275 if(pos + 3 > suffix.len)
276 return false;
277 suffix = suffix.range(pos + 1, pos + 3);
278 uint8_t val = 0;
279 if C4_UNLIKELY(!read_hex(suffix, &val) || val > 127)
280 return false;
281 *out = static_cast<char>(val);
282 return true;
283}
284} // namespace
285
286
287size_t transform_tag(substr output, csubstr handle, csubstr prefix, csubstr tag,
288 Callbacks const& callbacks, Location const& ymlloc,
289 bool with_brackets)
290{
291 RYML_ASSERT_BASIC_CB_(callbacks, tag.len >= handle.len);
292 RYML_ASSERT_BASIC_CB_(callbacks, !output.overlaps(tag));
293 RYML_ASSERT_BASIC_CB_(callbacks, prefix.len > 0);
294 csubstr rest = tag.sub(handle.len);
295 _c4dbgpf("%TAG: rest={}", prs_(rest));
296 size_t rpos = 0, wpos = 0;
297 auto appendstr = [&](csubstr s) {
298 if(s.len && wpos + s.len <= output.len)
299 memcpy(output.str + wpos, s.str, s.len);
300 wpos += s.len;
301 };
302 auto appendchar = [&](char c) {
303 if(wpos < output.len)
304 output.str[wpos] = c;
305 ++wpos;
306 };
307 if(with_brackets)
308 appendchar('<');
309 appendstr(prefix);
310 const char *errmsg = nullptr;
311 for(size_t pos = 0; pos < rest.len; ++pos)
312 {
313 char c = rest.str[pos];
314 if C4_LIKELY(is_valid_tag_char(c))
315 {
316 if(c != '%')
317 {
318 continue;
319 }
320 else if(read_hex_char(rest, pos, &c))
321 {
322 appendstr(rest.range(rpos, pos));
323 appendchar(c);
324 pos += 2;
325 rpos = pos + 1;
326 continue;
327 }
328 }
329 errmsg = "invalid tag";
330 goto err; // NOLINT
331 }
332 appendstr(rest.sub(rpos));
333 if(with_brackets)
334 appendchar('>');
335 return wpos;
336err:
337 if(ymlloc)
338 {
339 RYML_ERR_PARSE_CB_(callbacks, ymlloc, errmsg);
340 }
341 else
342 {
343 RYML_ERR_BASIC_CB_(callbacks, errmsg);
344 }
345}
346
347
348//-----------------------------------------------------------------------------
349
351{
352 // this assumes we have a very small number of tag directives
353 id_type i = 0;
354 for(; i < RYML_MAX_TAG_DIRECTIVES; ++i)
355 if(m_directives[i].handle.empty())
356 break;
357 return i;
358}
359
360TagDirective const* TagDirectives::add(csubstr handle, csubstr prefix, id_type doc_id) noexcept
361{
362 id_type pos = size();
363 TagDirective *C4_RESTRICT td = nullptr;
365 {
366 td = &m_directives[pos];
367 td->handle = handle;
368 td->prefix = prefix;
369 td->doc_id = doc_id;
370 _c4dbgpf("tagd[{}]: added! handle={} prefix={} doc={}", pos, td->handle, td->prefix, td->doc_id);
371 }
372 return td;
373}
374
375void TagDirectives::clear() noexcept
376{
377 for(TagDirective &td : m_directives)
378 {
379 td.handle = {};
380 td.prefix = {};
381 td.doc_id = NONE;
382 }
383}
384
386{
387 TagDirective const* first = nullptr;
388 TagDirective const* last = nullptr;
389 for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
390 {
391 TagDirective const& C4_RESTRICT td = m_directives[i];
392 if(doc_id == td.doc_id)
393 {
394 first = m_directives + i;
395 break;
396 }
397 else if(td.handle.empty())
398 {
399 break;
400 }
401 }
402 if(first)
403 {
405 for(TagDirective const* C4_RESTRICT td = first; td < last; ++td)
406 {
407 if(doc_id != td->doc_id || td->handle.empty())
408 {
409 last = td;
410 break;
411 }
412 }
413 }
414 else
415 {
416 first = last = m_directives;
417 }
418 return TagDirectiveRange{first, last};
419}
420
421TagDirective const* TagDirectives::lookup(csubstr tag, id_type doc_id) const noexcept
422{
423 _c4dbgpf("tagd: searching for {}, doc_id={}", prs_(tag), doc_id);
424 for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
425 {
426 TagDirective const& C4_RESTRICT td = m_directives[i];
427 if(td.handle.empty())
428 {
429 continue;
430 }
431 _c4dbgpf("tagd[{}]: handle={} prefix={} doc_id={}", i, td.handle, td.prefix, td.doc_id);
432 if(tag.begins_with(td.handle))
433 {
434 if(td.handle == '!' && (
435 tag.begins_with("!!")
436 || tag.begins_with('<')
437 || tag.begins_with("!<")
438 || is_custom_tag(tag)))
439 continue;
440 _c4dbgpf("tagd[{}]: matches handle!", i);
441 if(doc_id == td.doc_id)
442 {
443 _c4dbgpf("tagd[{}]: matches doc={}!", i, doc_id);
444 return &td;
445 }
446 }
447 }
448 return nullptr;
449}
450
451csubstr TagDirectives::resolve(substr buf, size_t *bufsz, csubstr tag, id_type id, Location const& ymlloc, Callbacks const& callbacks, bool with_brackets) const
452{
453 RYML_ASSERT_BASIC_CB_(callbacks, !buf.overlaps(tag));
454 TagDirective const* C4_RESTRICT td = lookup(tag, id);
455 *bufsz = 0;
456 csubstr handle, prefix, ret;
457 const char *errmsg = nullptr;
458 size_t len;
459 if(td)
460 {
461 handle = td->handle;
462 prefix = td->prefix;
463 }
464 else
465 {
466 _c4dbgp("tagd: no directive found");
467 if(tag.begins_with('<'))
468 {
469 _c4dbgp("tagd: already resolved");
470 if C4_UNLIKELY(!tag.ends_with('>'))
471 {
472 errmsg = "malformed tag";
473 goto err; // NOLINT
474 }
475 return tag;
476 }
477 else if(tag.begins_with("!<"))
478 {
479 _c4dbgp("tagd: already resolved");
480 if C4_UNLIKELY(!tag.ends_with('>'))
481 {
482 errmsg = "malformed tag";
483 goto err; // NOLINT
484 }
485 return tag.sub(1);
486 }
487 else if(tag.begins_with("!!"))
488 {
489 _c4dbgp("tagd: !!");
490 YamlTag_e tagenum = to_tag(tag);
491 if(tagenum != TAG_NONE)
492 {
493 _c4dbgpf("tagd: standard tag: {} -> {}", tag, from_tag_long(tagenum));
494 tag = from_tag_long(tagenum);
495 return with_brackets ? tag : tag.offs(1, 1);
496 }
497 handle = "!!";
498 prefix = "tag:yaml.org,2002:";
499 }
500 else if C4_UNLIKELY(is_custom_tag(tag))
501 {
502 _c4dbgp("tagd: custom_tag");
503 _c4dbgpf("tag '{}' at id={}: no matching directive was found", tag, id);
504 errmsg = "tag without matching directive";
505 goto err; // NOLINT
506 }
507 else
508 {
509 _c4dbgp("tagd: !");
510 handle = prefix = "!";
511 }
512 }
513 len = transform_tag(buf, handle, prefix, tag, callbacks, ymlloc, with_brackets);
514 *bufsz = len;
515 if(len <= buf.len)
516 {
517 ret = buf.first(len);
518 }
519 else
520 {
521 _c4dbgp("tagd: not enough room");
522 ret.str = nullptr;
523 ret.len = len;
524 }
525 return ret;
526err:
527 if(ymlloc)
528 {
529 RYML_ERR_PARSE_CB_(callbacks, ymlloc, errmsg);
530 }
531 else
532 {
533 RYML_ERR_BASIC_CB_(callbacks, errmsg);
534 }
535}
536
537
538//-----------------------------------------------------------------------------
539TagCache::LookupResult TagCache::find(csubstr tag, id_type doc_id, id_type linear_threshold) const noexcept
540{
541 LookupResult ret = {};
542 id_type sz = m_entries.size();
543 if(sz < linear_threshold) // do a linear search on small size
544 {
545 for(size_t i = 0; i < sz; ++i)
546 {
547 Entry const& C4_RESTRICT e = m_entries[i];
548 if(e.tag == tag && e.doc_id == doc_id)
549 {
550 ret.resolved = e.resolved;
551 ret.pos = i;
552 return ret;
553 }
554 else if(e.tag > tag || ((e.tag == tag) && e.doc_id > doc_id))
555 {
556 ret.pos = i;
557 return ret;
558 }
559 }
560 ret.pos = sz;
561 }
562 else // do a binary search on larger size
563 {
564 id_type first = 0;
565 id_type count = sz;
566 while(count)
567 {
568 id_type halfsz = count / id_type(2); // NOLINT(*avoid-c-style-cast)
569 id_type mid = first + halfsz;
570 RYML_ASSERT_BASIC_CB_(m_entries.m_callbacks, mid < sz);
571 Entry const& C4_RESTRICT e = m_entries[mid];
572 if(e.tag < tag || (e.tag == tag && e.doc_id < doc_id))
573 {
574 first = mid + 1;
575 RYML_ASSERT_BASIC_CB_(m_entries.m_callbacks, count >= halfsz + 1);
576 count -= halfsz + 1;
577 }
578 else
579 {
580 count = halfsz;
581 }
582 }
583 ret.pos = first;
584 if(first < sz)
585 {
586 Entry const& C4_RESTRICT e = m_entries[first];
587 if(e.tag == tag && e.doc_id == doc_id)
588 {
589 ret.resolved = m_entries[first].resolved;
590 }
591 }
592 }
593 return ret;
594}
595
597{
598 const id_type sz = m_entries.size();
599 RYML_ASSERT_BASIC_CB_(m_entries.m_callbacks, pos <= sz);
600 RYML_ASSERT_BASIC_CB_(m_entries.m_callbacks, pos == sz || tag < m_entries[pos].tag || (tag == m_entries[pos].tag && doc_id < m_entries[pos].doc_id));
601 m_entries.resize(sz + 1);
602 if(pos < sz)
603 memmove(m_entries.m_stack + pos + 1, m_entries.m_stack + pos, (sz - pos) * sizeof(Entry));
604 m_entries.m_stack[pos].tag = tag;
605 m_entries.m_stack[pos].resolved = resolved;
606 m_entries.m_stack[pos].doc_id = doc_id;
607 _c4dbgpf("tagcache: add entry @pos={}: docid={} {} -> {}", pos, doc_id, tag, maybe_null_str_(resolved));
608}
609
610} // namespace yml
611} // namespace c4
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Error utilities used by ryml.
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition charconv.hpp:902
basic_substring< char > substr
a mutable string view
Definition substr.hpp:2355
basic_substring< const char > csubstr
an immutable string view
Definition substr.hpp:2356
csubstr from_tag_long(YamlTag_e tag)
Definition tag.cpp:130
bool is_valid_tag_handle(csubstr handle)
Definition tag.cpp:210
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
Definition tag.cpp:9
csubstr normalize_tag_long(csubstr tag)
Definition tag.cpp:31
YamlTag_e
a bit mask for marking tags for types
Definition tag.hpp:33
size_t transform_tag(substr output, csubstr handle, csubstr prefix, csubstr tag, Callbacks const &callbacks, Location const &ymlloc, bool with_brackets)
returns the length of the transformed tag, or 0 to signal that the tag is local and cannot be resolve...
Definition tag.cpp:287
csubstr normalize_tag(csubstr tag)
Definition tag.cpp:19
csubstr from_tag(YamlTag_e tag)
Definition tag.cpp:170
YamlTag_e to_tag(csubstr tag)
Definition tag.cpp:68
#define RYML_MAX_TAG_DIRECTIVES
the maximum number of tag directives in a Tree
Definition tag.hpp:26
@ TAG_SET
!
Definition tag.hpp:39
@ TAG_MERGE
!
Definition tag.hpp:46
@ TAG_INT
!
Definition tag.hpp:45
@ TAG_SEQ
!
Definition tag.hpp:40
@ TAG_NULL
!
Definition tag.hpp:47
@ TAG_YAML
!
Definition tag.hpp:51
@ TAG_TIMESTAMP
!
Definition tag.hpp:49
@ TAG_NONE
Definition tag.hpp:34
@ TAG_STR
!
Definition tag.hpp:48
@ TAG_BOOL
!
Definition tag.hpp:43
@ TAG_MAP
!
Definition tag.hpp:36
@ TAG_BINARY
!
Definition tag.hpp:42
@ TAG_PAIRS
!
Definition tag.hpp:38
@ TAG_VALUE
!
Definition tag.hpp:50
@ TAG_OMAP
!
Definition tag.hpp:37
@ TAG_FLOAT
!
Definition tag.hpp:44
@ npos
a null string position
Definition common.hpp:138
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition common.hpp:124
@ NONE
an index to none
Definition common.hpp:131
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
Definition substr.hpp:519
bool begins_with(const C c) const noexcept
true if the first character of the string is c
Definition substr.hpp:850
basic_substring offs(size_t left, size_t right) const noexcept
offset from the ends: return [left,len-right[ ; ie, trim a number of characters from the left and rig...
Definition substr.hpp:547
size_t len
the length of the substring
Definition substr.hpp:218
bool ends_with(const C c) const noexcept
true if the last character of the string is c
Definition substr.hpp:894
size_t find(const C c, size_t start_pos=0) const
Definition substr.hpp:713
bool overlaps(ro_substr const that) const noexcept
true if there is overlap of at least one element between that and *this
Definition substr.hpp:493
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
Definition substr.hpp:529
basic_substring sub(size_t first) const noexcept
return [first,len[
Definition substr.hpp:502
C * str
a restricted pointer to the first character of the substring
Definition substr.hpp:216
A c-style callbacks class to customize behavior on errors or allocation.
Definition common.hpp:374
holds a source or yaml file position, for example when an error is detected; See also location_format...
Definition common.hpp:229
LookupResult find(csubstr tag, id_type doc_id, id_type linear_threshold=Entries::sso_size) const noexcept
Definition tag.cpp:539
id_type const_iterator
Definition tag.hpp:79
void add(csubstr tag, csubstr resolved, id_type doc_id, const_iterator pos) RYML_NOEXCEPT
Definition tag.cpp:596
void clear() noexcept
Definition tag.cpp:375
TagDirective m_directives[RYML_MAX_TAG_DIRECTIVES]
Definition tag.hpp:127
id_type size() const noexcept
Definition tag.cpp:350
TagDirectiveRange lookup_range(id_type doc_id) const noexcept
Definition tag.cpp:385
csubstr resolve(substr buf, size_t *bufsz, csubstr tag, id_type doc_id, Location const &ymlloc, Callbacks const &callbacks, bool with_brackets=true) const
Definition tag.cpp:451
TagDirective const * add(csubstr handle, csubstr prefix, id_type doc_id) noexcept
Definition tag.cpp:360
TagDirective const * lookup(csubstr tag, id_type id) const noexcept
Definition tag.cpp:421