1 #ifndef _C4_YML_PARSE_ENGINE_HPP_
2 #define _C4_YML_PARSE_ENGINE_HPP_
4 #ifndef _C4_YML_PARSER_STATE_HPP_
10 # pragma warning(push)
11 # pragma warning(disable: 4251)
206 struct FilterResultExtending;
209 typedef enum BlockChomp_ {
258 template<
class EventHandler>
304 _RYML_ASSERT_BASIC(m_evt_handler);
305 m_evt_handler->m_stack.reserve(capacity);
312 _resize_locations(num_source_lines);
326 Callbacks const&
callbacks()
const { _RYML_ASSERT_BASIC(m_evt_handler);
return m_evt_handler->m_stack.m_callbacks; }
329 csubstr
filename()
const {
return m_evt_handler->m_curr ? m_evt_handler->m_curr->pos.name : csubstr{}; }
332 csubstr
source()
const {
return m_evt_handler ? m_evt_handler->m_src : csubstr{}; }
421 bool _is_doc_begin(csubstr s);
422 bool _is_doc_end(csubstr s);
424 bool _scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation);
425 bool _scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc);
426 bool _scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc);
427 bool _scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc);
428 bool _scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc);
429 bool _scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc);
430 bool _scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc);
431 bool _scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc);
432 bool _is_valid_start_scalar_plain_flow(csubstr s);
433 bool _is_valid_start_scalar_plain_flow_check_block_token(csubstr s);
434 bool _is_valid_start_scalar_plain_flow_check_qmrk(csubstr s);
435 bool _scan_scalar_plain_handle_newline(csubstr s,
size_t offs);
436 void _check_valid_newline_in_quoted_scalar();
438 ScannedScalar _scan_scalar_squot();
439 ScannedScalar _scan_scalar_dquot();
441 void _scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref);
442 csubstr _scan_anchor();
443 csubstr _scan_ref_seq();
444 csubstr _scan_ref_map();
446 csubstr _scan_tag(csubstr *orig);
451 csubstr _filter_scalar_plain(substr s,
size_t indentation);
452 csubstr _filter_scalar_squot(substr s);
453 csubstr _filter_scalar_dquot(substr s);
454 csubstr _filter_scalar_literal(substr s,
size_t indentation, BlockChomp_e chomp);
455 csubstr _filter_scalar_folded(substr s,
size_t indentation, BlockChomp_e chomp);
456 csubstr _move_scalar_left_and_add_newline(substr s);
458 csubstr _maybe_filter_key_scalar_plain(ScannedScalar
const& sc,
size_t indendation);
459 csubstr _maybe_filter_val_scalar_plain(ScannedScalar
const& sc,
size_t indendation);
460 csubstr _maybe_filter_key_scalar_squot(ScannedScalar
const& sc);
461 csubstr _maybe_filter_val_scalar_squot(ScannedScalar
const& sc);
462 csubstr _maybe_filter_key_scalar_dquot(ScannedScalar
const& sc);
463 csubstr _maybe_filter_val_scalar_dquot(ScannedScalar
const& sc);
464 csubstr _maybe_filter_key_scalar_literal(ScannedBlock
const& sb);
465 csubstr _maybe_filter_val_scalar_literal(ScannedBlock
const& sb);
466 csubstr _maybe_filter_key_scalar_folded(ScannedBlock
const& sb);
467 csubstr _maybe_filter_val_scalar_folded(ScannedBlock
const& sb);
472 void _handle_map_block();
473 bool _handle_map_block_qmrk();
474 bool _handle_map_block_rkcl();
475 void _handle_seq_block();
476 void _handle_map_flow();
477 void _handle_seq_flow();
478 void _handle_seq_imap();
479 void _handle_map_json();
480 void _handle_seq_json();
483 void _handle_unk_json();
487 void _handle_flow_skip_whitespace();
488 void _handle_flow_line_beginning();
490 size_t _handle_unk_check_left_tokens(
size_t realindent,
size_t col,
bool skip_annotations=
true);
491 void _handle_unk_get_first_non_pending_token_pos(csubstr s,
size_t *indent,
size_t *first_non_token_pos);
492 void _handle_unk_begin_doc();
494 size_t _handle_block_skip_leading_whitespace();
496 size_t _handle_block_get_whitespace_mark() const noexcept {
return m_evt_handler->m_curr->pos.offset; }
497 void _handle_block_check_leading_tabs(
size_t prev_mark) {
return _handle_block_check_leading_tabs(prev_mark, m_evt_handler->m_curr->pos.offset); }
498 void _handle_block_check_leading_tabs(
size_t start_mark,
size_t end_mark);
500 void _end_map_flow();
501 void _end_seq_flow();
502 void _end_map_blck();
503 void _end_seq_blck();
506 void _end_flow_container(
size_t orig_indent,
bool multiline);
507 void _flow_container_was_a_key(
size_t orig_indent);
510 void _begin2_doc_expl();
512 void _end2_doc_expl();
513 void _check_doc_end_tokens()
const;
515 void _maybe_begin_doc();
516 void _maybe_end_doc();
518 void _start_doc_suddenly();
519 void _end_doc_suddenly();
520 void _end_doc_suddenly__pop();
521 void _check_trailing_doc_token();
524 void _set_indentation(
size_t indentation) noexcept;
525 void _save_indentation();
526 void _mark_seqflow_val_end() noexcept;
527 void _handle_indentation_pop_from_block_seq();
528 void _handle_indentation_pop_from_block_map();
529 void _handle_indentation_pop(ParserState const* dst);
531 void _maybe_skip_comment();
532 void _maybe_skip_comment_strict();
533 void _skip_comment();
534 void _maybe_skip_whitespace_tokens();
535 void _maybe_skipchars(
char c);
537 void _skipchars(const
char (&chars)[N]);
538 bool _maybe_scan_following_colon() noexcept;
543 template<class FilterProcessor> auto _filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result());
544 template<class FilterProcessor> auto _filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result());
545 template<class FilterProcessor> auto _filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result());
546 template<class FilterProcessor> auto _filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result());
547 template<class FilterProcessor> auto _filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result());
553 template<class FilterProcessor>
void _filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation);
554 template<class FilterProcessor>
void _filter_nl_squoted(FilterProcessor &C4_RESTRICT proc);
555 template<class FilterProcessor>
void _filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc);
557 template<class FilterProcessor>
bool _filter_ws_handle_to_first_non_space(FilterProcessor &C4_RESTRICT proc);
558 template<class FilterProcessor>
void _filter_ws_copy_trailing(FilterProcessor &C4_RESTRICT proc);
559 template<class FilterProcessor>
void _filter_ws_skip_trailing(FilterProcessor &C4_RESTRICT proc);
561 template<class FilterProcessor>
void _filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc);
562 template<class FilterProcessor>
void _filter_dquoted_backslash_decode(FilterProcessor &C4_RESTRICT proc,
size_t sz);
564 template<class FilterProcessor>
void _filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp,
size_t indentation);
565 template<class FilterProcessor>
size_t _handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp);
566 template<class FilterProcessor>
size_t _extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len);
567 template<class FilterProcessor>
void _filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation);
568 template<class FilterProcessor>
void _filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len);
569 template<class FilterProcessor>
size_t _filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl);
570 template<class FilterProcessor>
void _filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len);
571 template<class FilterProcessor>
void _filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept;
573 substr _alloc_arena(
size_t len, substr *relocated=
nullptr);
574 substr _alloc_arena(
size_t len, csubstr *relocated) {
return _alloc_arena(len,
reinterpret_cast<substr*
>(relocated)); }
580 void _line_progressed(
size_t ahead);
582 void _line_ended_undo();
584 bool _finished_file()
const;
585 bool _finished_line()
const;
588 substr _peek_next_line(
size_t pos=
npos)
const;
590 void _relocate_arena(csubstr prev_arena, substr next_arena, substr *other_string=
nullptr);
594 C4_ALWAYS_INLINE substr _buf() const noexcept {
return m_evt_handler->m_src; }
596 C4_ALWAYS_INLINE
bool has_all(
ParserFlag_t f)
const noexcept {
return (m_evt_handler->m_curr->flags & f) == f; }
597 C4_ALWAYS_INLINE
bool has_any(
ParserFlag_t f)
const noexcept {
return (m_evt_handler->m_curr->flags & f) != 0; }
598 C4_ALWAYS_INLINE
bool has_none(
ParserFlag_t f)
const noexcept {
return (m_evt_handler->m_curr->flags & f) == 0; }
599 static C4_ALWAYS_INLINE
bool has_all(
ParserFlag_t f, ParserState
const* C4_RESTRICT s) noexcept {
return (s->flags & f) == f; }
600 static C4_ALWAYS_INLINE
bool has_any(
ParserFlag_t f, ParserState
const* C4_RESTRICT s) noexcept {
return (s->flags & f) != 0; }
601 static C4_ALWAYS_INLINE
bool has_none(
ParserFlag_t f, ParserState
const* C4_RESTRICT s) noexcept {
return (s->flags & f) == 0; }
604 C4_ALWAYS_INLINE
void add_flags(
ParserFlag_t on) noexcept { m_evt_handler->m_curr->flags |= on; }
605 C4_ALWAYS_INLINE
void addrem_flags(
ParserFlag_t on,
ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; m_evt_handler->m_curr->flags |= on; }
606 C4_ALWAYS_INLINE
void rem_flags(
ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; }
615 void _prepare_locations();
616 void _resize_locations(
size_t sz);
617 bool _locations_dirty()
const;
625 template<
class ...Args> C4_NORETURN C4_NO_INLINE
void _err(Location
const& cpploc,
const char *fmt, Args
const& ...args)
const;
626 template<
class ...Args> C4_NORETURN C4_NO_INLINE
void _err(Location
const& cpploc, Location
const& ymlloc,
const char *fmt, Args
const& ...args)
const;
628 template<
class ...Args> C4_NO_INLINE
void _dbg(csubstr fmt, Args
const& ...args)
const;
629 template<
class DumpFn> C4_NO_INLINE
void _fmt_msg(DumpFn &&dumpfn)
const;
630 C4_NO_INLINE
void _print_state_stack()
const;
631 C4_NO_INLINE
void _print_state_stack(substr buf)
const;
647 Entry annotations[2];
651 void _handle_colon();
652 void _add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line);
653 void _add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line, csubstr orig);
654 void _add_annotation(Annotation *C4_RESTRICT dst, csubstr str);
655 C4_ALWAYS_INLINE
void _clear_annotations(Annotation *C4_RESTRICT dst) noexcept { dst->num_entries = 0; }
656 bool _annotations_require_key_container()
const;
657 bool _handle_annotations_before_unexpected_flow_token_rkey();
658 void _handle_annotations_before_blck_key_scalar();
659 void _handle_annotations_before_blck_val_scalar();
660 void _handle_annotations_before_start_mapblck(
size_t current_line);
661 void _handle_annotations_before_start_mapblck_as_key();
662 void _handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line);
663 size_t _select_indentation_from_annotations(
size_t val_indentation,
size_t val_line);
664 uint32_t _get_annotations_same_line(csubstr token_soup, csubstr * first, csubstr * second)
const;
665 void _handle_keyref(csubstr alias);
666 void _handle_valref(csubstr alias);
667 csubstr _resolve_tag(csubstr tag);
668 void _handle_directive(csubstr rem);
669 bool _validate_directive_yaml(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT
version)
const;
670 bool _validate_directive_tag(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT handle, csubstr *C4_RESTRICT prefix)
const;
681 EventHandler *C4_RESTRICT m_evt_handler;
686 Annotation m_pending_anchors;
687 Annotation m_pending_tags;
689 bool m_has_directives_yaml;
690 bool m_has_directives;
693 size_t m_prev_val_end;
703 size_t *m_newline_offsets;
704 size_t m_newline_offsets_size;
705 size_t m_newline_offsets_capacity;
712 RYML_DEPRECATED(
"filter arena no longer needed") size_t filter_arena_capacity()
const {
return 0u; }
713 RYML_DEPRECATED(
"filter arena no longer needed") void reserve_filter_arena(
size_t) {}
715 template<
class U=EventHandler> RYML_DEPRECATED(
"removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place(csubstr
filename, substr yaml, Tree *t,
size_t node_id);
716 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place( substr yaml, Tree *t,
size_t node_id);
717 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place(csubstr
filename, substr yaml, Tree *t );
718 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place( substr yaml, Tree *t );
719 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place(csubstr
filename, substr yaml, NodeRef node );
720 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place( substr yaml, NodeRef node );
721 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type
parse_in_place(csubstr
filename, substr yaml );
722 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type
parse_in_place( substr yaml );
723 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, csubstr yaml, Tree *t,
size_t node_id);
724 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( csubstr yaml, Tree *t,
size_t node_id);
725 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, csubstr yaml, Tree *t );
726 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( csubstr yaml, Tree *t );
727 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, csubstr yaml, NodeRef node );
728 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( csubstr yaml, NodeRef node );
729 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type
parse_in_arena(csubstr
filename, csubstr yaml );
730 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the function in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type
parse_in_arena( csubstr yaml );
731 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, substr yaml, Tree *t,
size_t node_id);
732 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( substr yaml, Tree *t,
size_t node_id);
733 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, substr yaml, Tree *t );
734 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( substr yaml, Tree *t );
735 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, substr yaml, NodeRef node );
736 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( substr yaml, NodeRef node );
737 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type
parse_in_arena(csubstr
filename, substr yaml );
738 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type
parse_in_arena( substr yaml );
741 RYML_DEPRECATED("moved to Tree::location(
Parser const&). deliberately undefined here.")
742 auto location(Tree const&,
id_type node) const -> typename std::enable_if<U::is_wtree, Location>::type;
745 RYML_DEPRECATED("moved to ConstNodeRef::location(
Parser const&), deliberately undefined here.")
746 auto location(ConstNodeRef const&) const -> typename std::enable_if<U::is_wtree, Location>::type;
759 #if defined(_MSC_VER)
760 # pragma warning(pop)
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
void reserve_stack(id_type capacity)
Reserve a certain capacity for the parsing stack.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
Encoding_e encoding() const
Get the encoding of the latest YAML buffer parsed by this object.
size_t locations_capacity() const
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
csubstr source() const
Get the latest YAML buffer parsed by this object.
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
ParserOptions const & options() const
Get the options used to build this parser object.
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
id_type stack_capacity() const
Callbacks const & callbacks() const
Get the current callbacks in the parser.
EventHandler handler_type
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
csubstr filename() const
Get the name of the latest file parsed by this object.
void reserve_locations(size_t num_source_lines)
Reserve a certain capacity for the array used to track node locations in the source buffer.
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t, id_type node_id)
(1) parse YAML into an existing tree node. The filename will be used in any error messages arising du...
void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id)
(1) parse YAML into an existing tree node.
ParseEngine< EventHandlerTree > Parser
This is the main ryml parser, where the parser events are handled to create a ryml tree.
id_type estimate_tree_capacity(csubstr src)
Quickly inspect the source to estimate the number of nodes the resulting tree is likely to have.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ npos
a null string position
int ParserFlag_t
data type for ParserState_e
@ NOBOM
No Byte Order Mark was found.
enum c4::yml::Encoding_ Encoding_e
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
A c-style callbacks class to customize behavior on errors or allocation.
Abstracts the fact that a scalar filter result may not fit in the intended memory.
Abstracts the fact that a scalar filter result may not fit in the intended memory.
holds a source or yaml file position, for example when an error is detected; See also location_format...
Options to give to the parser to control its behavior.