1 #ifndef _C4_YML_PARSE_ENGINE_HPP_
2 #define _C4_YML_PARSE_ENGINE_HPP_
4 #ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_
5 #include "c4/yml/detail/parser_dbg.hpp"
8 #ifndef _C4_YML_PARSER_STATE_HPP_
14 # pragma warning(push)
15 # pragma warning(disable: 4251)
216 typedef enum : uint32_t {
217 SCALAR_FILTERING = (1u << 0),
218 LOCATIONS = (1u << 1),
219 DEFAULTS = SCALAR_FILTERING,
222 uint32_t flags = DEFAULTS;
243 C4_ALWAYS_INLINE
bool locations() const noexcept {
return (flags & LOCATIONS); }
256 flags |= SCALAR_FILTERING;
258 flags &= ~SCALAR_FILTERING;
262 C4_ALWAYS_INLINE
bool scalar_filtering() const noexcept {
return (flags & SCALAR_FILTERING); }
290 template<
class EventHandler>
336 m_evt_handler->m_stack.reserve(capacity);
343 _resize_locations(num_source_lines);
346 RYML_DEPRECATED(
"filter arena no longer needed")
360 Callbacks const&
callbacks()
const { RYML_ASSERT(m_evt_handler);
return m_evt_handler->m_stack.m_callbacks; }
371 RYML_DEPRECATED(
"filter arena no longer needed")
395 template<
class U=EventHandler> RYML_DEPRECATED(
"removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place(csubstr
filename, substr yaml,
Tree *t,
size_t node_id);
396 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place( substr yaml,
Tree *t,
size_t node_id);
397 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place(csubstr
filename, substr yaml,
Tree *t );
398 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place( substr yaml,
Tree *t );
399 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place(csubstr
filename, substr yaml,
NodeRef node );
400 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place( substr yaml,
NodeRef node );
401 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_place(csubstr
filename, substr yaml );
402 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_place( substr yaml );
403 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, csubstr yaml,
Tree *t,
size_t node_id);
404 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( csubstr yaml,
Tree *t,
size_t node_id);
405 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, csubstr yaml,
Tree *t );
406 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( csubstr yaml,
Tree *t );
407 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, csubstr yaml,
NodeRef node );
408 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( csubstr yaml,
NodeRef node );
409 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_arena(csubstr
filename, csubstr yaml );
410 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_arena( csubstr yaml );
411 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, substr yaml,
Tree *t,
size_t node_id);
412 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( substr yaml,
Tree *t,
size_t node_id);
413 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, substr yaml,
Tree *t );
414 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( substr yaml,
Tree *t );
415 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, substr yaml,
NodeRef node );
416 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( substr yaml,
NodeRef node );
417 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_arena(csubstr
filename, substr yaml );
418 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_arena( substr yaml );
489 bool _is_doc_begin(csubstr s);
490 bool _is_doc_end(csubstr s);
492 bool _scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation);
493 bool _scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc);
494 bool _scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc);
495 bool _scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc);
496 bool _scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc);
497 bool _scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc);
498 bool _scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc);
499 bool _scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc);
500 bool _is_valid_start_scalar_plain_flow(csubstr s);
502 ScannedScalar _scan_scalar_squot();
503 ScannedScalar _scan_scalar_dquot();
505 void _scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref);
507 csubstr _scan_anchor();
508 csubstr _scan_ref_seq();
509 csubstr _scan_ref_map();
515 csubstr _filter_scalar_plain(substr s,
size_t indentation);
516 csubstr _filter_scalar_squot(substr s);
517 csubstr _filter_scalar_dquot(substr s);
518 csubstr _filter_scalar_literal(substr s,
size_t indentation, BlockChomp_e chomp);
519 csubstr _filter_scalar_folded(substr s,
size_t indentation, BlockChomp_e chomp);
521 csubstr _maybe_filter_key_scalar_plain(ScannedScalar
const& sc,
size_t indendation);
522 csubstr _maybe_filter_val_scalar_plain(ScannedScalar
const& sc,
size_t indendation);
523 csubstr _maybe_filter_key_scalar_squot(ScannedScalar
const& sc);
524 csubstr _maybe_filter_val_scalar_squot(ScannedScalar
const& sc);
525 csubstr _maybe_filter_key_scalar_dquot(ScannedScalar
const& sc);
526 csubstr _maybe_filter_val_scalar_dquot(ScannedScalar
const& sc);
527 csubstr _maybe_filter_key_scalar_literal(ScannedBlock
const& sb);
528 csubstr _maybe_filter_val_scalar_literal(ScannedBlock
const& sb);
529 csubstr _maybe_filter_key_scalar_folded(ScannedBlock
const& sb);
530 csubstr _maybe_filter_val_scalar_folded(ScannedBlock
const& sb);
535 void _handle_map_block();
536 void _handle_seq_block();
537 void _handle_map_flow();
538 void _handle_seq_flow();
539 void _handle_seq_imap();
540 void _handle_map_json();
541 void _handle_seq_json();
544 void _handle_unk_json();
547 void _handle_flow_skip_whitespace();
549 void _end_map_blck();
550 void _end_seq_blck();
555 void _begin2_doc_expl();
557 void _end2_doc_expl();
559 void _maybe_begin_doc();
560 void _maybe_end_doc();
562 void _start_doc_suddenly();
563 void _end_doc_suddenly();
564 void _end_doc_suddenly__pop();
567 void _set_indentation(
size_t indentation);
568 void _save_indentation();
569 void _handle_indentation_pop_from_block_seq();
570 void _handle_indentation_pop_from_block_map();
571 void _handle_indentation_pop(ParserState
const* dst);
573 void _maybe_skip_comment();
574 void _skip_comment();
575 void _maybe_skip_whitespace_tokens();
576 void _maybe_skipchars(
char c);
577 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
578 void _maybe_skipchars_up_to(
char c,
size_t max_to_skip);
581 void _skipchars(
const char (&chars)[N]);
582 bool _maybe_scan_following_colon() noexcept;
583 bool _maybe_scan_following_comma() noexcept;
588 template<class FilterProcessor> auto _filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result());
589 template<class FilterProcessor> auto _filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result());
590 template<class FilterProcessor> auto _filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result());
591 template<class FilterProcessor> auto _filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result());
592 template<class FilterProcessor> auto _filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result());
598 template<class FilterProcessor>
void _filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation);
599 template<class FilterProcessor>
void _filter_nl_squoted(FilterProcessor &C4_RESTRICT proc);
600 template<class FilterProcessor>
void _filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc);
602 template<class FilterProcessor>
bool _filter_ws_handle_to_first_non_space(FilterProcessor &C4_RESTRICT proc);
603 template<class FilterProcessor>
void _filter_ws_copy_trailing(FilterProcessor &C4_RESTRICT proc);
604 template<class FilterProcessor>
void _filter_ws_skip_trailing(FilterProcessor &C4_RESTRICT proc);
606 template<class FilterProcessor>
void _filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc);
608 template<class FilterProcessor>
void _filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp,
size_t indentation);
609 template<class FilterProcessor>
size_t _handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp);
610 template<class FilterProcessor>
size_t _extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len);
611 template<class FilterProcessor>
void _filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation);
612 template<class FilterProcessor>
void _filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len);
613 template<class FilterProcessor>
size_t _filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl);
614 template<class FilterProcessor>
void _filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len);
615 template<class FilterProcessor>
void _filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept;
621 void _line_progressed(
size_t ahead);
623 void _line_ended_undo();
625 bool _finished_file() const;
626 bool _finished_line() const;
629 substr _peek_next_line(
size_t pos=
npos) const;
631 inline
bool _at_line_begin()
const
633 return m_evt_handler->m_curr->line_contents.rem.begin() == m_evt_handler->m_curr->line_contents.full.begin();
636 void _relocate_arena(csubstr prev_arena, substr next_arena);
637 static void _s_relocate_arena(
void*, csubstr prev_arena, substr next_arena);
641 C4_ALWAYS_INLINE
bool has_all(
ParserFlag_t f)
const noexcept {
return (m_evt_handler->m_curr->flags & f) == f; }
642 C4_ALWAYS_INLINE
bool has_any(
ParserFlag_t f)
const noexcept {
return (m_evt_handler->m_curr->flags & f) != 0; }
643 C4_ALWAYS_INLINE
bool has_none(
ParserFlag_t f)
const noexcept {
return (m_evt_handler->m_curr->flags & f) == 0; }
644 static C4_ALWAYS_INLINE
bool has_all(
ParserFlag_t f, ParserState
const* C4_RESTRICT s) noexcept {
return (s->flags & f) == f; }
645 static C4_ALWAYS_INLINE
bool has_any(
ParserFlag_t f, ParserState
const* C4_RESTRICT s) noexcept {
return (s->flags & f) != 0; }
646 static C4_ALWAYS_INLINE
bool has_none(
ParserFlag_t f, ParserState
const* C4_RESTRICT s) noexcept {
return (s->flags & f) == 0; }
649 C4_ALWAYS_INLINE
static void add_flags(
ParserFlag_t on, ParserState *C4_RESTRICT s) noexcept { s->flags |= on; }
650 C4_ALWAYS_INLINE
static void addrem_flags(
ParserFlag_t on,
ParserFlag_t off, ParserState *C4_RESTRICT s) noexcept { s->flags &= ~off; s->flags |= on; }
651 C4_ALWAYS_INLINE
static void rem_flags(
ParserFlag_t off, ParserState *C4_RESTRICT s) noexcept { s->flags &= ~off; }
652 C4_ALWAYS_INLINE
void add_flags(
ParserFlag_t on) noexcept { m_evt_handler->m_curr->flags |= on; }
653 C4_ALWAYS_INLINE
void addrem_flags(
ParserFlag_t on,
ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; m_evt_handler->m_curr->flags |= on; }
654 C4_ALWAYS_INLINE
void rem_flags(
ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; }
656 static void add_flags(
ParserFlag_t on, ParserState *C4_RESTRICT s);
658 static void rem_flags(
ParserFlag_t off, ParserState *C4_RESTRICT s);
659 C4_ALWAYS_INLINE
void add_flags(
ParserFlag_t on) noexcept { add_flags(on, m_evt_handler->m_curr); }
660 C4_ALWAYS_INLINE
void addrem_flags(
ParserFlag_t on,
ParserFlag_t off) noexcept { addrem_flags(on, off, m_evt_handler->m_curr); }
661 C4_ALWAYS_INLINE
void rem_flags(
ParserFlag_t off) noexcept { rem_flags(off, m_evt_handler->m_curr); }
666 void _prepare_locations();
667 void _resize_locations(
size_t sz);
668 bool _locations_dirty()
const;
670 bool _location_from_cont(Tree
const& tree,
id_type node, Location *C4_RESTRICT loc)
const;
671 bool _location_from_node(Tree
const& tree,
id_type node, Location *C4_RESTRICT loc,
id_type level)
const;
680 template<
class ...Args>
void _dbg(csubstr fmt, Args
const& C4_RESTRICT ...args)
const;
682 template<
class ...Args>
void _err(csubstr fmt, Args
const& C4_RESTRICT ...args)
const;
683 template<
class ...Args>
void _errloc(csubstr fmt, Location
const& loc, Args
const& C4_RESTRICT ...args)
const;
685 template<
class DumpFn>
void _fmt_msg(DumpFn &&dumpfn)
const;
698 Entry annotations[2];
702 void _add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line);
703 void _clear_annotations(Annotation *C4_RESTRICT dst);
704 bool _has_pending_annotations()
const {
return m_pending_tags.num_entries || m_pending_anchors.num_entries; }
705 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
706 bool _handle_indentation_from_annotations();
708 bool _annotations_require_key_container()
const;
709 void _handle_annotations_before_blck_key_scalar();
710 void _handle_annotations_before_blck_val_scalar();
711 void _handle_annotations_before_start_mapblck(
size_t current_line);
712 void _handle_annotations_before_start_mapblck_as_key();
713 void _handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line);
714 size_t _select_indentation_from_annotations(
size_t val_indentation,
size_t val_line);
715 void _handle_directive(csubstr rem);
717 void _check_tag(csubstr tag);
729 EventHandler *C4_RESTRICT m_evt_handler;
734 Annotation m_pending_anchors;
735 Annotation m_pending_tags;
737 bool m_was_inside_qmrk;
738 bool m_doc_empty =
true;
742 size_t *m_newline_offsets;
743 size_t m_newline_offsets_size;
744 size_t m_newline_offsets_capacity;
745 csubstr m_newline_offsets_buf;
774 #if defined(_MSC_VER)
775 # pragma warning(pop)
Holds a pointer to an existing tree, and a node id.
A reference to a node in an existing yaml tree, offering a more convenient API than the index-based A...
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
Location location(Tree const &tree, id_type node_id) const
Get the location of a node of the last tree to be parsed by this parser.
void reserve_stack(id_type capacity)
Reserve a certain capacity for the parsing stack.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void reserve_filter_arena(size_t)
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
size_t locations_capacity() const
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
csubstr source() const
Get the latest YAML buffer parsed by this object.
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
ParserOptions const & options() const
Get the options used to build this parser object.
size_t filter_arena_capacity() const
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
id_type stack_capacity() const
Callbacks const & callbacks() const
Get the current callbacks in the parser.
EventHandler handler_type
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&)
csubstr filename() const
Get the name of the latest file parsed by this object.
void reserve_locations(size_t num_source_lines)
Reserve a certain capacity for the array used to track node locations in the source buffer.
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t, id_type node_id)
(1) parse YAML into an existing tree node. The filename will be used in any error messages arising du...
void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id)
(1) parse YAML into an existing tree node.
id_type estimate_tree_capacity(csubstr src)
Quickly inspect the source to estimate the number of nodes the resulting tree is likely have.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ npos
a null string position
size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
int ParserFlag_t
data type for ParserState_e
a c-style callbacks class.
Options to give to the parser to control its behavior.
ParserOptions & scalar_filtering(bool enabled) noexcept
enable/disable scalar filtering while parsing
bool scalar_filtering() const noexcept
query scalar filtering status
bool locations() const noexcept
query source location tracking status
ParserOptions & locations(bool enabled) noexcept
enable/disable source location tracking