1 #ifndef _C4_YML_PARSE_ENGINE_HPP_
2 #define _C4_YML_PARSE_ENGINE_HPP_
4 #ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_
5 #include "c4/yml/detail/parser_dbg.hpp"
8 #ifndef _C4_YML_PARSER_STATE_HPP_
14 # pragma warning(push)
15 # pragma warning(disable: 4251)
217 typedef enum : uint32_t {
218 SCALAR_FILTERING = (1u << 0u),
219 LOCATIONS = (1u << 1u),
220 DEFAULTS = SCALAR_FILTERING,
223 uint32_t flags = DEFAULTS;
244 C4_ALWAYS_INLINE
bool locations() const noexcept {
return (flags & LOCATIONS); }
257 flags |= SCALAR_FILTERING;
259 flags &= ~SCALAR_FILTERING;
263 C4_ALWAYS_INLINE
bool scalar_filtering() const noexcept {
return (flags & SCALAR_FILTERING); }
291 template<
class EventHandler>
337 m_evt_handler->m_stack.reserve(capacity);
344 _resize_locations(num_source_lines);
347 RYML_DEPRECATED(
"filter arena no longer needed")
361 Callbacks const&
callbacks()
const { RYML_ASSERT(m_evt_handler);
return m_evt_handler->m_stack.m_callbacks; }
376 RYML_DEPRECATED(
"filter arena no longer needed")
400 template<
class U=EventHandler> RYML_DEPRECATED(
"removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place(csubstr
filename, substr yaml,
Tree *t,
size_t node_id);
401 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place( substr yaml,
Tree *t,
size_t node_id);
402 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place(csubstr
filename, substr yaml,
Tree *t );
403 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place( substr yaml,
Tree *t );
404 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place(csubstr
filename, substr yaml,
NodeRef node );
405 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_place( substr yaml,
NodeRef node );
406 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_place(csubstr
filename, substr yaml );
407 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_place( substr yaml );
408 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, csubstr yaml,
Tree *t,
size_t node_id);
409 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( csubstr yaml,
Tree *t,
size_t node_id);
410 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, csubstr yaml,
Tree *t );
411 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( csubstr yaml,
Tree *t );
412 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, csubstr yaml,
NodeRef node );
413 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( csubstr yaml,
NodeRef node );
414 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_arena(csubstr
filename, csubstr yaml );
415 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_arena( csubstr yaml );
416 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, substr yaml,
Tree *t,
size_t node_id);
417 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( substr yaml,
Tree *t,
size_t node_id);
418 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, substr yaml,
Tree *t );
419 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( substr yaml,
Tree *t );
420 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena(csubstr
filename, substr yaml,
NodeRef node );
421 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
void>::type
parse_in_arena( substr yaml,
NodeRef node );
422 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_arena(csubstr
filename, substr yaml );
423 template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr
version in parse.hpp.") typename std::enable_if<U::is_wtree,
Tree>::type
parse_in_arena( substr yaml );
494 bool _is_doc_begin(csubstr s);
495 bool _is_doc_end(csubstr s);
497 bool _scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation);
498 bool _scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc);
499 bool _scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc);
500 bool _scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc);
501 bool _scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc);
502 bool _scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc);
503 bool _scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc);
504 bool _scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc);
505 bool _is_valid_start_scalar_plain_flow(csubstr s);
507 ScannedScalar _scan_scalar_squot();
508 ScannedScalar _scan_scalar_dquot();
510 void _scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref);
512 csubstr _scan_anchor();
513 csubstr _scan_ref_seq();
514 csubstr _scan_ref_map();
520 csubstr _filter_scalar_plain(substr s,
size_t indentation);
521 csubstr _filter_scalar_squot(substr s);
522 csubstr _filter_scalar_dquot(substr s);
523 csubstr _filter_scalar_literal(substr s,
size_t indentation, BlockChomp_e chomp);
524 csubstr _filter_scalar_folded(substr s,
size_t indentation, BlockChomp_e chomp);
526 csubstr _maybe_filter_key_scalar_plain(ScannedScalar
const& sc,
size_t indendation);
527 csubstr _maybe_filter_val_scalar_plain(ScannedScalar
const& sc,
size_t indendation);
528 csubstr _maybe_filter_key_scalar_squot(ScannedScalar
const& sc);
529 csubstr _maybe_filter_val_scalar_squot(ScannedScalar
const& sc);
530 csubstr _maybe_filter_key_scalar_dquot(ScannedScalar
const& sc);
531 csubstr _maybe_filter_val_scalar_dquot(ScannedScalar
const& sc);
532 csubstr _maybe_filter_key_scalar_literal(ScannedBlock
const& sb);
533 csubstr _maybe_filter_val_scalar_literal(ScannedBlock
const& sb);
534 csubstr _maybe_filter_key_scalar_folded(ScannedBlock
const& sb);
535 csubstr _maybe_filter_val_scalar_folded(ScannedBlock
const& sb);
540 void _handle_map_block();
541 void _handle_seq_block();
542 void _handle_map_flow();
543 void _handle_seq_flow();
544 void _handle_seq_imap();
545 void _handle_map_json();
546 void _handle_seq_json();
549 void _handle_unk_json();
552 void _handle_flow_skip_whitespace();
554 void _end_map_blck();
555 void _end_seq_blck();
560 void _begin2_doc_expl();
562 void _end2_doc_expl();
564 void _maybe_begin_doc();
565 void _maybe_end_doc();
567 void _start_doc_suddenly();
568 void _end_doc_suddenly();
569 void _end_doc_suddenly__pop();
572 void _set_indentation(
size_t indentation);
573 void _save_indentation();
574 void _handle_indentation_pop_from_block_seq();
575 void _handle_indentation_pop_from_block_map();
576 void _handle_indentation_pop(ParserState
const* dst);
578 void _maybe_skip_comment();
579 void _skip_comment();
580 void _maybe_skip_whitespace_tokens();
581 void _maybe_skipchars(
char c);
582 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
583 void _maybe_skipchars_up_to(
char c,
size_t max_to_skip);
586 void _skipchars(
const char (&chars)[N]);
587 bool _maybe_scan_following_colon() noexcept;
588 bool _maybe_scan_following_comma() noexcept;
593 template<class FilterProcessor> auto _filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result());
594 template<class FilterProcessor> auto _filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result());
595 template<class FilterProcessor> auto _filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result());
596 template<class FilterProcessor> auto _filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result());
597 template<class FilterProcessor> auto _filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result());
603 template<class FilterProcessor>
void _filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation);
604 template<class FilterProcessor>
void _filter_nl_squoted(FilterProcessor &C4_RESTRICT proc);
605 template<class FilterProcessor>
void _filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc);
607 template<class FilterProcessor>
bool _filter_ws_handle_to_first_non_space(FilterProcessor &C4_RESTRICT proc);
608 template<class FilterProcessor>
void _filter_ws_copy_trailing(FilterProcessor &C4_RESTRICT proc);
609 template<class FilterProcessor>
void _filter_ws_skip_trailing(FilterProcessor &C4_RESTRICT proc);
611 template<class FilterProcessor>
void _filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc);
613 template<class FilterProcessor>
void _filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp,
size_t indentation);
614 template<class FilterProcessor>
size_t _handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp);
615 template<class FilterProcessor>
size_t _extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len);
616 template<class FilterProcessor>
void _filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation);
617 template<class FilterProcessor>
void _filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len);
618 template<class FilterProcessor>
size_t _filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl);
619 template<class FilterProcessor>
void _filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len);
620 template<class FilterProcessor>
void _filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept;
626 void _line_progressed(
size_t ahead);
628 void _line_ended_undo();
630 bool _finished_file() const;
631 bool _finished_line() const;
634 substr _peek_next_line(
size_t pos=
npos) const;
636 bool _at_line_begin()
const
638 return m_evt_handler->m_curr->line_contents.rem.begin() == m_evt_handler->m_curr->line_contents.full.begin();
641 void _relocate_arena(csubstr prev_arena, substr next_arena);
642 static void _s_relocate_arena(
void*, csubstr prev_arena, substr next_arena);
646 C4_ALWAYS_INLINE
bool has_all(
ParserFlag_t f)
const noexcept {
return (m_evt_handler->m_curr->flags & f) == f; }
647 C4_ALWAYS_INLINE
bool has_any(
ParserFlag_t f)
const noexcept {
return (m_evt_handler->m_curr->flags & f) != 0; }
648 C4_ALWAYS_INLINE
bool has_none(
ParserFlag_t f)
const noexcept {
return (m_evt_handler->m_curr->flags & f) == 0; }
649 static C4_ALWAYS_INLINE
bool has_all(
ParserFlag_t f, ParserState
const* C4_RESTRICT s) noexcept {
return (s->flags & f) == f; }
650 static C4_ALWAYS_INLINE
bool has_any(
ParserFlag_t f, ParserState
const* C4_RESTRICT s) noexcept {
return (s->flags & f) != 0; }
651 static C4_ALWAYS_INLINE
bool has_none(
ParserFlag_t f, ParserState
const* C4_RESTRICT s) noexcept {
return (s->flags & f) == 0; }
654 C4_ALWAYS_INLINE
static void add_flags(
ParserFlag_t on, ParserState *C4_RESTRICT s) noexcept { s->flags |= on; }
655 C4_ALWAYS_INLINE
static void addrem_flags(
ParserFlag_t on,
ParserFlag_t off, ParserState *C4_RESTRICT s) noexcept { s->flags &= ~off; s->flags |= on; }
656 C4_ALWAYS_INLINE
static void rem_flags(
ParserFlag_t off, ParserState *C4_RESTRICT s) noexcept { s->flags &= ~off; }
657 C4_ALWAYS_INLINE
void add_flags(
ParserFlag_t on) noexcept { m_evt_handler->m_curr->flags |= on; }
658 C4_ALWAYS_INLINE
void addrem_flags(
ParserFlag_t on,
ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; m_evt_handler->m_curr->flags |= on; }
659 C4_ALWAYS_INLINE
void rem_flags(
ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; }
661 static void add_flags(
ParserFlag_t on, ParserState *C4_RESTRICT s);
663 static void rem_flags(
ParserFlag_t off, ParserState *C4_RESTRICT s);
664 C4_ALWAYS_INLINE
void add_flags(
ParserFlag_t on) noexcept { add_flags(on, m_evt_handler->m_curr); }
665 C4_ALWAYS_INLINE
void addrem_flags(
ParserFlag_t on,
ParserFlag_t off) noexcept { addrem_flags(on, off, m_evt_handler->m_curr); }
666 C4_ALWAYS_INLINE
void rem_flags(
ParserFlag_t off) noexcept { rem_flags(off, m_evt_handler->m_curr); }
671 void _prepare_locations();
672 void _resize_locations(
size_t sz);
673 bool _locations_dirty()
const;
675 bool _location_from_cont(Tree
const& tree,
id_type node, Location *C4_RESTRICT loc)
const;
676 bool _location_from_node(Tree
const& tree,
id_type node, Location *C4_RESTRICT loc,
id_type level)
const;
685 template<
class ...Args>
void _dbg(csubstr fmt, Args
const& C4_RESTRICT ...args)
const;
687 template<
class ...Args>
void _err(csubstr fmt, Args
const& C4_RESTRICT ...args)
const;
688 template<
class ...Args>
void _errloc(csubstr fmt, Location
const& loc, Args
const& C4_RESTRICT ...args)
const;
690 template<
class DumpFn>
void _fmt_msg(DumpFn &&dumpfn)
const;
703 Entry annotations[2];
707 void _add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line);
708 void _clear_annotations(Annotation *C4_RESTRICT dst);
709 bool _has_pending_annotations()
const {
return m_pending_tags.num_entries || m_pending_anchors.num_entries; }
710 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
711 bool _handle_indentation_from_annotations();
713 bool _annotations_require_key_container()
const;
714 void _handle_annotations_before_blck_key_scalar();
715 void _handle_annotations_before_blck_val_scalar();
716 void _handle_annotations_before_start_mapblck(
size_t current_line);
717 void _handle_annotations_before_start_mapblck_as_key();
718 void _handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line);
719 size_t _select_indentation_from_annotations(
size_t val_indentation,
size_t val_line);
720 void _handle_directive(csubstr rem);
724 void _check_tag(csubstr tag);
736 EventHandler *C4_RESTRICT m_evt_handler;
741 Annotation m_pending_anchors;
742 Annotation m_pending_tags;
744 bool m_was_inside_qmrk;
745 bool m_doc_empty =
true;
751 size_t *m_newline_offsets;
752 size_t m_newline_offsets_size;
753 size_t m_newline_offsets_capacity;
754 csubstr m_newline_offsets_buf;
785 #if defined(_MSC_VER)
786 # pragma warning(pop)
Holds a pointer to an existing tree, and a node id.
A reference to a node in an existing yaml tree, offering a more convenient API than the index-based A...
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
Location location(Tree const &tree, id_type node_id) const
Get the location of a node of the last tree to be parsed by this parser.
void reserve_stack(id_type capacity)
Reserve a certain capacity for the parsing stack.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void reserve_filter_arena(size_t)
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
Encoding_e encoding() const
Get the encoding of the latest YAML buffer parsed by this object.
size_t locations_capacity() const
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
csubstr source() const
Get the latest YAML buffer parsed by this object.
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
ParserOptions const & options() const
Get the options used to build this parser object.
size_t filter_arena_capacity() const
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
id_type stack_capacity() const
Callbacks const & callbacks() const
Get the current callbacks in the parser.
EventHandler handler_type
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
csubstr filename() const
Get the name of the latest file parsed by this object.
void reserve_locations(size_t num_source_lines)
Reserve a certain capacity for the array used to track node locations in the source buffer.
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t, id_type node_id)
(1) parse YAML into an existing tree node. The filename will be used in any error messages arising du...
void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id)
(1) parse YAML into an existing tree node.
id_type estimate_tree_capacity(csubstr src)
Quickly inspect the source to estimate the number of nodes the resulting tree is likely have.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ npos
a null string position
size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
int ParserFlag_t
data type for ParserState_e
a c-style callbacks class.
Options to give to the parser to control its behavior.
ParserOptions & scalar_filtering(bool enabled) noexcept
enable/disable scalar filtering while parsing
bool scalar_filtering() const noexcept
query scalar filtering status
bool locations() const noexcept
query source location tracking status
ParserOptions & locations(bool enabled) noexcept
enable/disable source location tracking