rapidyaml  0.12.0
parse and emit YAML, and do it fast
substr.hpp
Go to the documentation of this file.
1 #ifndef _C4_SUBSTR_HPP_
2 #define _C4_SUBSTR_HPP_
3 
4 /** @file substr.hpp read+write string views */
5 
6 #include <string.h>
7 #include <ctype.h>
8 #include <type_traits>
9 
10 #include "c4/export.hpp"
11 #include "c4/language.hpp"
12 #include "c4/error.hpp"
13 #include "c4/substr_fwd.hpp"
14 
15 #ifdef __clang__
16 # pragma clang diagnostic push
17 # pragma clang diagnostic ignored "-Wold-style-cast"
18 #elif defined(__GNUC__)
19 # pragma GCC diagnostic push
20 # pragma GCC diagnostic ignored "-Wtype-limits" // disable warnings on size_t>=0, used heavily in assertions below. These assertions are a preparation step for providing the index type as a template parameter.
21 # pragma GCC diagnostic ignored "-Wuseless-cast"
22 # pragma GCC diagnostic ignored "-Wold-style-cast"
23 #endif
24 
25 
26 namespace c4 {
27 
28 /** @defgroup doc_substr Substring: read/write string views
29  * @{ */
30 
31 //-----------------------------------------------------------------------------
32 //-----------------------------------------------------------------------------
33 //-----------------------------------------------------------------------------
34 
35 /** @cond dev */
36 namespace detail {
37 template<typename C>
38 static inline void _do_reverse(C *C4_RESTRICT first, C *C4_RESTRICT last)
39 {
40  while(last > first)
41  {
42  C tmp = *last;
43  *last-- = *first;
44  *first++ = tmp;
45  }
46 }
47 } // namespace detail
48 /** @endcond */
49 
50 //-----------------------------------------------------------------------------
51 //-----------------------------------------------------------------------------
52 //-----------------------------------------------------------------------------
53 
54 /** @cond dev */
55 // utility macros to deuglify SFINAE code; undefined after the class.
56 // https://stackoverflow.com/questions/43051882/how-to-disable-a-class-member-funrtion-for-certain-template-types
57 #define C4_REQUIRE_RW(ret_type) \
58  template <typename U=C> \
59  typename std::enable_if< ! std::is_const<U>::value, ret_type>::type
60 /** @endcond */
61 
62 
63 /** a non-owning string-view, consisting of a character pointer
64  * and a length.
65  *
66  * @note The pointer is explicitly restricted.
67  *
68  * @see a [quickstart
69  * sample](https://rapidyaml.readthedocs.io/latest/doxygen/group__doc__quickstart.html#ga43e253da0692c13967019446809c1113)
70  * in rapidyaml's documentation.
71  */
72 template<class C>
73 struct C4CORE_EXPORT basic_substring // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions)
74 {
75 public:
76 
77  /** a restricted pointer to the first character of the substring */
78  C * C4_RESTRICT str;
79  /** the length of the substring */
80  size_t len;
81 
82 public:
83 
84  /** @name Types */
85  /** @{ */
86 
87  using CC = typename std::add_const<C>::type; //!< CC=const char
88  using NCC_ = typename std::remove_const<C>::type; //!< NCC_=non const char
89 
92 
93  using char_type = C;
94  using size_type = size_t;
95 
96  using iterator = C*;
97  using const_iterator = CC*;
98 
99  enum : size_t { npos = (size_t)-1, NONE = (size_t)-1 };
100 
101  /// convert automatically to substring of const C
102  template<class U=C>
103  C4_ALWAYS_INLINE operator typename std::enable_if<!std::is_const<U>::value, ro_substr const&>::type () const noexcept
104  {
105  return *(ro_substr const*)this; // don't call the str+len ctor because it does a check
106  }
107 
108  /** @} */
109 
110 public:
111 
112  /** @name Default construction and assignment */
113  /** @{ */
114 
115  C4_ALWAYS_INLINE constexpr basic_substring() noexcept : str(), len() {}
116 
117  C4_ALWAYS_INLINE basic_substring(basic_substring const&) noexcept = default;
118  C4_ALWAYS_INLINE basic_substring(basic_substring &&) noexcept = default;
119  C4_ALWAYS_INLINE basic_substring(std::nullptr_t) noexcept : str(nullptr), len(0) {}
120 
121  C4_ALWAYS_INLINE basic_substring& operator= (basic_substring const&) noexcept = default;
122  C4_ALWAYS_INLINE basic_substring& operator= (basic_substring &&) noexcept = default;
123  C4_ALWAYS_INLINE basic_substring& operator= (std::nullptr_t) noexcept { str = nullptr; len = 0; return *this; }
124 
125  C4_ALWAYS_INLINE void clear() noexcept { str = nullptr; len = 0; }
126 
127  /** @} */
128 
129 public:
130 
131  /** @name Construction and assignment from characters with the same type */
132  /** @{ */
133 
134  /** Construct from an array.
135  * @warning the input string need not be zero terminated, but the
136  * length is taken as if the string was zero terminated */
137  template<size_t N>
138  C4_ALWAYS_INLINE constexpr basic_substring(C (&s_)[N]) noexcept : str(s_), len(N-1) {}
139  /** Construct from a pointer and length.
140  * @warning the input string need not be zero terminated. */
141  C4_ALWAYS_INLINE basic_substring(C *s_, size_t len_) noexcept : str(s_), len(len_) { C4_ASSERT(str || !len_); }
142  /** Construct from two pointers.
143  * @warning the end pointer MUST BE larger than or equal to the begin pointer
144  * @warning the input string need not be zero terminated */
145  C4_ALWAYS_INLINE basic_substring(C *beg_, C *end_) noexcept : str(beg_), len(static_cast<size_t>(end_ - beg_)) { C4_ASSERT(end_ >= beg_); }
146  /** Construct from a C-string (zero-terminated string)
147  * @warning the input string MUST BE zero terminated.
148  * @warning will call strlen()
149  * @note this overload uses SFINAE to prevent it from overriding the array ctor
150  * @see For a more detailed explanation on why the plain overloads cannot
151  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
152  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
153  C4_ALWAYS_INLINE basic_substring(U s_) noexcept : str(s_), len(s_ ? strlen(s_) : 0) {}
154 
155  /** Assign from an array.
156  * @warning the input string need not be zero terminated, but the
157  * length is taken as if the string was zero terminated */
158  template<size_t N>
159  C4_ALWAYS_INLINE void assign(C (&s_)[N]) noexcept { str = (s_); len = (N-1); }
160  /** Assign from a pointer and length.
161  * @warning the input string need not be zero terminated. */
162  C4_ALWAYS_INLINE void assign(C *s_, size_t len_) noexcept { str = s_; len = len_; C4_ASSERT(str || !len_); }
163  /** Assign from two pointers.
164  * @warning the end pointer MUST BE larger than or equal to the begin pointer
165  * @warning the input string need not be zero terminated. */
166  C4_ALWAYS_INLINE void assign(C *beg_, C *end_) noexcept { C4_ASSERT(end_ >= beg_); str = (beg_); len = static_cast<size_t>(end_ - beg_); }
167  /** Assign from a C-string (zero-terminated string)
168  * @warning the input string must be zero terminated.
169  * @warning will call strlen()
170  * @note this overload uses SFINAE to prevent it from overriding the array ctor
171  * @see For a more detailed explanation on why the plain overloads cannot
172  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
173  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
174  C4_ALWAYS_INLINE void assign(U s_) noexcept { str = (s_); len = (s_ ? strlen(s_) : 0); }
175 
176  /** Assign from an array.
177  * @warning the input string need not be zero terminated. */
178  template<size_t N>
179  C4_ALWAYS_INLINE basic_substring& operator= (C (&s_)[N]) noexcept { str = (s_); len = (N-1); return *this; }
180  /** Assign from a C-string (zero-terminated string)
181  * @warning the input string MUST BE zero terminated.
182  * @warning will call strlen()
183  * @note this overload uses SFINAE to prevent it from overriding the array ctor
184  * @see For a more detailed explanation on why the plain overloads cannot
185  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
186  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
187  C4_ALWAYS_INLINE basic_substring& operator= (U s_) noexcept { str = s_; len = s_ ? strlen(s_) : 0; return *this; }
188 
189  /** @} */
190 
191 public:
192 
193  /** @name Standard accessor methods */
194  /** @{ */
195 
196  C4_ALWAYS_INLINE C4_PURE bool has_str() const noexcept { return ! empty() && str[0] != C(0); }
197  C4_ALWAYS_INLINE C4_PURE bool empty() const noexcept { return (len == 0 || str == nullptr); }
198  C4_ALWAYS_INLINE C4_PURE bool not_empty() const noexcept { return (len != 0 && str != nullptr); }
199  C4_ALWAYS_INLINE C4_PURE size_t size() const noexcept { return len; }
200 
201  C4_ALWAYS_INLINE C4_PURE iterator begin() noexcept { return str; }
202  C4_ALWAYS_INLINE C4_PURE iterator end () noexcept { return str + len; }
203 
204  C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { return str; }
205  C4_ALWAYS_INLINE C4_PURE const_iterator end () const noexcept { return str + len; }
206 
207  C4_ALWAYS_INLINE C4_PURE C * data() noexcept { return str; }
208  C4_ALWAYS_INLINE C4_PURE C const* data() const noexcept { return str; }
209 
210  C4_ALWAYS_INLINE C4_PURE C & operator[] (size_t i) noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
211  C4_ALWAYS_INLINE C4_PURE C const& operator[] (size_t i) const noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
212 
213  C4_ALWAYS_INLINE C4_PURE C & front() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
214  C4_ALWAYS_INLINE C4_PURE C const& front() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
215 
216  C4_ALWAYS_INLINE C4_PURE C & back() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
217  C4_ALWAYS_INLINE C4_PURE C const& back() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
218 
219  /** @} */
220 
221 public:
222 
223  /** @name Comparison methods */
224  /** @{ */
225 
226  C4_PURE int compare(C const c) const noexcept
227  {
228  C4_XASSERT((str != nullptr) || len == 0);
229  if(C4_LIKELY(str != nullptr && len > 0))
230  return (*str != c) ? *str - c : (static_cast<int>(len) - 1);
231  else
232  return -1;
233  }
234 
235  C4_PURE int compare(C const* C4_RESTRICT that, size_t sz) const noexcept
236  {
237  #if defined(__GNUC__) && (__GNUC__ >= 6)
238  C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wnull-dereference")
239  #endif
240  C4_XASSERT(that || sz == 0);
241  C4_XASSERT(str || len == 0);
242  if(C4_LIKELY(str && that))
243  {
244  {
245  const size_t min = len < sz ? len : sz;
246  for(size_t i = 0; i < min; ++i)
247  if(str[i] != that[i])
248  return str[i] < that[i] ? -1 : 1;
249  }
250  if(len < sz)
251  return -1;
252  else if(len == sz)
253  return 0;
254  else
255  return 1;
256  }
257  else if(len == sz)
258  {
259  C4_XASSERT(len == 0 && sz == 0);
260  return 0;
261  }
262  return len < sz ? -1 : 1;
263  #if defined(__GNUC__) && (__GNUC__ >= 6)
264  C4_SUPPRESS_WARNING_GCC_POP
265  #endif
266  }
267 
268  C4_ALWAYS_INLINE C4_PURE int compare(ro_substr const that) const noexcept { return this->compare(that.str, that.len); }
269 
270  C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return str == nullptr; }
271  C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return str != nullptr; }
272 
273  C4_ALWAYS_INLINE C4_PURE bool operator== (C const c) const noexcept { return this->compare(c) == 0; }
274  C4_ALWAYS_INLINE C4_PURE bool operator!= (C const c) const noexcept { return this->compare(c) != 0; }
275  C4_ALWAYS_INLINE C4_PURE bool operator< (C const c) const noexcept { return this->compare(c) < 0; }
276  C4_ALWAYS_INLINE C4_PURE bool operator> (C const c) const noexcept { return this->compare(c) > 0; }
277  C4_ALWAYS_INLINE C4_PURE bool operator<= (C const c) const noexcept { return this->compare(c) <= 0; }
278  C4_ALWAYS_INLINE C4_PURE bool operator>= (C const c) const noexcept { return this->compare(c) >= 0; }
279 
280  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator== (basic_substring<U> const that) const noexcept { return this->compare(that) == 0; }
281  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator!= (basic_substring<U> const that) const noexcept { return this->compare(that) != 0; }
282  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator< (basic_substring<U> const that) const noexcept { return this->compare(that) < 0; }
283  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator> (basic_substring<U> const that) const noexcept { return this->compare(that) > 0; }
284  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator<= (basic_substring<U> const that) const noexcept { return this->compare(that) <= 0; }
285  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator>= (basic_substring<U> const that) const noexcept { return this->compare(that) >= 0; }
286 
287  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator== (const char (&that)[N]) const noexcept { return this->compare(that, N-1) == 0; }
288  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator!= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) != 0; }
289  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator< (const char (&that)[N]) const noexcept { return this->compare(that, N-1) < 0; }
290  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator> (const char (&that)[N]) const noexcept { return this->compare(that, N-1) > 0; }
291  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator<= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) <= 0; }
292  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator>= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) >= 0; }
293 
294  /** @} */
295 
296 public:
297 
298  /** @name Sub-selection methods */
299  /** @{ */
300 
301  /** true if *this is a substring of that (ie, from the same buffer) */
302  C4_ALWAYS_INLINE C4_PURE bool is_sub(ro_substr const that) const noexcept
303  {
304  return that.is_super(*this);
305  }
306 
307  /** true if that is a substring of *this (ie, from the same buffer) */
308  C4_ALWAYS_INLINE C4_PURE bool is_super(ro_substr const that) const noexcept
309  {
310  if(C4_LIKELY(len > 0))
311  return that.str >= str && that.str+that.len <= str+len;
312  else
313  return that.len == 0 && that.str == str && str != nullptr;
314  }
315 
316  /** true if there is overlap of at least one element between that and *this */
317  C4_ALWAYS_INLINE C4_PURE bool overlaps(ro_substr const that) const noexcept
318  {
319  // thanks @timwynants
320  return that.str+that.len > str && that.str < str+len;
321  }
322 
323 public:
324 
325  /** return [first,len[ */
326  C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first) const noexcept
327  {
328  C4_ASSERT(first >= 0 && first <= len);
329  return basic_substring(str + first, len - first);
330  }
331 
332  /** return [first,first+num[. If num==npos, return [first,len[ */
333  C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first, size_t num) const noexcept
334  {
335  C4_ASSERT(first >= 0 && first <= len);
336  C4_ASSERT((num >= 0 && num <= len) || (num == npos));
337  size_t rnum = num != npos ? num : len - first;
338  C4_ASSERT((first >= 0 && first + rnum <= len) || (num == 0));
339  return basic_substring(str + first, rnum);
340  }
341 
342  /** return [first,last[. If last==npos, return [first,len[ */
343  C4_ALWAYS_INLINE C4_PURE basic_substring range(size_t first, size_t last=npos) const noexcept
344  {
345  C4_ASSERT(first >= 0 && first <= len);
346  last = last != npos ? last : len;
347  C4_ASSERT(first <= last);
348  C4_ASSERT(last >= 0 && last <= len);
349  return basic_substring(str + first, last - first);
350  }
351 
352  /** return the first @p num elements: [0,num[*/
353  C4_ALWAYS_INLINE C4_PURE basic_substring first(size_t num) const noexcept
354  {
355  C4_ASSERT(num <= len || num == npos);
356  return basic_substring(str, num != npos ? num : len);
357  }
358 
359  /** return the last @p num elements: [len-num,len[*/
360  C4_ALWAYS_INLINE C4_PURE basic_substring last(size_t num) const noexcept
361  {
362  C4_ASSERT(num <= len || num == npos);
363  return num != npos ?
364  basic_substring(str + len - num, num) :
365  *this;
366  }
367 
368  /** offset from the ends: return [left,len-right[ ; ie, trim a
369  number of characters from the left and right. This is
370  equivalent to python's negative list indices. */
371  C4_ALWAYS_INLINE C4_PURE basic_substring offs(size_t left, size_t right) const noexcept
372  {
373  C4_ASSERT(left >= 0 && left <= len);
374  C4_ASSERT(right >= 0 && right <= len);
375  C4_ASSERT(left <= len - right + 1);
376  return basic_substring(str + left, len - right - left);
377  }
378 
379  /** return [0, pos[ . Same as .first(pos), but provided for compatibility with .right_of() */
380  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos) const noexcept
381  {
382  C4_ASSERT(pos <= len || pos == npos);
383  return (pos != npos) ?
384  basic_substring(str, pos) :
385  *this;
386  }
387 
388  /** return [0, pos+include_pos[ . Same as .first(pos+1), but provided for compatibility with .right_of() */
389  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos, bool include_pos) const noexcept
390  {
391  C4_ASSERT(pos <= len || pos == npos);
392  return (pos != npos) ?
393  basic_substring(str, pos+include_pos) :
394  *this;
395  }
396 
397  /** return [pos+1, len[ */
398  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos) const noexcept
399  {
400  C4_ASSERT(pos <= len || pos == npos);
401  return (pos != npos) ?
402  basic_substring(str + (pos + 1), len - (pos + 1)) :
403  basic_substring(str + len, size_t(0));
404  }
405 
406  /** return [pos+!include_pos, len[ */
407  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos, bool include_pos) const noexcept
408  {
409  C4_ASSERT(pos <= len || pos == npos);
410  return (pos != npos) ?
411  basic_substring(str + (pos + !include_pos), len - (pos + !include_pos)) :
412  basic_substring(str + len, size_t(0));
413  }
414 
415 public:
416 
417  /** given @p subs a substring of the current string, get the
418  * portion of the current string to the left of it */
419  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(ro_substr const subs) const noexcept
420  {
421  C4_ASSERT(is_super(subs) || subs.empty());
422  auto ssb = subs.begin();
423  auto b = begin();
424  auto e = end();
425  if(ssb >= b && ssb <= e)
426  return sub(0, static_cast<size_t>(ssb - b));
427  else
428  return sub(0, 0);
429  }
430 
431  /** given @p subs a substring of the current string, get the
432  * portion of the current string to the right of it */
433  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(ro_substr const subs) const noexcept
434  {
435  C4_ASSERT(is_super(subs) || subs.empty());
436  auto sse = subs.end();
437  auto b = begin();
438  auto e = end();
439  if(sse >= b && sse <= e)
440  return sub(static_cast<size_t>(sse - b), static_cast<size_t>(e - sse));
441  else
442  return sub(0, 0);
443  }
444 
445  /** @} */
446 
447 public:
448 
449  /** @name Removing characters (trim()) / patterns (strip()) from the tips of the string */
450  /** @{ */
451 
452  /** trim left */
453  basic_substring triml(const C c) const
454  {
455  if( ! empty())
456  {
457  size_t pos = first_not_of(c);
458  if(pos != npos)
459  return sub(pos);
460  }
461  return sub(0, 0);
462  }
463  /** trim left ANY of the characters.
464  * @see stripl() to remove a pattern from the left */
466  {
467  if( ! empty())
468  {
469  size_t pos = first_not_of(chars);
470  if(pos != npos)
471  return sub(pos);
472  }
473  return sub(0, 0);
474  }
475 
476  /** trim the character c from the right */
477  basic_substring trimr(const C c) const
478  {
479  if( ! empty())
480  {
481  size_t pos = last_not_of(c, npos);
482  if(pos != npos)
483  return sub(0, pos+1);
484  }
485  return sub(0, 0);
486  }
487  /** trim right ANY of the characters
488  * @see stripr() to remove a pattern from the right */
490  {
491  if( ! empty())
492  {
493  size_t pos = last_not_of(chars, npos);
494  if(pos != npos)
495  return sub(0, pos+1);
496  }
497  return sub(0, 0);
498  }
499 
500  /** trim the character c left and right */
501  basic_substring trim(const C c) const
502  {
503  return triml(c).trimr(c);
504  }
505  /** trim left and right ANY of the characters
506  * @see strip() to remove a pattern from the left and right */
507  basic_substring trim(ro_substr const chars) const
508  {
509  return triml(chars).trimr(chars);
510  }
511 
512  /** remove a pattern from the left
513  * @see triml() to remove characters*/
515  {
516  if( ! begins_with(pattern))
517  return *this;
518  return sub(pattern.len < len ? pattern.len : len);
519  }
520 
521  /** remove a pattern from the right
522  * @see trimr() to remove characters*/
524  {
525  if( ! ends_with(pattern))
526  return *this;
527  return left_of(len - (pattern.len < len ? pattern.len : len));
528  }
529 
530  /** @} */
531 
532 public:
533 
534  /** @name Lookup methods */
535  /** @{ */
536 
537  size_t find(const C c, size_t start_pos=0) const
538  {
539  return first_of(c, start_pos);
540  }
541  size_t find(ro_substr pattern, size_t start_pos=0) const
542  {
543  C4_ASSERT(start_pos == npos || (start_pos >= 0 && start_pos <= len));
544  if(len < pattern.len) return npos;
545  for(size_t i = start_pos, e = len - pattern.len + 1; i < e; ++i)
546  {
547  bool gotit = true;
548  for(size_t j = 0; j < pattern.len; ++j)
549  {
550  C4_ASSERT(i + j < len);
551  if(str[i + j] != pattern.str[j])
552  {
553  gotit = false;
554  break;
555  }
556  }
557  if(gotit)
558  {
559  return i;
560  }
561  }
562  return npos;
563  }
564 
565 public:
566 
567  /** count the number of occurrences of c */
568  size_t count(const C c, size_t pos=0) const
569  {
570  C4_ASSERT(pos >= 0 && pos <= len);
571  size_t num = 0;
572  pos = find(c, pos);
573  while(pos != npos)
574  {
575  ++num;
576  pos = find(c, pos + 1);
577  }
578  return num;
579  }
580 
581  /** count the number of occurrences of s */
582  size_t count(ro_substr c, size_t pos=0) const
583  {
584  C4_ASSERT(pos >= 0 && pos <= len);
585  size_t num = 0;
586  pos = find(c, pos);
587  while(pos != npos)
588  {
589  ++num;
590  pos = find(c, pos + c.len);
591  }
592  return num;
593  }
594 
595  /** get the substr consisting of the first occurrence of @p c after @p pos, or an empty substr if none occurs */
596  basic_substring select(const C c, size_t pos=0) const
597  {
598  pos = find(c, pos);
599  return pos != npos ? sub(pos, 1) : basic_substring();
600  }
601 
602  /** get the substr consisting of the first occurrence of @p pattern after @p pos, or an empty substr if none occurs */
603  basic_substring select(ro_substr pattern, size_t pos=0) const
604  {
605  pos = find(pattern, pos);
606  return pos != npos ? sub(pos, pattern.len) : basic_substring();
607  }
608 
609 public:
610 
612  {
613  size_t which;
614  size_t pos;
615  operator bool() const { return which != NONE && pos != npos; }
616  };
617 
619  {
620  ro_substr s[2] = {s0, s1};
621  return first_of_any_iter(&s[0], &s[0] + 2);
622  }
623 
625  {
626  ro_substr s[3] = {s0, s1, s2};
627  return first_of_any_iter(&s[0], &s[0] + 3);
628  }
629 
631  {
632  ro_substr s[4] = {s0, s1, s2, s3};
633  return first_of_any_iter(&s[0], &s[0] + 4);
634  }
635 
637  {
638  ro_substr s[5] = {s0, s1, s2, s3, s4};
639  return first_of_any_iter(&s[0], &s[0] + 5);
640  }
641 
642  template<class It>
643  first_of_any_result first_of_any_iter(It first_span, It last_span) const
644  {
645  for(size_t i = 0; i < len; ++i)
646  {
647  size_t curr = 0;
648  for(It it = first_span; it != last_span; ++curr, ++it)
649  {
650  auto const& chars = *it;
651  if((i + chars.len) > len) continue;
652  bool gotit = true;
653  for(size_t j = 0; j < chars.len; ++j)
654  {
655  C4_ASSERT(i + j < len);
656  if(str[i + j] != chars[j])
657  {
658  gotit = false;
659  break;
660  }
661  }
662  if(gotit)
663  {
664  return {curr, i};
665  }
666  }
667  }
668  return {NONE, npos};
669  }
670 
671 public:
672 
673  /** true if the first character of the string is @p c */
674  bool begins_with(const C c) const
675  {
676  #if defined(__GNUC__) && (__GNUC__ >= 6)
677  C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wnull-dereference")
678  #endif
679  return len > 0 ? str[0] == c : false;
680  #if defined(__GNUC__) && (__GNUC__ >= 6)
681  C4_SUPPRESS_WARNING_GCC_POP
682  #endif
683  }
684 
685  /** true if the first @p num characters of the string are @p c */
686  bool begins_with(const C c, size_t num) const
687  {
688  if(len < num)
689  {
690  return false;
691  }
692  for(size_t i = 0; i < num; ++i)
693  {
694  if(str[i] != c)
695  {
696  return false;
697  }
698  }
699  return true;
700  }
701 
702  /** true if the string begins with the given @p pattern */
703  bool begins_with(ro_substr pattern) const
704  {
705  if(len < pattern.len)
706  {
707  return false;
708  }
709  for(size_t i = 0; i < pattern.len; ++i)
710  {
711  if(str[i] != pattern[i])
712  {
713  return false;
714  }
715  }
716  return true;
717  }
718 
719  /** true if the first character of the string is any of the given @p chars */
720  bool begins_with_any(ro_substr chars) const
721  {
722  if(len == 0)
723  {
724  return false;
725  }
726  for(size_t i = 0; i < chars.len; ++i)
727  {
728  if(str[0] == chars.str[i])
729  {
730  return true;
731  }
732  }
733  return false;
734  }
735 
736  /** true if the last character of the string is @p c */
737  bool ends_with(const C c) const
738  {
739  return len > 0 ? str[len-1] == c : false;
740  }
741 
742  /** true if the last @p num characters of the string are @p c */
743  bool ends_with(const C c, size_t num) const
744  {
745  if(len < num)
746  {
747  return false;
748  }
749  for(size_t i = len - num; i < len; ++i)
750  {
751  if(str[i] != c)
752  {
753  return false;
754  }
755  }
756  return true;
757  }
758 
759  /** true if the string ends with the given @p pattern */
760  bool ends_with(ro_substr pattern) const
761  {
762  if(len < pattern.len)
763  {
764  return false;
765  }
766  for(size_t i = 0, s = len-pattern.len; i < pattern.len; ++i)
767  {
768  if(str[s+i] != pattern[i])
769  {
770  return false;
771  }
772  }
773  return true;
774  }
775 
776  /** true if the last character of the string is any of the given @p chars */
777  bool ends_with_any(ro_substr chars) const
778  {
779  if(len == 0)
780  {
781  return false;
782  }
783  for(size_t i = 0; i < chars.len; ++i)
784  {
785  if(str[len - 1] == chars[i])
786  {
787  return true;
788  }
789  }
790  return false;
791  }
792 
793 public:
794 
795  /** @return the first position where c is found in the string, or npos if none is found */
796  size_t first_of(const C c, size_t start=0) const
797  {
798  C4_ASSERT(start == npos || (start >= 0 && start <= len));
799  for(size_t i = start; i < len; ++i)
800  {
801  if(str[i] == c)
802  return i;
803  }
804  return npos;
805  }
806 
807  /** @return the last position where c is found in the string, or npos if none is found */
808  size_t last_of(const C c, size_t start=npos) const
809  {
810  C4_ASSERT(start == npos || (start >= 0 && start <= len));
811  if(start == npos)
812  start = len;
813  for(size_t i = start-1; i != size_t(-1); --i)
814  {
815  if(str[i] == c)
816  return i;
817  }
818  return npos;
819  }
820 
821  /** @return the first position where ANY of the chars is found in the string, or npos if none is found */
822  size_t first_of(ro_substr chars, size_t start=0) const
823  {
824  C4_ASSERT(start == npos || (start >= 0 && start <= len));
825  for(size_t i = start; i < len; ++i)
826  {
827  for(size_t j = 0; j < chars.len; ++j)
828  {
829  if(str[i] == chars[j])
830  return i;
831  }
832  }
833  return npos;
834  }
835 
836  /** @return the last position where ANY of the chars is found in the string, or npos if none is found */
837  size_t last_of(ro_substr chars, size_t start=npos) const
838  {
839  C4_ASSERT(start == npos || (start >= 0 && start <= len));
840  if(start == npos)
841  start = len;
842  for(size_t i = start-1; i != size_t(-1); --i)
843  {
844  for(size_t j = 0; j < chars.len; ++j)
845  {
846  if(str[i] == chars[j])
847  return i;
848  }
849  }
850  return npos;
851  }
852 
853 public:
854 
855  size_t first_not_of(const C c) const
856  {
857  for(size_t i = 0; i < len; ++i)
858  {
859  if(str[i] != c)
860  return i;
861  }
862  return npos;
863  }
864 
865  size_t first_not_of(const C c, size_t start) const
866  {
867  C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));
868  for(size_t i = start; i < len; ++i)
869  {
870  if(str[i] != c)
871  return i;
872  }
873  return npos;
874  }
875 
876  size_t last_not_of(const C c) const
877  {
878  for(size_t i = len-1; i != size_t(-1); --i)
879  {
880  if(str[i] != c)
881  return i;
882  }
883  return npos;
884  }
885 
886  size_t last_not_of(const C c, size_t start) const
887  {
888  C4_ASSERT(start == npos || (start >= 0 && start <= len));
889  if(start == npos)
890  start = len;
891  for(size_t i = start-1; i != size_t(-1); --i)
892  {
893  if(str[i] != c)
894  return i;
895  }
896  return npos;
897  }
898 
899  size_t first_not_of(ro_substr chars) const
900  {
901  for(size_t i = 0; i < len; ++i)
902  {
903  bool gotit = true;
904  for(size_t j = 0; j < chars.len; ++j)
905  {
906  if(str[i] == chars.str[j])
907  {
908  gotit = false;
909  break;
910  }
911  }
912  if(gotit)
913  {
914  return i;
915  }
916  }
917  return npos;
918  }
919 
920  size_t first_not_of(ro_substr chars, size_t start) const
921  {
922  C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));
923  for(size_t i = start; i < len; ++i)
924  {
925  bool gotit = true;
926  for(size_t j = 0; j < chars.len; ++j)
927  {
928  if(str[i] == chars.str[j])
929  {
930  gotit = false;
931  break;
932  }
933  }
934  if(gotit)
935  {
936  return i;
937  }
938  }
939  return npos;
940  }
941 
942  size_t last_not_of(ro_substr chars) const
943  {
944  for(size_t i = len-1; i != size_t(-1); --i)
945  {
946  bool gotit = true;
947  for(size_t j = 0; j < chars.len; ++j)
948  {
949  if(str[i] == chars.str[j])
950  {
951  gotit = false;
952  break;
953  }
954  }
955  if(gotit)
956  {
957  return i;
958  }
959  }
960  return npos;
961  }
962 
963  size_t last_not_of(ro_substr chars, size_t start) const
964  {
965  C4_ASSERT(start == npos || (start >= 0 && start <= len));
966  if(start == npos)
967  start = len;
968  for(size_t i = start-1; i != size_t(-1); --i)
969  {
970  bool gotit = true;
971  for(size_t j = 0; j < chars.len; ++j)
972  {
973  if(str[i] == chars.str[j])
974  {
975  gotit = false;
976  break;
977  }
978  }
979  if(gotit)
980  {
981  return i;
982  }
983  }
984  return npos;
985  }
986 
987  /** @} */
988 
989 public:
990 
991  /** @name Range lookup methods */
992  /** @{ */
993 
994  /** get the range delimited by an open-close pair of characters.
995  * @note There must be no nested pairs.
996  * @note No checks for escapes are performed. */
997  basic_substring pair_range(CC open, CC close) const
998  {
999  size_t b = find(open);
1000  if(b == npos)
1001  return basic_substring();
1002  size_t e = find(close, b+1);
1003  if(e == npos)
1004  return basic_substring();
1005  basic_substring ret = range(b, e+1);
1006  C4_ASSERT(ret.sub(1).find(open) == npos);
1007  return ret;
1008  }
1009 
1010  /** get the range delimited by a single open-close character (eg, quotes).
1011  * @note The open-close character can be escaped. */
1012  basic_substring pair_range_esc(CC open_close, CC escape=CC('\\'))
1013  {
1014  size_t b = find(open_close);
1015  if(b == npos) return basic_substring();
1016  for(size_t i = b+1; i < len; ++i)
1017  {
1018  CC c = str[i];
1019  if(c == open_close)
1020  {
1021  if(str[i-1] != escape)
1022  {
1023  return range(b, i+1);
1024  }
1025  }
1026  }
1027  return basic_substring();
1028  }
1029 
1030  /** get the range delimited by an open-close pair of characters,
1031  * with possibly nested occurrences. No checks for escapes are
1032  * performed. */
1034  {
1035  size_t b = find(open);
1036  if(b == npos) return basic_substring();
1037  size_t e, curr = b+1, count = 0;
1038  const char both[] = {open, close, '\0'};
1039  while((e = first_of(both, curr)) != npos)
1040  {
1041  if(str[e] == open)
1042  {
1043  ++count;
1044  curr = e+1;
1045  }
1046  else if(str[e] == close)
1047  {
1048  if(count == 0) return range(b, e+1);
1049  --count;
1050  curr = e+1;
1051  }
1052  }
1053  return basic_substring();
1054  }
1055 
1057  {
1058  constexpr const C dq('"'), sq('\'');
1059  if(len >= 2 && (str[len - 2] != C('\\')) &&
1060  ((begins_with(sq) && ends_with(sq))
1061  ||
1062  (begins_with(dq) && ends_with(dq))))
1063  {
1064  return range(1, len -1);
1065  }
1066  return *this;
1067  }
1068 
1069  /** @} */
1070 
1071 public:
1072 
1073  /** @name Number-matching query methods */
1074  /** @{ */
1075 
1076  /** @return true if the substring contents are a floating-point or integer number.
1077  * @note any leading or trailing whitespace will return false. */
1078  bool is_number() const
1079  {
1080  if(empty() || (first_non_empty_span().empty()))
1081  return false;
1082  if(first_uint_span() == *this)
1083  return true;
1084  if(first_int_span() == *this)
1085  return true;
1086  if(first_real_span() == *this)
1087  return true;
1088  return false;
1089  }
1090 
1091  /** @return true if the substring contents are a real number.
1092  * @note any leading or trailing whitespace will return false. */
1093  bool is_real() const
1094  {
1095  if(empty() || (first_non_empty_span().empty()))
1096  return false;
1097  if(first_real_span() == *this)
1098  return true;
1099  return false;
1100  }
1101 
1102  /** @return true if the substring contents are an integer number.
1103  * @note any leading or trailing whitespace will return false. */
1104  bool is_integer() const
1105  {
1106  if(empty() || (first_non_empty_span().empty()))
1107  return false;
1108  if(first_uint_span() == *this)
1109  return true;
1110  if(first_int_span() == *this)
1111  return true;
1112  return false;
1113  }
1114 
1115  /** @return true if the substring contents are an unsigned integer number.
1116  * @note any leading or trailing whitespace will return false. */
1117  bool is_unsigned_integer() const
1118  {
1119  if(empty() || (first_non_empty_span().empty()))
1120  return false;
1121  if(first_uint_span() == *this)
1122  return true;
1123  return false;
1124  }
1125 
1126  /** get the first span consisting exclusively of non-empty characters */
1128  {
1129  constexpr const ro_substr empty_chars(" \n\r\t");
1130  size_t pos = first_not_of(empty_chars);
1131  if(pos == npos)
1132  return first(0);
1133  auto ret = sub(pos);
1134  pos = ret.first_of(empty_chars);
1135  return ret.first(pos);
1136  }
1137 
1138  /** get the first span which can be interpreted as an unsigned integer */
1140  {
1141  basic_substring ne = first_non_empty_span();
1142  if(ne.empty())
1143  return ne;
1144  if(ne.str[0] == '-')
1145  return first(0);
1146  size_t skip_start = size_t(ne.str[0] == '+');
1147  return ne._first_integral_span(skip_start);
1148  }
1149 
1150  /** get the first span which can be interpreted as a signed integer */
1152  {
1153  basic_substring ne = first_non_empty_span();
1154  if(ne.empty())
1155  return ne;
1156  size_t skip_start = size_t(ne.str[0] == '+' || ne.str[0] == '-');
1157  return ne._first_integral_span(skip_start);
1158  }
1159 
1160  basic_substring _first_integral_span(size_t skip_start) const
1161  {
1162  C4_ASSERT(!empty());
1163  if(skip_start == len)
1164  return first(0);
1165  C4_ASSERT(skip_start < len);
1166  if(len >= skip_start + 3)
1167  {
1168  if(str[skip_start] != '0')
1169  {
1170  for(size_t i = skip_start; i < len; ++i)
1171  {
1172  char c = str[i];
1173  if(c < '0' || c > '9')
1174  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1175  }
1176  }
1177  else
1178  {
1179  char next = str[skip_start + 1];
1180  if(next == 'x' || next == 'X')
1181  {
1182  skip_start += 2;
1183  for(size_t i = skip_start; i < len; ++i)
1184  {
1185  const char c = str[i];
1186  if( ! _is_hex_char(c))
1187  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1188  }
1189  return *this;
1190  }
1191  else if(next == 'b' || next == 'B')
1192  {
1193  skip_start += 2;
1194  for(size_t i = skip_start; i < len; ++i)
1195  {
1196  const char c = str[i];
1197  if(c != '0' && c != '1')
1198  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1199  }
1200  return *this;
1201  }
1202  else if(next == 'o' || next == 'O')
1203  {
1204  skip_start += 2;
1205  for(size_t i = skip_start; i < len; ++i)
1206  {
1207  const char c = str[i];
1208  if(c < '0' || c > '7')
1209  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1210  }
1211  return *this;
1212  }
1213  }
1214  }
1215  // must be a decimal, or it is not a an number
1216  for(size_t i = skip_start; i < len; ++i)
1217  {
1218  const char c = str[i];
1219  if(c < '0' || c > '9')
1220  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1221  }
1222  return *this;
1223  }
1224 
1225  /** get the first span which can be interpreted as a real (floating-point) number */
1227  {
1228  basic_substring ne = first_non_empty_span();
1229  if(ne.empty())
1230  return ne;
1231  const size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-');
1232  C4_ASSERT(skip_start == 0 || skip_start == 1);
1233  // if we have at least three digits after the leading sign, it
1234  // can be decimal, or hex, or bin or oct. Ex:
1235  // non-decimal: 0x0, 0b0, 0o0
1236  // decimal: 1.0, 10., 1e1, 100, inf, nan, infinity
1237  if(ne.len >= skip_start+3)
1238  {
1239  // if it does not have leading 0, it must be decimal, or it is not a real
1240  if(ne.str[skip_start] != '0')
1241  {
1242  if(ne.str[skip_start] == 'i') // is it infinity or inf?
1243  {
1244  basic_substring word = ne._word_follows(skip_start + 1, "nfinity");
1245  if(word.len)
1246  return word;
1247  return ne._word_follows(skip_start + 1, "nf");
1248  }
1249  else if(ne.str[skip_start] == 'n') // is it nan?
1250  {
1251  return ne._word_follows(skip_start + 1, "an");
1252  }
1253  else // must be a decimal, or it is not a real
1254  {
1255  return ne._first_real_span_dec(skip_start);
1256  }
1257  }
1258  else // starts with 0. is it 0x, 0b or 0o?
1259  {
1260  const char next = ne.str[skip_start + 1];
1261  // hexadecimal
1262  if(next == 'x' || next == 'X')
1263  return ne._first_real_span_hex(skip_start + 2);
1264  // binary
1265  else if(next == 'b' || next == 'B')
1266  return ne._first_real_span_bin(skip_start + 2);
1267  // octal
1268  else if(next == 'o' || next == 'O')
1269  return ne._first_real_span_oct(skip_start + 2);
1270  // none of the above. may still be a decimal.
1271  else
1272  return ne._first_real_span_dec(skip_start); // do not skip the 0.
1273  }
1274  }
1275  // less than 3 chars after the leading sign. It is either a
1276  // decimal or it is not a real. (cannot be any of 0x0, etc).
1277  return ne._first_real_span_dec(skip_start);
1278  }
1279 
1280  /** true if the character is a delimiter character *at the end* */
1281  static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_delim_char(char c) noexcept
1282  {
1283  return c == ' ' || c == '\n'
1284  || c == ']' || c == ')' || c == '}'
1285  || c == ',' || c == ';' || c == '\r' || c == '\t' || c == '\0';
1286  }
1287 
1288  /** true if the character is in [0-9a-fA-F] */
1289  static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_hex_char(char c) noexcept
1290  {
1291  return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
1292  }
1293 
1294  C4_NO_INLINE C4_PURE basic_substring _word_follows(size_t pos, csubstr word) const noexcept
1295  {
1296  size_t posend = pos + word.len;
1297  if(len >= posend && sub(pos, word.len) == word)
1298  if(len == posend || _is_delim_char(str[posend]))
1299  return first(posend);
1300  return first(0);
1301  }
1302 
1303  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1304  C4_NO_INLINE C4_PURE basic_substring _first_real_span_dec(size_t pos) const noexcept
1305  {
1306  bool intchars = false;
1307  bool fracchars = false;
1308  bool powchars;
1309  // integral part
1310  for( ; pos < len; ++pos)
1311  {
1312  const char c = str[pos];
1313  if(c >= '0' && c <= '9')
1314  {
1315  intchars = true;
1316  }
1317  else if(c == '.')
1318  {
1319  ++pos;
1320  goto fractional_part_dec; // NOLINT
1321  }
1322  else if(c == 'e' || c == 'E')
1323  {
1324  ++pos;
1325  goto power_part_dec; // NOLINT
1326  }
1327  else if(_is_delim_char(c))
1328  {
1329  return intchars ? first(pos) : first(0);
1330  }
1331  else
1332  {
1333  return first(0);
1334  }
1335  }
1336  // no . or p were found; this is either an integral number
1337  // or not a number at all
1338  return intchars ?
1339  *this :
1340  first(0);
1341  fractional_part_dec:
1342  C4_ASSERT(pos > 0);
1343  C4_ASSERT(str[pos - 1] == '.');
1344  for( ; pos < len; ++pos)
1345  {
1346  const char c = str[pos];
1347  if(c >= '0' && c <= '9')
1348  {
1349  fracchars = true;
1350  }
1351  else if(c == 'e' || c == 'E')
1352  {
1353  ++pos;
1354  goto power_part_dec; // NOLINT
1355  }
1356  else if(_is_delim_char(c))
1357  {
1358  return intchars || fracchars ? first(pos) : first(0);
1359  }
1360  else
1361  {
1362  return first(0);
1363  }
1364  }
1365  return intchars || fracchars ?
1366  *this :
1367  first(0);
1368  power_part_dec:
1369  C4_ASSERT(pos > 0);
1370  C4_ASSERT(str[pos - 1] == 'e' || str[pos - 1] == 'E');
1371  // either digits, or +, or - are expected here, followed by more digits.
1372  if((len == pos) || ((!intchars) && (!fracchars)))
1373  return first(0);
1374  if(str[pos] == '-' || str[pos] == '+')
1375  ++pos; // skip the sign
1376  powchars = false;
1377  for( ; pos < len; ++pos)
1378  {
1379  const char c = str[pos];
1380  if(c >= '0' && c <= '9')
1381  powchars = true;
1382  else if(powchars && _is_delim_char(c))
1383  return first(pos);
1384  else
1385  return first(0);
1386  }
1387  return powchars ? *this : first(0);
1388  }
1389 
1390  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1391  C4_NO_INLINE C4_PURE basic_substring _first_real_span_hex(size_t pos) const noexcept
1392  {
1393  bool intchars = false;
1394  bool fracchars = false;
1395  bool powchars;
1396  // integral part
1397  for( ; pos < len; ++pos)
1398  {
1399  const char c = str[pos];
1400  if(_is_hex_char(c))
1401  {
1402  intchars = true;
1403  }
1404  else if(c == '.')
1405  {
1406  ++pos;
1407  goto fractional_part_hex; // NOLINT
1408  }
1409  else if(c == 'p' || c == 'P')
1410  {
1411  ++pos;
1412  goto power_part_hex; // NOLINT
1413  }
1414  else if(_is_delim_char(c))
1415  {
1416  return intchars ? first(pos) : first(0);
1417  }
1418  else
1419  {
1420  return first(0);
1421  }
1422  }
1423  // no . or p were found; this is either an integral number
1424  // or not a number at all
1425  return intchars ?
1426  *this :
1427  first(0);
1428  fractional_part_hex:
1429  C4_ASSERT(pos > 0);
1430  C4_ASSERT(str[pos - 1] == '.');
1431  for( ; pos < len; ++pos)
1432  {
1433  const char c = str[pos];
1434  if(_is_hex_char(c))
1435  {
1436  fracchars = true;
1437  }
1438  else if(c == 'p' || c == 'P')
1439  {
1440  ++pos;
1441  goto power_part_hex; // NOLINT
1442  }
1443  else if(_is_delim_char(c))
1444  {
1445  return intchars || fracchars ? first(pos) : first(0);
1446  }
1447  else
1448  {
1449  return first(0);
1450  }
1451  }
1452  return intchars || fracchars ?
1453  *this :
1454  first(0);
1455  power_part_hex:
1456  C4_ASSERT(pos > 0);
1457  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1458  // either a + or a - is expected here, followed by more chars.
1459  // also, using (pos+1) in this check will cause an early
1460  // return when no more chars follow the sign.
1461  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1462  return first(0);
1463  ++pos; // this was the sign.
1464  // ... so the (pos+1) ensures that we enter the loop and
1465  // hence that there exist chars in the power part
1466  powchars = false;
1467  for( ; pos < len; ++pos)
1468  {
1469  const char c = str[pos];
1470  if(c >= '0' && c <= '9')
1471  powchars = true;
1472  else if(powchars && _is_delim_char(c))
1473  return first(pos);
1474  else
1475  return first(0);
1476  }
1477  return *this;
1478  }
1479 
1480  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1481  C4_NO_INLINE C4_PURE basic_substring _first_real_span_bin(size_t pos) const noexcept
1482  {
1483  bool intchars = false;
1484  bool fracchars = false;
1485  bool powchars;
1486  // integral part
1487  for( ; pos < len; ++pos)
1488  {
1489  const char c = str[pos];
1490  if(c == '0' || c == '1')
1491  {
1492  intchars = true;
1493  }
1494  else if(c == '.')
1495  {
1496  ++pos;
1497  goto fractional_part_bin; // NOLINT
1498  }
1499  else if(c == 'p' || c == 'P')
1500  {
1501  ++pos;
1502  goto power_part_bin; // NOLINT
1503  }
1504  else if(_is_delim_char(c))
1505  {
1506  return intchars ? first(pos) : first(0);
1507  }
1508  else
1509  {
1510  return first(0);
1511  }
1512  }
1513  // no . or p were found; this is either an integral number
1514  // or not a number at all
1515  return intchars ?
1516  *this :
1517  first(0);
1518  fractional_part_bin:
1519  C4_ASSERT(pos > 0);
1520  C4_ASSERT(str[pos - 1] == '.');
1521  for( ; pos < len; ++pos)
1522  {
1523  const char c = str[pos];
1524  if(c == '0' || c == '1')
1525  {
1526  fracchars = true;
1527  }
1528  else if(c == 'p' || c == 'P')
1529  {
1530  ++pos;
1531  goto power_part_bin; // NOLINT
1532  }
1533  else if(_is_delim_char(c))
1534  {
1535  return intchars || fracchars ? first(pos) : first(0);
1536  }
1537  else
1538  {
1539  return first(0);
1540  }
1541  }
1542  return intchars || fracchars ?
1543  *this :
1544  first(0);
1545  power_part_bin:
1546  C4_ASSERT(pos > 0);
1547  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1548  // either a + or a - is expected here, followed by more chars.
1549  // also, using (pos+1) in this check will cause an early
1550  // return when no more chars follow the sign.
1551  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1552  return first(0);
1553  ++pos; // this was the sign.
1554  // ... so the (pos+1) ensures that we enter the loop and
1555  // hence that there exist chars in the power part
1556  powchars = false;
1557  for( ; pos < len; ++pos)
1558  {
1559  const char c = str[pos];
1560  if(c >= '0' && c <= '9')
1561  powchars = true;
1562  else if(powchars && _is_delim_char(c))
1563  return first(pos);
1564  else
1565  return first(0);
1566  }
1567  return *this;
1568  }
1569 
1570  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1571  C4_NO_INLINE C4_PURE basic_substring _first_real_span_oct(size_t pos) const noexcept
1572  {
1573  bool intchars = false;
1574  bool fracchars = false;
1575  bool powchars;
1576  // integral part
1577  for( ; pos < len; ++pos)
1578  {
1579  const char c = str[pos];
1580  if(c >= '0' && c <= '7')
1581  {
1582  intchars = true;
1583  }
1584  else if(c == '.')
1585  {
1586  ++pos;
1587  goto fractional_part_oct; // NOLINT
1588  }
1589  else if(c == 'p' || c == 'P')
1590  {
1591  ++pos;
1592  goto power_part_oct; // NOLINT
1593  }
1594  else if(_is_delim_char(c))
1595  {
1596  return intchars ? first(pos) : first(0);
1597  }
1598  else
1599  {
1600  return first(0);
1601  }
1602  }
1603  // no . or p were found; this is either an integral number
1604  // or not a number at all
1605  return intchars ?
1606  *this :
1607  first(0);
1608  fractional_part_oct:
1609  C4_ASSERT(pos > 0);
1610  C4_ASSERT(str[pos - 1] == '.');
1611  for( ; pos < len; ++pos)
1612  {
1613  const char c = str[pos];
1614  if(c >= '0' && c <= '7')
1615  {
1616  fracchars = true;
1617  }
1618  else if(c == 'p' || c == 'P')
1619  {
1620  ++pos;
1621  goto power_part_oct; // NOLINT
1622  }
1623  else if(_is_delim_char(c))
1624  {
1625  return intchars || fracchars ? first(pos) : first(0);
1626  }
1627  else
1628  {
1629  return first(0);
1630  }
1631  }
1632  return intchars || fracchars ?
1633  *this :
1634  first(0);
1635  power_part_oct:
1636  C4_ASSERT(pos > 0);
1637  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1638  // either a + or a - is expected here, followed by more chars.
1639  // also, using (pos+1) in this check will cause an early
1640  // return when no more chars follow the sign.
1641  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1642  return first(0);
1643  ++pos; // this was the sign.
1644  // ... so the (pos+1) ensures that we enter the loop and
1645  // hence that there exist chars in the power part
1646  powchars = false;
1647  for( ; pos < len; ++pos)
1648  {
1649  const char c = str[pos];
1650  if(c >= '0' && c <= '9')
1651  powchars = true;
1652  else if(powchars && _is_delim_char(c))
1653  return first(pos);
1654  else
1655  return first(0);
1656  }
1657  return *this;
1658  }
1659 
1660  /** @} */
1661 
1662 public:
1663 
1664  /** @name Splitting methods */
1665  /** @{ */
1666 
1667  /** returns true if the string has not been exhausted yet, meaning
1668  * it's ok to call next_split() again. When no instance of sep
1669  * exists in the string, returns the full string. When the input
1670  * is an empty string, the output string is the empty string. */
1671  bool next_split(C sep, size_t *C4_RESTRICT start_pos, basic_substring *C4_RESTRICT out) const
1672  {
1673  if(C4_LIKELY(*start_pos < len))
1674  {
1675  for(size_t i = *start_pos; i < len; i++)
1676  {
1677  if(str[i] == sep)
1678  {
1679  out->assign(str + *start_pos, i - *start_pos);
1680  *start_pos = i+1;
1681  return true;
1682  }
1683  }
1684  out->assign(str + *start_pos, len - *start_pos);
1685  *start_pos = len + 1;
1686  return true;
1687  }
1688  else
1689  {
1690  bool valid = len > 0 && (*start_pos == len);
1691  if(valid && str && str[len-1] == sep)
1692  {
1693  out->assign(str + len, size_t(0)); // the cast is needed to prevent overload ambiguity
1694  }
1695  else
1696  {
1697  out->assign(str + len + 1, size_t(0)); // the cast is needed to prevent overload ambiguity
1698  }
1699  *start_pos = len + 1;
1700  return valid;
1701  }
1702  }
1703 
1704 private:
1705 
1706  struct split_proxy_impl
1707  {
1709  {
1710  split_proxy_impl const* m_proxy;
1712  size_t m_pos;
1714 
1715  split_iterator_impl(split_proxy_impl const* proxy, size_t pos, C sep)
1716  : m_proxy(proxy), m_pos(pos), m_sep(sep)
1717  {
1718  _tick();
1719  }
1720 
1721  void _tick()
1722  {
1723  m_proxy->m_str.next_split(m_sep, &m_pos, &m_str);
1724  }
1725 
1726  split_iterator_impl& operator++ () { _tick(); return *this; }
1727  split_iterator_impl operator++ (int) { split_iterator_impl it = *this; _tick(); return it; } // NOLINT
1728 
1729  basic_substring& operator* () { return m_str; }
1730  basic_substring* operator-> () { return &m_str; }
1731 
1732  bool operator!= (split_iterator_impl const& that) const
1733  {
1734  return !(this->operator==(that));
1735  }
1736  bool operator== (split_iterator_impl const& that) const
1737  {
1738  C4_XASSERT((m_sep == that.m_sep) && "cannot compare split iterators with different separators");
1739  if(m_str.size() != that.m_str.size())
1740  return false;
1741  if(m_str.data() != that.m_str.data())
1742  return false;
1743  return m_pos == that.m_pos;
1744  }
1745  };
1746 
1747  basic_substring m_str;
1748  size_t m_start_pos;
1749  C m_sep;
1750 
1751  split_proxy_impl(basic_substring str_, size_t start_pos, C sep)
1752  : m_str(str_), m_start_pos(start_pos), m_sep(sep)
1753  {
1754  }
1755 
1756  split_iterator_impl begin() const
1757  {
1758  auto it = split_iterator_impl(this, m_start_pos, m_sep);
1759  return it;
1760  }
1761  split_iterator_impl end() const
1762  {
1763  size_t pos = m_str.size() + 1;
1764  auto it = split_iterator_impl(this, pos, m_sep);
1765  return it;
1766  }
1767  };
1768 
1769 public:
1770 
1771  using split_proxy = split_proxy_impl;
1772 
1773  /** a view into the splits */
1774  split_proxy split(C sep, size_t start_pos=0) const
1775  {
1776  C4_XASSERT((start_pos >= 0 && start_pos < len) || empty());
1777  auto ss = sub(0, len);
1778  auto it = split_proxy(ss, start_pos, sep);
1779  return it;
1780  }
1781 
1782 public:
1783 
1784  /** pop right: return the first split from the right. Use
1785  * gpop_left() to get the reciprocal part.
1786  */
1787  basic_substring pop_right(C sep=C('/'), bool skip_empty=false) const
1788  {
1789  if(C4_LIKELY(len > 1))
1790  {
1791  auto pos = last_of(sep);
1792  if(pos != npos)
1793  {
1794  if(pos + 1 < len) // does not end with sep
1795  {
1796  return sub(pos + 1); // return from sep to end
1797  }
1798  else // the string ends with sep
1799  {
1800  if( ! skip_empty)
1801  {
1802  return sub(pos + 1, 0);
1803  }
1804  auto ppos = last_not_of(sep); // skip repeated seps
1805  if(ppos == npos) // the string is all made of seps
1806  {
1807  return sub(0, 0);
1808  }
1809  // find the previous sep
1810  auto pos0 = last_of(sep, ppos);
1811  if(pos0 == npos) // only the last sep exists
1812  {
1813  return sub(0); // return the full string (because skip_empty is true)
1814  }
1815  ++pos0;
1816  return sub(pos0);
1817  }
1818  }
1819  else // no sep was found, return the full string
1820  {
1821  return *this;
1822  }
1823  }
1824  else if(len == 1)
1825  {
1826  if(begins_with(sep))
1827  {
1828  return sub(0, 0);
1829  }
1830  return *this;
1831  }
1832  else // an empty string
1833  {
1834  return basic_substring();
1835  }
1836  }
1837 
1838  /** return the first split from the left. Use gpop_right() to get
1839  * the reciprocal part. */
1840  basic_substring pop_left(C sep = C('/'), bool skip_empty=false) const
1841  {
1842  if(C4_LIKELY(len > 1))
1843  {
1844  auto pos = first_of(sep);
1845  if(pos != npos)
1846  {
1847  if(pos > 0) // does not start with sep
1848  {
1849  return sub(0, pos); // return everything up to it
1850  }
1851  else // the string starts with sep
1852  {
1853  if( ! skip_empty)
1854  {
1855  return sub(0, 0);
1856  }
1857  auto ppos = first_not_of(sep); // skip repeated seps
1858  if(ppos == npos) // the string is all made of seps
1859  {
1860  return sub(0, 0);
1861  }
1862  // find the next sep
1863  auto pos0 = first_of(sep, ppos);
1864  if(pos0 == npos) // only the first sep exists
1865  {
1866  return sub(0); // return the full string (because skip_empty is true)
1867  }
1868  C4_XASSERT(pos0 > 0);
1869  // return everything up to the second sep
1870  return sub(0, pos0);
1871  }
1872  }
1873  else // no sep was found, return the full string
1874  {
1875  return sub(0);
1876  }
1877  }
1878  else if(len == 1)
1879  {
1880  if(begins_with(sep))
1881  {
1882  return sub(0, 0);
1883  }
1884  return sub(0);
1885  }
1886  else // an empty string
1887  {
1888  return basic_substring();
1889  }
1890  }
1891 
1892 public:
1893 
1894  /** greedy pop left. eg, csubstr("a/b/c").gpop_left('/')="c" */
1895  basic_substring gpop_left(C sep = C('/'), bool skip_empty=false) const
1896  {
1897  auto ss = pop_right(sep, skip_empty);
1898  ss = left_of(ss);
1899  if(ss.find(sep) != npos)
1900  {
1901  if(ss.ends_with(sep))
1902  {
1903  if(skip_empty)
1904  {
1905  ss = ss.trimr(sep);
1906  }
1907  else
1908  {
1909  ss = ss.sub(0, ss.len-1); // safe to subtract because ends_with(sep) is true
1910  }
1911  }
1912  }
1913  return ss;
1914  }
1915 
1916  /** greedy pop right. eg, csubstr("a/b/c").gpop_right('/')="a" */
1917  basic_substring gpop_right(C sep = C('/'), bool skip_empty=false) const
1918  {
1919  auto ss = pop_left(sep, skip_empty);
1920  ss = right_of(ss);
1921  if(ss.find(sep) != npos)
1922  {
1923  if(ss.begins_with(sep))
1924  {
1925  if(skip_empty)
1926  {
1927  ss = ss.triml(sep);
1928  }
1929  else
1930  {
1931  ss = ss.sub(1);
1932  }
1933  }
1934  }
1935  return ss;
1936  }
1937 
1938  /** @} */
1939 
1940 public:
1941 
1942  /** @name Path-like manipulation methods */
1943  /** @{ */
1944 
1945  basic_substring basename(C sep=C('/')) const
1946  {
1947  auto ss = pop_right(sep, /*skip_empty*/true);
1948  ss = ss.trimr(sep);
1949  return ss;
1950  }
1951 
1952  basic_substring dirname(C sep=C('/')) const
1953  {
1954  auto ss = basename(sep);
1955  ss = ss.empty() ? *this : left_of(ss);
1956  return ss;
1957  }
1958 
1959  C4_ALWAYS_INLINE basic_substring name_wo_extshort() const
1960  {
1961  return gpop_left('.');
1962  }
1963 
1964  C4_ALWAYS_INLINE basic_substring name_wo_extlong() const
1965  {
1966  return pop_left('.');
1967  }
1968 
1969  C4_ALWAYS_INLINE basic_substring extshort() const
1970  {
1971  return pop_right('.');
1972  }
1973 
1974  C4_ALWAYS_INLINE basic_substring extlong() const
1975  {
1976  return gpop_right('.');
1977  }
1978 
1979  /** @} */
1980 
1981 public:
1982 
1983  /** @name Content-modification methods (only for non-const C) */
1984  /** @{ */
1985 
1986  /** convert the string to upper-case
1987  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
1988  C4_REQUIRE_RW(void) toupper()
1989  {
1990  for(size_t i = 0; i < len; ++i)
1991  {
1992  str[i] = static_cast<C>(::toupper(str[i]));
1993  }
1994  }
1995 
1996  /** convert the string to lower-case
1997  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
1998  C4_REQUIRE_RW(void) tolower()
1999  {
2000  for(size_t i = 0; i < len; ++i)
2001  {
2002  str[i] = static_cast<C>(::tolower(str[i]));
2003  }
2004  }
2005 
2006 public:
2007 
2008  /** fill the entire contents with the given @p val
2009  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2010  C4_REQUIRE_RW(void) fill(C val)
2011  {
2012  for(size_t i = 0; i < len; ++i)
2013  str[i] = val;
2014  }
2015 
2016 public:
2017 
2018  /** copy a string to this substr, starting at 0
2019  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2020  C4_REQUIRE_RW(void) copy_from(ro_substr that)
2021  {
2022  C4_ASSERT(!overlaps(that));
2023  size_t num = that.len <= len ? that.len : len;
2024  // calling memcpy with zero len is undefined behavior
2025  // and will wreak havoc in calling code's branches.
2026  // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
2027  if(num)
2028  memcpy(str, that.str, sizeof(C) * num);
2029  }
2030 
2031  /** copy a string to this substr, starting at a specified given position
2032  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2033  C4_REQUIRE_RW(void) copy_from(ro_substr that, size_t ifirst, size_t num=npos)
2034  {
2035  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2036  num = num != npos ? num : len - ifirst;
2037  num = num < that.len ? num : that.len;
2038  C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);
2039  // calling memcpy with zero len is undefined behavior
2040  // and will wreak havoc in calling code's branches.
2041  // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
2042  if(num)
2043  memcpy(str + (sizeof(C) * ifirst), that.str, sizeof(C) * num);
2044  }
2045 
2046 public:
2047 
2048  /** reverse in place
2049  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2050  C4_REQUIRE_RW(void) reverse()
2051  {
2052  if(len == 0) return;
2053  detail::_do_reverse(str, str + len - 1);
2054  }
2055 
2056  /** revert a subpart in place
2057  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2058  C4_REQUIRE_RW(void) reverse_sub(size_t ifirst, size_t num)
2059  {
2060  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2061  C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);
2062  if(num == 0) return;
2063  detail::_do_reverse(str + ifirst, str + ifirst + num - 1);
2064  }
2065 
2066  /** revert a range in place
2067  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2068  C4_REQUIRE_RW(void) reverse_range(size_t ifirst, size_t ilast)
2069  {
2070  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2071  C4_ASSERT(ilast >= 0 && ilast <= len);
2072  if(ifirst == ilast) return;
2073  detail::_do_reverse(str + ifirst, str + ilast - 1);
2074  }
2075 
2076 public:
2077 
2078  /** erase part of the string. eg, with char s[] = "0123456789",
2079  * substr(s).erase(3, 2) = "01256789", and s is now "01245678989"
2080  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2081  C4_REQUIRE_RW(basic_substring) erase(size_t pos, size_t num)
2082  {
2083  C4_ASSERT(pos >= 0 && pos+num <= len);
2084  size_t num_to_move = len - pos - num;
2085  memmove(str + pos, str + pos + num, sizeof(C) * num_to_move);
2086  return basic_substring{str, len - num};
2087  }
2088 
2089  /** @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2090  C4_REQUIRE_RW(basic_substring) erase_range(size_t first, size_t last)
2091  {
2092  C4_ASSERT(first <= last);
2093  return erase(first, static_cast<size_t>(last-first)); // NOLINT
2094  }
2095 
2096  /** erase a part of the string.
2097  * @note @p sub must be a substring of this string
2098  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2099  C4_REQUIRE_RW(basic_substring) erase(ro_substr sub)
2100  {
2101  C4_ASSERT(is_super(sub));
2102  C4_ASSERT(sub.str >= str);
2103  return erase(static_cast<size_t>(sub.str - str), sub.len);
2104  }
2105 
2106 public:
2107 
2108  /** replace every occurrence of character @p value with the character @p repl
2109  * @return the number of characters that were replaced
2110  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2111  C4_REQUIRE_RW(size_t) replace(C value, C repl, size_t pos=0)
2112  {
2113  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2114  size_t did_it = 0;
2115  while((pos = find(value, pos)) != npos)
2116  {
2117  str[pos++] = repl;
2118  ++did_it;
2119  }
2120  return did_it;
2121  }
2122 
2123  /** replace every occurrence of each character in @p value with
2124  * the character @p repl.
2125  * @return the number of characters that were replaced
2126  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2127  C4_REQUIRE_RW(size_t) replace(ro_substr chars, C repl, size_t pos=0)
2128  {
2129  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2130  size_t did_it = 0;
2131  while((pos = first_of(chars, pos)) != npos)
2132  {
2133  str[pos++] = repl;
2134  ++did_it;
2135  }
2136  return did_it;
2137  }
2138 
2139  /** replace @p pattern with @p repl, and write the result into
2140  * @p dst. pattern and repl don't need equal sizes.
2141  *
2142  * @return the required size for dst. No overflow occurs if
2143  * dst.len is smaller than the required size; this can be used to
2144  * determine the required size for an existing container. */
2145  size_t replace_all(rw_substr dst, ro_substr pattern, ro_substr repl, size_t pos=0) const
2146  {
2147  C4_ASSERT( ! pattern.empty()); //!< @todo relax this precondition
2148  C4_ASSERT( ! this ->overlaps(dst)); //!< @todo relax this precondition
2149  C4_ASSERT( ! pattern.overlaps(dst));
2150  C4_ASSERT( ! repl .overlaps(dst));
2151  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2152  C4_SUPPRESS_WARNING_GCC_PUSH
2153  C4_SUPPRESS_WARNING_GCC("-Warray-bounds") // gcc11 has a false positive here
2154  #if (!defined(__clang__)) && (defined(__GNUC__) && (__GNUC__ >= 7))
2155  C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc11 has a false positive here
2156  #endif
2157  #define _c4append(first, last) \
2158  { \
2159  C4_ASSERT((last) >= (first)); \
2160  size_t num = static_cast<size_t>((last) - (first)); \
2161  if(num > 0 && sz + num <= dst.len) \
2162  { \
2163  memcpy(dst.str + sz, first, num * sizeof(C)); \
2164  } \
2165  sz += num; \
2166  }
2167  size_t sz = 0;
2168  size_t b = pos;
2169  _c4append(str, str + pos);
2170  do {
2171  size_t e = find(pattern, b);
2172  if(e == npos)
2173  {
2174  _c4append(str + b, str + len);
2175  break;
2176  }
2177  _c4append(str + b, str + e);
2178  _c4append(repl.begin(), repl.end());
2179  b = e + pattern.size();
2180  } while(b < len && b != npos);
2181  return sz;
2182  #undef _c4append
2183  C4_SUPPRESS_WARNING_GCC_POP
2184  }
2185 
2186  /** @} */
2187 
2188 }; // template class basic_substring
2189 
2190 
2191 #undef C4_REQUIRE_RW
2192 
2193 
2194 //-----------------------------------------------------------------------------
2195 //-----------------------------------------------------------------------------
2196 //-----------------------------------------------------------------------------
2197 
2198 
2199 /** @defgroup doc_substr_adapters substr adapters
2200  *
2201  * to_substr() and to_csubstr() is used in generic code like
2202  * format(), and allow adding construction of substrings from new
2203  * types like containers.
2204  * @{ */
2205 
2206 
2207 /** neutral version for use in generic code */
2208 C4_ALWAYS_INLINE substr to_substr(substr s) noexcept { return s; }
2209 /** neutral version for use in generic code */
2210 C4_ALWAYS_INLINE csubstr to_csubstr(substr s) noexcept { return csubstr{s.str, s.len}; }
2211 /** neutral version for use in generic code */
2212 C4_ALWAYS_INLINE csubstr to_csubstr(csubstr s) noexcept { return s; }
2213 
2214 
2215 template<size_t N> C4_ALWAYS_INLINE substr to_substr(char (&s)[N]) noexcept
2216 {
2217  return substr(s, N-1);
2218 }
2219 template<size_t N> C4_ALWAYS_INLINE csubstr to_csubstr(const char (&s)[N]) noexcept
2220 {
2221  return csubstr(s, N-1);
2222 }
2223 
2224 
2225 /** @note this overload uses SFINAE to prevent it from overriding the array overload
2226  * @see For a more detailed explanation on why the plain overloads cannot
2227  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
2228 template<class U> C4_ALWAYS_INLINE auto to_substr(U s) noexcept
2229  -> typename std::enable_if<std::is_same<U, char*>::value, substr>::type
2230 {
2231  return substr(s);
2232 }
2233 /** @note this overload uses SFINAE to prevent it from overriding the array overload
2234  * @see For a more detailed explanation on why the plain overloads cannot
2235  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
2236 template<class U> C4_ALWAYS_INLINE auto to_csubstr(U s) noexcept
2237  -> typename std::enable_if<std::is_same<U, const char*>::value || std::is_same<U, char*>::value, csubstr>::type
2238 {
2239  return csubstr(s);
2240 }
2241 
2242 
2243 /** a traits class to mark a type as a string type
2244  * (meaning @ref c4::to_csubstr() can be used directly). */
2245 template<class T> struct is_string : public std::false_type {};
2246 /** a traits class to mark a type as a writeable string type
2247  * (meaning @ref c4::to_substr() can be used directly). */
2248 template<class T> struct is_writeable_string : public std::false_type {};
2249 
2250 template<typename C> struct is_string<basic_substring<C>> : public std::true_type {};
2251 template<> struct is_writeable_string<basic_substring<char>> : public std::true_type {};
2252 template<> struct is_writeable_string<basic_substring<const char>> : public std::false_type {};
2253 
2254 template<> struct is_string<const char*> : public std::true_type {};
2255 template<> struct is_writeable_string<const char*> : public std::false_type {};
2256 
2257 template<> struct is_string<char*> : public std::true_type {};
2258 template<> struct is_writeable_string<char*> : public std::true_type {};
2259 
2260 template<size_t N> struct is_string<const char[N]> : public std::true_type {};
2261 template<size_t N> struct is_writeable_string<const char[N]> : public std::false_type {};
2262 
2263 template<size_t N> struct is_string<char[N]> : public std::true_type {};
2264 template<size_t N> struct is_writeable_string<char[N]> : public std::true_type {};
2265 
2266 template<size_t N> struct is_string<const char (&)[N]> : public std::true_type {};
2267 template<size_t N> struct is_writeable_string<const char (&)[N]> : public std::false_type {};
2268 
2269 template<size_t N> struct is_string<char (&)[N]> : public std::true_type {};
2270 template<size_t N> struct is_writeable_string<char (&)[N]> : public std::true_type {};
2271 
2272 template<size_t N> struct is_string<const char (&&)[N]> : public std::true_type {};
2273 template<size_t N> struct is_writeable_string<const char (&&)[N]> : public std::false_type {};
2274 
2275 template<size_t N> struct is_string<char (&&)[N]> : public std::true_type {};
2276 template<size_t N> struct is_writeable_string<char (&&)[N]> : public std::true_type {};
2277 
2278 /** @} */
2279 
2280 
2281 //-----------------------------------------------------------------------------
2282 //-----------------------------------------------------------------------------
2283 //-----------------------------------------------------------------------------
2284 
2285 /** @defgroup doc_substr_cmp substr comparison operators
2286  * @{ */
2287 
2288 template<typename C, size_t N> inline bool operator== (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) == 0; }
2289 template<typename C, size_t N> inline bool operator!= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) != 0; }
2290 template<typename C, size_t N> inline bool operator< (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) > 0; }
2291 template<typename C, size_t N> inline bool operator> (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) < 0; }
2292 template<typename C, size_t N> inline bool operator<= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) >= 0; }
2293 template<typename C, size_t N> inline bool operator>= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) <= 0; }
2294 
2295 template<typename C> inline bool operator== (const char c, basic_substring<C> const that) noexcept { return that.compare(c) == 0; }
2296 template<typename C> inline bool operator!= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) != 0; }
2297 template<typename C> inline bool operator< (const char c, basic_substring<C> const that) noexcept { return that.compare(c) > 0; }
2298 template<typename C> inline bool operator> (const char c, basic_substring<C> const that) noexcept { return that.compare(c) < 0; }
2299 template<typename C> inline bool operator<= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) >= 0; }
2300 template<typename C> inline bool operator>= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) <= 0; }
2301 
2302 /** @} */
2303 
2304 
2305 //-----------------------------------------------------------------------------
2306 //-----------------------------------------------------------------------------
2307 //-----------------------------------------------------------------------------
2308 
2309 /* C4_SUBSTR_NO_OSTREAM_LSHIFT doctest does not deal well with
2310  * template operator<<
2311  * @see https://github.com/onqtam/doctest/pull/431 */
2312 #ifndef C4_SUBSTR_NO_OSTREAM_LSHIFT
2313 #ifdef __clang__
2314 # pragma clang diagnostic push
2315 # pragma clang diagnostic ignored "-Wsign-conversion"
2316 #elif defined(__GNUC__)
2317 # pragma GCC diagnostic push
2318 # pragma GCC diagnostic ignored "-Wsign-conversion"
2319 #endif
2320 
2321 /** output the string to a stream */
2322 template<class OStream, class C>
2323 inline OStream& operator<< (OStream& os, basic_substring<C> s)
2324 {
2325  os.write(s.str, s.len);
2326  return os;
2327 }
2328 
2329 // this causes ambiguity
2330 ///** this is used by google test */
2331 //template<class OStream, class C>
2332 //inline void PrintTo(basic_substring<C> s, OStream* os)
2333 //{
2334 // os->write(s.str, s.len);
2335 //}
2336 
2337 #ifdef __clang__
2338 # pragma clang diagnostic pop
2339 #elif defined(__GNUC__)
2340 # pragma GCC diagnostic pop
2341 #endif
2342 #endif // !C4_SUBSTR_NO_OSTREAM_LSHIFT
2343 
2344 /** @} */
2345 
2346 } // namespace c4
2347 
2348 
2349 #ifdef __clang__
2350 # pragma clang diagnostic pop
2351 #elif defined(__GNUC__)
2352 # pragma GCC diagnostic pop
2353 #endif
2354 
2355 #endif /* _C4_SUBSTR_HPP_ */
left_< T > left(T val, size_t width, char padchar=' ')
mark an argument to be aligned left
Definition: format.hpp:525
right_< T > right(T val, size_t width, char padchar=' ')
mark an argument to be aligned right
Definition: format.hpp:532
csubstr to_csubstr(substr s) noexcept
neutral version for use in generic code
Definition: substr.hpp:2210
substr to_substr(substr s) noexcept
neutral version for use in generic code
Definition: substr.hpp:2208
bool operator!=(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2289
bool operator>(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2291
bool operator>=(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2293
bool operator<=(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2292
bool operator==(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2288
bool operator<(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2290
OStream & operator<<(OStream &os, basic_substring< C > s)
output the string to a stream
Definition: substr.hpp:2323
@ npos
a null string position
Definition: common.hpp:258
@ NONE
an index to none
Definition: common.hpp:251
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
split_iterator_impl(split_proxy_impl const *proxy, size_t pos, C sep)
Definition: substr.hpp:1715
a non-owning string-view, consisting of a character pointer and a length.
Definition: substr.hpp:74
basic_substring _first_real_span_hex(size_t pos) const noexcept
Definition: substr.hpp:1391
void reverse()
reverse in place
Definition: substr.hpp:2050
basic_substring first_uint_span() const
get the first span which can be interpreted as an unsigned integer
Definition: substr.hpp:1139
first_of_any_result first_of_any_iter(It first_span, It last_span) const
Definition: substr.hpp:643
int compare(ro_substr const that) const noexcept
Definition: substr.hpp:268
basic_substring(U s_) noexcept
Construct from a C-string (zero-terminated string)
Definition: substr.hpp:153
size_t first_not_of(ro_substr chars) const
Definition: substr.hpp:899
basic_substring gpop_right(C sep=C('/'), bool skip_empty=false) const
greedy pop right.
Definition: substr.hpp:1917
basic_substring trim(const C c) const
trim the character c left and right
Definition: substr.hpp:501
C const & front() const noexcept
Definition: substr.hpp:214
size_t count(const C c, size_t pos=0) const
count the number of occurrences of c
Definition: substr.hpp:568
basic_substring _first_real_span_oct(size_t pos) const noexcept
Definition: substr.hpp:1571
bool begins_with(const C c) const
true if the first character of the string is c
Definition: substr.hpp:674
first_of_any_result first_of_any(ro_substr s0, ro_substr s1) const
Definition: substr.hpp:618
basic_substring sub(size_t first, size_t num) const noexcept
return [first,first+num[.
Definition: substr.hpp:333
basic_substring pair_range(CC open, CC close) const
get the range delimited by an open-close pair of characters.
Definition: substr.hpp:997
basic_substring pop_right(C sep=C('/'), bool skip_empty=false) const
pop right: return the first split from the right.
Definition: substr.hpp:1787
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
Definition: substr.hpp:343
int compare(C const c) const noexcept
Definition: substr.hpp:226
C const & back() const noexcept
Definition: substr.hpp:217
size_t first_not_of(const C c) const
Definition: substr.hpp:855
const_iterator begin() const noexcept
Definition: substr.hpp:204
size_t last_not_of(const C c, size_t start) const
Definition: substr.hpp:886
basic_substring left_of(ro_substr const subs) const noexcept
given subs a substring of the current string, get the portion of the current string to the left of it
Definition: substr.hpp:419
basic_substring triml(const C c) const
trim left
Definition: substr.hpp:453
bool ends_with(const C c) const
true if the last character of the string is c
Definition: substr.hpp:737
size_t last_of(const C c, size_t start=npos) const
Definition: substr.hpp:808
bool is_integer() const
Definition: substr.hpp:1104
void tolower()
convert the string to lower-case
Definition: substr.hpp:1998
basic_substring trimr(ro_substr chars) const
trim right ANY of the characters
Definition: substr.hpp:489
basic_substring(basic_substring const &) noexcept=default
basic_substring _first_real_span_bin(size_t pos) const noexcept
Definition: substr.hpp:1481
void toupper()
convert the string to upper-case
Definition: substr.hpp:1988
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3, ro_substr s4) const
Definition: substr.hpp:636
basic_substring offs(size_t left, size_t right) const noexcept
offset from the ends: return [left,len-right[ ; ie, trim a number of characters from the left and rig...
Definition: substr.hpp:371
basic_substring first_real_span() const
get the first span which can be interpreted as a real (floating-point) number
Definition: substr.hpp:1226
basic_substring unquoted() const
Definition: substr.hpp:1056
C & front() noexcept
Definition: substr.hpp:213
split_proxy_impl split_proxy
Definition: substr.hpp:1771
basic_substring first_int_span() const
get the first span which can be interpreted as a signed integer
Definition: substr.hpp:1151
basic_substring select(ro_substr pattern, size_t pos=0) const
get the substr consisting of the first occurrence of pattern after pos, or an empty substr if none oc...
Definition: substr.hpp:603
basic_substring pair_range_esc(CC open_close, CC escape=CC('\\'))
get the range delimited by a single open-close character (eg, quotes).
Definition: substr.hpp:1012
size_t replace_all(rw_substr dst, ro_substr pattern, ro_substr repl, size_t pos=0) const
replace pattern with repl, and write the result into dst.
Definition: substr.hpp:2145
size_t count(ro_substr c, size_t pos=0) const
count the number of occurrences of s
Definition: substr.hpp:582
basic_substring name_wo_extshort() const
Definition: substr.hpp:1959
split_proxy split(C sep, size_t start_pos=0) const
a view into the splits
Definition: substr.hpp:1774
basic_substring name_wo_extlong() const
Definition: substr.hpp:1964
basic_substring erase(ro_substr sub)
erase a part of the string.
Definition: substr.hpp:2099
C & back() noexcept
Definition: substr.hpp:216
size_t last_not_of(ro_substr chars) const
Definition: substr.hpp:942
bool is_real() const
Definition: substr.hpp:1093
constexpr basic_substring() noexcept
Definition: substr.hpp:115
basic_substring triml(ro_substr chars) const
trim left ANY of the characters.
Definition: substr.hpp:465
size_t len
the length of the substring
Definition: substr.hpp:80
basic_substring left_of(size_t pos, bool include_pos) const noexcept
return [0, pos+include_pos[ .
Definition: substr.hpp:389
size_t last_not_of(ro_substr chars, size_t start) const
Definition: substr.hpp:963
basic_substring last(size_t num) const noexcept
return the last num elements: [len-num,len[
Definition: substr.hpp:360
size_t first_of(const C c, size_t start=0) const
Definition: substr.hpp:796
basic_substring stripl(ro_substr pattern) const
remove a pattern from the left
Definition: substr.hpp:514
bool ends_with(ro_substr pattern) const
true if the string ends with the given pattern
Definition: substr.hpp:760
basic_substring right_of(size_t pos, bool include_pos) const noexcept
return [pos+!include_pos, len[
Definition: substr.hpp:407
size_t find(const C c, size_t start_pos=0) const
Definition: substr.hpp:537
basic_substring trim(ro_substr const chars) const
trim left and right ANY of the characters
Definition: substr.hpp:507
const_iterator end() const noexcept
Definition: substr.hpp:205
typename std::add_const< C >::type CC
CC=const char.
Definition: substr.hpp:87
basic_substring(C *s_, size_t len_) noexcept
Construct from a pointer and length.
Definition: substr.hpp:141
basic_substring erase_range(size_t first, size_t last)
Definition: substr.hpp:2090
basic_substring extshort() const
Definition: substr.hpp:1969
void assign(U s_) noexcept
Assign from a C-string (zero-terminated string)
Definition: substr.hpp:174
basic_substring stripr(ro_substr pattern) const
remove a pattern from the right
Definition: substr.hpp:523
size_t first_of(ro_substr chars, size_t start=0) const
Definition: substr.hpp:822
size_t find(ro_substr pattern, size_t start_pos=0) const
Definition: substr.hpp:541
void assign(C *s_, size_t len_) noexcept
Assign from a pointer and length.
Definition: substr.hpp:162
iterator begin() noexcept
Definition: substr.hpp:201
void assign(C(&s_)[N]) noexcept
Assign from an array.
Definition: substr.hpp:159
int compare(C const *that, size_t sz) const noexcept
Definition: substr.hpp:235
bool ends_with_any(ro_substr chars) const
true if the last character of the string is any of the given chars
Definition: substr.hpp:777
void fill(C val)
fill the entire contents with the given val
Definition: substr.hpp:2010
size_t size() const noexcept
Definition: substr.hpp:199
basic_substring basename(C sep=C('/')) const
Definition: substr.hpp:1945
basic_substring(C *beg_, C *end_) noexcept
Construct from two pointers.
Definition: substr.hpp:145
bool is_unsigned_integer() const
Definition: substr.hpp:1117
bool overlaps(ro_substr const that) const noexcept
true if there is overlap of at least one element between that and *this
Definition: substr.hpp:317
basic_substring _first_integral_span(size_t skip_start) const
Definition: substr.hpp:1160
iterator end() noexcept
Definition: substr.hpp:202
bool not_empty() const noexcept
Definition: substr.hpp:198
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
Definition: substr.hpp:353
basic_substring left_of(size_t pos) const noexcept
return [0, pos[ .
Definition: substr.hpp:380
void copy_from(ro_substr that, size_t ifirst, size_t num=npos)
copy a string to this substr, starting at a specified given position
Definition: substr.hpp:2033
void reverse_range(size_t ifirst, size_t ilast)
revert a range in place
Definition: substr.hpp:2068
basic_substring pair_range_nested(CC open, CC close) const
get the range delimited by an open-close pair of characters, with possibly nested occurrences.
Definition: substr.hpp:1033
C const * data() const noexcept
Definition: substr.hpp:208
size_t replace(C value, C repl, size_t pos=0)
replace every occurrence of character value with the character repl
Definition: substr.hpp:2111
bool has_str() const noexcept
Definition: substr.hpp:196
bool begins_with(ro_substr pattern) const
true if the string begins with the given pattern
Definition: substr.hpp:703
bool is_number() const
Definition: substr.hpp:1078
basic_substring sub(size_t first) const noexcept
return [first,len[
Definition: substr.hpp:326
basic_substring _first_real_span_dec(size_t pos) const noexcept
Definition: substr.hpp:1304
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2) const
Definition: substr.hpp:624
void copy_from(ro_substr that)
copy a string to this substr, starting at 0
Definition: substr.hpp:2020
bool begins_with_any(ro_substr chars) const
true if the first character of the string is any of the given chars
Definition: substr.hpp:720
bool next_split(C sep, size_t *start_pos, basic_substring *out) const
returns true if the string has not been exhausted yet, meaning it's ok to call next_split() again.
Definition: substr.hpp:1671
basic_substring(basic_substring &&) noexcept=default
bool empty() const noexcept
Definition: substr.hpp:197
basic_substring right_of(ro_substr const subs) const noexcept
given subs a substring of the current string, get the portion of the current string to the right of i...
Definition: substr.hpp:433
size_t first_not_of(const C c, size_t start) const
Definition: substr.hpp:865
void assign(C *beg_, C *end_) noexcept
Assign from two pointers.
Definition: substr.hpp:166
basic_substring trimr(const C c) const
trim the character c from the right
Definition: substr.hpp:477
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3) const
Definition: substr.hpp:630
constexpr basic_substring(C(&s_)[N]) noexcept
Construct from an array.
Definition: substr.hpp:138
basic_substring dirname(C sep=C('/')) const
Definition: substr.hpp:1952
size_t replace(ro_substr chars, C repl, size_t pos=0)
replace every occurrence of each character in value with the character repl.
Definition: substr.hpp:2127
bool is_super(ro_substr const that) const noexcept
true if that is a substring of *this (ie, from the same buffer)
Definition: substr.hpp:308
size_t last_not_of(const C c) const
Definition: substr.hpp:876
bool ends_with(const C c, size_t num) const
true if the last num characters of the string are c
Definition: substr.hpp:743
bool begins_with(const C c, size_t num) const
true if the first num characters of the string are c
Definition: substr.hpp:686
static constexpr C4_CONST bool _is_hex_char(char c) noexcept
true if the character is in [0-9a-fA-F]
Definition: substr.hpp:1289
basic_substring erase(size_t pos, size_t num)
erase part of the string.
Definition: substr.hpp:2081
C * data() noexcept
Definition: substr.hpp:207
basic_substring _word_follows(size_t pos, csubstr word) const noexcept
Definition: substr.hpp:1294
void reverse_sub(size_t ifirst, size_t num)
revert a subpart in place
Definition: substr.hpp:2058
basic_substring gpop_left(C sep=C('/'), bool skip_empty=false) const
greedy pop left.
Definition: substr.hpp:1895
size_t first_not_of(ro_substr chars, size_t start) const
Definition: substr.hpp:920
void clear() noexcept
Definition: substr.hpp:125
size_t last_of(ro_substr chars, size_t start=npos) const
Definition: substr.hpp:837
typename std::remove_const< C >::type NCC_
NCC_=non const char.
Definition: substr.hpp:88
basic_substring first_non_empty_span() const
get the first span consisting exclusively of non-empty characters
Definition: substr.hpp:1127
C * str
a restricted pointer to the first character of the substring
Definition: substr.hpp:78
basic_substring right_of(size_t pos) const noexcept
return [pos+1, len[
Definition: substr.hpp:398
basic_substring pop_left(C sep=C('/'), bool skip_empty=false) const
return the first split from the left.
Definition: substr.hpp:1840
static constexpr C4_CONST bool _is_delim_char(char c) noexcept
true if the character is a delimiter character at the end
Definition: substr.hpp:1281
bool is_sub(ro_substr const that) const noexcept
true if *this is a substring of that (ie, from the same buffer)
Definition: substr.hpp:302
basic_substring select(const C c, size_t pos=0) const
get the substr consisting of the first occurrence of c after pos, or an empty substr if none occurs
Definition: substr.hpp:596
basic_substring extlong() const
Definition: substr.hpp:1974
a traits class to mark a type as a string type (meaning c4::to_csubstr() can be used directly).
Definition: substr.hpp:2245
a traits class to mark a type as a writeable string type (meaning c4::to_substr() can be used directl...
Definition: substr.hpp:2248
#define _c4append(first, last)