Sane C++ Libraries
C++ Platform Abstraction Libraries
Loading...
Searching...
No Matches
StringView.h
1// Copyright (c) Stefano Cristiano
2// SPDX-License-Identifier: MIT
3#pragma once
4#include "../Foundation/Span.h"
5#include "../Strings/StringIterator.h"
6
7namespace SC
8{
9struct SC_COMPILER_EXPORT StringView;
10struct SC_COMPILER_EXPORT StringViewTokenizer;
11struct SC_COMPILER_EXPORT StringAlgorithms;
12
13} // namespace SC
14
17
20
44
46{
47 StringView() : StringSpan() {}
48
50
51 constexpr StringView(StringSpan ssv) : StringSpan(ssv) {}
52
53 static constexpr StringView fromNullTerminated(const char* text, StringEncoding encoding)
54 {
55 return StringSpan::fromNullTerminated(text, encoding);
56 }
57
58#if SC_PLATFORM_WINDOWS
59 static constexpr StringView fromNullTerminated(const wchar_t* text, StringEncoding encoding)
60 {
61 return StringSpan::fromNullTerminated(text, encoding);
62 }
63#endif
64
67 Span<const uint8_t> toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
68 {
69 return Span<const uint8_t>::reinterpret_bytes(text, textSizeInBytes);
70 }
71
78 template <typename Func>
79 [[nodiscard]] constexpr auto withIterator(Func&& func) const;
80
89 template <typename Func>
90 [[nodiscard]] static constexpr auto withIterators(StringView s1, StringView s2, Func&& func);
91
95 template <typename StringIterator>
96 constexpr StringIterator getIterator() const;
97
101 [[nodiscard]] constexpr bool operator!=(StringView other) const { return not operator==(other); }
102
106 [[nodiscard]] constexpr bool operator==(StringSpan other) const;
107
120 [[nodiscard]] constexpr bool fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const;
121
124 [[nodiscard]] constexpr bool isEmpty() const { return text == nullptr or textSizeInBytes == 0; }
125
128 [[nodiscard]] constexpr bool isNullTerminated() const { return hasNullTerm; }
129
132 [[nodiscard]] constexpr size_t sizeInBytes() const { return textSizeInBytes; }
133
142 [[nodiscard]] bool endsWithAnyOf(Span<const StringCodePoint> codePoints) const;
143
152 [[nodiscard]] bool startsWithAnyOf(Span<const StringCodePoint> codePoints) const;
153
162 [[nodiscard]] bool startsWith(const StringView str) const;
163
172 [[nodiscard]] bool endsWith(const StringView str) const;
173
186 [[nodiscard]] bool containsString(const StringView str) const;
187
200 [[nodiscard]] bool splitAfter(const StringView stringToMatch, StringView& remainingAfterSplit) const;
201
214 [[nodiscard]] bool splitBefore(const StringView stringToMatch, StringView& stringBeforeSplit) const;
215
219 [[nodiscard]] bool containsCodePoint(StringCodePoint c) const;
220
225 [[nodiscard]] constexpr bool hasCompatibleEncoding(StringView str) const;
226
235 template <typename StringIterator>
237 StringEncoding encoding = StringIterator::getEncoding());
238
244 template <typename StringIterator>
245 static StringView fromIteratorUntilEnd(StringIterator it, StringEncoding encoding = StringIterator::getEncoding());
246
252 template <typename StringIterator>
254 StringEncoding encoding = StringIterator::getEncoding());
255
267 [[nodiscard]] StringView sliceStartEnd(size_t start, size_t end) const;
268
280 [[nodiscard]] StringView sliceStartLength(size_t start, size_t length) const;
281
291 [[nodiscard]] StringView sliceStart(size_t offset) const;
292
302 [[nodiscard]] StringView sliceEnd(size_t offset) const;
303
313 [[nodiscard]] StringView trimEndAnyOf(Span<const StringCodePoint> codePoints) const;
314
325
335 [[nodiscard]] StringView trimAnyOf(Span<const StringCodePoint> codePoints) const;
336
345 [[nodiscard]] StringView trimWhiteSpaces() const;
346
348
358 [[nodiscard]] bool isIntegerNumber() const;
359
372 [[nodiscard]] bool isFloatingNumber() const;
373
387 [[nodiscard]] bool parseInt32(int32_t& value) const;
388
402 [[nodiscard]] bool parseFloat(float& value) const;
403
417 [[nodiscard]] bool parseDouble(double& value) const;
418
419 private:
420 template <typename T>
421 struct identity
422 {
423 };
424 template <typename Type>
425 constexpr StringIteratorASCII getIterator(identity<Type>) const;
426 constexpr StringIteratorUTF8 getIterator(identity<StringIteratorUTF8>) const;
427 constexpr StringIteratorUTF16 getIterator(identity<StringIteratorUTF16>) const;
428 template <typename StringIterator1, typename StringIterator2>
429 static constexpr bool equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points);
430
431 template <typename StringIterator>
432 constexpr bool equalsIterator(StringView other, size_t& points) const;
433};
434
437{
439
440 size_t numSplitsNonEmpty = 0;
441 size_t numSplitsTotal = 0;
442
446
452
454 StringViewTokenizer(StringView text) : remaining(text), originalText(text) {}
455
469 [[nodiscard]] bool tokenizeNext(Span<const StringCodePoint> separators, Options options = Options::SkipEmpty);
470
486 [[nodiscard]] bool tokenizeNextLine() { return tokenizeNext({'\n'}); }
487
500
502 [[nodiscard]] bool isFinished() const;
503
504 private:
505 StringView originalText; // Original text as passed in the constructor
506};
507
527{
528 [[nodiscard]] static bool matchWildcard(StringView s1, StringView s2);
529
530 private:
531 template <typename StringIterator1, typename StringIterator2>
532 [[nodiscard]] static bool matchWildcardIterator(StringIterator1 pattern, StringIterator2 text);
533};
534
536
537//-----------------------------------------------------------------------------------------------------------------------
538// Implementations Details
539//-----------------------------------------------------------------------------------------------------------------------
540namespace SC
541{
542constexpr SC::StringView operator""_a8(const char* txt, size_t sz)
543{
544 return StringView({txt, sz}, true, StringEncoding::Ascii);
545}
546constexpr StringView operator""_u8(const char* txt, size_t sz)
547{
548 return StringView({txt, sz}, true, StringEncoding::Utf8);
549}
550constexpr StringView operator""_u16(const char* txt, size_t sz)
551{
552 const bool isNullTerminated = sz > 0 and sz % 2 == 1 and txt[sz - 1] == 0;
553 return StringView({txt, isNullTerminated ? sz - 1 : sz}, isNullTerminated, StringEncoding::Utf16);
554}
555} // namespace SC
556
557template <typename StringIterator>
559{
560 // For GCC complaining about specialization in non-namespace scope
561 return getIterator(identity<StringIterator>());
562}
563
564template <typename Type>
565constexpr SC::StringIteratorASCII SC::StringView::getIterator(identity<Type>) const
566{
567 return StringIteratorASCII(text, text + textSizeInBytes);
568}
569constexpr SC::StringIteratorUTF8 SC::StringView::getIterator(identity<StringIteratorUTF8>) const
570{
571 return StringIteratorUTF8(text, text + textSizeInBytes);
572}
573constexpr SC::StringIteratorUTF16 SC::StringView::getIterator(identity<StringIteratorUTF16>) const
574{
575 return StringIteratorUTF16(text, text + textSizeInBytes);
576}
577
578template <typename StringIterator1, typename StringIterator2>
579constexpr bool SC::StringView::equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points)
580{
581 StringCodePoint c1 = 0;
582 StringCodePoint c2 = 0;
583 while (t1.advanceRead(c1) and t2.advanceRead(c2))
584 {
585 if (c1 != c2)
586 {
587 return false;
588 }
589 points++;
590 }
591 return t1.isAtEnd() and t2.isAtEnd();
592}
593
594template <typename StringIterator>
595constexpr bool SC::StringView::equalsIterator(StringView other, size_t& points) const
596{
597 auto it = getIterator<StringIterator>();
598 switch (other.getEncoding())
599 {
600 case StringEncoding::Ascii: return equalsIterator(it, other.getIterator<StringIteratorASCII>(), points);
601 case StringEncoding::Utf8: return equalsIterator(it, other.getIterator<StringIteratorUTF8>(), points);
602 case StringEncoding::Utf16: return equalsIterator(it, other.getIterator<StringIteratorUTF16>(), points);
603 }
604 Assert::unreachable();
605}
606
607[[nodiscard]] constexpr bool SC::StringView::operator==(StringSpan other) const
608{
609#if defined(__clang__)
610#pragma clang diagnostic push
611#pragma clang diagnostic ignored "-Wunreachable-code"
612#endif
613 if (__builtin_is_constant_evaluated())
614 {
615 if (not hasCompatibleEncoding(other))
616 return false;
617 auto it1 = text;
618 auto it2 = other.text;
619 auto sz = textSizeInBytes;
620 for (size_t idx = 0; idx < sz; ++idx)
621 if (it1[idx] != it2[idx])
622 return false;
623 return true;
624 }
625 else
626 {
627 return StringSpan::operator==(other);
628 }
629#if defined(__clang__)
630#pragma clang diagnostic pop
631#endif
632}
633
634constexpr bool SC::StringView::fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const
635{
636 commonOverlappingPoints = 0;
637 switch (getEncoding())
638 {
639 case StringEncoding::Ascii: return equalsIterator<StringIteratorASCII>(other, commonOverlappingPoints);
640 case StringEncoding::Utf8: return equalsIterator<StringIteratorUTF8>(other, commonOverlappingPoints);
641 case StringEncoding::Utf16: return equalsIterator<StringIteratorUTF16>(other, commonOverlappingPoints);
642 }
643 Assert::unreachable();
644}
645
646template <typename Func>
647constexpr auto SC::StringView::withIterator(Func&& func) const
648{
649 switch (getEncoding())
650 {
651 case StringEncoding::Ascii: return func(getIterator<StringIteratorASCII>());
652 case StringEncoding::Utf8: return func(getIterator<StringIteratorUTF8>());
653 case StringEncoding::Utf16: return func(getIterator<StringIteratorUTF16>());
654 }
655 Assert::unreachable();
656}
657
658template <typename Func>
659constexpr auto SC::StringView::withIterators(StringView s1, StringView s2, Func&& func)
660{
661 return s1.withIterator([&s2, &func](auto it1)
662 { return s2.withIterator([&it1, &func](auto it2) { return func(it1, it2); }); });
663}
664
666{
667 return StringEncodingAreBinaryCompatible(getEncoding(), str.getEncoding());
668}
669
670template <typename StringIterator>
671inline SC::StringView SC::StringView::fromIterators(StringIterator from, StringIterator to, StringEncoding encoding)
672{
673 const ssize_t numBytes = to.bytesDistanceFrom(from);
674 if (numBytes >= 0)
675 {
676 StringIterator fromEnd = from;
677 fromEnd.setToEnd();
678 if (fromEnd.bytesDistanceFrom(to) >= 0) // If current iterator of to is inside from range
679 return StringView({from.it, static_cast<size_t>(numBytes)}, false, encoding);
680 }
681 return StringView(encoding); // TODO: Make StringView::fromIterators return bool to make it fallible
682}
683
684template <typename StringIterator>
685inline SC::StringView SC::StringView::fromIteratorUntilEnd(StringIterator it, StringEncoding encoding)
686{
687 StringIterator endIt = it;
688 endIt.setToEnd();
689 const size_t numBytes = static_cast<size_t>(endIt.bytesDistanceFrom(it));
690 return StringView({it.it, numBytes}, false, encoding);
691}
692
693template <typename StringIterator>
694constexpr SC::StringView SC::StringView::fromIteratorFromStart(StringIterator it, StringEncoding encoding)
695{
696 StringIterator start = it;
697 start.setToStart();
698 const size_t numBytes = static_cast<size_t>(it.bytesDistanceFrom(start));
699 return StringView({start.it, numBytes}, false, encoding);
700}
signed long ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:57
int int32_t
Platform independent (4) bytes signed int.
Definition PrimitiveTypes.h:46
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:20
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:14
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
static Span< Type > reinterpret_bytes(VoidType *rawMemory, SizeType sizeInBytes)
Construct a span reinterpreting raw memory (void* or const void*) to Type or const Type
Definition Span.h:80
Algorithms operating on strings (glob / wildcard).
Definition StringView.h:527
A string iterator for ASCII strings.
Definition StringIterator.h:239
A string iterator for UTF16 strings.
Definition StringIterator.h:262
A string iterator for UTF8 strings.
Definition StringIterator.h:280
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:36
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:406
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:46
An read-only view over a string (to avoid including Strings library when parsing is not needed).
Definition StringSpan.h:37
static constexpr StringSpan fromNullTerminated(const char *text, StringEncoding encoding)
Constructs a StringView from a null terminated string.
Definition StringSpan.h:54
constexpr StringEncoding getEncoding() const
Get encoding of this StringView.
Definition StringSpan.h:98
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
Splits a StringView in tokens according to separators.
Definition StringView.h:437
StringViewTokenizer(StringView text)
Build a tokenizer operating on the given text string view.
Definition StringView.h:454
StringView component
Current component that has been tokenized by tokenizeNext.
Definition StringView.h:443
bool isFinished() const
Check if the tokenizer has processed the entire the string view passed in the constructor.
bool tokenizeNextLine()
Tokenizes from current position to first newline.
Definition StringView.h:486
StringViewTokenizer & countTokens(Span< const StringCodePoint > separators)
Count the number of tokens that exist in the string view passed in constructor, when splitted along t...
size_t numSplitsNonEmpty
How many non-empty splits have occurred in current tokenization.
Definition StringView.h:440
StringView processed
Substring of original string passed in constructor processed so far.
Definition StringView.h:444
size_t numSplitsTotal
How many total splits have occurred in current tokenization.
Definition StringView.h:441
bool tokenizeNext(Span< const StringCodePoint > separators, Options options=Options::SkipEmpty)
Splits the string along a list of separators.
StringView remaining
Substring from current position until the end of original text.
Definition StringView.h:445
Options
Definition StringView.h:448
@ IncludeEmpty
If to tokenizeNext should return also empty tokens.
Definition StringView.h:449
@ SkipEmpty
If to tokenizeNext should NOT return also empty tokens.
Definition StringView.h:450
StringCodePoint splittingCharacter
The last splitting character matched in current tokenization.
Definition StringView.h:438
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:46
bool startsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView starts with any utf code point in the given span.
constexpr bool isNullTerminated() const
Check if StringView is immediately followed by a null termination character.
Definition StringView.h:128
constexpr StringIterator getIterator() const
Returns a StringIterator from current StringView.
Definition StringView.h:558
StringView trimEndAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing ending utf code points matching the codePoints span.
bool startsWith(const StringView str) const
Check if StringView starts with another StringView.
static constexpr StringView fromIteratorFromStart(StringIterator it, StringEncoding encoding=StringIterator::getEncoding())
Returns a section of a string, from start of StringView to it.
Span< const uint8_t > toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
Obtain a const uint8_t Span from this StringView.
Definition StringView.h:67
bool parseFloat(float &value) const
Try parsing current StringView as a floating point number.
constexpr bool operator!=(StringView other) const
Compare this StringView with another StringView for inequality.
Definition StringView.h:101
bool containsCodePoint(StringCodePoint c) const
Check if StringView contains given utf code point.
constexpr bool hasCompatibleEncoding(StringView str) const
Check if current StringView has compatible encoding with str.
Definition StringView.h:665
static constexpr auto withIterators(StringView s1, StringView s2, Func &&func)
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:659
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
StringView sliceStartEnd(size_t start, size_t end) const
Get slice [start, end) starting at offset start and ending at end (measured in utf code points)
bool endsWith(const StringView str) const
Check if StringView ends with another StringView.
static StringView fromIteratorUntilEnd(StringIterator it, StringEncoding encoding=StringIterator::getEncoding())
Returns a section of a string, from it to end of StringView.
StringView sliceStartLength(size_t start, size_t length) const
Get slice [start, start+length] starting at offset start and of length code points.
StringView sliceStart(size_t offset) const
Get slice [offset, end] measured in utf code points.
bool endsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView ends with any utf code point in the given span.
StringView trimWhiteSpaces() const
Returns a shortened StringView without starting/ending utf code points inside {'\r',...
bool parseDouble(double &value) const
Try parsing current StringView as a double precision floating point number.
constexpr bool fullyOverlaps(StringView other, size_t &commonOverlappingPoints) const
Check if this StringView is equal to other StringView (operates on code points, not on utf graphemes)...
Definition StringView.h:634
StringView trimStartAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting utf code points matching the codePoints span.
bool isIntegerNumber() const
If the current view is an integer number, returns true.
bool splitBefore(const StringView stringToMatch, StringView &stringBeforeSplit) const
Returns the part of the string before matching stringToMatch.
StringView sliceEnd(size_t offset) const
Get slice [end-offset, end] measured in utf code points.
bool splitAfter(const StringView stringToMatch, StringView &remainingAfterSplit) const
Returns the remaining part of the string after matching stringToMatch.
constexpr auto withIterator(Func &&func) const
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:647
constexpr bool isEmpty() const
Check if StringView is empty.
Definition StringView.h:124
bool containsString(const StringView str) const
Check if StringView contains another StringView.
constexpr size_t sizeInBytes() const
Get size of the StringView in bytes.
Definition StringView.h:132
bool parseInt32(int32_t &value) const
Try parsing current StringView as a 32 bit integer.
StringView trimAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting and ending utf code points inside the codePoints spa...
bool isFloatingNumber() const
Check if StringView can be parsed as an floating point number.
constexpr bool operator==(StringSpan other) const
Compare this StringView with another StringView for equality.
Definition StringView.h:607
static StringView fromIterators(StringIterator from, StringIterator to, StringEncoding encoding=StringIterator::getEncoding())
Returns a StringView starting at from and ending at to.