Sane C++ Libraries
C++ Platform Abstraction Libraries
Loading...
Searching...
No Matches
StringView.h
1// Copyright (c) Stefano Cristiano
2// SPDX-License-Identifier: MIT
3#pragma once
4#include "../Foundation/Span.h"
5#include "../Strings/StringIterator.h"
6
7namespace SC
8{
9struct SC_COMPILER_EXPORT StringView;
10struct SC_COMPILER_EXPORT StringViewTokenizer;
11struct SC_COMPILER_EXPORT StringAlgorithms;
12
13} // namespace SC
14
17
20
44
46{
47 StringView() : StringSpan() {}
48
50
51 constexpr StringView(StringSpan ssv) : StringSpan(ssv) {}
52
53 static constexpr StringView fromNullTerminated(const char* text, StringEncoding encoding)
54 {
55 return StringSpan::fromNullTerminated(text, encoding);
56 }
57
58#if SC_PLATFORM_WINDOWS
59 static constexpr StringView fromNullTerminated(const wchar_t* text, StringEncoding encoding)
60 {
61 return StringSpan::fromNullTerminated(text, encoding);
62 }
63#endif
64
72 auto getNullTerminatedNative() const;
73
76 Span<const uint8_t> toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
77 {
78 return Span<const uint8_t>::reinterpret_bytes(text, textSizeInBytes);
79 }
80
82 enum class Comparison
83 {
84 Smaller = -1,
85 Equals = 0,
86 Bigger = 1
87 };
88
108 [[nodiscard]] Comparison compare(StringView other) const;
109
126 [[nodiscard]] bool operator<(StringView other) const { return compare(other) == Comparison::Smaller; }
127
134 template <typename Func>
135 [[nodiscard]] constexpr auto withIterator(Func&& func) const;
136
145 template <typename Func>
146 [[nodiscard]] static constexpr auto withIterators(StringView s1, StringView s2, Func&& func);
147
151 template <typename StringIterator>
152 constexpr StringIterator getIterator() const;
153
157 [[nodiscard]] constexpr bool operator!=(StringView other) const { return not operator==(other); }
158
162 [[nodiscard]] constexpr bool operator==(StringSpan other) const;
163
176 [[nodiscard]] constexpr bool fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const;
177
180 [[nodiscard]] constexpr bool isEmpty() const { return text == nullptr or textSizeInBytes == 0; }
181
184 [[nodiscard]] constexpr bool isNullTerminated() const { return hasNullTerm; }
185
188 [[nodiscard]] constexpr size_t sizeInBytes() const { return textSizeInBytes; }
189
198 [[nodiscard]] bool endsWithAnyOf(Span<const StringCodePoint> codePoints) const;
199
208 [[nodiscard]] bool startsWithAnyOf(Span<const StringCodePoint> codePoints) const;
209
218 [[nodiscard]] bool startsWith(const StringView str) const;
219
228 [[nodiscard]] bool endsWith(const StringView str) const;
229
242 [[nodiscard]] bool containsString(const StringView str) const;
243
256 [[nodiscard]] bool splitAfter(const StringView stringToMatch, StringView& remainingAfterSplit) const;
257
270 [[nodiscard]] bool splitBefore(const StringView stringToMatch, StringView& stringBeforeSplit) const;
271
275 [[nodiscard]] bool containsCodePoint(StringCodePoint c) const;
276
281 [[nodiscard]] constexpr bool hasCompatibleEncoding(StringView str) const;
282
290 template <typename StringIterator>
292
297 template <typename StringIterator>
299
304 template <typename StringIterator>
306
318 [[nodiscard]] StringView sliceStartEnd(size_t start, size_t end) const;
319
331 [[nodiscard]] StringView sliceStartLength(size_t start, size_t length) const;
332
342 [[nodiscard]] StringView sliceStart(size_t offset) const;
343
353 [[nodiscard]] StringView sliceEnd(size_t offset) const;
354
364 [[nodiscard]] StringView trimEndAnyOf(Span<const StringCodePoint> codePoints) const;
365
376
386 [[nodiscard]] StringView trimAnyOf(Span<const StringCodePoint> codePoints) const;
387
396 [[nodiscard]] StringView trimWhiteSpaces() const;
397
399
409 [[nodiscard]] bool isIntegerNumber() const;
410
423 [[nodiscard]] bool isFloatingNumber() const;
424
438 [[nodiscard]] bool parseInt32(int32_t& value) const;
439
453 [[nodiscard]] bool parseFloat(float& value) const;
454
468 [[nodiscard]] bool parseDouble(double& value) const;
469
470 private:
471 template <typename T>
472 struct identity
473 {
474 };
475 template <typename Type>
476 constexpr StringIteratorASCII getIterator(identity<Type>) const;
477 constexpr StringIteratorUTF8 getIterator(identity<StringIteratorUTF8>) const;
478 constexpr StringIteratorUTF16 getIterator(identity<StringIteratorUTF16>) const;
479 template <typename StringIterator1, typename StringIterator2>
480 static constexpr bool equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points);
481
482 template <typename StringIterator>
483 constexpr bool equalsIterator(StringView other, size_t& points) const;
484};
485
488{
490
491 size_t numSplitsNonEmpty = 0;
492 size_t numSplitsTotal = 0;
493
497
503
505 StringViewTokenizer(StringView text) : remaining(text), originalText(text) {}
506
520 [[nodiscard]] bool tokenizeNext(Span<const StringCodePoint> separators, Options options = Options::SkipEmpty);
521
537 [[nodiscard]] bool tokenizeNextLine() { return tokenizeNext({'\n'}); }
538
551
553 [[nodiscard]] bool isFinished() const;
554
555 private:
556 StringView originalText; // Original text as passed in the constructor
557};
558
578{
579 [[nodiscard]] static bool matchWildcard(StringView s1, StringView s2);
580
581 private:
582 template <typename StringIterator1, typename StringIterator2>
583 [[nodiscard]] static bool matchWildcardIterator(StringIterator1 pattern, StringIterator2 text);
584};
585
587
588//-----------------------------------------------------------------------------------------------------------------------
589// Implementations Details
590//-----------------------------------------------------------------------------------------------------------------------
591namespace SC
592{
593constexpr SC::StringView operator""_a8(const char* txt, size_t sz)
594{
595 return StringView({txt, sz}, true, StringEncoding::Ascii);
596}
597constexpr StringView operator""_u8(const char* txt, size_t sz)
598{
599 return StringView({txt, sz}, true, StringEncoding::Utf8);
600}
601constexpr StringView operator""_u16(const char* txt, size_t sz)
602{
603 const bool isNullTerminated = sz > 0 and sz % 2 == 1 and txt[sz - 1] == 0;
604 return StringView({txt, isNullTerminated ? sz - 1 : sz}, isNullTerminated, StringEncoding::Utf16);
605}
606} // namespace SC
607
608template <typename StringIterator>
610{
611 // For GCC complaining about specialization in non-namespace scope
612 return getIterator(identity<StringIterator>());
613}
614
615template <typename Type>
616constexpr SC::StringIteratorASCII SC::StringView::getIterator(identity<Type>) const
617{
618 return StringIteratorASCII(text, text + textSizeInBytes);
619}
620constexpr SC::StringIteratorUTF8 SC::StringView::getIterator(identity<StringIteratorUTF8>) const
621{
622 return StringIteratorUTF8(text, text + textSizeInBytes);
623}
624constexpr SC::StringIteratorUTF16 SC::StringView::getIterator(identity<StringIteratorUTF16>) const
625{
626 return StringIteratorUTF16(text, text + textSizeInBytes);
627}
628
629template <typename StringIterator1, typename StringIterator2>
630constexpr bool SC::StringView::equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points)
631{
632 StringCodePoint c1 = 0;
633 StringCodePoint c2 = 0;
634 while (t1.advanceRead(c1) and t2.advanceRead(c2))
635 {
636 if (c1 != c2)
637 {
638 return false;
639 }
640 points++;
641 }
642 return t1.isAtEnd() and t2.isAtEnd();
643}
644
645template <typename StringIterator>
646constexpr bool SC::StringView::equalsIterator(StringView other, size_t& points) const
647{
648 auto it = getIterator<StringIterator>();
649 switch (other.getEncoding())
650 {
651 case StringEncoding::Ascii: return equalsIterator(it, other.getIterator<StringIteratorASCII>(), points);
652 case StringEncoding::Utf8: return equalsIterator(it, other.getIterator<StringIteratorUTF8>(), points);
653 case StringEncoding::Utf16: return equalsIterator(it, other.getIterator<StringIteratorUTF16>(), points);
654 }
655 Assert::unreachable();
656}
657
658[[nodiscard]] inline auto SC::StringView::getNullTerminatedNative() const
659{
660#if SC_PLATFORM_WINDOWS
661 SC_ASSERT_RELEASE(hasNullTerm && (getEncoding() == StringEncoding::Utf16));
662 return reinterpret_cast<const wchar_t*>(text);
663#else
664 SC_ASSERT_RELEASE(hasNullTerm && (getEncoding() == StringEncoding::Utf8 || getEncoding() == StringEncoding::Ascii));
665 return text;
666#endif
667}
668
669[[nodiscard]] constexpr bool SC::StringView::operator==(StringSpan other) const
670{
671#if defined(__clang__)
672#pragma clang diagnostic push
673#pragma clang diagnostic ignored "-Wunreachable-code"
674#endif
675 if (hasCompatibleEncoding(other))
676 {
677 if (textSizeInBytes != other.textSizeInBytes)
678 return false;
679 if (__builtin_is_constant_evaluated())
680 {
681 auto it1 = text;
682 auto it2 = other.text;
683 auto sz = textSizeInBytes;
684 for (size_t idx = 0; idx < sz; ++idx)
685 if (it1[idx] != it2[idx])
686 return false;
687 }
688 else
689 {
690 if (text == nullptr)
691 {
692 return other.textSizeInBytes == 0;
693 }
694 if (other.text == nullptr)
695 {
696 return textSizeInBytes == 0;
697 }
698 return memcmp(text, other.text, textSizeInBytes) == 0;
699 }
700 }
701 size_t commonOverlappingPoints = 0;
702 return fullyOverlaps(other, commonOverlappingPoints);
703#if defined(__clang__)
704#pragma clang diagnostic pop
705#endif
706}
707
708constexpr bool SC::StringView::fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const
709{
710 commonOverlappingPoints = 0;
711 switch (getEncoding())
712 {
713 case StringEncoding::Ascii: return equalsIterator<StringIteratorASCII>(other, commonOverlappingPoints);
714 case StringEncoding::Utf8: return equalsIterator<StringIteratorUTF8>(other, commonOverlappingPoints);
715 case StringEncoding::Utf16: return equalsIterator<StringIteratorUTF16>(other, commonOverlappingPoints);
716 }
717 Assert::unreachable();
718}
719
720template <typename Func>
721constexpr auto SC::StringView::withIterator(Func&& func) const
722{
723 switch (getEncoding())
724 {
725 case StringEncoding::Ascii: return func(getIterator<StringIteratorASCII>());
726 case StringEncoding::Utf8: return func(getIterator<StringIteratorUTF8>());
727 case StringEncoding::Utf16: return func(getIterator<StringIteratorUTF16>());
728 }
729 Assert::unreachable();
730}
731
732template <typename Func>
733constexpr auto SC::StringView::withIterators(StringView s1, StringView s2, Func&& func)
734{
735 return s1.withIterator([&s2, &func](auto it1)
736 { return s2.withIterator([&it1, &func](auto it2) { return func(it1, it2); }); });
737}
738
740{
741 return StringEncodingAreBinaryCompatible(getEncoding(), str.getEncoding());
742}
743
744template <typename StringIterator>
746{
747 const ssize_t numBytes = to.bytesDistanceFrom(from);
748 if (numBytes >= 0)
749 {
750 StringIterator fromEnd = from;
751 fromEnd.setToEnd();
752 if (fromEnd.bytesDistanceFrom(to) >= 0) // If current iterator of to is inside from range
753 return StringView({from.it, static_cast<size_t>(numBytes)}, false, StringIterator::getEncoding());
754 }
755 return StringView(); // TODO: Make StringView::fromIterators return bool to make it fallible
756}
757
758template <typename StringIterator>
760{
761 StringIterator endIt = it;
762 endIt.setToEnd();
763 const size_t numBytes = static_cast<size_t>(endIt.bytesDistanceFrom(it));
764 return StringView({it.it, numBytes}, false, StringIterator::getEncoding());
765}
766
767template <typename StringIterator>
768constexpr SC::StringView SC::StringView::fromIteratorFromStart(StringIterator it)
769{
770 StringIterator start = it;
771 start.setToStart();
772 const size_t numBytes = static_cast<size_t>(it.bytesDistanceFrom(start));
773 return StringView({start.it, numBytes}, false, StringIterator::getEncoding());
774}
#define SC_ASSERT_RELEASE(e)
Assert expression e to be true.
Definition Assert.h:42
signed long ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:57
int int32_t
Platform independent (4) bytes signed int.
Definition PrimitiveTypes.h:46
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:20
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:14
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
static Span< Type > reinterpret_bytes(VoidType *rawMemory, SizeType sizeInBytes)
Construct a span reinterpreting raw memory (void* or const void*) to Type or const Type
Definition Span.h:80
Algorithms operating on strings (glob / wildcard).
Definition StringView.h:578
A string iterator for ASCII strings.
Definition StringIterator.h:233
A string iterator for UTF16 strings.
Definition StringIterator.h:253
A string iterator for UTF8 strings.
Definition StringIterator.h:272
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:36
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:399
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:46
An read-only view over a string (to avoid including Strings library when parsing is not needed).
Definition StringSpan.h:37
static constexpr StringSpan fromNullTerminated(const char *text, StringEncoding encoding)
Constructs a StringView from a null terminated string.
Definition StringSpan.h:54
constexpr StringEncoding getEncoding() const
Get encoding of this StringView.
Definition StringSpan.h:83
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
Splits a StringView in tokens according to separators.
Definition StringView.h:488
StringViewTokenizer(StringView text)
Build a tokenizer operating on the given text string view.
Definition StringView.h:505
StringView component
Current component that has been tokenized by tokenizeNext.
Definition StringView.h:494
bool isFinished() const
Check if the tokenizer has processed the entire the string view passed in the constructor.
bool tokenizeNextLine()
Tokenizes from current position to first newline.
Definition StringView.h:537
StringViewTokenizer & countTokens(Span< const StringCodePoint > separators)
Count the number of tokens that exist in the string view passed in constructor, when splitted along t...
size_t numSplitsNonEmpty
How many non-empty splits have occurred in current tokenization.
Definition StringView.h:491
StringView processed
Substring of original string passed in constructor processed so far.
Definition StringView.h:495
size_t numSplitsTotal
How many total splits have occurred in current tokenization.
Definition StringView.h:492
bool tokenizeNext(Span< const StringCodePoint > separators, Options options=Options::SkipEmpty)
Splits the string along a list of separators.
StringView remaining
Substring from current position until the end of original text.
Definition StringView.h:496
Options
Definition StringView.h:499
@ IncludeEmpty
If to tokenizeNext should return also empty tokens.
Definition StringView.h:500
@ SkipEmpty
If to tokenizeNext should NOT return also empty tokens.
Definition StringView.h:501
StringCodePoint splittingCharacter
The last splitting character matched in current tokenization.
Definition StringView.h:489
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:46
bool startsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView starts with any utf code point in the given span.
Comparison
Result of ordering comparison done by StringView::compare.
Definition StringView.h:83
constexpr bool isNullTerminated() const
Check if StringView is immediately followed by a null termination character.
Definition StringView.h:184
Comparison compare(StringView other) const
Ordering comparison between non-normalized StringView (operates on code points, not on utf graphemes)
constexpr StringIterator getIterator() const
Returns a StringIterator from current StringView.
Definition StringView.h:609
StringView trimEndAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing ending utf code points matching the codePoints span.
bool startsWith(const StringView str) const
Check if StringView starts with another StringView.
static constexpr StringView fromIteratorFromStart(StringIterator it)
Returns a section of a string, from start of StringView to it.
Span< const uint8_t > toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
Obtain a const uint8_t Span from this StringView.
Definition StringView.h:76
bool parseFloat(float &value) const
Try parsing current StringView as a floating point number.
constexpr bool operator!=(StringView other) const
Compare this StringView with another StringView for inequality.
Definition StringView.h:157
bool containsCodePoint(StringCodePoint c) const
Check if StringView contains given utf code point.
constexpr bool hasCompatibleEncoding(StringView str) const
Check if current StringView has compatible encoding with str.
Definition StringView.h:739
static StringView fromIterators(StringIterator from, StringIterator to)
Returns a StringView starting at from and ending at to.
static constexpr auto withIterators(StringView s1, StringView s2, Func &&func)
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:733
constexpr StringSpan(StringEncoding encoding=StringEncoding::Ascii)
Construct an empty StringView.
Definition StringSpan.h:41
StringView sliceStartEnd(size_t start, size_t end) const
Get slice [start, end) starting at offset start and ending at end (measured in utf code points)
bool endsWith(const StringView str) const
Check if StringView ends with another StringView.
StringView sliceStartLength(size_t start, size_t length) const
Get slice [start, start+length] starting at offset start and of length code points.
StringView sliceStart(size_t offset) const
Get slice [offset, end] measured in utf code points.
auto getNullTerminatedNative() const
Directly access the memory of this null terminated-StringView.
Definition StringView.h:658
bool endsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView ends with any utf code point in the given span.
StringView trimWhiteSpaces() const
Returns a shortened StringView without starting/ending utf code points inside {'\r',...
bool parseDouble(double &value) const
Try parsing current StringView as a double precision floating point number.
constexpr bool fullyOverlaps(StringView other, size_t &commonOverlappingPoints) const
Check if this StringView is equal to other StringView (operates on code points, not on utf graphemes)...
Definition StringView.h:708
StringView trimStartAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting utf code points matching the codePoints span.
bool isIntegerNumber() const
If the current view is an integer number, returns true.
bool splitBefore(const StringView stringToMatch, StringView &stringBeforeSplit) const
Returns the part of the string before matching stringToMatch.
StringView sliceEnd(size_t offset) const
Get slice [end-offset, end] measured in utf code points.
bool splitAfter(const StringView stringToMatch, StringView &remainingAfterSplit) const
Returns the remaining part of the string after matching stringToMatch.
static StringView fromIteratorUntilEnd(StringIterator it)
Returns a section of a string, from it to end of StringView.
constexpr auto withIterator(Func &&func) const
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:721
constexpr bool isEmpty() const
Check if StringView is empty.
Definition StringView.h:180
bool containsString(const StringView str) const
Check if StringView contains another StringView.
bool operator<(StringView other) const
Ordering operator for StringView using StringView::compare.
Definition StringView.h:126
constexpr size_t sizeInBytes() const
Get size of the StringView in bytes.
Definition StringView.h:188
bool parseInt32(int32_t &value) const
Try parsing current StringView as a 32 bit integer.
StringView trimAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting and ending utf code points inside the codePoints spa...
bool isFloatingNumber() const
Check if StringView can be parsed as an floating point number.
constexpr bool operator==(StringSpan other) const
Compare this StringView with another StringView for equality.
Definition StringView.h:669