Sane C++ Libraries
C++ Platform Abstraction Libraries
Loading...
Searching...
No Matches
StringView.h
1// Copyright (c) Stefano Cristiano
2// SPDX-License-Identifier: MIT
3#pragma once
4#include "../Common/CompilerMacrosLifetimeBound.h"
5#include "../Common/PlatformMacrosType.h"
6#include "../Common/Span.h"
7#include "../Strings/StringIterator.h"
8#include "StringsExport.h"
9
10namespace SC
11{
12struct SC_STRINGS_EXPORT StringView;
13struct SC_STRINGS_EXPORT StringViewTokenizer;
14struct SC_STRINGS_EXPORT StringAlgorithms;
15
16} // namespace SC
17
20
23
47
48struct SC::StringView : public StringSpan
49{
50 StringView() : StringSpan() {}
51
52 using StringSpan::StringSpan;
53
54 constexpr StringView(StringSpan ssv) : StringSpan(ssv) {}
55
56 static StringView fromNullTerminated(const char* text, StringEncoding encoding)
57 {
58 return StringSpan::fromNullTerminated(text, encoding);
59 }
60
61#if SC_PLATFORM_WINDOWS
62 static StringView fromNullTerminated(const wchar_t* text, StringEncoding encoding)
63 {
64 return StringSpan::fromNullTerminated(text, encoding);
65 }
66#endif
67
70 Span<const uint8_t> toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
71 {
72 return Span<const uint8_t>::reinterpret_bytes(text, textSizeInBytes);
73 }
74
81 template <typename Func>
82 [[nodiscard]] constexpr auto withIterator(Func&& func) const;
83
92 template <typename Func>
93 [[nodiscard]] static constexpr auto withIterators(StringView s1, StringView s2, Func&& func);
94
98 template <typename StringIterator>
99 constexpr StringIterator getIterator() const;
100
104 [[nodiscard]] constexpr bool operator!=(StringView other) const { return not operator==(other); }
105
109 [[nodiscard]] constexpr bool operator==(StringView other) const;
110
123 [[nodiscard]] constexpr bool fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const;
124
127 [[nodiscard]] constexpr bool isEmpty() const { return text == nullptr or textSizeInBytes == 0; }
128
131 [[nodiscard]] constexpr bool isNullTerminated() const { return hasNullTerm; }
132
135 [[nodiscard]] constexpr size_t sizeInBytes() const { return textSizeInBytes; }
136
145 [[nodiscard]] bool endsWithAnyOf(Span<const StringCodePoint> codePoints) const;
146
155 [[nodiscard]] bool startsWithAnyOf(Span<const StringCodePoint> codePoints) const;
156
165 [[nodiscard]] bool startsWith(const StringView str) const;
166
169 [[nodiscard]] bool startsWithIgnoreCaseASCII(const StringView str) const;
170
179 [[nodiscard]] bool endsWith(const StringView str) const;
180
193 [[nodiscard]] bool containsString(const StringView str) const;
194
197 [[nodiscard]] bool containsStringIgnoreCaseASCII(const StringView str) const;
198
201 [[nodiscard]] bool equalsIgnoreCaseASCII(const StringView str) const;
202
215 [[nodiscard]] bool splitAfter(const StringView stringToMatch, StringView& remainingAfterSplit) const;
216
229 [[nodiscard]] bool splitBefore(const StringView stringToMatch, StringView& stringBeforeSplit) const;
230
234 [[nodiscard]] bool containsCodePoint(StringCodePoint c) const;
235
240 [[nodiscard]] constexpr bool hasCompatibleEncoding(StringView str) const;
241
250 template <typename StringIterator>
252 StringEncoding encoding = StringIterator::getEncoding());
253
259 template <typename StringIterator>
260 static StringView fromIteratorUntilEnd(StringIterator it, StringEncoding encoding = StringIterator::getEncoding());
261
267 template <typename StringIterator>
269 StringEncoding encoding = StringIterator::getEncoding());
270
282 [[nodiscard]] StringView sliceStartEnd(size_t start, size_t end) const;
283
295 [[nodiscard]] StringView sliceStartLength(size_t start, size_t length) const;
296
306 [[nodiscard]] StringView sliceStart(size_t offset) const;
307
317 [[nodiscard]] StringView sliceEnd(size_t offset) const;
318
328 [[nodiscard]] StringView trimEndAnyOf(Span<const StringCodePoint> codePoints) const;
329
339 [[nodiscard]] StringView trimStartAnyOf(Span<const StringCodePoint> codePoints) const;
340
350 [[nodiscard]] StringView trimAnyOf(Span<const StringCodePoint> codePoints) const;
351
360 [[nodiscard]] StringView trimWhiteSpaces() const;
361
363
373 [[nodiscard]] bool isIntegerNumber() const;
374
387 [[nodiscard]] bool isFloatingNumber() const;
388
402 [[nodiscard]] bool parseInt32(int32_t& value) const;
403
417 [[nodiscard]] bool parseFloat(float& value) const;
418
432 [[nodiscard]] bool parseDouble(double& value) const;
433
434 private:
435 template <typename T>
436 struct identity
437 {
438 };
439 template <typename Type>
440 constexpr StringIteratorASCII getIterator(identity<Type>) const;
441 constexpr StringIteratorUTF8 getIterator(identity<StringIteratorUTF8>) const;
442 constexpr StringIteratorUTF16 getIterator(identity<StringIteratorUTF16>) const;
443 template <typename StringIterator1, typename StringIterator2>
444 static constexpr bool equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points);
445
446 template <typename StringIterator>
447 constexpr bool equalsIterator(StringView other, size_t& points) const;
448};
449
452{
454
455 size_t numSplitsNonEmpty = 0;
456 size_t numSplitsTotal = 0;
457
461
467
469 StringViewTokenizer(StringView text) : remaining(text), originalText(text) {}
470
484 [[nodiscard]] bool tokenizeNext(Span<const StringCodePoint> separators, Options options = Options::SkipEmpty);
485
501 [[nodiscard]] bool tokenizeNextLine() { return tokenizeNext({'\n'}); }
502
514 StringViewTokenizer& countTokens(Span<const StringCodePoint> separators);
515
517 [[nodiscard]] bool isFinished() const;
518
519 private:
520 StringView originalText; // Original text as passed in the constructor
521};
522
542{
543 [[nodiscard]] static bool matchWildcard(StringView s1, StringView s2);
544
545 private:
546 template <typename StringIterator1, typename StringIterator2>
547 [[nodiscard]] static bool matchWildcardIterator(StringIterator1 pattern, StringIterator2 text);
548};
549
551
552//-----------------------------------------------------------------------------------------------------------------------
553// Implementations Details
554//-----------------------------------------------------------------------------------------------------------------------
555namespace SC
556{
557constexpr SC::StringView operator""_a8(const char* txt, size_t sz)
558{
559 return StringView({txt, sz}, true, StringEncoding::Ascii);
560}
561constexpr StringView operator""_u8(const char* txt, size_t sz)
562{
563 return StringView({txt, sz}, true, StringEncoding::Utf8);
564}
565constexpr StringView operator""_u16(const char* txt, size_t sz)
566{
567 const bool isNullTerminated = sz > 0 and sz % 2 == 1 and txt[sz - 1] == 0;
568 return StringView({txt, isNullTerminated ? sz - 1 : sz}, isNullTerminated, StringEncoding::Utf16);
569}
570} // namespace SC
571
572template <typename StringIterator>
574{
575 // For GCC complaining about specialization in non-namespace scope
576 return getIterator(identity<StringIterator>());
577}
578
579template <typename Type>
580constexpr SC::StringIteratorASCII SC::StringView::getIterator(identity<Type>) const
581{
582 return StringIteratorASCII(text, text + textSizeInBytes);
583}
584constexpr SC::StringIteratorUTF8 SC::StringView::getIterator(identity<StringIteratorUTF8>) const
585{
586 return StringIteratorUTF8(text, text + textSizeInBytes);
587}
588constexpr SC::StringIteratorUTF16 SC::StringView::getIterator(identity<StringIteratorUTF16>) const
589{
590 return StringIteratorUTF16(text, text + textSizeInBytes);
591}
592
593template <typename StringIterator1, typename StringIterator2>
594constexpr bool SC::StringView::equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points)
595{
596 StringCodePoint c1 = 0;
597 StringCodePoint c2 = 0;
598 while (t1.advanceRead(c1) and t2.advanceRead(c2))
599 {
600 if (c1 != c2)
601 {
602 return false;
603 }
604 points++;
605 }
606 return t1.isAtEnd() and t2.isAtEnd();
607}
608
609template <typename StringIterator>
610constexpr bool SC::StringView::equalsIterator(StringView other, size_t& points) const
611{
612 auto it = getIterator<StringIterator>();
613 switch (other.getEncoding())
614 {
615 case StringEncoding::Ascii: return equalsIterator(it, other.getIterator<StringIteratorASCII>(), points);
616 case StringEncoding::Utf8: return equalsIterator(it, other.getIterator<StringIteratorUTF8>(), points);
617 case StringEncoding::Utf16: return equalsIterator(it, other.getIterator<StringIteratorUTF16>(), points);
618 }
619 StringsAssert::unreachable();
620}
621
622[[nodiscard]] constexpr bool SC::StringView::operator==(StringView other) const
623{
624#if defined(__clang__)
625#pragma clang diagnostic push
626#pragma clang diagnostic ignored "-Wunreachable-code"
627#endif
628 if (__builtin_is_constant_evaluated())
629 {
630 if (not hasCompatibleEncoding(other))
631 return false;
632 auto it1 = text;
633 auto it2 = other.text;
634 auto sz = textSizeInBytes;
635 for (size_t idx = 0; idx < sz; ++idx)
636 if (it1[idx] != it2[idx])
637 return false;
638 return true;
639 }
640 else
641 {
642 return StringSpan::operator==(other);
643 }
644#if defined(__clang__)
645#pragma clang diagnostic pop
646#endif
647}
648
649constexpr bool SC::StringView::fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const
650{
651 commonOverlappingPoints = 0;
652 switch (getEncoding())
653 {
654 case StringEncoding::Ascii: return equalsIterator<StringIteratorASCII>(other, commonOverlappingPoints);
655 case StringEncoding::Utf8: return equalsIterator<StringIteratorUTF8>(other, commonOverlappingPoints);
656 case StringEncoding::Utf16: return equalsIterator<StringIteratorUTF16>(other, commonOverlappingPoints);
657 }
658 StringsAssert::unreachable();
659}
660
661template <typename Func>
662constexpr auto SC::StringView::withIterator(Func&& func) const
663{
664 switch (getEncoding())
665 {
666 case StringEncoding::Ascii: return func(getIterator<StringIteratorASCII>());
667 case StringEncoding::Utf8: return func(getIterator<StringIteratorUTF8>());
668 case StringEncoding::Utf16: return func(getIterator<StringIteratorUTF16>());
669 }
670 StringsAssert::unreachable();
671}
672
673template <typename Func>
674constexpr auto SC::StringView::withIterators(StringView s1, StringView s2, Func&& func)
675{
676 return s1.withIterator([&s2, &func](auto it1)
677 { return s2.withIterator([&it1, &func](auto it2) { return func(it1, it2); }); });
678}
679
681{
682 return StringEncodingAreBinaryCompatible(getEncoding(), str.getEncoding());
683}
684
685template <typename StringIterator>
686inline SC::StringView SC::StringView::fromIterators(StringIterator from, StringIterator to, StringEncoding encoding)
687{
688 const ssize_t numBytes = to.bytesDistanceFrom(from);
689 if (numBytes >= 0)
690 {
691 StringIterator fromEnd = from;
692 fromEnd.setToEnd();
693 if (fromEnd.bytesDistanceFrom(to) >= 0) // If current iterator of to is inside from range
694 return StringView({from.it, static_cast<size_t>(numBytes)}, false, encoding);
695 }
696 return StringView(encoding); // TODO: Make StringView::fromIterators return bool to make it fallible
697}
698
699template <typename StringIterator>
700inline SC::StringView SC::StringView::fromIteratorUntilEnd(StringIterator it, StringEncoding encoding)
701{
702 StringIterator endIt = it;
703 endIt.setToEnd();
704 const size_t numBytes = static_cast<size_t>(endIt.bytesDistanceFrom(it));
705 return StringView({it.it, numBytes}, false, encoding);
706}
707
708template <typename StringIterator>
709constexpr SC::StringView SC::StringView::fromIteratorFromStart(StringIterator it, StringEncoding encoding)
710{
711 StringIterator start = it;
712 start.setToStart();
713 const size_t numBytes = static_cast<size_t>(it.bytesDistanceFrom(start));
714 return StringView({start.it, numBytes}, false, encoding);
715}
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:20
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:14
Algorithms operating on strings (glob / wildcard).
Definition StringView.h:542
A string iterator for ASCII strings.
Definition StringIterator.h:239
A string iterator for UTF16 strings.
Definition StringIterator.h:262
A string iterator for UTF8 strings.
Definition StringIterator.h:280
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:36
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:406
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:46
Splits a StringView in tokens according to separators.
Definition StringView.h:452
StringViewTokenizer(StringView text)
Build a tokenizer operating on the given text string view.
Definition StringView.h:469
StringView component
Current component that has been tokenized by tokenizeNext.
Definition StringView.h:458
bool isFinished() const
Check if the tokenizer has processed the entire the string view passed in the constructor.
bool tokenizeNextLine()
Tokenizes from current position to first newline.
Definition StringView.h:501
StringViewTokenizer & countTokens(Span< const StringCodePoint > separators)
Count the number of tokens that exist in the string view passed in constructor, when splitted along t...
size_t numSplitsNonEmpty
How many non-empty splits have occurred in current tokenization.
Definition StringView.h:455
StringView processed
Substring of original string passed in constructor processed so far.
Definition StringView.h:459
size_t numSplitsTotal
How many total splits have occurred in current tokenization.
Definition StringView.h:456
bool tokenizeNext(Span< const StringCodePoint > separators, Options options=Options::SkipEmpty)
Splits the string along a list of separators.
StringView remaining
Substring from current position until the end of original text.
Definition StringView.h:460
Options
Definition StringView.h:463
@ IncludeEmpty
If to tokenizeNext should return also empty tokens.
Definition StringView.h:464
@ SkipEmpty
If to tokenizeNext should NOT return also empty tokens.
Definition StringView.h:465
StringCodePoint splittingCharacter
The last splitting character matched in current tokenization.
Definition StringView.h:453
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:49
bool startsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView starts with any utf code point in the given span.
constexpr bool isNullTerminated() const
Check if StringView is immediately followed by a null termination character.
Definition StringView.h:131
constexpr bool operator==(StringView other) const
Compare this StringView with another StringView for equality.
Definition StringView.h:622
bool equalsIgnoreCaseASCII(const StringView str) const
Check if StringView is equal to another StringView using ASCII-only case-insensitive comparison.
constexpr StringIterator getIterator() const
Returns a StringIterator from current StringView.
Definition StringView.h:573
StringView trimEndAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing ending utf code points matching the codePoints span.
bool startsWith(const StringView str) const
Check if StringView starts with another StringView.
static constexpr StringView fromIteratorFromStart(StringIterator it, StringEncoding encoding=StringIterator::getEncoding())
Returns a section of a string, from start of StringView to it.
bool containsStringIgnoreCaseASCII(const StringView str) const
Check if StringView contains another StringView using ASCII-only case-insensitive comparison.
Span< const uint8_t > toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND
Obtain a const uint8_t Span from this StringView.
Definition StringView.h:70
bool parseFloat(float &value) const
Try parsing current StringView as a floating point number.
constexpr bool operator!=(StringView other) const
Compare this StringView with another StringView for inequality.
Definition StringView.h:104
bool containsCodePoint(StringCodePoint c) const
Check if StringView contains given utf code point.
constexpr bool hasCompatibleEncoding(StringView str) const
Check if current StringView has compatible encoding with str.
Definition StringView.h:680
static constexpr auto withIterators(StringView s1, StringView s2, Func &&func)
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:674
StringView sliceStartEnd(size_t start, size_t end) const
Get slice [start, end) starting at offset start and ending at end (measured in utf code points)
bool endsWith(const StringView str) const
Check if StringView ends with another StringView.
static StringView fromIteratorUntilEnd(StringIterator it, StringEncoding encoding=StringIterator::getEncoding())
Returns a section of a string, from it to end of StringView.
StringView sliceStartLength(size_t start, size_t length) const
Get slice [start, start+length] starting at offset start and of length code points.
StringView sliceStart(size_t offset) const
Get slice [offset, end] measured in utf code points.
bool endsWithAnyOf(Span< const StringCodePoint > codePoints) const
Check if StringView ends with any utf code point in the given span.
StringView trimWhiteSpaces() const
Returns a shortened StringView without starting/ending utf code points inside {'\r',...
bool parseDouble(double &value) const
Try parsing current StringView as a double precision floating point number.
constexpr bool fullyOverlaps(StringView other, size_t &commonOverlappingPoints) const
Check if this StringView is equal to other StringView (operates on code points, not on utf graphemes)...
Definition StringView.h:649
StringView trimStartAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting utf code points matching the codePoints span.
bool isIntegerNumber() const
If the current view is an integer number, returns true.
bool splitBefore(const StringView stringToMatch, StringView &stringBeforeSplit) const
Returns the part of the string before matching stringToMatch.
StringView sliceEnd(size_t offset) const
Get slice [end-offset, end] measured in utf code points.
bool splitAfter(const StringView stringToMatch, StringView &remainingAfterSplit) const
Returns the remaining part of the string after matching stringToMatch.
bool startsWithIgnoreCaseASCII(const StringView str) const
Check if StringView starts with another StringView using ASCII-only case-insensitive comparison.
constexpr auto withIterator(Func &&func) const
Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending ...
Definition StringView.h:662
constexpr bool isEmpty() const
Check if StringView is empty.
Definition StringView.h:127
bool containsString(const StringView str) const
Check if StringView contains another StringView.
constexpr size_t sizeInBytes() const
Get size of the StringView in bytes.
Definition StringView.h:135
bool parseInt32(int32_t &value) const
Try parsing current StringView as a 32 bit integer.
StringView trimAnyOf(Span< const StringCodePoint > codePoints) const
Returns a shortened StringView removing starting and ending utf code points inside the codePoints spa...
bool isFloatingNumber() const
Check if StringView can be parsed as an floating point number.
static StringView fromIterators(StringIterator from, StringIterator to, StringEncoding encoding=StringIterator::getEncoding())
Returns a StringView starting at from and ending at to.