Sane C++ Libraries
C++ Platform Abstraction Libraries
Loading...
Searching...
No Matches
StringIterator.h
1// Copyright (c) Stefano Cristiano
2// SPDX-License-Identifier: MIT
3#pragma once
4#include "../Foundation/Assert.h" //Assert::unreachable
5#include "../Foundation/Span.h"
6#include "../Foundation/StringSpan.h"
7
8namespace SC
9{
12
15
20constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
21{
22 return (encoding1 == encoding2) or (encoding2 == StringEncoding::Ascii and encoding1 == StringEncoding::Utf8) or
23 (encoding2 == StringEncoding::Utf8 and encoding1 == StringEncoding::Ascii);
24}
25
34template <typename CharIterator>
36{
37 static constexpr StringEncoding getEncoding() { return CharIterator::getEncoding(); }
38
39 using CodeUnit = char;
40 using CodePoint = StringCodePoint;
41
43 constexpr void setToStart() { it = start; }
44
46 constexpr void setToEnd() { it = end; }
47
50 [[nodiscard]] constexpr bool isAtEnd() const { return it >= end; }
51
54 [[nodiscard]] constexpr bool isAtStart() const { return it <= start; }
55
59 [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c);
60
64 [[nodiscard]] bool reverseAdvanceUntilMatches(CodePoint c);
65
70 [[nodiscard]] bool advanceAfterFinding(StringIterator other);
71
76 [[nodiscard]] bool advanceBeforeFinding(StringIterator other);
77
81 [[nodiscard]] bool advanceByLengthOf(StringIterator other) { return advanceOfBytes(other.end - other.it); }
82
87 [[nodiscard]] bool advanceUntilMatchesAny(Span<const CodePoint> items, CodePoint& matched);
88
93 [[nodiscard]] bool reverseAdvanceUntilMatchesAny(Span<const CodePoint> items, CodePoint& matched);
94
99 [[nodiscard]] bool advanceUntilDifferentFrom(CodePoint c, CodePoint* optionalReadChar = nullptr);
100
104 [[nodiscard]] constexpr bool advanceIfMatches(CodePoint c);
105
109 [[nodiscard]] bool advanceBackwardIfMatches(CodePoint c);
110
115
120 [[nodiscard]] bool advanceIfMatchesRange(CodePoint first, CodePoint last);
121
126 [[nodiscard]] bool match(CodePoint c) { return it < end and CharIterator::decode(it) == c; }
127
131 [[nodiscard]] constexpr bool advanceRead(CodePoint& c);
132
136 [[nodiscard]] bool read(CodePoint& c);
137
141 [[nodiscard]] bool advanceBackwardRead(CodePoint& c);
142
145 [[nodiscard]] constexpr bool stepForward();
146
149 [[nodiscard]] constexpr bool stepBackward();
150
154 [[nodiscard]] constexpr bool advanceCodePoints(size_t numCodePoints);
155
159 [[nodiscard]] bool reverseAdvanceCodePoints(size_t numCodePoints);
160
164 [[nodiscard]] constexpr bool isFollowedBy(CodePoint c);
165
169 [[nodiscard]] constexpr bool isPrecededBy(CodePoint c);
170
174 [[nodiscard]] constexpr StringIterator sliceFromStartUntil(StringIterator otherPoint) const;
175
179 [[nodiscard]] constexpr ssize_t bytesDistanceFrom(StringIterator other) const;
180
184 [[nodiscard]] bool endsWithAnyOf(Span<const CodePoint> codePoints) const;
185
189 [[nodiscard]] bool startsWithAnyOf(Span<const CodePoint> codePoints) const;
190
194 template <typename IteratorType>
195 [[nodiscard]] bool endsWith(IteratorType other) const;
196
200 template <typename IteratorType>
201 [[nodiscard]] bool startsWith(IteratorType other) const;
202
203 protected:
204 [[nodiscard]] bool advanceOfBytes(ssize_t bytesLength);
205
206 friend struct StringView;
207 static constexpr const CodeUnit* getNextOf(const CodeUnit* src) { return CharIterator::getNextOf(src); }
208 static constexpr const CodeUnit* getPreviousOf(const CodeUnit* src) { return CharIterator::getPreviousOf(src); }
209 constexpr StringIterator(const CodeUnit* it, const CodeUnit* end) : it(it), start(it), end(end) {}
210 const CodeUnit* it;
211 const CodeUnit* start;
212 const CodeUnit* end;
213};
214
216struct SC_COMPILER_EXPORT StringIteratorASCII : public StringIterator<StringIteratorASCII>
217{
218 [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c);
219
220 private:
221 [[nodiscard]] bool advanceUntilMatchesNonConstexpr(CodePoint c);
222 using StringIterator::StringIterator;
223 constexpr StringIteratorASCII(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
225 friend Parent;
226 friend struct StringView;
227
228 [[nodiscard]] static constexpr StringEncoding getEncoding() { return StringEncoding::Ascii; }
229
230 [[nodiscard]] static constexpr const char* getNextOf(const char* src) { return src + 1; }
231 [[nodiscard]] static constexpr const char* getPreviousOf(const char* src) { return src - 1; }
232 [[nodiscard]] static constexpr CodePoint decode(const char* src) { return static_cast<CodePoint>(*src); }
233};
234
236struct SC_COMPILER_EXPORT StringIteratorUTF16 : public StringIterator<StringIteratorUTF16>
237{
238 private:
239 using StringIterator::StringIterator;
240 constexpr StringIteratorUTF16(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
242 friend Parent;
243 friend struct StringView;
244
245 [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf16; }
246
247 [[nodiscard]] static const char* getNextOf(const char* bytes);
248
249 [[nodiscard]] static const char* getPreviousOf(const char* bytes);
250
251 [[nodiscard]] static uint32_t decode(const char* bytes);
252};
253
255struct SC_COMPILER_EXPORT StringIteratorUTF8 : public StringIterator<StringIteratorUTF8>
256{
257 private:
259 friend Parent;
260 friend struct StringView;
261 using StringIterator::StringIterator;
262 constexpr StringIteratorUTF8(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
263
264 [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf8; }
265
266 [[nodiscard]] static const char* getNextOf(const char* src);
267
268 [[nodiscard]] static const char* getPreviousOf(const char* src);
269
270 [[nodiscard]] static uint32_t decode(const char* src);
271};
272
275{
276 bool matches[256] = {false};
278 {
279 for (auto c : chars)
280 {
281 matches[static_cast<int>(c)] = true;
282 }
283 }
284};
286
287//-----------------------------------------------------------------------------------------------------------------------
288// Implementations Details
289//-----------------------------------------------------------------------------------------------------------------------
290template <typename CharIterator>
292{
293 while (it < end)
294 {
295 if (CharIterator::decode(it) == c)
296 return true;
297 it = getNextOf(it);
298 }
299 return false;
300}
301
302template <typename CharIterator>
304{
305 if (it < end and CharIterator::decode(it) == c)
306 {
307 it = getNextOf(it);
308 return true;
309 }
310 return false;
311}
312
313template <typename CharIterator>
315{
316 if (it < end)
317 {
318 c = CharIterator::decode(it);
319 it = getNextOf(it);
320 return true;
321 }
322 return false;
323}
324
325template <typename CharIterator>
327{
328 if (it < end)
329 {
330 it = getNextOf(it);
331 return true;
332 }
333 return false;
334}
335
336template <typename CharIterator>
338{
339 if (it > start)
340 {
341 it = getPreviousOf(it);
342 return true;
343 }
344 return false;
345}
346
347template <typename CharIterator>
348constexpr bool StringIterator<CharIterator>::advanceCodePoints(size_t numCodePoints)
349{
350 while (numCodePoints > 0)
351 {
352 numCodePoints -= 1;
353 if (it >= end)
354 {
355 return false;
356 }
357 it = getNextOf(it);
358 }
359 return true;
360}
361
362template <typename CharIterator>
364{
365 return it < end ? CharIterator::decode(getNextOf(it)) == c : false;
366}
367
368template <typename CharIterator>
370{
371 return it > start ? CharIterator::decode(getPreviousOf(it)) == c : false;
372}
373
374template <typename CharIterator>
376 StringIterator otherPoint) const
377{
378 SC_ASSERT_RELEASE(it <= otherPoint.it);
379 return StringIterator(it, otherPoint.it);
380}
381
382template <typename CharIterator>
384{
385 return (it - other.it) * static_cast<ssize_t>(sizeof(CodeUnit));
386}
387
388// StringIteratorASCII
389[[nodiscard]] constexpr bool StringIteratorASCII::advanceUntilMatches(CodePoint c)
390{
391#if defined(__clang__)
392#pragma clang diagnostic push
393#pragma clang diagnostic ignored "-Wunreachable-code"
394#endif
395 return __builtin_is_constant_evaluated() ? StringIterator::advanceUntilMatches(c)
396 : advanceUntilMatchesNonConstexpr(c);
397#if defined(__clang__)
398#pragma clang diagnostic pop
399#endif
400}
401
402} // namespace SC
#define SC_COMPILER_EXPORT
Macro for symbol visibility in non-MSVC compilers.
Definition Compiler.h:78
#define SC_ASSERT_RELEASE(e)
Assert expression e to be true.
Definition Assert.h:66
unsigned int uint32_t
Platform independent (4) bytes unsigned int.
Definition PrimitiveTypes.h:38
signed long ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:57
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:20
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:14
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
A string iterator for ASCII strings.
Definition StringIterator.h:217
Builds a constexpr bool skip table of 256 entries used in some parsers.
Definition StringIterator.h:275
A string iterator for UTF16 strings.
Definition StringIterator.h:237
A string iterator for UTF8 strings.
Definition StringIterator.h:256
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:36
constexpr void setToStart()
Rewind current position to start of iterator range.
Definition StringIterator.h:43
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:383
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:46
bool advanceUntilMatchesAny(Span< const CodePoint > items, CodePoint &matched)
Advances position until any CodePoint in the given Span is found.
bool advanceBackwardIfMatches(CodePoint c)
Move position by one code point towards start if previous code point matches c
constexpr bool advanceIfMatches(CodePoint c)
Advance position only if next code point matches c.
Definition StringIterator.h:303
constexpr bool isAtStart() const
Check if current position is at start of iterator range.
Definition StringIterator.h:54
bool reverseAdvanceUntilMatches(CodePoint c)
Moves position towards start until CodePoint c is found or position == end
bool advanceBeforeFinding(StringIterator other)
Advances position towards end until a matching range of character equal to other[it,...
bool advanceUntilDifferentFrom(CodePoint c, CodePoint *optionalReadChar=nullptr)
Advances position until a code point different from c is found or end is reached.
bool advanceAfterFinding(StringIterator other)
Advances position towards end until a matching range of character equal to other[it,...
bool startsWith(IteratorType other) const
Check if this Iterator at its start matches entirely another Iterator's range.
bool reverseAdvanceCodePoints(size_t numCodePoints)
Move position backwards (towards start) by variable number of code pints.
constexpr bool stepBackward()
Move position to previous code point.
Definition StringIterator.h:337
constexpr bool advanceCodePoints(size_t numCodePoints)
Move position forward (towards end) by variable number of code points.
Definition StringIterator.h:348
bool reverseAdvanceUntilMatchesAny(Span< const CodePoint > items, CodePoint &matched)
Moves position towards start until any CodePoint in the given Span is found.
constexpr bool stepForward()
Move position to next code point.
Definition StringIterator.h:326
bool endsWithAnyOf(Span< const CodePoint > codePoints) const
Check if this Iterator ends with any code point in the given span.
bool advanceIfMatchesAny(Span< const CodePoint > items)
Advance position only if any of the code points in given Span is matched.
constexpr bool isFollowedBy(CodePoint c)
Check if next code point is c
Definition StringIterator.h:363
bool advanceBackwardRead(CodePoint &c)
Move to previous position and read code unit.
bool startsWithAnyOf(Span< const CodePoint > codePoints) const
Check if this Iterator starts with any code point in the given span.
constexpr StringIterator sliceFromStartUntil(StringIterator otherPoint) const
Returns another StringIterator range, starting from start to otherPoint position.
Definition StringIterator.h:375
constexpr bool isPrecededBy(CodePoint c)
Check if previous code point is c
Definition StringIterator.h:369
constexpr bool advanceUntilMatches(CodePoint c)
Advances position towards end until it matches CodePoint c or position == end
Definition StringIterator.h:291
bool read(CodePoint &c)
Read code unit at current position.
constexpr bool advanceRead(CodePoint &c)
Decode code unit at current position and advance.
Definition StringIterator.h:314
bool match(CodePoint c)
Check if code unit at current position matches CodePoint c
Definition StringIterator.h:126
constexpr bool isAtEnd() const
Check if current position is at end of iterator range.
Definition StringIterator.h:50
bool advanceIfMatchesRange(CodePoint first, CodePoint last)
Advance position if any code point in the range [first, last] is matched.
bool endsWith(IteratorType other) const
Check if this Iterator at its end matches entirely another Iterator's range.
bool advanceByLengthOf(StringIterator other)
Advances position by the same number of code points as other.
Definition StringIterator.h:81
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:46