GCC Code Coverage Report


Directory: libs/url/
File: include/boost/url/grammar/lut_chars.hpp
Date: 2025-11-10 19:06:22
Exec Total Coverage
Lines: 47 47 100.0%
Functions: 16 16 100.0%
Branches: 14 14 100.0%

Line Branch Exec Source
1 //
2 // Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/url
8 //
9
10 #ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP
11 #define BOOST_URL_GRAMMAR_LUT_CHARS_HPP
12
13 #include <boost/url/detail/config.hpp>
14 #include <boost/url/grammar/detail/charset.hpp>
15 #include <cstdint>
16 #include <type_traits>
17
18 // Credit to Peter Dimov for ideas regarding
19 // SIMD constexpr, and character set masks.
20
21 namespace boost {
22 namespace urls {
23 namespace grammar {
24
25 #ifndef BOOST_URL_DOCS
26 namespace detail {
27 template<class T, class = void>
28 struct is_pred : std::false_type {};
29
30 template<class T>
31 struct is_pred<T, void_t<
32 decltype(
33 std::declval<bool&>() =
34 std::declval<T const&>().operator()(
35 std::declval<char>())
36 ) > > : std::true_type
37 {
38 };
39 } // detail
40 #endif
41
42 /** A set of characters
43
44 The characters defined by instances of
45 this set are provided upon construction.
46 The `constexpr` implementation allows
47 these to become compile-time constants.
48
49 @par Example
50 Character sets are used with rules and the
51 functions @ref find_if and @ref find_if_not.
52 @code
53 constexpr lut_chars vowel_chars = "AEIOU" "aeiou";
54
55 system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) );
56 @endcode
57
58 @see
59 @ref find_if,
60 @ref find_if_not,
61 @ref parse,
62 @ref token_rule.
63 */
64 class lut_chars
65 {
66 std::uint64_t mask_[4] = {};
67
68 constexpr
69 static
70 std::uint64_t
71 157924 lo(char c) noexcept
72 {
73 157924 return static_cast<
74 157924 unsigned char>(c) & 3;
75 }
76
77 constexpr
78 static
79 std::uint64_t
80 138901 hi(char c) noexcept
81 {
82 138901 return 1ULL << (static_cast<
83 138901 unsigned char>(c) >> 2);
84 }
85
86 constexpr
87 static
88 lut_chars
89 construct(
90 char const* s) noexcept
91 {
92 return *s
93 ? lut_chars(*s) +
94 construct(s+1)
95 : lut_chars();
96 }
97
98 constexpr
99 static
100 lut_chars
101 34048 construct(
102 unsigned char ch,
103 bool b) noexcept
104 {
105 return b
106
2/2
✓ Branch 0 taken 5824 times.
✓ Branch 1 taken 28224 times.
34048 ? lut_chars(ch)
107
2/2
✓ Branch 1 taken 84672 times.
✓ Branch 2 taken 28224 times.
118720 : lut_chars();
108 }
109
110 template<class Pred>
111 constexpr
112 static
113 lut_chars
114 68096 construct(
115 Pred pred,
116 unsigned char ch) noexcept
117 {
118 return ch == 255
119
2/2
✓ Branch 0 taken 133 times.
✓ Branch 1 taken 33915 times.
68096 ? construct(ch, pred(static_cast<char>(ch)))
120 67830 : construct(ch, pred(static_cast<char>(ch))) +
121 135926 construct(pred, ch + 1);
122 }
123
124 constexpr
125 28224 lut_chars() = default;
126
127 constexpr
128 34109 lut_chars(
129 std::uint64_t m0,
130 std::uint64_t m1,
131 std::uint64_t m2,
132 std::uint64_t m3) noexcept
133 34109 : mask_{ m0, m1, m2, m3 }
134 {
135 34109 }
136
137 public:
138 /** Constructor
139
140 This function constructs a character
141 set which has as a single member,
142 the character `ch`.
143
144 @par Example
145 @code
146 constexpr lut_chars asterisk( '*' );
147 @endcode
148
149 @par Complexity
150 Constant.
151
152 @par Exception Safety
153 Throws nothing.
154
155 @param ch A character.
156 */
157 constexpr
158 6341 lut_chars(char ch) noexcept
159 31705 : mask_ {
160
2/2
✓ Branch 1 taken 1362 times.
✓ Branch 2 taken 4979 times.
6341 lo(ch) == 0 ? hi(ch) : 0,
161
2/2
✓ Branch 1 taken 1901 times.
✓ Branch 2 taken 4440 times.
6341 lo(ch) == 1 ? hi(ch) : 0,
162
2/2
✓ Branch 1 taken 1742 times.
✓ Branch 2 taken 4599 times.
6341 lo(ch) == 2 ? hi(ch) : 0,
163
2/2
✓ Branch 1 taken 1336 times.
✓ Branch 2 taken 5005 times.
6341 lo(ch) == 3 ? hi(ch) : 0 }
164 {
165 6341 }
166
167 /** Constructor
168
169 This function constructs a character
170 set which has as members, all of the
171 characters present in the null-terminated
172 string `s`.
173
174 @par Example
175 @code
176 constexpr lut_chars digits = "0123456789";
177 @endcode
178
179 @par Complexity
180 Linear in `::strlen(s)`, or constant
181 if `s` is a constant expression.
182
183 @par Exception Safety
184 Throws nothing.
185
186 @param s A null-terminated string.
187 */
188 constexpr
189 lut_chars(
190 char const* s) noexcept
191 : lut_chars(construct(s))
192 {
193 }
194
195 /** Constructor.
196
197 This function constructs a character
198 set which has as members, every value
199 of `char ch` for which the expression
200 `pred(ch)` returns `true`.
201
202 @par Example
203 @code
204 struct is_digit
205 {
206 constexpr bool
207 operator()(char c ) const noexcept
208 {
209 return c >= '0' && c <= '9';
210 }
211 };
212
213 constexpr lut_chars digits( is_digit{} );
214 @endcode
215
216 @par Complexity
217 Linear in `pred`, or constant if
218 `pred(ch)` is a constant expression.
219
220 @par Exception Safety
221 Throws nothing.
222
223 @param pred The function object to
224 use for determining membership in
225 the character set.
226 */
227 template<class Pred
228 #ifndef BOOST_URL_DOCS
229 ,class = typename std::enable_if<
230 detail::is_pred<Pred>::value &&
231 ! std::is_base_of<
232 lut_chars, Pred>::value>::type
233 #endif
234 >
235 constexpr
236 266 lut_chars(Pred const& pred) noexcept
237 : lut_chars(
238 266 construct(pred, 0))
239 {
240 266 }
241
242 /** Return true if ch is in the character set.
243
244 This function returns true if the
245 character `ch` is in the set, otherwise
246 it returns false.
247
248 @par Complexity
249 Constant.
250
251 @par Exception Safety
252 Throws nothing.
253
254 @param ch The character to test.
255 @return `true` if `ch` is in the set.
256 */
257 constexpr
258 bool
259 1280 operator()(
260 unsigned char ch) const noexcept
261 {
262 1280 return operator()(static_cast<char>(ch));
263 }
264
265 /// @copydoc operator()(unsigned char) const
266 constexpr
267 bool
268 132560 operator()(char ch) const noexcept
269 {
270 132560 return mask_[lo(ch)] & hi(ch);
271 }
272
273 /** Return the union of two character sets.
274
275 This function returns a new character
276 set which contains all of the characters
277 in `cs0` as well as all of the characters
278 in `cs`.
279
280 @par Example
281 This creates a character set which
282 includes all letters and numbers
283 @code
284 constexpr lut_chars alpha_chars(
285 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
286 "abcdefghijklmnopqrstuvwxyz");
287
288 constexpr lut_chars alnum_chars = alpha_chars + "0123456789";
289 @endcode
290
291 @par Complexity
292 Constant.
293
294 @return The new character set.
295
296 @param cs0 A character to join
297
298 @param cs1 A character to join
299 */
300 friend
301 constexpr
302 lut_chars
303 33919 operator+(
304 lut_chars const& cs0,
305 lut_chars const& cs1) noexcept
306 {
307 return lut_chars(
308 33919 cs0.mask_[0] | cs1.mask_[0],
309 33919 cs0.mask_[1] | cs1.mask_[1],
310 33919 cs0.mask_[2] | cs1.mask_[2],
311 33919 cs0.mask_[3] | cs1.mask_[3]);
312 }
313
314 /** Return a new character set by subtracting
315
316 This function returns a new character
317 set which is formed from all of the
318 characters in `cs0` which are not in `cs`.
319
320 @par Example
321 This statement declares a character set
322 containing all the lowercase letters
323 which are not vowels:
324 @code
325 constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou";
326 @endcode
327
328 @par Complexity
329 Constant.
330
331 @return The new character set.
332
333 @param cs0 A character set to join.
334
335 @param cs1 A character set to join.
336 */
337 friend
338 constexpr
339 lut_chars
340 190 operator-(
341 lut_chars const& cs0,
342 lut_chars const& cs1) noexcept
343 {
344 return lut_chars(
345 190 cs0.mask_[0] & ~cs1.mask_[0],
346 190 cs0.mask_[1] & ~cs1.mask_[1],
347 190 cs0.mask_[2] & ~cs1.mask_[2],
348 190 cs0.mask_[3] & ~cs1.mask_[3]);
349 }
350
351 /** Return a new character set which is the complement of another character set.
352
353 This function returns a new character
354 set which contains all of the characters
355 that are not in `*this`.
356
357 @par Example
358 This statement declares a character set
359 containing everything but vowels:
360 @code
361 constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" );
362 @endcode
363
364 @par Complexity
365 Constant.
366
367 @par Exception Safety
368 Throws nothing.
369
370 @return The new character set.
371 */
372 constexpr
373 lut_chars
374 operator~() const noexcept
375 {
376 return lut_chars(
377 ~mask_[0],
378 ~mask_[1],
379 ~mask_[2],
380 ~mask_[3]
381 );
382 }
383
384 #ifndef BOOST_URL_DOCS
385 #ifdef BOOST_URL_USE_SSE2
386 char const*
387 1603 find_if(
388 char const* first,
389 char const* last) const noexcept
390 {
391 1603 return detail::find_if_pred(
392 1603 *this, first, last);
393 }
394
395 char const*
396 14075 find_if_not(
397 char const* first,
398 char const* last) const noexcept
399 {
400 14075 return detail::find_if_not_pred(
401 14075 *this, first, last);
402 }
403 #endif
404 #endif
405 };
406
407 } // grammar
408 } // urls
409 } // boost
410
411 #endif
412