GCC Code Coverage Report


Directory: libs/url/
File: src/detail/normalize.cpp
Date: 2025-11-10 19:06:22
Exec Total Coverage
Lines: 427 430 99.3%
Functions: 21 21 100.0%
Branches: 231 244 94.7%

Line Branch Exec Source
1 //
2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
3 // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com)
4 //
5 // Distributed under the Boost Software License, Version 1.0. (See accompanying
6 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // Official repository: https://github.com/boostorg/url
9 //
10
11
12 #include <boost/url/detail/config.hpp>
13 #include <boost/url/decode_view.hpp>
14 #include "decode.hpp"
15 #include <boost/url/segments_encoded_view.hpp>
16 #include <boost/url/grammar/ci_string.hpp>
17 #include <boost/url/grammar/lut_chars.hpp>
18 #include <boost/assert.hpp>
19 #include <boost/core/ignore_unused.hpp>
20 #include <cstring>
21 #include "normalize.hpp"
22
23 namespace boost {
24 namespace urls {
25 namespace detail {
26
27 void
28 7466 pop_encoded_front(
29 core::string_view& s,
30 char& c,
31 std::size_t& n) noexcept
32 {
33
2/2
✓ Branch 1 taken 7314 times.
✓ Branch 2 taken 152 times.
7466 if(s.front() != '%')
34 {
35 7314 c = s.front();
36 7314 s.remove_prefix(1);
37 }
38 else
39 {
40 152 detail::decode_unsafe(
41 &c,
42 &c + 1,
43 s.substr(0, 3));
44 152 s.remove_prefix(3);
45 }
46 7466 ++n;
47 7466 }
48
49 int
50 60 compare_encoded(
51 core::string_view lhs,
52 core::string_view rhs) noexcept
53 {
54 60 std::size_t n0 = 0;
55 60 std::size_t n1 = 0;
56 60 char c0 = 0;
57 60 char c1 = 0;
58 60 while(
59
4/4
✓ Branch 1 taken 218 times.
✓ Branch 2 taken 26 times.
✓ Branch 3 taken 205 times.
✓ Branch 4 taken 39 times.
462 !lhs.empty() &&
60
2/2
✓ Branch 1 taken 205 times.
✓ Branch 2 taken 13 times.
218 !rhs.empty())
61 {
62 205 pop_encoded_front(lhs, c0, n0);
63 205 pop_encoded_front(rhs, c1, n1);
64
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 187 times.
205 if (c0 < c1)
65 18 return -1;
66
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 184 times.
187 if (c1 < c0)
67 3 return 1;
68 }
69 39 n0 += detail::decode_bytes_unsafe(lhs);
70 39 n1 += detail::decode_bytes_unsafe(rhs);
71
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 21 times.
39 if (n0 == n1)
72 18 return 0;
73
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 13 times.
21 if (n0 < n1)
74 8 return -1;
75 13 return 1;
76 }
77
78 int
79 24 compare_encoded_query(
80 core::string_view lhs,
81 core::string_view rhs) noexcept
82 {
83 static constexpr
84 grammar::lut_chars
85 query_compare_exception_lut = "&=+";
86
87 24 std::size_t n0 = 0;
88 24 std::size_t n1 = 0;
89 24 char c0 = 0;
90 24 char c1 = 0;
91 24 while(
92
4/4
✓ Branch 1 taken 108 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 107 times.
✓ Branch 4 taken 7 times.
222 !lhs.empty() &&
93
2/2
✓ Branch 1 taken 107 times.
✓ Branch 2 taken 1 times.
108 !rhs.empty())
94 {
95 107 bool const lhs_was_decoded = lhs.front() != '%';
96 107 bool const rhs_was_decoded = rhs.front() != '%';
97 107 pop_encoded_front(lhs, c0, n0);
98 107 pop_encoded_front(rhs, c1, n1);
99
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 105 times.
107 if (c0 < c1)
100 2 return -1;
101
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 93 times.
105 if (c1 < c0)
102 12 return 1;
103 // The decoded chars are the same, but
104 // are these query exceptions that have
105 // different meanings when decoded?
106
2/2
✓ Branch 1 taken 38 times.
✓ Branch 2 taken 55 times.
93 if (query_compare_exception_lut(c0))
107 {
108 // If so, we only continue if both
109 // chars were decoded or encoded
110 // the same way.
111
2/2
✓ Branch 0 taken 35 times.
✓ Branch 1 taken 3 times.
38 if (lhs_was_decoded == rhs_was_decoded)
112 35 continue;
113 // Otherwise, we return a value != 0
114 // because these chars are not equal.
115 // If rhs was the decoded one, it contains
116 // an ascii char higher than '%'
117
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (rhs_was_decoded)
118 2 return -1;
119 else
120 1 return 1;
121 }
122 }
123 7 n0 += detail::decode_bytes_unsafe(lhs);
124 7 n1 += detail::decode_bytes_unsafe(rhs);
125
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 2 times.
7 if (n0 == n1)
126 5 return 0;
127
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (n0 < n1)
128 1 return -1;
129 1 return 1;
130 }
131
132 void
133 1216 digest_encoded(
134 core::string_view s,
135 fnv_1a& hasher) noexcept
136 {
137 1216 char c = 0;
138 1216 std::size_t n = 0;
139
2/2
✓ Branch 1 taken 508 times.
✓ Branch 2 taken 1216 times.
1724 while(!s.empty())
140 {
141 508 pop_encoded_front(s, c, n);
142 508 hasher.put(c);
143 }
144 1216 }
145
146 int
147 167 ci_compare_encoded(
148 core::string_view lhs,
149 core::string_view rhs) noexcept
150 {
151 167 std::size_t n0 = 0;
152 167 std::size_t n1 = 0;
153 167 char c0 = 0;
154 167 char c1 = 0;
155 167 while (
156
4/4
✓ Branch 1 taken 2142 times.
✓ Branch 2 taken 151 times.
✓ Branch 3 taken 2136 times.
✓ Branch 4 taken 157 times.
4435 !lhs.empty() &&
157
2/2
✓ Branch 1 taken 2136 times.
✓ Branch 2 taken 6 times.
2142 !rhs.empty())
158 {
159 2136 pop_encoded_front(lhs, c0, n0);
160 2136 pop_encoded_front(rhs, c1, n1);
161 2136 c0 = grammar::to_lower(c0);
162 2136 c1 = grammar::to_lower(c1);
163
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2128 times.
2136 if (c0 < c1)
164 8 return -1;
165
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2126 times.
2128 if (c1 < c0)
166 2 return 1;
167 }
168 157 n0 += detail::decode_bytes_unsafe(lhs);
169 157 n1 += detail::decode_bytes_unsafe(rhs);
170
2/2
✓ Branch 0 taken 150 times.
✓ Branch 1 taken 7 times.
157 if (n0 == n1)
171 150 return 0;
172
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 6 times.
7 if (n0 < n1)
173 1 return -1;
174 6 return 1;
175 }
176
177 void
178 304 ci_digest_encoded(
179 core::string_view s,
180 fnv_1a& hasher) noexcept
181 {
182 304 char c = 0;
183 304 std::size_t n = 0;
184
2/2
✓ Branch 1 taken 2062 times.
✓ Branch 2 taken 304 times.
2366 while(!s.empty())
185 {
186 2062 pop_encoded_front(s, c, n);
187 2062 c = grammar::to_lower(c);
188 2062 hasher.put(c);
189 }
190 304 }
191
192 int
193 46 compare(
194 core::string_view lhs,
195 core::string_view rhs) noexcept
196 {
197 46 auto rlen = (std::min)(lhs.size(), rhs.size());
198
2/2
✓ Branch 0 taken 79 times.
✓ Branch 1 taken 25 times.
104 for (std::size_t i = 0; i < rlen; ++i)
199 {
200 79 char c0 = lhs[i];
201 79 char c1 = rhs[i];
202
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 66 times.
79 if (c0 < c1)
203 13 return -1;
204
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 58 times.
66 if (c1 < c0)
205 8 return 1;
206 }
207
2/2
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 21 times.
25 if ( lhs.size() == rhs.size() )
208 4 return 0;
209
2/2
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 13 times.
21 if ( lhs.size() < rhs.size() )
210 8 return -1;
211 13 return 1;
212 }
213
214 int
215 204 ci_compare(
216 core::string_view lhs,
217 core::string_view rhs) noexcept
218 {
219 204 auto rlen = (std::min)(lhs.size(), rhs.size());
220
2/2
✓ Branch 0 taken 839 times.
✓ Branch 1 taken 197 times.
1036 for (std::size_t i = 0; i < rlen; ++i)
221 {
222 839 char c0 = grammar::to_lower(lhs[i]);
223 839 char c1 = grammar::to_lower(rhs[i]);
224
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 833 times.
839 if (c0 < c1)
225 6 return -1;
226
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 832 times.
833 if (c1 < c0)
227 1 return 1;
228 }
229
2/2
✓ Branch 2 taken 190 times.
✓ Branch 3 taken 7 times.
197 if ( lhs.size() == rhs.size() )
230 190 return 0;
231
2/2
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 1 times.
7 if ( lhs.size() < rhs.size() )
232 6 return -1;
233 1 return 1;
234 }
235
236 void
237 304 ci_digest(
238 core::string_view s,
239 fnv_1a& hasher) noexcept
240 {
241
2/2
✓ Branch 2 taken 730 times.
✓ Branch 3 taken 304 times.
1034 for (char c: s)
242 {
243 730 c = grammar::to_lower(c);
244 730 hasher.put(c);
245 }
246 304 }
247
248 /* Check if a string ends with the specified suffix (decoded comparison)
249
250 This function determines if a string ends with the specified suffix
251 when the string and suffix are compared after percent-decoding.
252
253 @param str The string to check (percent-encoded)
254 @param suffix The suffix to check for (percent-decoded)
255 @return The number of encoded chars consumed in the string
256 */
257 std::size_t
258 2136 path_ends_with(
259 core::string_view str,
260 core::string_view suffix) noexcept
261 {
262
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
2136 BOOST_ASSERT(!str.empty());
263
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
2136 BOOST_ASSERT(!suffix.empty());
264
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
2136 BOOST_ASSERT(!suffix.contains("%2F"));
265
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2136 times.
2136 BOOST_ASSERT(!suffix.contains("%2f"));
266 5848 auto consume_last = [](
267 core::string_view::iterator& it,
268 core::string_view::iterator& end,
269 char& c)
270 {
271
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5848 times.
5848 BOOST_ASSERT(end > it);
272
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5848 times.
5848 BOOST_ASSERT(it != end);
273
4/4
✓ Branch 0 taken 3960 times.
✓ Branch 1 taken 1888 times.
✓ Branch 2 taken 5800 times.
✓ Branch 3 taken 48 times.
9808 if ((end - it) < 3 ||
274
2/2
✓ Branch 0 taken 3912 times.
✓ Branch 1 taken 48 times.
7920 *(std::prev(end, 3)) != '%')
275 {
276 5800 c = *--end;
277 5800 return false;
278 }
279 96 detail::decode_unsafe(
280 &c,
281 &c + 1,
282 core::string_view(std::prev(
283 end, 3), 3));
284 48 end -= 3;
285 48 return true;
286 };
287
288 2136 auto it0 = str.begin();
289 2136 auto end0 = str.end();
290 2136 auto it1 = suffix.begin();
291 2136 auto end1 = suffix.end();
292 2136 char c0 = 0;
293 2136 char c1 = 0;
294 2136 while(
295
2/2
✓ Branch 0 taken 3006 times.
✓ Branch 1 taken 242 times.
3248 it0 < end0 &&
296
2/2
✓ Branch 0 taken 2932 times.
✓ Branch 1 taken 74 times.
3006 it1 < end1)
297 {
298 2932 bool const is_encoded = consume_last(it0, end0, c0);
299 // The suffix never contains an encoded slash (%2F), and a decoded
300 // slash is not equivalent to an encoded slash
301
4/4
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 2884 times.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 32 times.
2932 if (is_encoded && c0 == '/')
302 16 return 0;
303 2916 consume_last(it1, end1, c1);
304
2/2
✓ Branch 0 taken 1804 times.
✓ Branch 1 taken 1112 times.
2916 if (c0 != c1)
305 1804 return 0;
306 }
307 316 bool const consumed_suffix = it1 == end1;
308
2/2
✓ Branch 0 taken 110 times.
✓ Branch 1 taken 206 times.
316 if (consumed_suffix)
309 {
310 110 std::size_t const consumed_encoded = str.end() - end0;
311 110 return consumed_encoded;
312 }
313 206 return 0;
314 }
315
316 std::size_t
317 836 remove_dot_segments(
318 char* dest0,
319 char const* end,
320 core::string_view input) noexcept
321 {
322 // 1. The input buffer `s` is initialized with
323 // the now-appended path components and the
324 // output buffer `dest0` is initialized to
325 // the empty string.
326 836 char* dest = dest0;
327 836 bool const is_absolute = input.starts_with('/');
328
329 // Step 2 is a loop through 5 production rules:
330 // https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4
331 //
332 // There are no transitions between all rules,
333 // which enables some optimizations.
334 //
335 // Initial:
336 // - Rule A: handle initial dots
337 // If the input buffer begins with a
338 // prefix of "../" or "./", then remove
339 // that prefix from the input buffer.
340 // Rule A can only happen at the beginning.
341 // Errata 4547: Keep "../" in the beginning
342 // https://www.rfc-editor.org/errata/eid4547
343 //
344 // Then:
345 // - Rule D: ignore a final ".." or "."
346 // if the input buffer consists only of "."
347 // or "..", then remove that from the input
348 // buffer.
349 // Rule D can only happen after Rule A because:
350 // - B and C write "/" to the input
351 // - E writes "/" to input or returns
352 //
353 // Then:
354 // - Rule B: ignore ".": write "/" to the input
355 // - Rule C: apply "..": remove seg and write "/"
356 // - Rule E: copy complete segment
357 auto append =
358 1497 [](char*& first, char const* last, core::string_view in)
359 {
360 // append `in` to `dest`
361
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1497 times.
1497 BOOST_ASSERT(in.size() <= std::size_t(last - first));
362 1497 std::memmove(first, in.data(), in.size());
363 1497 first += in.size();
364 ignore_unused(last);
365 1497 };
366
367 9599 auto dot_starts_with = [](
368 core::string_view str, core::string_view dots, std::size_t& n)
369 {
370 // starts_with for encoded/decoded dots
371 // or decoded otherwise. return how many
372 // chars in str match the dots
373 9599 n = 0;
374
2/2
✓ Branch 2 taken 16424 times.
✓ Branch 3 taken 550 times.
16974 for (char c: dots)
375 {
376
2/2
✓ Branch 1 taken 7375 times.
✓ Branch 2 taken 9049 times.
16424 if (str.starts_with(c))
377 {
378 7375 str.remove_prefix(1);
379 7375 ++n;
380 7375 continue;
381 }
382
383 // In the general case, we would need to
384 // check if the next char is an encoded
385 // dot.
386 // However, an encoded dot in `str`
387 // would have already been decoded in
388 // url_base::normalize_path().
389 // This needs to be undone if
390 // `remove_dot_segments` is used in a
391 // different context.
392 // if (str.size() > 2 &&
393 // c == '.'
394 // &&
395 // str[0] == '%' &&
396 // str[1] == '2' &&
397 // (str[2] == 'e' ||
398 // str[2] == 'E'))
399 // {
400 // str.remove_prefix(3);
401 // n += 3;
402 // continue;
403 // }
404
405 9049 n = 0;
406 9049 return false;
407 }
408 550 return true;
409 };
410
411 4795 auto dot_equal = [&dot_starts_with](
412 core::string_view str, core::string_view dots)
413 {
414 4795 std::size_t n = 0;
415 4795 dot_starts_with(str, dots, n);
416 4795 return n == str.size();
417 836 };
418
419 // Rule A
420 std::size_t n;
421
2/2
✓ Branch 1 taken 771 times.
✓ Branch 2 taken 81 times.
852 while (!input.empty())
422 {
423
2/2
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 767 times.
771 if (dot_starts_with(input, "../", n))
424 {
425 // Errata 4547
426 4 append(dest, end, "../");
427 4 input.remove_prefix(n);
428 4 continue;
429 }
430
2/2
✓ Branch 2 taken 755 times.
✓ Branch 3 taken 12 times.
767 else if (!dot_starts_with(input, "./", n))
431 {
432 755 break;
433 }
434 12 input.remove_prefix(n);
435 }
436
437 // Rule D
438
2/2
✓ Branch 2 taken 82 times.
✓ Branch 3 taken 754 times.
836 if( dot_equal(input, "."))
439 {
440 82 input = {};
441 }
442
2/2
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 751 times.
754 else if( dot_equal(input, "..") )
443 {
444 // Errata 4547
445 3 append(dest, end, "..");
446 3 input = {};
447 }
448
449 // 2. While the input buffer is not empty,
450 // loop as follows:
451
2/2
✓ Branch 1 taken 1653 times.
✓ Branch 2 taken 797 times.
2450 while (!input.empty())
452 {
453 // Rule B
454 1653 bool const is_dot_seg = dot_starts_with(input, "/./", n);
455
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 1621 times.
1653 if (is_dot_seg)
456 {
457 32 input.remove_prefix(n - 1);
458 32 continue;
459 }
460
461 1621 bool const is_final_dot_seg = dot_equal(input, "/.");
462
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1613 times.
1621 if (is_final_dot_seg)
463 {
464 // We can't remove "." from a core::string_view
465 // So what we do here is equivalent to
466 // replacing s with '/' as required
467 // in Rule B and executing the next
468 // iteration, which would append this
469 // '/' to the output, as required by
470 // Rule E
471 8 append(dest, end, input.substr(0, 1));
472 8 input = {};
473 8 break;
474 }
475
476 // Rule C
477 1613 bool const is_dotdot_seg = dot_starts_with(input, "/../", n);
478
2/2
✓ Branch 0 taken 193 times.
✓ Branch 1 taken 1420 times.
1613 if (is_dotdot_seg)
479 {
480 193 core::string_view cur_out(dest0, dest - dest0);
481 193 std::size_t p = cur_out.find_last_of('/');
482 193 bool const has_multiple_segs = p != core::string_view::npos;
483
2/2
✓ Branch 0 taken 132 times.
✓ Branch 1 taken 61 times.
193 if (has_multiple_segs)
484 {
485 // output has multiple segments
486 // "erase" [p, end] if not "/.."
487 132 core::string_view last_seg(dest0 + p, dest - (dest0 + p));
488 132 bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");
489
2/2
✓ Branch 0 taken 121 times.
✓ Branch 1 taken 11 times.
132 if (!prev_is_dotdot_seg)
490 {
491 121 dest = dest0 + p;
492 }
493 else
494 {
495 11 append(dest, end, "/..");
496 }
497 }
498
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 50 times.
61 else if (dest0 != dest)
499 {
500 // Only one segment in the output: remove it
501 11 core::string_view last_seg(dest0, dest - dest0);
502 11 bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");
503
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 2 times.
11 if (!prev_is_dotdot_seg)
504 {
505 9 dest = dest0;
506
1/2
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
9 if (!is_absolute)
507 {
508 9 input.remove_prefix(1);
509 }
510 }
511 else
512 {
513 2 append(dest, end, "/..");
514 }
515 }
516 else
517 {
518 // Output is empty
519
1/2
✓ Branch 0 taken 50 times.
✗ Branch 1 not taken.
50 if (is_absolute)
520 {
521 50 append(dest, end, "/..");
522 }
523 else
524 {
525 // AFREITAS: Although we have no formal proof
526 // for that, the output can't be relative
527 // and empty at this point because relative
528 // paths will fall in the `dest0 != dest`
529 // case above of this rule C and then the
530 // general case of rule E for "..".
531 append(dest, end, "..");
532 }
533 }
534 193 input.remove_prefix(n - 1);
535 193 continue;
536 193 }
537
538 1420 bool const is_final_dotdot_seg = dot_equal(input, "/..");
539
2/2
✓ Branch 0 taken 31 times.
✓ Branch 1 taken 1389 times.
1420 if (is_final_dotdot_seg)
540 {
541 31 core::string_view cur_out(dest0, dest - dest0);
542 31 std::size_t p = cur_out.find_last_of('/');
543 31 bool const has_multiple_segs = p != core::string_view::npos;
544
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 13 times.
31 if (has_multiple_segs)
545 {
546 // output has multiple segments
547 // "erase" [p, end] if not "/.."
548 18 core::string_view last_seg(dest0 + p, dest - (dest0 + p));
549 18 bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");
550
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
18 if (!prev_is_dotdot_seg)
551 {
552 14 dest = dest0 + p;
553 14 append(dest, end, "/");
554 }
555 else
556 {
557 4 append(dest, end, "/..");
558 }
559 }
560
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 10 times.
13 else if (dest0 != dest)
561 {
562 // Only one segment in the output: remove it
563 3 core::string_view last_seg(dest0, dest - dest0);
564 3 bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");
565
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 if (!prev_is_dotdot_seg) {
566 1 dest = dest0;
567 }
568 else
569 {
570 2 append(dest, end, "/..");
571 }
572 }
573 else
574 {
575 // Output is empty: append dotdot
576
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
10 if (is_absolute)
577 {
578 10 append(dest, end, "/..");
579 }
580 else
581 {
582 // AFREITAS: Although we have no formal proof
583 // for that, the output can't be relative
584 // and empty at this point because relative
585 // paths will fall in the `dest0 != dest`
586 // case above of this rule C and then the
587 // general case of rule E for "..".
588 append(dest, end, "..");
589 }
590 }
591 31 input = {};
592 31 break;
593 }
594
595 // Rule E
596 1389 std::size_t p = input.find_first_of('/', 1);
597
2/2
✓ Branch 0 taken 677 times.
✓ Branch 1 taken 712 times.
1389 if (p != core::string_view::npos)
598 {
599 677 append(dest, end, input.substr(0, p));
600 677 input.remove_prefix(p);
601 }
602 else
603 {
604 712 append(dest, end, input);
605 712 input = {};
606 }
607 }
608
609 // 3. Finally, the output buffer is set
610 // as the result of remove_dot_segments,
611 // and we return its size
612 836 return dest - dest0;
613 }
614
615 char
616 1154 path_pop_back( core::string_view& s )
617 {
618
4/4
✓ Branch 1 taken 522 times.
✓ Branch 2 taken 632 times.
✓ Branch 3 taken 1102 times.
✓ Branch 4 taken 52 times.
1676 if (s.size() < 3 ||
619
2/2
✓ Branch 1 taken 470 times.
✓ Branch 2 taken 52 times.
1044 *std::prev(s.end(), 3) != '%')
620 {
621 1102 char c = s.back();
622 1102 s.remove_suffix(1);
623 1102 return c;
624 }
625 52 char c = 0;
626
1/2
✓ Branch 2 taken 52 times.
✗ Branch 3 not taken.
104 detail::decode_unsafe(
627 104 &c, &c + 1, s.substr(s.size() - 3));
628
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 8 times.
52 if (c != '/')
629 {
630 44 s.remove_suffix(3);
631 44 return c;
632 }
633 8 c = s.back();
634 8 s.remove_suffix(1);
635 8 return c;
636 };
637
638 void
639 538 pop_last_segment(
640 core::string_view& str,
641 core::string_view& seg,
642 std::size_t& level,
643 bool remove_unmatched) noexcept
644 {
645 538 seg = {};
646 538 std::size_t n = 0;
647
2/2
✓ Branch 1 taken 558 times.
✓ Branch 2 taken 142 times.
700 while (!str.empty())
648 {
649 // B. if the input buffer begins with a
650 // prefix of "/./" or "/.", where "." is
651 // a complete path segment, then replace
652 // that prefix with "/" in the input
653 // buffer; otherwise,
654 558 n = detail::path_ends_with(str, "/./");
655
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 548 times.
558 if (n)
656 {
657 10 seg = str.substr(str.size() - n);
658 10 str.remove_suffix(n);
659 10 continue;
660 }
661 548 n = detail::path_ends_with(str, "/.");
662
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 536 times.
548 if (n)
663 {
664 12 seg = str.substr(str.size() - n, 1);
665 12 str.remove_suffix(n);
666 12 continue;
667 }
668
669 // C. if the input buffer begins with a
670 // prefix of "/../" or "/..", where ".."
671 // is a complete path segment, then
672 // replace that prefix with "/" in the
673 // input buffer and remove the last
674 // segment and its preceding "/"
675 // (if any) from the output buffer
676 // otherwise,
677 536 n = detail::path_ends_with(str, "/../");
678
2/2
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 494 times.
536 if (n)
679 {
680 42 seg = str.substr(str.size() - n);
681 42 str.remove_suffix(n);
682 42 ++level;
683 42 continue;
684 }
685 494 n = detail::path_ends_with(str, "/..");
686
2/2
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 448 times.
494 if (n)
687 {
688 46 seg = str.substr(str.size() - n);
689 46 str.remove_suffix(n);
690 46 ++level;
691 46 continue;
692 }
693
694 // E. move the first path segment in the
695 // input buffer to the end of the output
696 // buffer, including the initial "/"
697 // character (if any) and any subsequent
698 // characters up to, but not including,
699 // the next "/" character or the end of
700 // the input buffer.
701 448 std::size_t p = str.size() > 1
702
2/2
✓ Branch 0 taken 374 times.
✓ Branch 1 taken 74 times.
448 ? str.find_last_of('/', str.size() - 2)
703 448 : core::string_view::npos;
704
2/2
✓ Branch 0 taken 276 times.
✓ Branch 1 taken 172 times.
448 if (p != core::string_view::npos)
705 {
706 276 seg = str.substr(p + 1);
707 276 str.remove_suffix(seg.size());
708 }
709 else
710 {
711 172 seg = str;
712 172 str = {};
713 }
714
715
2/2
✓ Branch 0 taken 396 times.
✓ Branch 1 taken 52 times.
448 if (level == 0)
716 396 return;
717
2/2
✓ Branch 1 taken 42 times.
✓ Branch 2 taken 10 times.
52 if (!str.empty())
718 42 --level;
719 }
720 // we still need to skip n_skip + 1
721 // but the string is empty
722
4/4
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 100 times.
✓ Branch 2 taken 34 times.
✓ Branch 3 taken 8 times.
142 if (remove_unmatched && level)
723 {
724 34 seg = "/";
725 34 level = 0;
726 34 return;
727 }
728
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 104 times.
108 else if (level)
729 {
730
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 if (!seg.empty())
731 {
732 4 seg = "/../";
733 }
734 else
735 {
736 // AFREITAS: this condition
737 // is correct, but it might
738 // unreachable.
739 seg = "/..";
740 }
741 4 --level;
742 4 return;
743 }
744 104 seg = {};
745 }
746
747 void
748 304 normalized_path_digest(
749 core::string_view str,
750 bool remove_unmatched,
751 fnv_1a& hasher) noexcept
752 {
753 304 core::string_view seg;
754 304 std::size_t level = 0;
755 do
756 {
757 538 pop_last_segment(
758 str, seg, level, remove_unmatched);
759
2/2
✓ Branch 1 taken 1154 times.
✓ Branch 2 taken 538 times.
1692 while (!seg.empty())
760 {
761 1154 char c = path_pop_back(seg);
762 1154 hasher.put(c);
763 }
764 }
765
2/2
✓ Branch 1 taken 234 times.
✓ Branch 2 taken 304 times.
538 while (!str.empty());
766 304 }
767
768 // compare segments as if there were a normalized
769 int
770 181 segments_compare(
771 segments_encoded_view seg0,
772 segments_encoded_view seg1) noexcept
773 {
774 // calculate path size as if it were normalized
775 auto normalized_size =
776 362 [](segments_encoded_view seg) -> std::size_t
777 {
778
2/2
✓ Branch 1 taken 110 times.
✓ Branch 2 taken 252 times.
362 if (seg.empty())
779 110 return seg.is_absolute();
780
781 252 std::size_t n = 0;
782 252 std::size_t skip = 0;
783 252 auto begin = seg.begin();
784 252 auto it = seg.end();
785
2/2
✓ Branch 1 taken 676 times.
✓ Branch 2 taken 252 times.
928 while (it != begin)
786 {
787 676 --it;
788 676 decode_view dseg = **it;
789
2/2
✓ Branch 1 taken 167 times.
✓ Branch 2 taken 509 times.
676 if (dseg == "..")
790 167 ++skip;
791
2/2
✓ Branch 1 taken 471 times.
✓ Branch 2 taken 38 times.
509 else if (dseg != ".")
792 {
793
2/2
✓ Branch 0 taken 85 times.
✓ Branch 1 taken 386 times.
471 if (skip)
794 85 --skip;
795 else
796 386 n += dseg.size() + 1;
797 }
798 }
799 252 n += skip * 3;
800 252 n -= !seg.is_absolute();
801 252 return n;
802 };
803
804 // find the normalized size for the comparison
805 181 std::size_t n0 = normalized_size(seg0);
806 181 std::size_t n1 = normalized_size(seg1);
807 181 std::size_t n00 = n0;
808 181 std::size_t n10 = n1;
809
810 // consume the last char from a segment range
811 auto consume_last =
812 1670 [](
813 std::size_t& n,
814 decode_view& dseg,
815 segments_encoded_view::iterator& begin,
816 segments_encoded_view::iterator& it,
817 decode_view::iterator& cit,
818 std::size_t& skip,
819 bool& at_slash) -> char
820 {
821
2/2
✓ Branch 2 taken 1023 times.
✓ Branch 3 taken 647 times.
1670 if (cit != dseg.begin())
822 {
823 // return last char from current segment
824 1023 at_slash = false;
825 1023 --cit;
826 1023 --n;
827 1023 return *cit;
828 }
829
830
2/2
✓ Branch 0 taken 385 times.
✓ Branch 1 taken 262 times.
647 if (!at_slash)
831 {
832 // current segment dseg is over and
833 // previous char was not a slash
834 // so we output one
835 385 at_slash = true;
836 385 --n;
837 385 return '/';
838 }
839
840 // current segment dseg is over and
841 // last char was already the slash
842 // between segments, so take the
843 // next final segment to consume
844 262 at_slash = false;
845
1/2
✓ Branch 2 taken 500 times.
✗ Branch 3 not taken.
500 while (cit == dseg.begin())
846 {
847 // take next segment
848
2/2
✓ Branch 1 taken 376 times.
✓ Branch 2 taken 124 times.
500 if (it != begin)
849 376 --it;
850 else
851 124 break;
852
2/2
✓ Branch 3 taken 140 times.
✓ Branch 4 taken 236 times.
376 if (**it == "..")
853 {
854 // skip next if this is ".."
855 140 ++skip;
856 }
857
2/2
✓ Branch 3 taken 208 times.
✓ Branch 4 taken 28 times.
236 else if (**it != ".")
858 {
859
2/2
✓ Branch 0 taken 70 times.
✓ Branch 1 taken 138 times.
208 if (skip)
860 {
861 // discount skips
862 70 --skip;
863 }
864 else
865 {
866 // or update current seg
867 138 dseg = **it;
868 138 cit = dseg.end();
869 138 break;
870 }
871 }
872 }
873 // consume from the new current
874 // segment
875 262 --n;
876
2/2
✓ Branch 2 taken 123 times.
✓ Branch 3 taken 139 times.
262 if (cit != dseg.begin())
877 {
878 // in the general case, we consume
879 // one more character from the end
880 123 --cit;
881 123 return *cit;
882 }
883
884 // nothing left to consume in the
885 // current and new segment
886
2/2
✓ Branch 1 taken 130 times.
✓ Branch 2 taken 9 times.
139 if (it == begin)
887 {
888 // if this is the first
889 // segment, the segments are
890 // over and there can only
891 // be repetitions of "../" to
892 // output
893 130 return "/.."[n % 3];
894 }
895 // at other segments, we need
896 // a slash to transition to the
897 // next segment
898 9 at_slash = true;
899 9 return '/';
900 };
901
902 // consume final segments from seg0 that
903 // should not influence the comparison
904 181 auto begin0 = seg0.begin();
905 181 auto it0 = seg0.end();
906 181 decode_view dseg0;
907
2/2
✓ Branch 2 taken 126 times.
✓ Branch 3 taken 55 times.
181 if (it0 != seg0.begin())
908 {
909 126 --it0;
910 126 dseg0 = **it0;
911 }
912 181 decode_view::iterator cit0 = dseg0.end();
913 181 std::size_t skip0 = 0;
914 181 bool at_slash0 = true;
915
2/2
✓ Branch 0 taken 134 times.
✓ Branch 1 taken 181 times.
315 while (n0 > n1)
916 {
917 134 consume_last(n0, dseg0, begin0, it0, cit0, skip0, at_slash0);
918 }
919
920 // consume final segments from seg1 that
921 // should not influence the comparison
922 181 auto begin1 = seg1.begin();
923 181 auto it1 = seg1.end();
924 181 decode_view dseg1;
925
2/2
✓ Branch 2 taken 126 times.
✓ Branch 3 taken 55 times.
181 if (it1 != seg1.begin())
926 {
927 126 --it1;
928 126 dseg1 = **it1;
929 }
930 181 decode_view::iterator cit1 = dseg1.end();
931 181 std::size_t skip1 = 0;
932 181 bool at_slash1 = true;
933
2/2
✓ Branch 0 taken 34 times.
✓ Branch 1 taken 181 times.
215 while (n1 > n0)
934 {
935 34 consume_last(n1, dseg1, begin1, it1, cit1, skip1, at_slash1);
936 }
937
938 181 int cmp = 0;
939
2/2
✓ Branch 0 taken 751 times.
✓ Branch 1 taken 181 times.
932 while (n0)
940 {
941 751 char c0 = consume_last(
942 n0, dseg0, begin0, it0, cit0, skip0, at_slash0);
943 751 char c1 = consume_last(
944 n1, dseg1, begin1, it1, cit1, skip1, at_slash1);
945
2/2
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 715 times.
751 if (c0 < c1)
946 36 cmp = -1;
947
2/2
✓ Branch 0 taken 41 times.
✓ Branch 1 taken 674 times.
715 else if (c1 < c0)
948 41 cmp = +1;
949 }
950
951
2/2
✓ Branch 0 taken 41 times.
✓ Branch 1 taken 140 times.
181 if (cmp != 0)
952 41 return cmp;
953
2/2
✓ Branch 0 taken 138 times.
✓ Branch 1 taken 2 times.
140 if ( n00 == n10 )
954 138 return 0;
955
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if ( n00 < n10 )
956 1 return -1;
957 1 return 1;
958 }
959
960 } // detail
961 } // urls
962 } // boost
963
964
965