@@ -66,7 +66,7 @@ use core::{fmt, mem, ops, slice, str};
66
66
/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
67
67
/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
68
68
/// ```
69
- #[ derive( Debug , PartialEq , Eq ) ]
69
+ #[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
70
70
pub struct AsciiSet {
71
71
mask : [ Chunk ; ASCII_RANGE_LEN / BITS_PER_CHUNK ] ,
72
72
}
@@ -79,7 +79,7 @@ const BITS_PER_CHUNK: usize = 8 * mem::size_of::<Chunk>();
79
79
80
80
impl AsciiSet {
81
81
/// An empty set.
82
- pub const EMPTY : AsciiSet = AsciiSet {
82
+ pub const EMPTY : & ' static AsciiSet = & AsciiSet {
83
83
mask : [ 0 ; ASCII_RANGE_LEN / BITS_PER_CHUNK ] ,
84
84
} ;
85
85
@@ -108,7 +108,7 @@ impl AsciiSet {
108
108
}
109
109
110
110
/// Return the union of two sets.
111
- pub const fn union ( & self , other : Self ) -> Self {
111
+ pub const fn union ( & self , other : & Self ) -> Self {
112
112
let mask = [
113
113
self . mask [ 0 ] | other. mask [ 0 ] ,
114
114
self . mask [ 1 ] | other. mask [ 1 ] ,
@@ -128,15 +128,31 @@ impl AsciiSet {
128
128
impl ops:: Add for AsciiSet {
129
129
type Output = Self ;
130
130
131
- fn add ( self , other : Self ) -> Self {
131
+ fn add ( self , other : Self ) -> Self :: Output {
132
+ self . union ( & other)
133
+ }
134
+ }
135
+
136
+ impl ops:: Add for & AsciiSet {
137
+ type Output = AsciiSet ;
138
+
139
+ fn add ( self , other : Self ) -> Self :: Output {
132
140
self . union ( other)
133
141
}
134
142
}
135
143
136
144
impl ops:: Not for AsciiSet {
137
145
type Output = Self ;
138
146
139
- fn not ( self ) -> Self {
147
+ fn not ( self ) -> Self :: Output {
148
+ self . complement ( )
149
+ }
150
+ }
151
+
152
+ impl ops:: Not for & AsciiSet {
153
+ type Output = AsciiSet ;
154
+
155
+ fn not ( self ) -> Self :: Output {
140
156
self . complement ( )
141
157
}
142
158
}
@@ -268,7 +284,7 @@ pub fn percent_encode_byte(byte: u8) -> &'static str {
268
284
/// assert_eq!(percent_encode(b"foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F");
269
285
/// ```
270
286
#[ inline]
271
- pub fn percent_encode < ' a > ( input : & ' a [ u8 ] , ascii_set : & ' static AsciiSet ) -> PercentEncode < ' a > {
287
+ pub fn percent_encode < ' a > ( input : & ' a [ u8 ] , ascii_set : & ' a AsciiSet ) -> PercentEncode < ' a > {
272
288
PercentEncode {
273
289
bytes : input,
274
290
ascii_set,
@@ -287,15 +303,15 @@ pub fn percent_encode<'a>(input: &'a [u8], ascii_set: &'static AsciiSet) -> Perc
287
303
/// assert_eq!(utf8_percent_encode("foo bar?", NON_ALPHANUMERIC).to_string(), "foo%20bar%3F");
288
304
/// ```
289
305
#[ inline]
290
- pub fn utf8_percent_encode < ' a > ( input : & ' a str , ascii_set : & ' static AsciiSet ) -> PercentEncode < ' a > {
306
+ pub fn utf8_percent_encode < ' a > ( input : & ' a str , ascii_set : & ' a AsciiSet ) -> PercentEncode < ' a > {
291
307
percent_encode ( input. as_bytes ( ) , ascii_set)
292
308
}
293
309
294
310
/// The return type of [`percent_encode`] and [`utf8_percent_encode`].
295
311
#[ derive( Clone ) ]
296
312
pub struct PercentEncode < ' a > {
297
313
bytes : & ' a [ u8 ] ,
298
- ascii_set : & ' static AsciiSet ,
314
+ ascii_set : & ' a AsciiSet ,
299
315
}
300
316
301
317
impl < ' a > Iterator for PercentEncode < ' a > {
@@ -372,6 +388,19 @@ pub fn percent_decode_str(input: &str) -> PercentDecode<'_> {
372
388
percent_decode ( input. as_bytes ( ) )
373
389
}
374
390
391
+ /// Percent-decode the given string preserving the given ascii_set.
392
+ ///
393
+ /// <https://url.spec.whatwg.org/#string-percent-decode>
394
+ ///
395
+ /// See [`percent_decode`] regarding the return type.
396
+ #[ inline]
397
+ pub fn percent_decode_str_with_set < ' a > (
398
+ input : & ' a str ,
399
+ ascii_set : & ' a AsciiSet ,
400
+ ) -> PercentDecode < ' a > {
401
+ percent_decode_with_set ( input. as_bytes ( ) , ascii_set)
402
+ }
403
+
375
404
/// Percent-decode the given bytes.
376
405
///
377
406
/// <https://url.spec.whatwg.org/#percent-decode>
@@ -394,13 +423,44 @@ pub fn percent_decode_str(input: &str) -> PercentDecode<'_> {
394
423
pub fn percent_decode ( input : & [ u8 ] ) -> PercentDecode < ' _ > {
395
424
PercentDecode {
396
425
bytes : input. iter ( ) ,
426
+ ascii_set : None ,
427
+ }
428
+ }
429
+
430
+ /// Percent-decode the given bytes preserving the given ascii_set.
431
+ ///
432
+ /// <https://url.spec.whatwg.org/#percent-decode>
433
+ ///
434
+ /// Any sequence of `%` followed by two hexadecimal digits expect for the given [AsciiSet] is decoded.
435
+ /// The return type:
436
+ ///
437
+ /// * Implements `Into<Cow<u8>>` borrowing `input` when it contains no percent-encoded sequence,
438
+ /// * Implements `Iterator<Item = u8>` and therefore has a `.collect::<Vec<u8>>()` method,
439
+ /// * Has `decode_utf8()` and `decode_utf8_lossy()` methods.
440
+ ///
441
+ /// # Examples
442
+ ///
443
+ /// ```
444
+ /// use percent_encoding::{percent_decode_with_set, NON_ALPHANUMERIC};
445
+ ///
446
+ /// assert_eq!(percent_decode_with_set(b"%66oo%20bar%3f", &!NON_ALPHANUMERIC).decode_utf8().unwrap(), "%66oo bar?");
447
+ /// ```
448
+ #[ inline]
449
+ pub fn percent_decode_with_set < ' a > (
450
+ input : & ' a [ u8 ] ,
451
+ ascii_set : & ' a AsciiSet ,
452
+ ) -> PercentDecode < ' a > {
453
+ PercentDecode {
454
+ bytes : input. iter ( ) ,
455
+ ascii_set : Some ( ascii_set) ,
397
456
}
398
457
}
399
458
400
459
/// The return type of [`percent_decode`].
401
460
#[ derive( Clone , Debug ) ]
402
461
pub struct PercentDecode < ' a > {
403
462
bytes : slice:: Iter < ' a , u8 > ,
463
+ ascii_set : Option < & ' a AsciiSet > ,
404
464
}
405
465
406
466
fn after_percent_sign ( iter : & mut slice:: Iter < ' _ , u8 > ) -> Option < u8 > {
@@ -411,13 +471,35 @@ fn after_percent_sign(iter: &mut slice::Iter<'_, u8>) -> Option<u8> {
411
471
Some ( h as u8 * 0x10 + l as u8 )
412
472
}
413
473
474
+ fn after_percent_sign_lookahead < ' a > (
475
+ iter : & mut slice:: Iter < ' a , u8 > ,
476
+ ) -> Option < ( u8 , slice:: Iter < ' a , u8 > ) > {
477
+ let mut cloned_iter = iter. clone ( ) ;
478
+ let h = char:: from ( * cloned_iter. next ( ) ?) . to_digit ( 16 ) ?;
479
+ let l = char:: from ( * cloned_iter. next ( ) ?) . to_digit ( 16 ) ?;
480
+ Some ( ( h as u8 * 0x10 + l as u8 , cloned_iter) )
481
+ }
482
+
414
483
impl < ' a > Iterator for PercentDecode < ' a > {
415
484
type Item = u8 ;
416
485
417
486
fn next ( & mut self ) -> Option < u8 > {
418
487
self . bytes . next ( ) . map ( |& byte| {
419
- if byte == b'%' {
420
- after_percent_sign ( & mut self . bytes ) . unwrap_or ( byte)
488
+ if byte != b'%' {
489
+ return byte;
490
+ }
491
+
492
+ let Some ( ( decoded_byte, iter) ) = after_percent_sign_lookahead ( & mut self . bytes ) else {
493
+ return byte;
494
+ } ;
495
+
496
+ let should_decode = self
497
+ . ascii_set
498
+ . map_or ( true , |ascii_set| !ascii_set. contains ( decoded_byte) ) ;
499
+
500
+ if should_decode {
501
+ self . bytes = iter;
502
+ decoded_byte
421
503
} else {
422
504
byte
423
505
}
@@ -447,11 +529,20 @@ impl<'a> PercentDecode<'a> {
447
529
let mut bytes_iter = self . bytes . clone ( ) ;
448
530
while bytes_iter. any ( |& b| b == b'%' ) {
449
531
if let Some ( decoded_byte) = after_percent_sign ( & mut bytes_iter) {
532
+ if let Some ( ascii_set) = self . ascii_set {
533
+ if ascii_set. contains ( decoded_byte) {
534
+ continue ;
535
+ }
536
+ }
537
+
450
538
let initial_bytes = self . bytes . as_slice ( ) ;
451
539
let unchanged_bytes_len = initial_bytes. len ( ) - bytes_iter. len ( ) - 3 ;
452
540
let mut decoded = initial_bytes[ ..unchanged_bytes_len] . to_owned ( ) ;
453
541
decoded. push ( decoded_byte) ;
454
- decoded. extend ( PercentDecode { bytes : bytes_iter } ) ;
542
+ decoded. extend ( PercentDecode {
543
+ bytes : bytes_iter,
544
+ ascii_set : self . ascii_set ,
545
+ } ) ;
455
546
return Some ( decoded) ;
456
547
}
457
548
}
@@ -542,8 +633,8 @@ mod tests {
542
633
/// useful for defining sets in a modular way.
543
634
#[ test]
544
635
fn union ( ) {
545
- const A : AsciiSet = AsciiSet :: EMPTY . add ( b'A' ) ;
546
- const B : AsciiSet = AsciiSet :: EMPTY . add ( b'B' ) ;
636
+ const A : & AsciiSet = & AsciiSet :: EMPTY . add ( b'A' ) ;
637
+ const B : & AsciiSet = & AsciiSet :: EMPTY . add ( b'B' ) ;
547
638
const UNION : AsciiSet = A . union ( B ) ;
548
639
const EXPECTED : AsciiSet = AsciiSet :: EMPTY . add ( b'A' ) . add ( b'B' ) ;
549
640
assert_eq ! ( UNION , EXPECTED ) ;
0 commit comments