module Codec.URI.PercentEncoding
( DelimitableOctet(..)
, DelimitedByteString
, DecodeError(..)
, encode
, decode
)
where
import Control.Applicative
import Control.Exception.Base
import Control.Failure
import Control.Monad
import Data.Bits
import Data.ByteString.Internal (w2c)
import Data.Hashable
import Data.Monoid.Unicode
import Data.String
import Data.Typeable
import Data.Vector.Storable.ByteString (ByteString)
import qualified Data.Vector.Storable.ByteString.Char8 as C8
import qualified Data.Vector.Generic as GV
import qualified Data.Vector.Generic.Mutable as MV
import qualified Data.Vector.Unboxed as UV
import qualified Data.Vector.Fusion.Stream as PS
import Data.Vector.Fusion.Stream.Monadic
import Data.Vector.Fusion.Stream.Size
import Data.URI.Internal
import Data.Word
import Prelude.Unicode
#if defined(MIN_VERSION_QuickCheck)
import Test.QuickCheck.Arbitrary
#endif
data DelimitableOctet
= Delimiter !Word8
| Literal !Word8
deriving (Eq, Ord)
instance Hashable DelimitableOctet where
hash (marshal → (isDelim, w))
= hash isDelim `hashWithSalt` hash w
#if defined(MIN_VERSION_QuickCheck)
instance Arbitrary DelimitableOctet where
arbitrary = unmarshal <$> arbitrary
shrink = (unmarshal <$>) ∘ shrink ∘ marshal
instance CoArbitrary DelimitableOctet where
coarbitrary = coarbitrary ∘ marshal
#endif
newtype instance UV.MVector s DelimitableOctet
= MV_DelimitableOctet (UV.MVector s (Bool, Word8))
newtype instance UV.Vector DelimitableOctet
= V_DelimitableOctet (UV.Vector (Bool, Word8))
marshal ∷ DelimitableOctet → (Bool, Word8)
marshal (Delimiter w) = (True , w)
marshal (Literal w) = (False, w)
unmarshal ∷ (Bool, Word8) → DelimitableOctet
unmarshal (True , w) = Delimiter w
unmarshal (False, w) = Literal w
instance MV.MVector UV.MVector DelimitableOctet where
basicLength (MV_DelimitableOctet v) = MV.basicLength v
basicUnsafeSlice i n (MV_DelimitableOctet v)
= MV_DelimitableOctet $ MV.basicUnsafeSlice i n v
basicOverlaps (MV_DelimitableOctet v1) (MV_DelimitableOctet v2)
= MV.basicOverlaps v1 v2
basicUnsafeNew n = MV_DelimitableOctet `liftM` MV.basicUnsafeNew n
basicUnsafeReplicate n
= (MV_DelimitableOctet `liftM`) ∘ MV.basicUnsafeReplicate n ∘ marshal
basicUnsafeRead (MV_DelimitableOctet v) i
= unmarshal `liftM` MV.basicUnsafeRead v i
basicUnsafeWrite (MV_DelimitableOctet v) i
= MV.basicUnsafeWrite v i ∘ marshal
basicClear (MV_DelimitableOctet v) = MV.basicClear v
basicSet (MV_DelimitableOctet v) = MV.basicSet v ∘ marshal
basicUnsafeCopy (MV_DelimitableOctet v1) (MV_DelimitableOctet v2)
= MV.basicUnsafeCopy v1 v2
basicUnsafeMove (MV_DelimitableOctet v1) (MV_DelimitableOctet v2)
= MV.basicUnsafeMove v1 v2
basicUnsafeGrow (MV_DelimitableOctet v) n
= MV_DelimitableOctet `liftM` MV.basicUnsafeGrow v n
instance GV.Vector UV.Vector DelimitableOctet where
basicUnsafeFreeze (MV_DelimitableOctet v)
= V_DelimitableOctet `liftM` GV.basicUnsafeFreeze v
basicUnsafeThaw (V_DelimitableOctet v)
= MV_DelimitableOctet `liftM` GV.basicUnsafeThaw v
basicLength (V_DelimitableOctet v) = GV.basicLength v
basicUnsafeSlice i n (V_DelimitableOctet v)
= V_DelimitableOctet $ GV.basicUnsafeSlice i n v
basicUnsafeIndexM (V_DelimitableOctet v) i
= unmarshal `liftM` GV.basicUnsafeIndexM v i
basicUnsafeCopy (MV_DelimitableOctet mv) (V_DelimitableOctet v)
= GV.basicUnsafeCopy mv v
elemseq _ o z
= case marshal o of
(isDelim, w) → GV.elemseq ((⊥) ∷ UV.Vector Bool ) isDelim $
GV.elemseq ((⊥) ∷ UV.Vector Word8) w z
instance UV.Unbox DelimitableOctet
instance IsString (UV.Vector DelimitableOctet) where
fromString str
= case decode (const False) (C8.pack str) of
Right v → v
Left e → throw (e ∷ DecodeError)
instance Hashable (UV.Vector DelimitableOctet) where
hashWithSalt = UV.foldl' hashWithSalt
#if defined(MIN_VERSION_QuickCheck)
instance Arbitrary (UV.Vector DelimitableOctet) where
arbitrary = UV.fromList <$> arbitrary
shrink = (UV.fromList <$>) ∘ shrink ∘ UV.toList
instance CoArbitrary (UV.Vector DelimitableOctet) where
coarbitrary = coarbitrary ∘ UV.toList
#endif
type DelimitedByteString
= UV.Vector DelimitableOctet
data EncState s
= EInitial !s
| EPercent !s !Word8 !Word8
| EUpperHalf !s !Word8
data DecState s
= DInitial !s
| DPercent !s
| DUpperHalf !s !Word8
data DecodeError
= InvalidUpperHalf !Char
| InvalidLowerHalf !Char !Char
| MissingUpperHalf
| MissingLowerHalf !Char
deriving Typeable
instance Exception DecodeError
instance Show DecodeError where
show (InvalidUpperHalf u)
= "DecodeError: non-hex-digit occured after \"%\": '" ⊕ [u] ⊕ "'"
show (InvalidLowerHalf u l)
= "DecodeError: non-hex-digit occured after \"%" ⊕ [u] ⊕ "\": '" ⊕ [l] ⊕ "'"
show MissingUpperHalf
= "DecodeError: premature end with \"%\""
show (MissingLowerHalf u)
= "DecodeError: premature end with \"%" ⊕ [u] ⊕ "\""
encode ∷ (Char → Bool) → DelimitedByteString → ByteString
encode isUnsafe = GV.unstream ∘ encodeStream isUnsafe ∘ GV.stream
decode ∷ ∀f. (Applicative f, Failure DecodeError f)
⇒ (Char → Bool)
→ ByteString
→ f DelimitedByteString
decode isDelim = munstream ∘ decodeStream isDelim ∘ mstream
mstream ∷ (Monad m, GV.Vector v α) ⇒ v α → Stream m α
mstream = PS.liftStream ∘ GV.stream
munstream ∷ (Functor m, Monad m, GV.Vector v α) ⇒ Stream m α → m (v α)
munstream s = GV.unstream ∘ PS.unsafeFromList (size s) <$> toList s
encodeStream ∷ ∀f. (Applicative f, Monad f)
⇒ (Char → Bool)
→ Stream f DelimitableOctet
→ Stream f Word8
encodeStream isUnsafe (Stream step (s0 ∷ s) sz)
= Stream go (EInitial s0) sz'
where
sz' ∷ Size
sz' = case upperBound sz of
Just n → Max $ n ⋅ 3
Nothing → Unknown
go ∷ EncState s → f (Step (EncState s) Word8)
go (EInitial s)
= do r ← step s
case r of
Yield (Delimiter w) s'
→ pure $ Yield w (EInitial s' )
Yield (Literal w) s'
| isUnsafe (w2c w)
→ let (u, l) = encodeHex w in
pure $ Yield 0x25 (EPercent s' u l)
| otherwise
→ pure $ Yield w (EInitial s' )
Skip s' → pure $ Skip (EInitial s' )
Done → pure $ Done
go (EPercent s u l) = pure $ Yield u (EUpperHalf s l)
go (EUpperHalf s l) = pure $ Yield l (EInitial s )
decodeStream ∷ ∀f. (Applicative f, Failure DecodeError f)
⇒ (Char → Bool)
→ Stream f Word8
→ Stream f DelimitableOctet
decodeStream isDelim (Stream step (s0 ∷ s) sz)
= Stream go (DInitial s0) (toMax sz)
where
go ∷ DecState s → f (Step (DecState s) DelimitableOctet)
go (DInitial s)
= do r ← step s
case r of
Yield w s'
| w ≡ 0x25 → pure $ Skip (DPercent s')
| isDelim (w2c w) → pure $ Yield (Delimiter w) (DInitial s')
| otherwise → pure $ Yield (Literal w) (DInitial s')
Skip s' → pure $ Skip (DInitial s')
Done → pure Done
go (DPercent s)
= do r ← step s
case r of
Yield u s' → pure $ Skip (DUpperHalf s' u)
Skip s' → pure $ Skip (DPercent s' )
Done → pure Done
go (DUpperHalf s u)
= do r ← step s
case r of
Yield l s' → do w ← decodeHex u l
pure $ Yield (Literal w) (DInitial s')
Skip s' → pure $ Skip (DUpperHalf s' u)
Done → failure $ MissingLowerHalf (w2c u)
encodeHex ∷ Word8 → (Word8, Word8)
encodeHex w = ( encodeHalf $ w `shiftR` 4
, encodeHalf $ w .&. 0x0F
)
where
encodeHalf ∷ Word8 → Word8
encodeHalf h
| h < 0x0A = h + 0x30
| otherwise = h 10 + 0x65
decodeHex ∷ (Applicative f, Failure DecodeError f) ⇒ Word8 → Word8 → f Word8
decodeHex u l
| (¬) (isHexDigit_w8 u) = failure $ InvalidUpperHalf (w2c u)
| (¬) (isHexDigit_w8 l) = failure $ InvalidLowerHalf (w2c u) (w2c l)
| otherwise = pure $ unsafeDecodeHex u l
unsafeDecodeHex ∷ Word8 → Word8 → Word8
unsafeDecodeHex u l = (decodeHalf u `shiftL` 4) .|. decodeHalf l
where
decodeHalf ∷ Word8 → Word8
decodeHalf w
| w ≥ 0x30 ∧ w ≤ 0x39 = w 0x30
| w ≥ 0x61 = w 0x61 + 10
| otherwise = w 0x65 + 10