module Network.HTTP.Lucu.MIMEParams
( MIMEParams
)
where
import Control.Applicative hiding (empty)
import Control.Monad hiding (mapM)
import Control.Monad.Unicode
import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
import qualified Data.Ascii as A
import Data.Attoparsec.Char8
import Data.Bits
import qualified Data.ByteString.Char8 as BS
import Data.Char
import Data.Collections
import Data.Collections.BaseInstances ()
import qualified Data.Collections.Newtype.TH as C
import Data.Convertible.Base
import Data.Convertible.Instances.Ascii ()
import Data.Convertible.Utils
import Data.Default
import qualified Data.Map as M (Map)
import Data.Monoid.Unicode
import Data.Sequence (Seq)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding
import Data.Text.Encoding.Error
import Data.Word
import Network.HTTP.Lucu.MIMEParams.Internal
import Network.HTTP.Lucu.OrphanInstances ()
import Network.HTTP.Lucu.Parser.Http
import Network.HTTP.Lucu.Utils
import Prelude hiding (concat, lookup, mapM, takeWhile)
import Prelude.Unicode
C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text)
instance Foldable MIMEParams (CIAscii, Text)
instance Collection MIMEParams (CIAscii, Text)
instance Indexed MIMEParams CIAscii Text
instance Map MIMEParams CIAscii Text
instance SortingCollection MIMEParams (CIAscii, Text)
|]
instance ConvertSuccess MIMEParams Ascii where
convertSuccess = convertSuccessVia ((⊥) ∷ AsciiBuilder)
instance ConvertSuccess MIMEParams AsciiBuilder where
convertSuccess = foldl' f (∅)
where
f ∷ AsciiBuilder → (CIAscii, Text) → AsciiBuilder
f ab (k, v) = ab ⊕ cs ("; " ∷ Ascii) ⊕ printPair k v
printPair ∷ CIAscii → Text → AsciiBuilder
printPair name value
| T.any (> '\xFF') value
= printPairInUTF8 name value
| otherwise
= printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)
printPairInUTF8 ∷ CIAscii → Text → AsciiBuilder
printPairInUTF8 name value
= cs name ⊕
cs ("*=utf-8''" ∷ Ascii) ⊕
escapeUnsafeChars (encodeUtf8 value) (∅)
printPairInAscii ∷ CIAscii → Ascii → AsciiBuilder
printPairInAscii name value
= cs name ⊕
cs ("=" ∷ Ascii) ⊕
if BS.any ((¬) ∘ isToken) (cs value) then
quoteStr value
else
cs value
escapeUnsafeChars ∷ BS.ByteString → AsciiBuilder → AsciiBuilder
escapeUnsafeChars bs b
= case BS.uncons bs of
Nothing → b
Just (c, bs')
| isToken c → escapeUnsafeChars bs' $
b ⊕ cs (A.unsafeFromString [c])
| otherwise → escapeUnsafeChars bs' $
b ⊕ toHex (fromIntegral $ fromEnum c)
toHex ∷ Word8 → AsciiBuilder
toHex o = cs ("%" ∷ Ascii) ⊕
cs (A.unsafeFromString [ toHex' (o `shiftR` 8)
, toHex' (o .&. 0x0F) ])
where
toHex' ∷ Word8 → Char
toHex' h
| h ≤ 0x09 = toEnum $ fromIntegral
$ fromEnum '0' + fromIntegral h
| otherwise = toEnum $ fromIntegral
$ fromEnum 'A' + fromIntegral (h 0x0A)
deriveAttempts [ ([t| MIMEParams |], [t| Ascii |])
, ([t| MIMEParams |], [t| AsciiBuilder |])
]
data ExtendedParam
= InitialEncodedParam {
epName ∷ !CIAscii
, epCharset ∷ !CIAscii
, epPayload ∷ !BS.ByteString
}
| ContinuedEncodedParam {
epName ∷ !CIAscii
, epSection ∷ !Integer
, epPayload ∷ !BS.ByteString
}
| AsciiParam {
epName ∷ !CIAscii
, epSection ∷ !Integer
, apPayload ∷ !Ascii
}
section ∷ ExtendedParam → Integer
section (InitialEncodedParam {..}) = 0
section ep = epSection ep
instance Default (Parser MIMEParams) where
def = decodeParams =≪ many (try def)
instance Default (Parser ExtendedParam) where
def = do skipMany lws
void $ char ';'
skipMany lws
epm ← name
void $ char '='
case epm of
(nm, 0, True)
→ do (charset, payload) ← initialEncodedValue
return $ InitialEncodedParam nm charset payload
(nm, sect, True)
→ do payload ← encodedPayload
return $ ContinuedEncodedParam nm sect payload
(nm, sect, False)
→ do payload ← token <|> quotedStr
return $ AsciiParam nm sect payload
name ∷ Parser (CIAscii, Integer, Bool)
name = do nm ← (cs ∘ A.unsafeFromByteString) <$>
takeWhile1 (\c → isToken c ∧ c ≢ '*')
sect ← option 0 $ try (char '*' *> decimal )
isEncoded ← option False $ try (char '*' *> pure True)
return (nm, sect, isEncoded)
initialEncodedValue ∷ Parser (CIAscii, BS.ByteString)
initialEncodedValue
= do charset ← metadata
void $ char '\''
void $ metadata
void $ char '\''
payload ← encodedPayload
if charset ≡ "" then
fail "charset is missing"
else
return (charset, payload)
where
metadata ∷ Parser CIAscii
metadata = (cs ∘ A.unsafeFromByteString) <$>
takeWhile (\c → c ≢ '\'' ∧ isToken c)
encodedPayload ∷ Parser BS.ByteString
encodedPayload = BS.concat <$> many (hexChar <|> rawChars)
hexChar ∷ Parser BS.ByteString
hexChar = do void $ char '%'
h ← satisfy isHexChar
l ← satisfy isHexChar
return $ BS.singleton $ hexToChar h l
isHexChar ∷ Char → Bool
isHexChar = inClass "0-9a-fA-F"
hexToChar ∷ Char → Char → Char
hexToChar h l
= chr $ (hexToInt h `shiftL` 8) .&. hexToInt l
hexToInt ∷ Char → Int
hexToInt c
| c ≤ '9' = ord c ord '0'
| c ≤ 'F' = ord c ord 'A' + 10
| otherwise = ord c ord 'a' + 10
rawChars ∷ Parser BS.ByteString
rawChars = takeWhile1 (\c → isToken c ∧ c ≢ '%')
decodeParams ∷ (Functor m, Monad m) ⇒ [ExtendedParam] → m MIMEParams
decodeParams = (MIMEParams <$>)
∘ (mapM (\(k, v) → ((,) k) <$> decodeSections v) =≪)
∘ sortBySection
sortBySection ∷ Monad m
⇒ [ExtendedParam]
→ m (M.Map CIAscii (M.Map Integer ExtendedParam))
sortBySection = flip go (∅)
where
go ∷ Monad m
⇒ [ExtendedParam]
→ M.Map CIAscii (M.Map Integer ExtendedParam)
→ m (M.Map CIAscii (M.Map Integer ExtendedParam))
go [] m = return m
go (x:xs) m
= case lookup (epName x) m of
Nothing
→ let s = singleton (section x, x)
m' = insert (epName x, s) m
in
go xs m'
Just s
→ case lookup (section x) s of
Nothing
→ let s' = insert (section x, x ) s
m' = insert (epName x, s') m
in
go xs m'
Just _
→ fail (concat [ "Duplicate section "
, show $ section x
, " for parameter '"
, cs $ epName x
, "'"
])
decodeSections ∷ Monad m ⇒ M.Map Integer ExtendedParam → m Text
decodeSections = (decodeSeq =≪) ∘ flip (flip toSeq 0) (∅)
where
toSeq ∷ Monad m
⇒ M.Map Integer ExtendedParam
→ Integer
→ Seq ExtendedParam
→ m (Seq ExtendedParam)
toSeq m expectedSect sects
= case minView m of
Nothing
→ return sects
Just ((sect, p), m')
| sect ≡ expectedSect
→ toSeq m' (expectedSect + 1) (sects ⊳ p)
| otherwise
→ fail (concat [ "Missing section "
, show $ section p
, " for parameter '"
, cs $ epName p
, "'"
])
decodeSeq ∷ Monad m ⇒ Seq ExtendedParam → m Text
decodeSeq sects
= case front sects of
Nothing
→ fail "decodeSeq: internal error: empty seq"
Just (InitialEncodedParam {..}, xs)
→ do d ← getDecoder epCharset
t ← decodeStr d epPayload
decodeSeq' (Just d) xs $ singleton t
Just (ContinuedEncodedParam {..}, _)
→ fail "decodeSeq: internal error: CEP at section 0"
Just (AsciiParam {..}, xs)
→ decodeSeq' Nothing xs $ singleton $ cs apPayload
decodeSeq' ∷ Monad m
⇒ Maybe Decoder
→ Seq ExtendedParam
→ Seq Text
→ m Text
decodeSeq' decoder sects chunks
= case front sects of
Nothing
→ return $ T.concat $ toList chunks
Just (InitialEncodedParam {}, _)
→ fail "decodeSeq': internal error: IEP at section > 0"
Just (ContinuedEncodedParam {..}, xs)
→ case decoder of
Just d
→ do t ← decodeStr d epPayload
decodeSeq' decoder xs $ chunks ⊳ t
Nothing
→ fail (concat [ "Section "
, show epSection
, " for parameter '"
, cs epName
, "' is encoded but its first section is not"
])
Just (AsciiParam {..}, xs)
→ decodeSeq' decoder xs $ chunks ⊳ cs apPayload
type Decoder = BS.ByteString → Either UnicodeException Text
decodeStr ∷ Monad m ⇒ Decoder → BS.ByteString → m Text
decodeStr decoder str
= case decoder str of
Right t → return t
Left e → fail $ show e
getDecoder ∷ Monad m ⇒ CIAscii → m Decoder
getDecoder charset
| charset ≡ "UTF-8" = return decodeUtf8'
| charset ≡ "US-ASCII" = return decodeUtf8'
| otherwise = fail $ "No decoders found for charset: " ⊕ cs charset