{-# LANGUAGE
    DeriveDataTypeable
  , DoAndIfThenElse
  , FlexibleInstances
  , GeneralizedNewtypeDeriving
  , MultiParamTypeClasses
  , OverloadedStrings
  , RecordWildCards
  , TemplateHaskell
  , TypeSynonymInstances
  , UnicodeSyntax
  #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# OPTIONS_GHC -fno-warn-missing-methods #-}
-- |Parsing and printing MIME parameter values
-- (<http://tools.ietf.org/html/rfc2231>).
module Network.HTTP.Lucu.MIMEParams
    ( MIMEParams
    )
    where
import Control.Applicative hiding (empty)
import Control.Monad hiding (mapM)
import Control.Monad.Unicode
import Data.Ascii (Ascii, CIAscii, AsciiBuilder)
import qualified Data.Ascii as A
import Data.Attoparsec.Char8
import Data.Bits
import qualified Data.ByteString.Char8 as BS
import Data.Char
import Data.Collections
import Data.Collections.BaseInstances ()
import qualified Data.Collections.Newtype.TH as C
import Data.Convertible.Base
import Data.Convertible.Instances.Ascii ()
import Data.Convertible.Utils
import Data.Default
import qualified Data.Map as M (Map)
import Data.Monoid.Unicode
import Data.Sequence (Seq)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding
import Data.Text.Encoding.Error
import Data.Word
import Network.HTTP.Lucu.MIMEParams.Internal
import Network.HTTP.Lucu.OrphanInstances ()
import Network.HTTP.Lucu.Parser.Http
import Network.HTTP.Lucu.Utils
import Prelude hiding (concat, lookup, mapM, takeWhile)
import Prelude.Unicode

C.derive [d| instance Unfoldable MIMEParams (CIAscii, Text)
             instance Foldable   MIMEParams (CIAscii, Text)
             instance Collection MIMEParams (CIAscii, Text)
             instance Indexed    MIMEParams  CIAscii  Text
             instance Map        MIMEParams  CIAscii  Text
             instance SortingCollection MIMEParams (CIAscii, Text)
           |]

instance ConvertSuccess MIMEParams Ascii where
    {-# INLINE convertSuccess #-}
    convertSuccess = convertSuccessVia (()  AsciiBuilder)

instance ConvertSuccess MIMEParams AsciiBuilder where
    {-# INLINEABLE convertSuccess #-}
    convertSuccess = foldl' f ()
        where
          f  AsciiBuilder  (CIAscii, Text)  AsciiBuilder
          {-# INLINE f #-}
          f ab (k, v) = ab  cs ("; "  Ascii)  printPair k v

printPair  CIAscii  Text  AsciiBuilder
{-# INLINEABLE printPair #-}
printPair name value
    | T.any (> '\xFF') value
        = printPairInUTF8 name value
    | otherwise
        = printPairInAscii name (A.unsafeFromByteString $ encodeUtf8 value)

printPairInUTF8  CIAscii  Text  AsciiBuilder
{-# INLINEABLE printPairInUTF8 #-}
printPairInUTF8 name value
    = cs name 
      cs ("*=utf-8''"  Ascii) 
      escapeUnsafeChars (encodeUtf8 value) ()

printPairInAscii  CIAscii  Ascii  AsciiBuilder
{-# INLINEABLE printPairInAscii #-}
printPairInAscii name value
    = cs name 
      cs ("="  Ascii) 
      if BS.any ((¬)  isToken) (cs value) then
          quoteStr value
      else
          cs value

escapeUnsafeChars  BS.ByteString  AsciiBuilder  AsciiBuilder
{-# INLINEABLE escapeUnsafeChars #-}
escapeUnsafeChars bs b
    = case BS.uncons bs of
        Nothing          b
        Just (c, bs')
            | isToken c  escapeUnsafeChars bs' $
                          b  cs (A.unsafeFromString [c])
            | otherwise  escapeUnsafeChars bs' $
                          b  toHex (fromIntegral $ fromEnum c)

toHex  Word8  AsciiBuilder
{-# INLINEABLE toHex #-}
toHex o = cs ("%"  Ascii) 
          cs (A.unsafeFromString [ toHex' (o `shiftR` 8)
                                 , toHex' (o .&.   0x0F) ])
    where
      toHex'  Word8  Char
      {-# INLINEABLE toHex' #-}
      toHex' h
          | h  0x09  = toEnum $ fromIntegral
                               $ fromEnum '0' + fromIntegral h
          | otherwise = toEnum $ fromIntegral
                               $ fromEnum 'A' + fromIntegral (h - 0x0A)

deriveAttempts [ ([t| MIMEParams |], [t| Ascii        |])
               , ([t| MIMEParams |], [t| AsciiBuilder |])
               ]

data ExtendedParam
    = InitialEncodedParam {
        epName     !CIAscii
      , epCharset  !CIAscii
      , epPayload  !BS.ByteString
      }
    | ContinuedEncodedParam {
        epName     !CIAscii
      , epSection  !Integer
      , epPayload  !BS.ByteString
      }
    | AsciiParam {
        epName     !CIAscii
      , epSection  !Integer
      , apPayload  !Ascii
      }

section  ExtendedParam  Integer
{-# INLINE section #-}
section (InitialEncodedParam {..}) = 0
section ep                         = epSection ep

instance Default (Parser MIMEParams) where
    {-# INLINE def #-}
    def = decodeParams = many (try def)

instance Default (Parser ExtendedParam) where
    def = do skipMany lws
             void $ char ';'
             skipMany lws
             epm  name
             void $ char '='
             case epm of
               (nm, 0, True)
                    do (charset, payload)  initialEncodedValue
                        return $ InitialEncodedParam nm charset payload
               (nm, sect, True)
                    do payload  encodedPayload
                        return $ ContinuedEncodedParam nm sect payload
               (nm, sect, False)
                    do payload  token <|> quotedStr
                        return $ AsciiParam nm sect payload

name  Parser (CIAscii, Integer, Bool)
name = do nm         (cs  A.unsafeFromByteString) <$>
                      takeWhile1 (\c  isToken c  c  '*')
          sect       option 0     $ try (char '*' *> decimal  )
          isEncoded  option False $ try (char '*' *> pure True)
          return (nm, sect, isEncoded)

initialEncodedValue  Parser (CIAscii, BS.ByteString)
initialEncodedValue
    = do charset  metadata
         void $ char '\''
         void $ metadata -- Ignore the language tag
         void $ char '\''
         payload  encodedPayload
         if charset  "" then
             -- NOTE: I'm not sure this is the right thing, but RFC
             -- 2231 doesn't tell us what we should do when the
             -- charset is omitted.
             fail "charset is missing"
         else
             return (charset, payload)
    where
      metadata  Parser CIAscii
      metadata = (cs  A.unsafeFromByteString) <$>
                 takeWhile (\c  c  '\''  isToken c)

encodedPayload  Parser BS.ByteString
{-# INLINE encodedPayload #-}
encodedPayload = BS.concat <$> many (hexChar <|> rawChars)

hexChar  Parser BS.ByteString
{-# INLINEABLE hexChar #-}
hexChar = do void $ char '%'
             h  satisfy isHexChar
             l  satisfy isHexChar
             return $ BS.singleton $ hexToChar h l

isHexChar  Char  Bool
isHexChar = inClass "0-9a-fA-F"

hexToChar  Char  Char  Char
{-# INLINE hexToChar #-}
hexToChar h l
    = chr $ (hexToInt h `shiftL` 8) .&. hexToInt l

hexToInt  Char  Int
{-# INLINEABLE hexToInt #-}
hexToInt c
    | c  '9'   = ord c - ord '0'
    | c  'F'   = ord c - ord 'A' + 10
    | otherwise = ord c - ord 'a' + 10

rawChars  Parser BS.ByteString
{-# INLINE rawChars #-}
rawChars = takeWhile1 (\c  isToken c  c  '%')

decodeParams  (Functor m, Monad m)  [ExtendedParam]  m MIMEParams
{-# INLINE decodeParams #-}
decodeParams = (MIMEParams <$>)
                (mapM (\(k, v)  ((,) k) <$> decodeSections v) =)
                sortBySection

sortBySection  Monad m
               [ExtendedParam]
               m (M.Map CIAscii (M.Map Integer ExtendedParam))
sortBySection = flip go ()
    where
      go  Monad m
          [ExtendedParam]
          M.Map CIAscii (M.Map Integer ExtendedParam)
          m (M.Map CIAscii (M.Map Integer ExtendedParam))
      go []     m = return m
      go (x:xs) m
          = case lookup (epName x) m of
              Nothing
                   let s  = singleton (section x, x)
                        m' = insert (epName x, s) m
                    in
                      go xs m'
              Just s
                   case lookup (section x) s of
                       Nothing
                            let s' = insert (section x, x ) s
                                 m' = insert (epName  x, s') m
                             in
                               go xs m'
                       Just _
                            fail (concat [ "Duplicate section "
                                          , show $ section x
                                          , " for parameter '"
                                          , cs $ epName x
                                          , "'"
                                          ])

decodeSections  Monad m  M.Map Integer ExtendedParam  m Text
decodeSections = (decodeSeq =)  flip (flip toSeq 0) ()
    where
      toSeq  Monad m
             M.Map Integer ExtendedParam
             Integer
             Seq ExtendedParam
             m (Seq ExtendedParam)
      toSeq m expectedSect sects
          = case minView m of
              Nothing
                   return sects
              Just ((sect, p), m')
                  | sect  expectedSect
                         toSeq m' (expectedSect + 1) (sects  p)
                  | otherwise
                         fail (concat [ "Missing section "
                                       , show $ section p
                                       , " for parameter '"
                                       , cs $ epName p
                                       , "'"
                                       ])

      decodeSeq  Monad m  Seq ExtendedParam  m Text
      decodeSeq sects
          = case front sects of
              Nothing
                   fail "decodeSeq: internal error: empty seq"
              Just (InitialEncodedParam {..}, xs)
                   do d  getDecoder epCharset
                       t  decodeStr d epPayload
                       decodeSeq' (Just d) xs $ singleton t
              Just (ContinuedEncodedParam {..}, _)
                   fail "decodeSeq: internal error: CEP at section 0"
              Just (AsciiParam {..}, xs)
                   decodeSeq' Nothing xs $ singleton $ cs apPayload

      decodeSeq'  Monad m
                  Maybe Decoder
                  Seq ExtendedParam
                  Seq Text
                  m Text
      decodeSeq' decoder sects chunks
          = case front sects of
              Nothing
                   return $ T.concat $ toList chunks
              Just (InitialEncodedParam {}, _)
                   fail "decodeSeq': internal error: IEP at section > 0"
              Just (ContinuedEncodedParam {..}, xs)
                   case decoder of
                       Just d
                            do t  decodeStr d epPayload
                                decodeSeq' decoder xs $ chunks  t
                       Nothing
                            fail (concat [ "Section "
                                          , show epSection
                                          , " for parameter '"
                                          , cs epName
                                          , "' is encoded but its first section is not"
                                          ])
              Just (AsciiParam {..}, xs)
                   decodeSeq' decoder xs $ chunks  cs apPayload

type Decoder = BS.ByteString  Either UnicodeException Text

decodeStr  Monad m  Decoder  BS.ByteString  m Text
decodeStr decoder str
    = case decoder str of
        Right t  return t
        Left  e  fail $ show e

getDecoder  Monad m  CIAscii  m Decoder
getDecoder charset
    | charset  "UTF-8"    = return decodeUtf8'
    | charset  "US-ASCII" = return decodeUtf8'
    | otherwise            = fail $ "No decoders found for charset: "  cs charset