-- WARNING: This file is security sensitive as it uses unsafeWrite which does
-- not check bounds. Any changes should be made with care and we would love to
-- get informed about them, just cc us in any PR targetting this file: @eskimor @jprider63
-- We would be happy to review the changes!

-- The security check at the end (pos > length) only works if pos grows
-- monotonously, if this condition does not hold, the check is flawed.
module Data.Aeson.Parser.UnescapePure
    (
      unescapeText
    ) where

import Control.Exception (evaluate, throw, try)
import Control.Monad (when)
import Data.ByteString as B
import Data.Bits (Bits, shiftL, shiftR, (.&.), (.|.))
import Data.Text (Text)
import qualified Data.Text.Array as A
import Data.Text.Encoding.Error (UnicodeException (..))
import Data.Text.Internal.Private (runText)
import Data.Text.Unsafe (unsafeDupablePerformIO)
import Data.Word (Word8, Word16, Word32)
import GHC.ST (ST)

-- Different UTF states.
data Utf =
      UtfGround
    | UtfTail1
    | UtfU32e0
    | UtfTail2
    | UtfU32ed
    | Utf843f0
    | UtfTail3
    | Utf843f4
    deriving (Utf -> Utf -> Bool
(Utf -> Utf -> Bool) -> (Utf -> Utf -> Bool) -> Eq Utf
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Utf -> Utf -> Bool
$c/= :: Utf -> Utf -> Bool
== :: Utf -> Utf -> Bool
$c== :: Utf -> Utf -> Bool
Eq)

data State =
      StateNone
    | StateUtf !Utf !Word32
    | StateBackslash
    | StateU0
    | StateU1 !Word16
    | StateU2 !Word16
    | StateU3 !Word16
    | StateS0
    | StateS1
    | StateSU0
    | StateSU1 !Word16
    | StateSU2 !Word16
    | StateSU3 !Word16
    deriving (State -> State -> Bool
(State -> State -> Bool) -> (State -> State -> Bool) -> Eq State
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: State -> State -> Bool
$c/= :: State -> State -> Bool
== :: State -> State -> Bool
$c== :: State -> State -> Bool
Eq)

-- References:
-- http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
-- https://github.com/jwilm/vte/blob/master/utf8parse/src/table.rs.in

setByte1 :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte1 :: a -> b -> a
setByte1 point :: a
point word :: b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. 0x3f)
{-# INLINE setByte1 #-}

setByte2 :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2 :: a -> b -> a
setByte2 point :: a
point word :: b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. (b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. 0x3f) a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` 6)
{-# INLINE setByte2 #-}

setByte2Top :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2Top :: a -> b -> a
setByte2Top point :: a
point word :: b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. (b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. 0x1f) a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` 6)
{-# INLINE setByte2Top #-}

setByte3 :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3 :: a -> b -> a
setByte3 point :: a
point word :: b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. (b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. 0x3f) a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` 12)
{-# INLINE setByte3 #-}

setByte3Top :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3Top :: a -> b -> a
setByte3Top point :: a
point word :: b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. (b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. 0xf) a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` 12)
{-# INLINE setByte3Top #-}

setByte4 :: (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte4 :: a -> b -> a
setByte4 point :: a
point word :: b
word = a
point a -> a -> a
forall a. Bits a => a -> a -> a
.|. (b -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
word b -> b -> b
forall a. Bits a => a -> a -> a
.&. 0x7) a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` 18)
{-# INLINE setByte4 #-}

decode :: Utf -> Word32 -> Word8 -> (Utf, Word32)
decode :: Utf -> Word32 -> Word8 -> (Utf, Word32)
decode UtfGround point :: Word32
point word :: Word8
word = case Word8
word of
    w :: Word8
w | 0x00 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0x7f -> (Utf
UtfGround, Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
word)
    w :: Word8
w | 0xc2 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xdf -> (Utf
UtfTail1, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2Top Word32
point Word8
word)
    0xe0                       -> (Utf
UtfU32e0, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3Top Word32
point Word8
word)
    w :: Word8
w | 0xe1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xec -> (Utf
UtfTail2, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3Top Word32
point Word8
word)
    0xed                       -> (Utf
UtfU32ed, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3Top Word32
point Word8
word)
    w :: Word8
w | 0xee Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xef -> (Utf
UtfTail2, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3Top Word32
point Word8
word)
    0xf0                       -> (Utf
Utf843f0, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte4 Word32
point Word8
word)
    w :: Word8
w | 0xf1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xf3 -> (Utf
UtfTail3, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte4 Word32
point Word8
word)
    0xf4                       -> (Utf
Utf843f4, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte4 Word32
point Word8
word)
    _                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode UtfU32e0 point :: Word32
point word :: Word8
word = case Word8
word of
    w :: Word8
w | 0xa0 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xbf -> (Utf
UtfTail1, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2 Word32
point Word8
word)
    _                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode UtfU32ed point :: Word32
point word :: Word8
word = case Word8
word of
    w :: Word8
w | 0x80 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0x9f -> (Utf
UtfTail1, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2 Word32
point Word8
word)
    _                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode Utf843f0 point :: Word32
point word :: Word8
word = case Word8
word of
    w :: Word8
w | 0x90 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xbf -> (Utf
UtfTail2, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3 Word32
point Word8
word)
    _                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode Utf843f4 point :: Word32
point word :: Word8
word = case Word8
word of
    w :: Word8
w | 0x80 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0x8f -> (Utf
UtfTail2, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3 Word32
point Word8
word)
    _                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode UtfTail3 point :: Word32
point word :: Word8
word = case Word8
word of
    w :: Word8
w | 0x80 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xbf -> (Utf
UtfTail2, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte3 Word32
point Word8
word)
    _                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode UtfTail2 point :: Word32
point word :: Word8
word = case Word8
word of
    w :: Word8
w | 0x80 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xbf -> (Utf
UtfTail1, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte2 Word32
point Word8
word)
    _                          -> (Utf, Word32)
forall a. a
throwDecodeError

decode UtfTail1 point :: Word32
point word :: Word8
word = case Word8
word of
    w :: Word8
w | 0x80 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xbf -> (Utf
UtfGround, Word32 -> Word8 -> Word32
forall a b. (Num a, Bits b, Bits a, Integral b) => a -> b -> a
setByte1 Word32
point Word8
word)
    _                          -> (Utf, Word32)
forall a. a
throwDecodeError

decodeHex :: Word8 -> Word16
decodeHex :: Word8 -> Word16
decodeHex x :: Word8
x
  | 48 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
x Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<=  57 = Word8 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
- 48  -- 0-9
  | 65 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
x Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<=  70 = Word8 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
- 55  -- A-F
  | 97 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
x Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= 102 = Word8 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
- 87  -- a-f
  | Bool
otherwise = Word16
forall a. a
throwDecodeError

unescapeText' :: ByteString -> Text
unescapeText' :: ByteString -> Text
unescapeText' bs :: ByteString
bs = (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
runText ((forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text)
-> (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ \done :: MArray s -> Int -> ST s Text
done -> do
    MArray s
dest <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
len

    (pos :: Int
pos, finalState :: State
finalState) <- MArray s -> (Int, State) -> Int -> ST s (Int, State)
forall s. MArray s -> (Int, State) -> Int -> ST s (Int, State)
loop MArray s
dest (0, State
StateNone) 0

    -- Check final state. Currently pos gets only increased over time, so this check should catch overflows.
    Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ( State
finalState State -> State -> Bool
forall a. Eq a => a -> a -> Bool
/= State
StateNone Bool -> Bool -> Bool
|| Int
pos Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
len)
      ST s ()
forall a. a
throwDecodeError

    MArray s -> Int -> ST s Text
done MArray s
dest Int
pos -- TODO: pos, pos-1??? XXX

    where
      len :: Int
len = ByteString -> Int
B.length ByteString
bs

      runUtf :: MArray s -> Int -> Utf -> Word32 -> Word8 -> ST s (Int, State)
runUtf dest :: MArray s
dest pos :: Int
pos st :: Utf
st point :: Word32
point c :: Word8
c = case Utf -> Word32 -> Word8 -> (Utf, Word32)
decode Utf
st Word32
point Word8
c of
        (UtfGround, 92) -> -- Backslash
            (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, State
StateBackslash)
        (UtfGround, w :: Word32
w) | Word32
w Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xffff ->
            MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos (Word32 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
w) State
StateNone
        (UtfGround, w :: Word32
w) -> do
            MArray s -> Int -> Word16 -> ST s ()
forall s. MArray s -> Int -> Word16 -> ST s ()
write MArray s
dest Int
pos (0xd7c0 Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
+ Word32 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
w Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` 10))
            MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest (Int
pos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (0xdc00 Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
+ Word32 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
w Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. 0x3ff)) State
StateNone
        (st' :: Utf
st', p :: Word32
p) ->
            (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Utf -> Word32 -> State
StateUtf Utf
st' Word32
p)

      loop :: A.MArray s -> (Int, State) -> Int -> ST s (Int, State)
      loop :: MArray s -> (Int, State) -> Int -> ST s (Int, State)
loop _ ps :: (Int, State)
ps i :: Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len = (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int, State)
ps
      loop dest :: MArray s
dest ps :: (Int, State)
ps i :: Int
i = do
        let c :: Word8
c = ByteString -> Int -> Word8
B.index ByteString
bs Int
i -- JP: We can use unsafe index once we prove bounds with Liquid Haskell.
        (Int, State)
ps' <- MArray s -> (Int, State) -> Word8 -> ST s (Int, State)
forall s. MArray s -> (Int, State) -> Word8 -> ST s (Int, State)
f MArray s
dest (Int, State)
ps Word8
c
        MArray s -> (Int, State) -> Int -> ST s (Int, State)
forall s. MArray s -> (Int, State) -> Int -> ST s (Int, State)
loop MArray s
dest (Int, State)
ps' (Int -> ST s (Int, State)) -> Int -> ST s (Int, State)
forall a b. (a -> b) -> a -> b
$ Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+1

      -- No pending state.
      f :: MArray s -> (Int, State) -> Word8 -> ST s (Int, State)
f dest :: MArray s
dest (pos :: Int
pos, StateNone) c :: Word8
c = MArray s -> Int -> Utf -> Word32 -> Word8 -> ST s (Int, State)
forall s.
MArray s -> Int -> Utf -> Word32 -> Word8 -> ST s (Int, State)
runUtf MArray s
dest Int
pos Utf
UtfGround 0 Word8
c

      -- In the middle of parsing a UTF string.
      f dest :: MArray s
dest (pos :: Int
pos, StateUtf st :: Utf
st point :: Word32
point) c :: Word8
c = MArray s -> Int -> Utf -> Word32 -> Word8 -> ST s (Int, State)
forall s.
MArray s -> Int -> Utf -> Word32 -> Word8 -> ST s (Int, State)
runUtf MArray s
dest Int
pos Utf
st Word32
point Word8
c

      -- In the middle of escaping a backslash.
      f dest :: MArray s
dest (pos :: Int
pos, StateBackslash)  34 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos 34 State
StateNone -- "
      f dest :: MArray s
dest (pos :: Int
pos, StateBackslash)  92 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos 92 State
StateNone -- Backslash
      f dest :: MArray s
dest (pos :: Int
pos, StateBackslash)  47 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos 47 State
StateNone -- /
      f dest :: MArray s
dest (pos :: Int
pos, StateBackslash)  98 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos  8 State
StateNone -- b
      f dest :: MArray s
dest (pos :: Int
pos, StateBackslash) 102 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos 12 State
StateNone -- f
      f dest :: MArray s
dest (pos :: Int
pos, StateBackslash) 110 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos 10 State
StateNone -- n
      f dest :: MArray s
dest (pos :: Int
pos, StateBackslash) 114 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos 13 State
StateNone -- r
      f dest :: MArray s
dest (pos :: Int
pos, StateBackslash) 116 = MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos  9 State
StateNone -- t
      f    _ (pos :: Int
pos, StateBackslash) 117 = (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, State
StateU0)                -- u
      f    _ (  _, StateBackslash) _   = ST s (Int, State)
forall a. a
throwDecodeError

      -- Processing '\u'.
      f _ (pos :: Int
pos, StateU0) c :: Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateU1 (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` 12))

      f _ (pos :: Int
pos, StateU1 w' :: Word16
w') c :: Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateU2 (Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` 8)))

      f _ (pos :: Int
pos, StateU2 w' :: Word16
w') c :: Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateU3 (Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` 4)))

      f dest :: MArray s
dest (pos :: Int
pos, StateU3 w' :: Word16
w') c :: Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        let u :: Word16
u = Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word16
w in

        -- Get next state based on surrogates.
        let st :: State
st
              | Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= 0xd800 Bool -> Bool -> Bool
&& Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xdbff = -- High surrogate.
                State
StateS0
              | Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= 0xdc00 Bool -> Bool -> Bool
&& Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xdfff = -- Low surrogate.
                State
forall a. a
throwDecodeError
              | Bool
otherwise =
                State
StateNone
        in
        MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos Word16
u State
st

      -- Handle surrogates.
      f _ (pos :: Int
pos, StateS0) 92 = (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, State
StateS1) -- Backslash
      f _ (  _, StateS0)  _ = ST s (Int, State)
forall a. a
throwDecodeError

      f _ (pos :: Int
pos, StateS1) 117 = (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, State
StateSU0) -- u
      f _ (  _, StateS1)   _ = ST s (Int, State)
forall a. a
throwDecodeError

      f _ (pos :: Int
pos, StateSU0) c :: Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateSU1 (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` 12))

      f _ (pos :: Int
pos, StateSU1 w' :: Word16
w') c :: Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateSU2 (Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` 8)))

      f _ (pos :: Int
pos, StateSU2 w' :: Word16
w') c :: Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        (Int, State) -> ST s (Int, State)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos, Word16 -> State
StateSU3 (Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. (Word16
w Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` 4)))

      f dest :: MArray s
dest (pos :: Int
pos, StateSU3 w' :: Word16
w') c :: Word8
c =
        let w :: Word16
w = Word8 -> Word16
decodeHex Word8
c in
        let u :: Word16
u = Word16
w' Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word16
w in

        -- Check if not low surrogate.
        if Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< 0xdc00 Bool -> Bool -> Bool
|| Word16
u Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> 0xdfff then
          ST s (Int, State)
forall a. a
throwDecodeError
        else
          MArray s -> Int -> Word16 -> State -> ST s (Int, State)
forall s t. MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn MArray s
dest Int
pos Word16
u State
StateNone

write :: A.MArray s -> Int -> Word16 -> ST s ()
write :: MArray s -> Int -> Word16 -> ST s ()
write dest :: MArray s
dest pos :: Int
pos char :: Word16
char =
    MArray s -> Int -> Word16 -> ST s ()
forall s. MArray s -> Int -> Word16 -> ST s ()
A.unsafeWrite MArray s
dest Int
pos Word16
char
{-# INLINE write #-}

writeAndReturn :: A.MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn :: MArray s -> Int -> Word16 -> t -> ST s (Int, t)
writeAndReturn dest :: MArray s
dest pos :: Int
pos char :: Word16
char res :: t
res = do
    MArray s -> Int -> Word16 -> ST s ()
forall s. MArray s -> Int -> Word16 -> ST s ()
write MArray s
dest Int
pos Word16
char
    (Int, t) -> ST s (Int, t)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
pos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1, t
res)
{-# INLINE writeAndReturn #-}

throwDecodeError :: a
throwDecodeError :: a
throwDecodeError =
    let desc :: [Char]
desc = "Data.Text.Internal.Encoding.decodeUtf8: Invalid UTF-8 stream" in
    UnicodeException -> a
forall a e. Exception e => e -> a
throw ([Char] -> Maybe Word8 -> UnicodeException
DecodeError [Char]
desc Maybe Word8
forall a. Maybe a
Nothing)

unescapeText :: ByteString -> Either UnicodeException Text
unescapeText :: ByteString -> Either UnicodeException Text
unescapeText = IO (Either UnicodeException Text) -> Either UnicodeException Text
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either UnicodeException Text) -> Either UnicodeException Text)
-> (ByteString -> IO (Either UnicodeException Text))
-> ByteString
-> Either UnicodeException Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO Text -> IO (Either UnicodeException Text)
forall e a. Exception e => IO a -> IO (Either e a)
try (IO Text -> IO (Either UnicodeException Text))
-> (ByteString -> IO Text)
-> ByteString
-> IO (Either UnicodeException Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IO Text
forall a. a -> IO a
evaluate (Text -> IO Text) -> (ByteString -> Text) -> ByteString -> IO Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
unescapeText'