{-# LANGUAGE BangPatterns #-}
module Data.Attoparsec.Text.FastSet
(
FastSet
, fromList
, set
, member
, charClass
) where
import Data.Bits ((.|.), (.&.), shiftR)
import Data.Function (on)
import Data.List (sort, sortBy)
import qualified Data.Array.Base as AB
import qualified Data.Array.Unboxed as A
import qualified Data.Text as T
data FastSet = FastSet {
FastSet -> UArray Int Int
table :: {-# UNPACK #-} !(A.UArray Int Int)
, FastSet -> Int
mask :: {-# UNPACK #-} !Int
}
data Entry = Entry {
Entry -> Char
key :: {-# UNPACK #-} !Char
, Entry -> Int
initialIndex :: {-# UNPACK #-} !Int
, Entry -> Int
index :: {-# UNPACK #-} !Int
}
offset :: Entry -> Int
offset :: Entry -> Int
offset e :: Entry
e = Entry -> Int
index Entry
e Int -> Int -> Int
forall a. Num a => a -> a -> a
- Entry -> Int
initialIndex Entry
e
resolveCollisions :: [Entry] -> [Entry]
resolveCollisions :: [Entry] -> [Entry]
resolveCollisions [] = []
resolveCollisions [e :: Entry
e] = [Entry
e]
resolveCollisions (a :: Entry
a:b :: Entry
b:entries :: [Entry]
entries) = Entry
a' Entry -> [Entry] -> [Entry]
forall a. a -> [a] -> [a]
: [Entry] -> [Entry]
resolveCollisions (Entry
b' Entry -> [Entry] -> [Entry]
forall a. a -> [a] -> [a]
: [Entry]
entries)
where (a' :: Entry
a', b' :: Entry
b')
| Entry -> Int
index Entry
a Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Entry -> Int
index Entry
b = (Entry
a, Entry
b)
| Entry -> Int
offset Entry
a Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Entry -> Int
offset Entry
b = (Entry
b { index :: Int
index=Entry -> Int
index Entry
a }, Entry
a { index :: Int
index=Entry -> Int
index Entry
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1 })
| Bool
otherwise = (Entry
a, Entry
b { index :: Int
index=Entry -> Int
index Entry
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1 })
pad :: Int -> [Entry] -> [Entry]
pad :: Int -> [Entry] -> [Entry]
pad = Int -> Int -> [Entry] -> [Entry]
go 0
where
go :: Int -> Int -> [Entry] -> [Entry]
go !Int
_ !Int
m [] = Int -> Entry -> [Entry]
forall a. Int -> a -> [a]
replicate (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max 1 Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) Entry
empty
go k :: Int
k m :: Int
m (e :: Entry
e:entries :: [Entry]
entries) = (Int -> Entry) -> [Int] -> [Entry]
forall a b. (a -> b) -> [a] -> [b]
map (Entry -> Int -> Entry
forall a b. a -> b -> a
const Entry
empty) [Int
k..Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1] [Entry] -> [Entry] -> [Entry]
forall a. [a] -> [a] -> [a]
++ Entry
e Entry -> [Entry] -> [Entry]
forall a. a -> [a] -> [a]
:
Int -> Int -> [Entry] -> [Entry]
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) [Entry]
entries
where i :: Int
i = Entry -> Int
index Entry
e
empty :: Entry
empty = Char -> Int -> Int -> Entry
Entry '\0' Int
forall a. Bounded a => a
maxBound 0
nextPowerOf2 :: Int -> Int
nextPowerOf2 :: Int -> Int
nextPowerOf2 0 = 1
nextPowerOf2 x :: Int
x = Int -> Int -> Int
forall t. (Num t, Bits t) => t -> Int -> t
go (Int
x Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) 1
where go :: t -> Int -> t
go y :: t
y 32 = t
y t -> t -> t
forall a. Num a => a -> a -> a
+ 1
go y :: t
y k :: Int
k = t -> Int -> t
go (t
y t -> t -> t
forall a. Bits a => a -> a -> a
.|. (t
y t -> Int -> t
forall a. Bits a => a -> Int -> a
`shiftR` Int
k)) (Int -> t) -> Int -> t
forall a b. (a -> b) -> a -> b
$ Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
* 2
fastHash :: Char -> Int
fastHash :: Char -> Int
fastHash = Char -> Int
forall a. Enum a => a -> Int
fromEnum
fromList :: String -> FastSet
fromList :: String -> FastSet
fromList s :: String
s = UArray Int Int -> Int -> FastSet
FastSet ((Int, Int) -> [Int] -> UArray Int Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
AB.listArray (0, [Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
interleaved Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) [Int]
interleaved)
Int
mask'
where s' :: String
s' = String -> String
forall a. Eq a => [a] -> [a]
ordNub (String -> String
forall a. Ord a => [a] -> [a]
sort String
s)
l :: Int
l = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
s'
mask' :: Int
mask' = Int -> Int
nextPowerOf2 ((5 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
l) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` 4) Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1
entries :: [Entry]
entries = Int -> [Entry] -> [Entry]
pad Int
mask' ([Entry] -> [Entry]) -> (String -> [Entry]) -> String -> [Entry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
[Entry] -> [Entry]
resolveCollisions ([Entry] -> [Entry]) -> (String -> [Entry]) -> String -> [Entry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
(Entry -> Entry -> Ordering) -> [Entry] -> [Entry]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> (Entry -> Int) -> Entry -> Entry -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` Entry -> Int
initialIndex) ([Entry] -> [Entry]) -> (String -> [Entry]) -> String -> [Entry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
(Char -> Int -> Entry) -> String -> [Int] -> [Entry]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\c :: Char
c i :: Int
i -> Char -> Int -> Int -> Entry
Entry Char
c Int
i Int
i) String
s' ([Int] -> [Entry]) -> (String -> [Int]) -> String -> [Entry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
(Char -> Int) -> String -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ((Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
mask') (Int -> Int) -> (Char -> Int) -> Char -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
fastHash) (String -> [Entry]) -> String -> [Entry]
forall a b. (a -> b) -> a -> b
$ String
s'
interleaved :: [Int]
interleaved = (Entry -> [Int]) -> [Entry] -> [Int]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\e :: Entry
e -> [Char -> Int
forall a. Enum a => a -> Int
fromEnum (Char -> Int) -> Char -> Int
forall a b. (a -> b) -> a -> b
$ Entry -> Char
key Entry
e, Entry -> Int
initialIndex Entry
e])
[Entry]
entries
ordNub :: Eq a => [a] -> [a]
ordNub :: [a] -> [a]
ordNub [] = []
ordNub (y :: a
y:ys :: [a]
ys) = a -> [a] -> [a]
forall a. Eq a => a -> [a] -> [a]
go a
y [a]
ys
where go :: a -> [a] -> [a]
go x :: a
x (z :: a
z:zs :: [a]
zs)
| a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
z = a -> [a] -> [a]
go a
x [a]
zs
| Bool
otherwise = a
x a -> [a] -> [a]
forall a. a -> [a] -> [a]
: a -> [a] -> [a]
go a
z [a]
zs
go x :: a
x [] = [a
x]
set :: T.Text -> FastSet
set :: Text -> FastSet
set = String -> FastSet
fromList (String -> FastSet) -> (Text -> String) -> Text -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack
member :: Char -> FastSet -> Bool
member :: Char -> FastSet -> Bool
member c :: Char
c a :: FastSet
a = Int -> Bool
go (2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
i)
where i :: Int
i = Char -> Int
fastHash Char
c Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. FastSet -> Int
mask FastSet
a
lookupAt :: Int -> Bool -> Bool
lookupAt j :: Int
j b :: Bool
b = (Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
i) Bool -> Bool -> Bool
&& (Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c' Bool -> Bool -> Bool
|| Bool
b)
where c' :: Char
c' = Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
AB.unsafeAt (FastSet -> UArray Int Int
table FastSet
a) Int
j
i' :: Int
i' = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
AB.unsafeAt (FastSet -> UArray Int Int
table FastSet
a) (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1
go :: Int -> Bool
go j :: Int
j = Int -> Bool -> Bool
lookupAt Int
j (Bool -> Bool) -> (Int -> Bool) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool -> Bool
lookupAt (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 2) (Bool -> Bool) -> (Int -> Bool) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool -> Bool
lookupAt (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4) (Bool -> Bool) -> (Int -> Bool) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
Int -> Bool -> Bool
lookupAt (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 6) (Bool -> Bool) -> (Int -> Bool) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool
go (Int -> Bool) -> Int -> Bool
forall a b. (a -> b) -> a -> b
$ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 8
charClass :: String -> FastSet
charClass :: String -> FastSet
charClass = String -> FastSet
fromList (String -> FastSet) -> (String -> String) -> String -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
go
where go :: String -> String
go (a :: Char
a:'-':b :: Char
b:xs :: String
xs) = [Char
a..Char
b] String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
go String
xs
go (x :: Char
x:xs :: String
xs) = Char
x Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
go String
xs
go _ = ""