{-# LANGUAGE BangPatterns #-}

------------------------------------------------------------------------------
-- |
-- Module      :  Data.Attoparsec.FastSet
-- Copyright   :  Felipe Lessa 2010, Bryan O'Sullivan 2007-2015
-- License     :  BSD3
--
-- Maintainer  :  felipe.lessa@gmail.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- Fast set membership tests for 'Char' values. We test for
-- membership using a hashtable implemented with Robin Hood
-- collision resolution. The set representation is unboxed,
-- and the characters and hashes interleaved, for efficiency.
--
--
-----------------------------------------------------------------------------
module Data.Attoparsec.Text.FastSet
    (
    -- * Data type
      FastSet
    -- * Construction
    , fromList
    , set
    -- * Lookup
    , member
    -- * Handy interface
    , charClass
    ) where

import Data.Bits ((.|.), (.&.), shiftR)
import Data.Function (on)
import Data.List (sort, sortBy)
import qualified Data.Array.Base as AB
import qualified Data.Array.Unboxed as A
import qualified Data.Text as T

data FastSet = FastSet {
    FastSet -> UArray Int Int
table :: {-# UNPACK #-} !(A.UArray Int Int)
  , FastSet -> Int
mask  :: {-# UNPACK #-} !Int
  }

data Entry = Entry {
    Entry -> Char
key          :: {-# UNPACK #-} !Char
  , Entry -> Int
initialIndex :: {-# UNPACK #-} !Int
  , Entry -> Int
index        :: {-# UNPACK #-} !Int
  }

offset :: Entry -> Int
offset :: Entry -> Int
offset e :: Entry
e = Entry -> Int
index Entry
e Int -> Int -> Int
forall a. Num a => a -> a -> a
- Entry -> Int
initialIndex Entry
e

resolveCollisions :: [Entry] -> [Entry]
resolveCollisions :: [Entry] -> [Entry]
resolveCollisions [] = []
resolveCollisions [e :: Entry
e] = [Entry
e]
resolveCollisions (a :: Entry
a:b :: Entry
b:entries :: [Entry]
entries) = Entry
a' Entry -> [Entry] -> [Entry]
forall a. a -> [a] -> [a]
: [Entry] -> [Entry]
resolveCollisions (Entry
b' Entry -> [Entry] -> [Entry]
forall a. a -> [a] -> [a]
: [Entry]
entries)
  where (a' :: Entry
a', b' :: Entry
b')
          | Entry -> Int
index Entry
a Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Entry -> Int
index Entry
b   = (Entry
a, Entry
b)
          | Entry -> Int
offset Entry
a Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Entry -> Int
offset Entry
b = (Entry
b { index :: Int
index=Entry -> Int
index Entry
a }, Entry
a { index :: Int
index=Entry -> Int
index Entry
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1 })
          | Bool
otherwise           = (Entry
a, Entry
b { index :: Int
index=Entry -> Int
index Entry
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1 })

pad :: Int -> [Entry] -> [Entry]
pad :: Int -> [Entry] -> [Entry]
pad = Int -> Int -> [Entry] -> [Entry]
go 0
  where -- ensure that we pad enough so that lookups beyond the
        -- last hash in the table fall within the array
        go :: Int -> Int -> [Entry] -> [Entry]
go !Int
_ !Int
m []          = Int -> Entry -> [Entry]
forall a. Int -> a -> [a]
replicate (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max 1 Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) Entry
empty
        go  k :: Int
k  m :: Int
m (e :: Entry
e:entries :: [Entry]
entries) = (Int -> Entry) -> [Int] -> [Entry]
forall a b. (a -> b) -> [a] -> [b]
map (Entry -> Int -> Entry
forall a b. a -> b -> a
const Entry
empty) [Int
k..Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1] [Entry] -> [Entry] -> [Entry]
forall a. [a] -> [a] -> [a]
++ Entry
e Entry -> [Entry] -> [Entry]
forall a. a -> [a] -> [a]
:
                               Int -> Int -> [Entry] -> [Entry]
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) [Entry]
entries
          where i :: Int
i            = Entry -> Int
index Entry
e
        empty :: Entry
empty                = Char -> Int -> Int -> Entry
Entry '\0' Int
forall a. Bounded a => a
maxBound 0

nextPowerOf2 :: Int -> Int
nextPowerOf2 :: Int -> Int
nextPowerOf2 0  = 1
nextPowerOf2 x :: Int
x  = Int -> Int -> Int
forall t. (Num t, Bits t) => t -> Int -> t
go (Int
x Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) 1
  where go :: t -> Int -> t
go y :: t
y 32 = t
y t -> t -> t
forall a. Num a => a -> a -> a
+ 1
        go y :: t
y k :: Int
k  = t -> Int -> t
go (t
y t -> t -> t
forall a. Bits a => a -> a -> a
.|. (t
y t -> Int -> t
forall a. Bits a => a -> Int -> a
`shiftR` Int
k)) (Int -> t) -> Int -> t
forall a b. (a -> b) -> a -> b
$ Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
* 2

fastHash :: Char -> Int
fastHash :: Char -> Int
fastHash = Char -> Int
forall a. Enum a => a -> Int
fromEnum

fromList :: String -> FastSet
fromList :: String -> FastSet
fromList s :: String
s = UArray Int Int -> Int -> FastSet
FastSet ((Int, Int) -> [Int] -> UArray Int Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
AB.listArray (0, [Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
interleaved Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) [Int]
interleaved)
             Int
mask'
  where s' :: String
s'      = String -> String
forall a. Eq a => [a] -> [a]
ordNub (String -> String
forall a. Ord a => [a] -> [a]
sort String
s)
        l :: Int
l       = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
s'
        mask' :: Int
mask'   = Int -> Int
nextPowerOf2 ((5 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
l) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` 4) Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1
        entries :: [Entry]
entries = Int -> [Entry] -> [Entry]
pad Int
mask' ([Entry] -> [Entry]) -> (String -> [Entry]) -> String -> [Entry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                  [Entry] -> [Entry]
resolveCollisions ([Entry] -> [Entry]) -> (String -> [Entry]) -> String -> [Entry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                  (Entry -> Entry -> Ordering) -> [Entry] -> [Entry]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> (Entry -> Int) -> Entry -> Entry -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` Entry -> Int
initialIndex) ([Entry] -> [Entry]) -> (String -> [Entry]) -> String -> [Entry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                  (Char -> Int -> Entry) -> String -> [Int] -> [Entry]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\c :: Char
c i :: Int
i -> Char -> Int -> Int -> Entry
Entry Char
c Int
i Int
i) String
s' ([Int] -> [Entry]) -> (String -> [Int]) -> String -> [Entry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                  (Char -> Int) -> String -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ((Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
mask') (Int -> Int) -> (Char -> Int) -> Char -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
fastHash) (String -> [Entry]) -> String -> [Entry]
forall a b. (a -> b) -> a -> b
$ String
s'
        interleaved :: [Int]
interleaved = (Entry -> [Int]) -> [Entry] -> [Int]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\e :: Entry
e -> [Char -> Int
forall a. Enum a => a -> Int
fromEnum (Char -> Int) -> Char -> Int
forall a b. (a -> b) -> a -> b
$ Entry -> Char
key Entry
e, Entry -> Int
initialIndex Entry
e])
                      [Entry]
entries

ordNub :: Eq a => [a] -> [a]
ordNub :: [a] -> [a]
ordNub []     = []
ordNub (y :: a
y:ys :: [a]
ys) = a -> [a] -> [a]
forall a. Eq a => a -> [a] -> [a]
go a
y [a]
ys
  where go :: a -> [a] -> [a]
go x :: a
x (z :: a
z:zs :: [a]
zs)
          | a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
z    = a -> [a] -> [a]
go a
x [a]
zs
          | Bool
otherwise = a
x a -> [a] -> [a]
forall a. a -> [a] -> [a]
: a -> [a] -> [a]
go a
z [a]
zs
        go x :: a
x []       = [a
x]

set :: T.Text -> FastSet
set :: Text -> FastSet
set = String -> FastSet
fromList (String -> FastSet) -> (Text -> String) -> Text -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack

-- | Check the set for membership.
member :: Char -> FastSet -> Bool
member :: Char -> FastSet -> Bool
member c :: Char
c a :: FastSet
a           = Int -> Bool
go (2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
i)
  where i :: Int
i            = Char -> Int
fastHash Char
c Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. FastSet -> Int
mask FastSet
a
        lookupAt :: Int -> Bool -> Bool
lookupAt j :: Int
j b :: Bool
b = (Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
i) Bool -> Bool -> Bool
&& (Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c' Bool -> Bool -> Bool
|| Bool
b)
            where c' :: Char
c' = Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
AB.unsafeAt (FastSet -> UArray Int Int
table FastSet
a) Int
j
                  i' :: Int
i' = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
AB.unsafeAt (FastSet -> UArray Int Int
table FastSet
a) (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1
        go :: Int -> Bool
go j :: Int
j         = Int -> Bool -> Bool
lookupAt Int
j (Bool -> Bool) -> (Int -> Bool) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool -> Bool
lookupAt (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 2) (Bool -> Bool) -> (Int -> Bool) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool -> Bool
lookupAt (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4) (Bool -> Bool) -> (Int -> Bool) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                       Int -> Bool -> Bool
lookupAt (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 6) (Bool -> Bool) -> (Int -> Bool) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Bool
go (Int -> Bool) -> Int -> Bool
forall a b. (a -> b) -> a -> b
$ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 8

charClass :: String -> FastSet
charClass :: String -> FastSet
charClass = String -> FastSet
fromList (String -> FastSet) -> (String -> String) -> String -> FastSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
go
  where go :: String -> String
go (a :: Char
a:'-':b :: Char
b:xs :: String
xs) = [Char
a..Char
b] String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
go String
xs
        go (x :: Char
x:xs :: String
xs)       = Char
x Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
go String
xs
        go _            = ""