module Codec.Archive.Tar.Index.IntTrie (
IntTrie(..),
construct,
lookup,
TrieLookup(..),
#ifdef TESTS
test1, test2, test3,
ValidPaths(..),
prop_lookup,
prop_completions,
prop_lookup_mono,
prop_completions_mono,
#endif
) where
import Prelude hiding (lookup)
import Data.Typeable (Typeable)
import qualified Data.Array.Unboxed as A
import Data.Array.IArray ((!))
import qualified Data.Bits as Bits
import Data.Word (Word32)
import Data.List hiding (lookup)
import Data.Function (on)
#ifdef TESTS
import Test.QuickCheck
import Control.Applicative ((<$>), (<*>))
#endif
newtype IntTrie k v = IntTrie (A.UArray Word32 Word32)
deriving (Eq, Show, Typeable)
#ifdef TESTS
example0 :: [(FilePath, Int)]
example0 =
[("foo-1.0/foo-1.0.cabal", 512)
,("foo-1.0/LICENSE", 2048)
,("foo-1.0/Data/Foo.hs", 4096)]
example1 :: Paths Word32 Word32
example1 =
[([1,2], 512)
,([1,3], 2048)
,([1,4,5], 4096)]
example2 :: Trie Word32 Word32
example2 = Trie [ Node 1 t1 ]
where
t1 = Trie [ Leaf 2 512, Leaf 3 2048, Node 4 t2 ]
t2 = Trie [ Leaf 5 4096 ]
example2' :: Trie Word32 Word32
example2' = Trie [ Node 0 t1 ]
where
t1 = Trie [ Node 3 t2 ]
t2 = Trie [ Node 1 t3, Node 2 t4 ]
t3 = Trie [ Leaf 4 10608 ]
t4 = Trie [ Leaf 4 10612 ]
example2'' :: Trie Word32 Word32
example2'' = Trie [ Node 1 t1, Node 2 t2 ]
where
t1 = Trie [ Leaf 4 10608 ]
t2 = Trie [ Leaf 4 10612 ]
example2''' :: Trie Word32 Word32
example2''' = Trie [ Node 0 t3 ]
where
t3 = Trie [ Node 4 t8, Node 6 t11 ]
t8 = Trie [ Node 1 t14 ]
t11 = Trie [ Leaf 5 10605 ]
t14 = Trie [ Node 2 t19, Node 3 t22 ]
t19 = Trie [ Leaf 7 10608 ]
t22 = Trie [ Leaf 7 10612 ]
test1 = example2 == mkTrie example1
#endif
tagLeaf, tagNode, untag :: Word32 -> Word32
tagLeaf = id
tagNode = flip Bits.setBit 31
untag = flip Bits.clearBit 31
isNode :: Word32 -> Bool
isNode = flip Bits.testBit 31
#ifdef TESTS
example3 :: [Word32]
example3 =
[1, tagNode 1,
3,
3, tagLeaf 2, tagLeaf 3, tagNode 4,
512, 2048, 10,
1, tagLeaf 5,
4096
]
test2 = example3 == flattenTrie example2
example4 :: IntTrie Int Int
example4 = IntTrie (mkArray example3)
test3 = case lookup example4 [1] of
Just (Completions [(2,_),(3,_),(4,_)]) -> True
_ -> False
test1, test2, test3 :: Bool
#endif
completionsFrom :: (Enum k, Enum v) => IntTrie k v -> Word32 -> Completions k v
completionsFrom trie@(IntTrie arr) nodeOff =
[ (word32ToEnum (untag key), next)
| keyOff <- [keysStart..keysEnd]
, let key = arr ! keyOff
entry = arr ! (keyOff + nodeSize)
next | isNode key = Completions (completionsFrom trie entry)
| otherwise = Entry (word32ToEnum entry)
]
where
nodeSize = arr ! nodeOff
keysStart = nodeOff + 1
keysEnd = nodeOff + nodeSize
construct :: (Ord k, Enum k, Enum v) => [([k], v)] -> IntTrie k v
construct = IntTrie . mkArray . flattenTrie . mkTrie
mkArray :: [Word32] -> A.UArray Word32 Word32
mkArray xs = A.listArray (0, fromIntegral (length xs) 1) xs
data TrieLookup k v = Entry !v | Completions (Completions k v) deriving Show
type Completions k v = [(k, TrieLookup k v)]
lookup :: forall k v. (Enum k, Enum v) => IntTrie k v -> [k] -> Maybe (TrieLookup k v)
lookup trie@(IntTrie arr) = go 0
where
go :: Word32 -> [k] -> Maybe (TrieLookup k v)
go nodeOff [] = Just (completions nodeOff)
go nodeOff (k:ks) = case search nodeOff (tagLeaf k') of
Just entryOff
| null ks -> Just (entry entryOff)
| otherwise -> Nothing
Nothing -> case search nodeOff (tagNode k') of
Nothing -> Nothing
Just entryOff -> go (arr ! entryOff) ks
where
k' = enumToWord32 k
entry entryOff = Entry (word32ToEnum (arr ! entryOff))
completions nodeOff = Completions (completionsFrom trie nodeOff)
search :: Word32 -> Word32 -> Maybe Word32
search nodeOff key = fmap (+nodeSize) (bsearch keysStart keysEnd key)
where
nodeSize = arr ! nodeOff
keysStart = nodeOff + 1
keysEnd = nodeOff + nodeSize
bsearch :: Word32 -> Word32 -> Word32 -> Maybe Word32
bsearch a b key
| a > b = Nothing
| otherwise = case compare key (arr ! mid) of
LT -> bsearch a (mid1) key
EQ -> Just mid
GT -> bsearch (mid+1) b key
where mid = (a + b) `div` 2
enumToWord32 :: Enum n => n -> Word32
enumToWord32 = fromIntegral . fromEnum
word32ToEnum :: Enum n => Word32 -> n
word32ToEnum = toEnum . fromIntegral
data TrieNodeF k v x = Leaf k v | Node k x deriving (Eq, Show)
instance Functor (TrieNodeF k v) where
fmap _ (Leaf k v) = Leaf k v
fmap f (Node k x) = Node k (f x)
type TrieF k v x = [TrieNodeF k v x]
newtype Trie k v = Trie (TrieF k v (Trie k v)) deriving (Eq, Show)
unfoldTrieNode :: (s -> TrieNodeF k v [s]) -> s -> TrieNodeF k v (Trie k v)
unfoldTrieNode f = fmap (unfoldTrie f) . f
unfoldTrie :: (s -> TrieNodeF k v [s]) -> [s] -> Trie k v
unfoldTrie f = Trie . map (unfoldTrieNode f)
type Paths k v = [([k], v)]
mkTrie :: Ord k => Paths k v -> Trie k v
mkTrie = unfoldTrie (fmap split) . split
. sortBy (compare `on` fst)
. filter (not . null . fst)
where
split :: Eq k => Paths k v -> TrieF k v (Paths k v)
split = map mkGroup . groupBy ((==) `on` (head . fst))
where
mkGroup = \ksvs@((k0:_,v0):_) ->
case [ (ks, v) | (_:ks, v) <- ksvs, not (null ks) ] of
[] -> Leaf k0 v0
ksvs' -> Node k0 ksvs'
type Offset = Int
flattenTrie :: (Enum k, Enum v) => Trie k v -> [Word32]
flattenTrie trie = go (queue [trie]) (size trie)
where
size (Trie tns) = 1 + 2 * length tns
go :: (Enum k, Enum v) => Q (Trie k v) -> Offset -> [Word32]
go todo !offset =
case dequeue todo of
Nothing -> []
Just (Trie tnodes, tries) ->
flat ++ go (tries `enqueue` tries') offset'
where
!count = length tnodes
flat = fromIntegral count : keys ++ values
(keys, values) = unzip (sortBy (compare `on` fst) keysValues)
(!keysValues, !tries', !offset') = doNodes offset [] [] tnodes
doNodes off kvs ts' [] = (kvs, reverse ts', off)
doNodes off kvs ts' (tn:tns) = case tn of
Leaf k v -> doNodes off (leafKV k v :kvs) ts' tns
Node k t -> doNodes (off + size t) (nodeKV k off:kvs) (t:ts') tns
leafKV k v = (tagLeaf (enum2Word32 k), enum2Word32 v)
nodeKV k o = (tagNode (enum2Word32 k), int2Word32 o)
data Q a = Q [a] [[a]]
queue :: [a] -> Q a
queue xs = Q xs []
enqueue :: Q a -> [a] -> Q a
enqueue (Q front back) [] = Q front back
enqueue (Q front back) xs = Q front (xs : back)
dequeue :: Q a -> Maybe (a, Q a)
dequeue (Q (x:xs) back) = Just (x, Q xs back)
dequeue (Q [] back) = case concat (reverse back) of
x:xs -> Just (x, Q xs [])
[] -> Nothing
int2Word32 :: Int -> Word32
int2Word32 = fromIntegral
enum2Word32 :: Enum n => n -> Word32
enum2Word32 = int2Word32 . fromEnum
#ifdef TESTS
prop_lookup :: (Ord k, Enum k, Eq v, Enum v, Show k, Show v)
=> [([k], v)] -> Bool
prop_lookup paths =
flip all paths $ \(key, value) ->
case lookup trie key of
Just (Entry value') | value' == value -> True
Just (Entry value') -> error $ "IntTrie: " ++ show (key, value, value')
Nothing -> error $ "IntTrie: didn't find " ++ show key
Just (Completions xs) -> error $ "IntTrie: " ++ show xs
where
trie = construct paths
prop_completions :: forall k v. (Ord k, Enum k, Eq v, Enum v) => [([k], v)] -> Bool
prop_completions paths =
mkTrie paths == convertCompletions (completionsFrom (construct paths) 0)
where
convertCompletions :: Ord k => Completions k v -> Trie k v
convertCompletions kls =
Trie [ case l of
Entry v -> Leaf k v
Completions kls' -> Node k (convertCompletions kls')
| (k, l) <- sortBy (compare `on` fst) kls ]
prop_lookup_mono :: ValidPaths -> Bool
prop_lookup_mono (ValidPaths paths) = prop_lookup paths
prop_completions_mono :: ValidPaths -> Bool
prop_completions_mono (ValidPaths paths) = prop_completions paths
newtype ValidPaths = ValidPaths (Paths Char Char) deriving Show
instance Arbitrary ValidPaths where
arbitrary =
ValidPaths . makeNoPrefix <$> listOf ((,) <$> listOf1 arbitrary <*> arbitrary)
where
makeNoPrefix [] = []
makeNoPrefix ((k,v):kvs)
| all (\(k', _) -> not (isPrefixOfOther k k')) kvs
= (k,v) : makeNoPrefix kvs
| otherwise = makeNoPrefix kvs
shrink (ValidPaths kvs) =
map ValidPaths . filter noPrefix . filter nonEmpty . shrink $ kvs
where
noPrefix [] = True
noPrefix ((k,_):kvs') = all (\(k', _) -> not (isPrefixOfOther k k')) kvs'
&& noPrefix kvs'
nonEmpty = all (not . null . fst)
isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a
#endif