We introduce a binary tree like data structure with next structure.

{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-}

import Prelude hiding (head, length, drop, take, lookup, null)
import Data.Function
import Data.ByteString.Char8 hiding (empty)
import Test.QuickCheck

--        [Node (current value) l v r eq]
--                              | | |  +------------------------------------+
--        +---------------------+ | +------------------+                    |
--        |                       |                    |                    |
--        +                       |                    +                    |
--  element less            value or nothing        elements that        elements that
--  than current            if it intermideate      are more then        have <current value>
--                              node                current              as prefix
-- 

Top level item represent empty value and can have a value.

type PrefixMap a = (Maybe a, PMap a)

Inner tree is either an empty value or node, that has left/right children and maybe can have a value and next element

data PMap  a = E
             | N ByteString (PMap a) (Maybe a) (PMap a) (PMap a)
             {-   current    less      value        more    eq   -}
             deriving (Show)

Having PrefixMap as a additional layer we can assume, that we have a non-null prefix on each level.

Introduce simple builders

empty :: PrefixMap a
empty = (Nothing, E)

node :: ByteString -> a -> PrefixMap a
node b a | null b    = (Just a, E) 
         | otherwise = (Nothing, N b E  (Just a)  E E)

Now inserting elements it’s a bit tricky and may be simplified in the way of removing not needed insances

insert :: ByteString -> a -> PrefixMap a -> PrefixMap a
insert b a (v,n) | null b    = (Just a, n)
                 | otherwise = (v, inner b a n)
inner :: ByteString -> a -> PMap a -> PMap a
inner b a E = N b E (Just a) E E
inner b a n@(N b' l v r e) | null b     = n
                           | otherwise  = 
  case comparing head b b' of
    LT -> N b' (inner b a l) v r e   -- value less then current
    GT -> N b' l v (inner b a r) e   -- value more then current
    EQ -> let x = commonPart b b' -- value has common part
              c = take x b
              c'= take x b'
              n' = N (drop x b') E v E e
          in if on (==) length c b'       -- b' isPrefix of b
                 then 
                  if on (==) length c b    -- b' == b 
                      then N c l (Just $! a `fq` v) r e
                      else N c l v r (inner (drop x b) a e) -- [b < b']
                 else -- [ c < b ]
                  if on (==) length c b
                      then N c' l (Just a) r n'
                      else N c  l Nothing  r (inner (drop x b) a n')
  where 
    fq a _ = a

lookup function

lookup :: ByteString -> PrefixMap a -> Maybe a
lookup b (v, n) | null b = v 
                | otherwise = lookinner b n
lookinner :: ByteString -> PMap a -> Maybe a
lookinner b E = Nothing
lookinner b (N b' l v r e) =
  case comparing head b b' of
    LT -> lookinner b l
    GT -> lookinner b r
    EQ -> let x = commonPart b b'
          in if x == length b'
                then if x == length b then v else lookinner (drop x b) e
                else Nothing
commonPart :: ByteString -> ByteString -> Int
commonPart a b = go 0
  where 
    go :: Int -> Int
    go x | x == y                = x
         | on (==) (findex x) a b = go (x+1)
         | otherwise             = x
    y = on min length a b
    findex = flip index
    {-# INLINE findex #-}

comparing = on compare

Check if we are right

prop_InsertList (ls::[String]) = 
  let x = Prelude.foldl (\o x -> insert (pack x) (pack x) o) empty ls
  in Prelude.all (\l -> (l=="") || pack l `lookup` x == Just (pack l)) ls

main = quickCheck prop_InsertList

What interesting is what properties to we have, ideally we can rewrite code thinking of a N c l v r e as a Tree (M v e)

Caveats:




comments powered by Disqus