Skip to content

Commit 1182f22

Browse files
authored
Improve performance of toMap when keys are mostly sorted (#82)
1 parent 9bf881b commit 1182f22

File tree

3 files changed

+206
-1
lines changed

3 files changed

+206
-1
lines changed

bench/Main.hs

+175
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
{-# LANGUAGE ApplicativeDo #-}
2+
{-# LANGUAGE BangPatterns #-}
3+
{-# LANGUAGE DeriveFunctor #-}
4+
{-# LANGUAGE ScopedTypeVariables #-}
5+
{-# LANGUAGE TypeApplications #-}
6+
7+
8+
module Main where
9+
10+
import qualified Data.ByteString as B
11+
import qualified Data.ByteString.Lazy as BL
12+
import qualified Proto3.Wire.Decode as De
13+
import qualified Proto3.Wire.Encode as En
14+
import Proto3.Wire
15+
16+
import Control.Applicative (liftA2, liftA3)
17+
import Control.Monad (forM)
18+
import Data.Maybe
19+
import Data.Word
20+
import Data.IORef
21+
22+
import Criterion (bench)
23+
import qualified Criterion as C
24+
import Criterion.Main (defaultMain)
25+
26+
data Tree a = Leaf | Branch a (Tree a) (Tree a)
27+
deriving (Eq, Functor)
28+
29+
data Rose a = Bud | Rose a [Rose a]
30+
31+
instance Foldable Tree where
32+
foldr _ z Leaf = z
33+
foldr f z (Branch a t1 t2) = foldr f (f a (foldr f z t2)) t1
34+
35+
sum Leaf = 0
36+
sum (Branch a t1 t2) =
37+
let !a1 = sum t1
38+
!a2 = sum t2
39+
in a + a1 + a2
40+
41+
instance Foldable Rose where
42+
foldMap f Bud = mempty
43+
foldMap f (Rose x rs) = f x <> ((foldMap.foldMap) f rs)
44+
45+
intTreeParser :: De.Parser De.RawMessage (Tree Word64)
46+
intTreeParser = liftA3 combine
47+
(De.at (De.repeated De.fixed64) (FieldNumber 0))
48+
(De.at (De.one (De.embedded' intTreeParser) Leaf) (FieldNumber 1))
49+
(De.at (De.one (De.embedded' intTreeParser) Leaf) (FieldNumber 2))
50+
where
51+
combine xs y z = Branch (sum xs) y z
52+
53+
intRoseParser :: De.Parser De.RawMessage (Rose Word64)
54+
intRoseParser = liftA2 (Rose @Word64)
55+
(De.at (De.one De.fixed64 0) (FieldNumber 0))
56+
(De.at (De.repeated (De.embedded' intRoseParser)) (FieldNumber 1))
57+
58+
detRandom :: [Word64]
59+
detRandom = concat . replicate 10 $
60+
[ 227, 133, 16, 164, 43,
61+
159, 207, 87, 180, 236,
62+
245, 128, 249, 170, 216,
63+
181, 164, 162, 239, 249,
64+
76, 237, 197, 246, 209,
65+
231, 124, 154, 55, 64,
66+
4, 114, 79, 199, 252,
67+
163, 116, 237, 209, 138,
68+
240, 148, 212, 224, 88,
69+
131, 122, 114, 158, 97,
70+
186, 3, 223, 230, 223,
71+
207, 93, 168, 48, 130,
72+
77, 122, 30, 222, 221,
73+
224, 243, 19, 175, 61,
74+
112, 246, 201, 57, 185,
75+
19, 128, 129, 138, 209,
76+
4, 153, 196, 238, 72,
77+
254, 157, 233, 81, 30,
78+
106, 249, 57, 214, 104,
79+
171, 146, 175, 185, 192,
80+
159, 207, 87, 180, 236,
81+
227, 133, 16, 164, 43,
82+
245, 128, 249, 170, 216,
83+
181, 164, 162, 239, 249,
84+
76, 237, 197, 246, 209,
85+
231, 124, 154, 55, 64,
86+
4, 114, 79, 199, 252,
87+
163, 116, 237, 209, 138,
88+
240, 148, 212, 224, 88,
89+
131, 122, 114, 158, 97,
90+
186, 3, 223, 230, 223,
91+
207, 93, 168, 48, 130,
92+
77, 122, 30, 222, 221,
93+
224, 243, 19, 175, 61,
94+
112, 246, 201, 57, 185,
95+
19, 128, 129, 138, 209,
96+
4, 153, 196, 238, 72,
97+
254, 157, 233, 81, 30,
98+
106, 249, 57, 214, 104,
99+
171, 146, 175, 185, 192,
100+
159, 207, 87, 180, 236,
101+
227, 133, 16, 164, 43,
102+
245, 128, 249, 170, 216,
103+
181, 164, 162, 239, 249,
104+
76, 237, 197, 246, 209,
105+
231, 124, 154, 55, 64,
106+
4, 114, 79, 199, 252,
107+
163, 116, 237, 209, 138,
108+
240, 148, 212, 224, 88,
109+
131, 122, 114, 158, 97,
110+
186, 3, 223, 230, 223,
111+
207, 93, 168, 48, 130,
112+
77, 122, 30, 222, 221,
113+
224, 243, 19, 175, 61,
114+
112, 246, 201, 57, 185,
115+
19, 128, 129, 138, 209,
116+
4, 153, 196, 238, 72,
117+
254, 157, 233, 81, 30,
118+
106, 249, 57, 214, 104,
119+
171, 146, 175, 185, 192
120+
]
121+
122+
pullInt :: IORef [Word64] -> IO Word64
123+
pullInt xs = do
124+
xs' <- readIORef xs
125+
case xs' of
126+
[] -> pure (-1)
127+
x : xs' -> do
128+
writeIORef xs xs'
129+
pure x
130+
131+
mkTree0 :: IO Word64 -> IO En.MessageBuilder
132+
mkTree0 ints = do
133+
shouldFork <- (\(i :: Word64) -> (i `mod` 8) < 6) <$> ints
134+
if shouldFork
135+
then do
136+
i <- En.fixed64 (FieldNumber 0) <$> ints
137+
left <- En.embedded (FieldNumber 1) <$> mkTree0 ints
138+
right <- En.embedded (FieldNumber 2) <$> mkTree0 ints
139+
pure (i <> left <> right)
140+
else pure mempty
141+
142+
mkRose0 :: IO Word64 -> IO En.MessageBuilder
143+
mkRose0 ints = do
144+
next <- fromIntegral <$> ints
145+
if next == -1 then pure mempty else do
146+
let nBranches = next `mod` 9
147+
if nBranches == 0 then pure mempty else do
148+
loc <- (\i -> (i `mod` nBranches)) . fromIntegral <$> ints
149+
i <- En.fixed64 (FieldNumber 0) <$> ints
150+
rs1 <- forM (replicate loc ()) $ \() ->
151+
En.embedded (FieldNumber 1) <$> mkTree0 ints
152+
rs2 <- forM (replicate (nBranches - loc) ()) $ \() ->
153+
En.embedded (FieldNumber 1) <$> mkTree0 ints
154+
pure (mconcat rs1 <> i <> mconcat rs2)
155+
156+
mkTree :: IO B.ByteString
157+
mkTree = BL.toStrict . En.toLazyByteString <$> (mkTree0 . pullInt =<< newIORef detRandom)
158+
159+
mkRose :: IO B.ByteString
160+
mkRose = BL.toStrict . En.toLazyByteString <$> (mkRose0 . pullInt =<< newIORef detRandom)
161+
162+
decode :: Foldable f => De.Parser De.RawMessage (f Word64) -> B.ByteString -> IO (Maybe Word64)
163+
decode p = pure . fmap sum . toMaybe . De.parse p
164+
where
165+
toMaybe (Left _) = Nothing
166+
toMaybe (Right x) = Just x
167+
168+
unwrap :: (Functor m, Foldable f) => m (f a) -> m a
169+
unwrap = fmap (foldr1 const)
170+
171+
main :: IO ()
172+
main =
173+
defaultMain
174+
[ bench "Parse int tree" $ C.perRunEnv mkTree (unwrap . decode intTreeParser)
175+
, bench "Parse int rose tree" $ C.perRunEnv mkRose (unwrap . decode intRoseParser)]

proto3-wire.cabal

+8
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,11 @@ test-suite tests
6565
text >= 0.2 && <1.3,
6666
transformers >=0.5.6.2 && <0.6,
6767
vector >=0.12.0.2 && <0.13
68+
69+
benchmark bench
70+
type: exitcode-stdio-1.0
71+
main-is: Main.hs
72+
build-depends: base >= 4 && < 5, bytestring, random, criterion, proto3-wire
73+
hs-source-dirs: bench
74+
ghc-options: -O2 -Wall -fobject-code -ddump-simpl -ddump-to-file
75+

src/Proto3/Wire/Decode.hs

+23-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,29 @@ data ParsedField = VarintField Word64
127127
-- fromList [(1,[6,3]),(2,[4])]
128128
--
129129
toMap :: [(FieldNumber, v)] -> M.IntMap [v]
130-
toMap kvs0 = M.fromListWith (<>) . map (fmap (:[])) . map (first (fromIntegral . getFieldNumber)) $ kvs0
130+
toMap kvs0 = makeMap . map (first (fromIntegral . getFieldNumber)) $ kvs0
131+
where
132+
makeMap :: [(Int, v)] -> M.IntMap [v]
133+
makeMap = close . foldl' combineSeen Nothing
134+
135+
close Nothing = M.empty
136+
close (Just (m, k, v)) = M.insertWith (++) k v m
137+
138+
-- If keys are in order, then we don't have to make any lookups,
139+
-- we just maintain the active element.
140+
-- Out of order keys will lookup in the map
141+
combineSeen :: Maybe (M.IntMap [v], Int, [v]) -> (Int, v) -> Maybe (M.IntMap [v], Int, [v])
142+
combineSeen Nothing (k1, a1) = Just (M.empty, k1, [a1])
143+
combineSeen (Just (m, k2, as)) (k1, a1) =
144+
if k1 == k2
145+
then Just (m, k1, a1 : as)
146+
-- It might seem that we want to use DList but we don't because:
147+
-- - alter has worse performance than insertWith, and there's no upsert
148+
-- - We're building up a list of elements in a recursive way
149+
-- that will be opaque to GHC
150+
-- - DList would add another dependency
151+
else let !m' = M.insertWith (++) k2 as m
152+
in Just (m', k1, [a1])
131153

132154
-- | Parses data in the raw wire format into an untyped 'Map' representation.
133155
decodeWire :: B.ByteString -> Either String [(FieldNumber, ParsedField)]

0 commit comments

Comments
 (0)