{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# OPTIONS_GHC -Wno-unused-top-binds #-}
module Try.IO.RandomNumbers (main) where
import Control.Monad.Fix (fix)
import Data.Binary (decode, encode)
import Data.ByteString qualified as BS
import Data.ByteString.Builder (intDec, toLazyByteString)
import Data.ByteString.Char8 qualified as BSC
import Data.ByteString.Lazy qualified as LBS
import Data.ByteString.Lazy.Char8 qualified as LBSC
import Data.Foldable (Foldable (..))
import Data.Function ((&))
import Data.Functor ((<&>))
import Data.HashMap.Strict qualified as M
import GHC.IO.Handle.FD (withFile)
import GHC.IO.IOMode (IOMode (..))
import System.Random (newStdGen)
import System.Random.Stateful (Random (..))
nNumbers :: Int
nNumbers = 70 * 1024 * 1024
fname :: FilePath
fname = "tmp/file-1"
bounds :: (Int, Int)
bounds = (1, 10000)
-- | write generated numbers into a file
writeNumbers :: IO ()
writeNumbers = do
print "generating numbers"
g <- newStdGen
let randomStream :: [Int] = randomRs bounds g
LBS.writeFile fname $ LBSC.unwords (toLazyByteString . intDec <$> take nNumbers randomStream)
kb :: Int
kb = 1024
chunkSize :: Int
chunkSize = 16 * kb
type MyState = (M.HashMap Int Int, LBS.ByteString)
-- | count numbers while reading the file in fixed chunks
-- and inserting them into a map in one go
countNumbersChunks :: IO ()
countNumbersChunks = do
print "counting numbers (chunks)"
print . sum . fst
=<< withFile
fname
ReadMode
( \h -> do
fix
( \(ret :: MyState -> IO MyState) statePrev@(!quantities, unparsed) -> do
chunk_ <- LBS.hGet h chunkSize
let
newChunk = unparsed <> chunk_
stateNew =
foldl'
( \(!qs, !unparsed_) (!y) ->
maybe
(qs, y)
(\(x_, _) -> (M.insertWith (+) x_ 1 qs, ""))
(LBSC.readInt y)
)
statePrev
(LBSC.words newChunk)
(if LBS.null chunk_ then return else ret) stateNew
)
(M.empty, "")
)
-- | count numbers using lazy bytestring's @readFile@
countNumbersReadFile :: IO ()
countNumbersReadFile = do
print "counting numbers (readFile)"
print
. sum
. M.fromListWith (+)
. fmap (maybe undefined ((,1) . fst) . LBSC.readInt)
. LBSC.words
=<< LBS.readFile fname
main :: IO ()
main = do
countNumbersChunks
Trying serialization
nums :: [Int]
nums = [1, 3, 4, 4, 5, 2, 5]
-- binary <= 0.8.9.1
-- binary isn't human-readable
s :: [Int]
s = decode . encode $ [1 :: Int, 2, 3]
-- >>> s
-- [1,2,3]
-- bytestring
tryConvertLBS' :: [Maybe (Int, LBSC.ByteString)]
tryConvertLBS' = LBSC.readInt <$> LBSC.words (LBSC.unwords (toLazyByteString . intDec <$> nums))
-- >>> tryConvertLBS'
-- [Just (1,""),Just (3,""),Just (4,""),Just (4,""),Just (5,""),Just (2,""),Just (5,"")]