Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/DataFrame/Internal/Row.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ import Data.Typeable (Typeable, type (:~:) (..))
import Data.Word ( Word8, Word16, Word32, Word64 )
import Type.Reflection (TypeRep, typeOf, typeRep)
import Data.Type.Equality (TestEquality(..))
import Text.ParserCombinators.ReadPrec(ReadPrec)
import Text.Read (Lexeme (Ident), lexP, parens, readListPrec, readListPrecDefault, readPrec)


data RowValue where
Value :: (Columnable' a) => a -> RowValue
Expand All @@ -46,6 +49,16 @@ instance Show RowValue where
show :: RowValue -> String
show (Value a) = show a

instance Read RowValue where
readListPrec :: ReadPrec [RowValue]
readListPrec = readListPrecDefault

readPrec :: ReadPrec RowValue
readPrec = parens $ do
Ident "Value" <- lexP
readPrec


toRowValue :: forall a . (Columnable' a) => a -> RowValue
toRowValue = Value

Expand Down
34 changes: 25 additions & 9 deletions src/DataFrame/Operations/Statistics.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
{-# LANGUAGE BangPatterns #-}
module DataFrame.Operations.Statistics where

import Data.Bifunctor (second)
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
Expand All @@ -29,13 +30,18 @@ import Data.Maybe (isJust, fromMaybe)
import Data.Function ((&))
import Data.Type.Equality (type (:~:)(Refl), TestEquality (testEquality))
import Type.Reflection (typeRep)

import qualified Data.Bifunctor as Data
import DataFrame.Internal.Row (toRowValue)
import GHC.Float (int2Double)
import Text.Printf (printf)

frequencies :: T.Text -> DataFrame -> DataFrame
frequencies name df = case getColumn name df of
Nothing -> throw $ ColumnNotFoundException name "frequencies" (map fst $ M.toList $ columnIndices df)
Just ((BoxedColumn (column :: V.Vector a))) -> let
counts = valueCounts @a name df
counts :: [(a, Int)]
counts = valueCounts @a name df
total :: Int
total = P.sum $ map snd counts
vText :: forall a . (Columnable a) => a -> T.Text
vText c' = case testEquality (typeRep @a) (typeRep @T.Text) of
Expand All @@ -44,9 +50,13 @@ frequencies name df = case getColumn name df of
Just Refl -> T.pack c'
Nothing -> (T.pack . show) c'
initDf = empty & insertVector "Statistic" (V.fromList ["Count" :: T.Text, "Percentage (%)"])
in L.foldl' (\df (col, k) -> insertVector (vText col) (V.fromList [k, k * 100 `div` total]) df) initDf counts

calculatePercentage k = toRowValue $ roundTo 2 (fromIntegral k * 100 / fromIntegral total)
in L.foldl' (\df (col, k) -> insertVector (vText col)(V.fromList [toRowValue k, calculatePercentage k]) df) initDf counts
Just ((OptionalColumn (column :: V.Vector a))) -> let
counts = valueCounts @a name df
counts :: [(a, Int)]
counts = valueCounts @a name df
total :: Int
total = P.sum $ map snd counts
vText :: forall a . (Columnable a) => a -> T.Text
vText c' = case testEquality (typeRep @a) (typeRep @T.Text) of
Expand All @@ -55,9 +65,12 @@ frequencies name df = case getColumn name df of
Just Refl -> T.pack c'
Nothing -> (T.pack . show) c'
initDf = empty & insertVector "Statistic" (V.fromList ["Count" :: T.Text, "Percentage (%)"])
in L.foldl' (\df (col, k) -> insertVector (vText col) (V.fromList [k, k * 100 `div` total]) df) initDf counts
calculatePercentage k = toRowValue $ roundTo 2 (fromIntegral k * 100 / fromIntegral total)
in L.foldl' (\df (col, k) -> insertVector (vText col)(V.fromList [toRowValue k, calculatePercentage k]) df) initDf counts
Just ((UnboxedColumn (column :: VU.Vector a))) -> let
counts = valueCounts @a name df
counts :: [(a, Int)]
counts = valueCounts @a name df
total :: Int
total = P.sum $ map snd counts
vText :: forall a . (Columnable a) => a -> T.Text
vText c' = case testEquality (typeRep @a) (typeRep @T.Text) of
Expand All @@ -66,7 +79,8 @@ frequencies name df = case getColumn name df of
Just Refl -> T.pack c'
Nothing -> (T.pack . show) c'
initDf = empty & insertVector "Statistic" (V.fromList ["Count" :: T.Text, "Percentage (%)"])
in L.foldl' (\df (col, k) -> insertVector (vText col) (V.fromList [k, k * 100 `div` total]) df) initDf counts
calculatePercentage k = toRowValue $ roundTo 2 (fromIntegral k * 100 / fromIntegral total)
in L.foldl' (\df (col, k) -> insertVector (vText col)(V.fromList [toRowValue k, calculatePercentage k]) df) initDf counts

mean :: T.Text -> DataFrame -> Maybe Double
mean = applyStatistic mean'
Expand Down Expand Up @@ -152,8 +166,10 @@ summarize df = fold columnStats (columnNames df) (fromNamedColumns [("Statistic"
standardDeviation name df,
iqr,
skewness name df]
roundTo :: Int -> Double -> Double
roundTo n x = fromInteger (round $ x * (10^n)) / (10.0^^n)

-- | Round a @Double@ to Specified Precision
roundTo :: Int -> Double -> Double
roundTo n x = fromInteger (round $ x * (10^n)) / (10.0^^n)

mean' :: VU.Vector Double -> Double
mean' samp = let
Expand Down