diff --git a/Data/SGF.hs b/lib/Data/SGF.hs similarity index 96% rename from Data/SGF.hs rename to lib/Data/SGF.hs index a213d38..fdce25d 100644 --- a/Data/SGF.hs +++ b/lib/Data/SGF.hs @@ -7,10 +7,7 @@ There are plans to support other games and pretty-printing in future releases. -} module Data.SGF ( module Data.SGF.Types, - module Data.SGF.Parse, - module Data.Word, - module Data.Tree, - module Text.ParserCombinators.Parsec + collection, -- * Overview of SGF -- $sgf @@ -19,14 +16,23 @@ module Data.SGF ( -- * Example usage -- $example + + -- * Convenience reexports + Word8, + Tree(..), levels, + runParser, + B.ByteString, B.getContents, B.unpack, B.readFile ) where import Data.SGF.Types import Data.SGF.Parse (collection) -import Data.Word -import Data.Tree + +import Data.Word (Word8) +import Data.Tree (Tree(..), levels) import Text.ParserCombinators.Parsec (runParser) +import qualified Data.ByteString as B + -- TODO: -- * support parsing from ByteString, then take the "unpack" out of "parse" below -- * support the rest of the SGF format ;-) @@ -99,8 +105,6 @@ obscure the idea.) First, some boring stuff. > import Data.SGF -> import Data.ByteString (ByteString, getContents, unpack) -> import Data.Tree > import Data.List hiding ((!!)) > import Prelude hiding ((!!), getContents) > diff --git a/lib/Data/SGF/Parse.hs b/lib/Data/SGF/Parse.hs new file mode 100644 index 0000000..b2998c1 --- /dev/null +++ b/lib/Data/SGF/Parse.hs @@ -0,0 +1,5 @@ +module Data.SGF.Parse ( + module Data.SGF.Parse.Internal + ) where + +import Data.SGF.Parse.Internal diff --git a/lib/Data/SGF/Types.hs b/lib/Data/SGF/Types.hs new file mode 100644 index 0000000..86e7227 --- /dev/null +++ b/lib/Data/SGF/Types.hs @@ -0,0 +1,5 @@ +module Data.SGF.Types ( + module Data.SGF.Types.Internal, + ) where + +import Data.SGF.Types.Internal diff --git a/sgf.cabal b/sgf.cabal index d661e98..8fe4154 100644 --- a/sgf.cabal +++ b/sgf.cabal @@ -1,7 +1,8 @@ +cabal-version: 3.0 name: sgf version: 0.1.3.3 author: Daniel Wagner daniel@wagner-home.com -maintainer: Toni Cebrián ancechu@gmail.com +maintainer: Toni Cebrián ancechu@gmail.com homepage: https://github.com/tonicebrian/sgf bug-reports: https://github.com/tonicebrian/sgf/issues synopsis: SGF (Smart Game Format) parser @@ -12,28 +13,55 @@ description: editors. There are plans to support other games and pretty-printing in future releases. category: Data -license: BSD3 +license: BSD-3-Clause license-file: LICENSE -cabal-version: >= 1.10 build-type: Simple -library +common shared + build-depends: base == 4.*, + ghc-options: -Wall default-language: Haskell2010 - exposed-modules: Data.SGF - Data.SGF.Parse + +library + import: shared + exposed-modules: Data.SGF, + Data.SGF.Parse, Data.SGF.Types - other-modules: Data.SGF.Parse.Encodings - Data.SGF.Parse.Raw + build-depends: sgf-internal, + bytestring >= 0.11.5.3 && < 0.13, + containers >= 0.6.7 && < 0.9, + parsec >= 3.1.16 && < 3.2 + hs-source-dirs: lib/ + +library sgf-internal + import: shared + visibility: private + exposed-modules: Data.SGF.Parse.Internal, + Data.SGF.Types.Internal, + Data.SGF.Parse.Encodings, + Data.SGF.Parse.Raw, Data.SGF.Parse.Util - build-depends: base >=3 && < 5, - containers >= 0.6.7 && < 0.7, - encoding >= 0.8.9 && < 0.9, - extensible-exceptions >= 0.1.1 && < 0.2, - mtl >= 2.2.2, - transformers >= 0.5.6, - time >= 1.12.2 && < 1.13, - split >= 0.2.5, - parsec >= 3.1.16 && < 3.2 + build-depends: bytestring >= 0.11.5.3 && < 0.13, + containers >= 0.6.7 && < 0.9, + encoding >= 0.8.9 && < 0.11, + extensible-exceptions >= 0.1.1 && < 0.2, + mtl >= 2.2.2 && < 2.4, + transformers >= 0.5.6 && < 0.7, + time >= 1.12.2 && < 1.16, + split >= 0.2.5 && < 0.3, + parsec >= 3.1.16 && < 3.2 + hs-source-dirs: src/ + +test-suite test + import: shared + main-is: Test.hs + other-modules: Unit.Data.SGF.Parse.EncodingsSpec + build-depends: sgf-internal, + hspec >= 2.10.10 && < 2.12, + build-tool-depends: hspec-discover:hspec-discover + hs-source-dirs: test + type: exitcode-stdio-1.0 + source-repository head type: git diff --git a/Data/SGF/Parse/Encodings.hs b/src/Data/SGF/Parse/Encodings.hs similarity index 84% rename from Data/SGF/Parse/Encodings.hs rename to src/Data/SGF/Parse/Encodings.hs index df4f7d1..3ca2f97 100644 --- a/Data/SGF/Parse/Encodings.hs +++ b/src/Data/SGF/Parse/Encodings.hs @@ -1,15 +1,12 @@ -{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE InstanceSigs #-} -module Data.SGF.Parse.Encodings - ( guessEncoding - , decodeWordStringExplicit +module Data.SGF.Parse.Encodings ( + module Data.SGF.Parse.Encodings, + encodingFromString ) where -import Control.Applicative (Applicative(..)) -import Control.Exception.Extensible import Control.Monad (ap, liftM) import Control.Monad.State import Control.Throws @@ -27,13 +24,13 @@ instance Functor (MyEither a) where instance Applicative (MyEither a) where pure :: a2 -> MyEither a1 a2 - pure x = return x -- note that an eta reduced version of this trips the type checker for non-canonical "pure = return" + pure x = MyEither (Right x) (<*>) :: MyEither a1 (a2 -> b) -> MyEither a1 a2 -> MyEither a1 b (<*>) = ap instance Monad (MyEither a) where (MyEither (Right x)) >>= f = f x - (MyEither (Left x)) >>= f = MyEither (Left x) + (MyEither (Left x)) >>= _ = MyEither (Left x) instance ByteSource (StateT [Word8] (MyEither DecodingException)) where sourceEmpty = gets null @@ -49,8 +46,10 @@ instance ByteSource (StateT [Word8] (MyEither DecodingException)) where return v -- some ones that we know satisfy our invariant (see SGF.Parse.Raw) +encodings :: [DynEncoding] encodings = map encodingFromString ["latin1", "utf-8", "ascii"] +guess :: [Word8] -> DynEncoding -> Bool guess ws encoding = case runStateT (decode encoding) ws :: MyIHateGHC of (MyEither (Right (s, []))) -> diff --git a/Data/SGF/Parse.hs b/src/Data/SGF/Parse/Internal.hs similarity index 99% rename from Data/SGF/Parse.hs rename to src/Data/SGF/Parse/Internal.hs index 4bd85d3..c13caea 100644 --- a/Data/SGF/Parse.hs +++ b/src/Data/SGF/Parse/Internal.hs @@ -8,7 +8,7 @@ -- boilerplate {{{ -- TODO: check that every occurrence of "die" should really be a death, and not just a "fix-it-up-and-warn" -- boilerplate {{{ -module Data.SGF.Parse +module Data.SGF.Parse.Internal ( collection , clipDate , PropertyType(..) @@ -40,15 +40,15 @@ import Data.SGF.Parse.Encodings import Data.SGF.Parse.Raw hiding (collection) import qualified Data.SGF.Parse.Raw as Raw import Data.SGF.Parse.Util -import Data.SGF.Types (Game(Game), GameNode(GameNode)) -import Data.SGF.Types hiding +import Data.SGF.Types.Internal (Game(Game), GameNode(GameNode)) +import Data.SGF.Types.Internal hiding ( Game(..) , GameInfo(..) , GameNode(..) , Move(..) , Setup(..) ) -import qualified Data.SGF.Types as T +import qualified Data.SGF.Types.Internal as T import qualified Data.Set as Set import Data.Time.Calendar import Data.Tree @@ -77,7 +77,7 @@ test = runParser collection () "" . map enum -- two kinds of errors in SGF files: recoverable ones (which will be -- accumulated in the ['Warning'] return) and unrecoverable ones (which will -- result in parse errors). -collection :: Stream s m Word8 => ParsecT s u m (Collection, [Warning]) +collection :: SGFParser (Collection, [Warning]) collection = second concat . unzip <$> (mapM (translate gameTree) =<< Raw.collection) diff --git a/Data/SGF/Parse/Raw.hs b/src/Data/SGF/Parse/Raw.hs similarity index 76% rename from Data/SGF/Parse/Raw.hs rename to src/Data/SGF/Parse/Raw.hs index 2e06406..c46266d 100644 --- a/Data/SGF/Parse/Raw.hs +++ b/src/Data/SGF/Parse/Raw.hs @@ -3,6 +3,7 @@ module Data.SGF.Parse.Raw ( collection, Property(..), + SGFParser, enum ) where @@ -12,9 +13,12 @@ import Data.Char import Data.Tree import Data.Word import Prelude hiding (lex) -import Text.Parsec (SourcePos(..), incSourceColumn) +import Text.Parsec (SourcePos, incSourceColumn) import Text.Parsec.Prim import Text.Parsec.Combinator + +type SGFParser a = Parsec [Word8] () a + -- }}} data Property = Property { position :: SourcePos, -- ^ @@ -38,19 +42,25 @@ data Property = Property { -- things. enum :: (Enum a, Enum b) => a -> b enum = toEnum . fromEnum +ensure :: (Monad m, Alternative m) => (b -> Bool) -> b -> m b ensure p x = guard (p x) >> return x +satisfy :: (Word8 -> Bool) -> SGFParser Word8 satisfy p = tokenPrim ((\x -> ['\'', x, '\'']) . enum) (\pos _ _ -> incSourceColumn pos 1) (ensure p) +satisfyChar :: Enum b => (b -> Bool) -> SGFParser Word8 satisfyChar = satisfy . (. enum) +anyWord :: SGFParser Word8 anyWord = satisfy (const True) +exactWord :: Char -> SGFParser Word8 exactWord = satisfy . (==) . enum -someWord = satisfy . flip elem . map enum +noWord :: [Char] -> SGFParser Word8 noWord = satisfy . flip notElem . map enum +whitespace :: SGFParser [Word8] whitespace = many (satisfyChar isSpace) -- assumed: the current byte is literally ASCII '\\' iff the current byte is @@ -58,30 +68,36 @@ whitespace = many (satisfyChar isSpace) -- the bytes that are literally ASCII ']' and ASCII ':' occur after the first -- byte of any multi-byte encoded character -- (in particular, UTF-8, ASCII, and ISO 8859-1 satisfy this property) +escapedChar :: SGFParser [Word8] escapedChar = liftM2 (\x y -> [x, y]) (exactWord '\\') anyWord +unescapedExcept :: [Char] -> SGFParser [Word8] unescapedExcept ws = fmap return (noWord ws) +literalTextExcept :: [Char] -> SGFParser [Word8] literalTextExcept ws = fmap concat $ many (escapedChar <|> unescapedExcept ws) +property :: SGFParser Property property = liftM3 ((. map enum) . Property) (getPosition) (many1 (satisfyChar (liftM2 (&&) isUpper (< '\128')))) (sepEndBy1 (exactWord '[' >> literalTextExcept "]" <* exactWord ']') whitespace) +node :: SGFParser [Property] node = do - exactWord ';' - whitespace + _ <- exactWord ';' + _ <- whitespace sepEndBy property whitespace +gameTree :: SGFParser (Tree [Property]) gameTree = do - exactWord '(' - whitespace - (node:nodes) <- sepEndBy1 node whitespace + _ <- exactWord '(' + _ <- whitespace + (node':nodes) <- sepEndBy1 node whitespace trees <- sepEndBy gameTree whitespace - exactWord ')' - return (Node node (foldr ((return .) . Node) trees nodes)) + _ <- exactWord ')' + return (Node node' (foldr ((return .) . Node) trees nodes)) -- | -- Parse the tree-structure of an SGF file, but without any knowledge of the -- semantics of the properties, etc. -collection :: Stream s m Word8 => ParsecT s u m [Tree [Property]] +collection :: SGFParser [Tree [Property]] collection = whitespace >> sepEndBy1 gameTree whitespace <* whitespace <* eof diff --git a/Data/SGF/Parse/Util.hs b/src/Data/SGF/Parse/Util.hs similarity index 90% rename from Data/SGF/Parse/Util.hs rename to src/Data/SGF/Parse/Util.hs index 95e0864..57726fd 100644 --- a/Data/SGF/Parse/Util.hs +++ b/src/Data/SGF/Parse/Util.hs @@ -4,19 +4,18 @@ module Data.SGF.Parse.Util where import Control.Arrow (Arrow (first, second, (&&&))) import Control.Monad (liftM, liftM2, when, (>=>)) import Control.Monad.State (MonadState (get), MonadTrans (lift), StateT (StateT), gets, modify) -import qualified Control.Monad.Trans.Except as Either -import Control.Monad.Writer (MonadTrans (lift), MonadWriter (tell), WriterT) +import Control.Monad.Writer (MonadWriter (tell), WriterT) import Data.Char (isDigit, isSpace, toLower) import Data.Encoding (DynEncoding) import Data.Function (on) import Data.Ix (Ix (range)) import Data.List (groupBy, isPrefixOf, nub, partition, sortBy) -import Data.Map (Map (..), fromList, keys) +import Data.Map (Map, fromList, keys) import Data.Maybe (fromJust, listToMaybe) import Data.Ord (comparing) import Data.SGF.Parse.Encodings (decodeWordStringExplicit) import Data.SGF.Parse.Raw (Property (..), enum) -import Data.SGF.Types (Color (..), Emphasis (..), Judgment, Mark, PartialDate, Point) +import Data.SGF.Types.Internal (Color (..), Emphasis (..), Judgment, Mark, PartialDate, Point) import Data.Set (Set) import Data.Tree (Tree (rootLabel)) import Data.Word (Word8) @@ -136,11 +135,11 @@ readNumber s pos newline :: a -> (String -> a) -> (Char -> String -> a) -> String -> a newline empty with without xs = case xs of - '\r' : '\n' : xs -> with xs - '\n' : '\r' : xs -> with xs - '\r' : xs -> with xs - '\n' : xs -> with xs - x : xs -> without x xs + '\r' : '\n' : xs' -> with xs' + '\n' : '\r' : xs' -> with xs' + '\r' : xs' -> with xs' + '\n' : xs' -> with xs' + x : xs' -> without x xs' [] -> empty trim :: Char -> Char @@ -156,8 +155,10 @@ descape hard pos s = case s of decodeAndDescape :: Char -> Header -> PTranslator String decodeAndDescape hard (Header {encoding = e}) (Property {values = v : _, position = pos}) = case decodeWordStringExplicit e v of - Left exception -> dieWithPos BadlyEncodedValue pos + Left _exception -> dieWithPos BadlyEncodedValue pos Right decoded -> descape hard pos decoded +decodeAndDescape _ (Header _ _) (Property _ _ []) = + error "decodeAndDescape error" splitColon :: [Word8] -> Maybe ([Word8], [Word8]) splitColons :: [[Word8]] -> Maybe ([[Word8]], [[Word8]]) @@ -191,7 +192,7 @@ hasAny = fmap or . mapM has consume :: String -> Translator (Maybe Property) consume s = do (v, rest) <- gets (partition ((== s) . name) . rootLabel) - modify (\s -> s {rootLabel = rest}) + modify (\s' -> s' {rootLabel = rest}) return (listToMaybe v) consumeSingle :: String -> Translator (Maybe Property) @@ -206,17 +207,19 @@ consumeSingle s = do unknownProperties :: Translator (Map String [[Word8]]) unknownProperties = do m <- gets (fromList . map (name &&& values) . rootLabel) - tell [UnknownPropertyPreserved name | name <- keys m] + tell [UnknownPropertyPreserved pnam | pnam <- keys m] return m -- }}} -- PTranslators and combinators {{{ number :: PTranslator Integer +number (Property _ _ []) = error "number: empty property list" number p@(Property {values = v : _}) | enum '.' `elem` v = dieWith BadlyFormattedValue p | otherwise = fmap floor (real p) real :: PTranslator Rational +real (Property _ _ []) = error "real: empty property list" real (Property {values = v : _, position = pos}) | [enum '+'] `isPrefixOf` v = result 1 | [enum '-'] `isPrefixOf` v = fmap negate (result 1) @@ -241,7 +244,7 @@ none (Property {values = [[]]}) = return () none p = tell [PropValueForNonePropertyOmitted p] choice :: [([Word8], a)] -> PTranslator a -choice vs p@(Property {values = []}) = dieWith BadlyFormattedValue p -- can't happen +choice _vs p@(Property {values = []}) = dieWith BadlyFormattedValue p -- can't happen choice vs p@(Property {values = v : _}) = maybe (dieWith BadlyFormattedValue p) return (lookup v vs) choice' :: [(String, a)] -> PTranslator a @@ -266,6 +269,8 @@ elistOf _ (Property {values = [[]]}) = return [] elistOf a p = listOf a p mayBeCompoundPoint, listOfPoint, elistOfPoint :: PTranslator Point -> PTranslator [Point] +mayBeCompoundPoint _ (Property _ _ []) = + error "mayBeCompoundPoint: empty property list" mayBeCompoundPoint a p@(Property {values = v : _}) = case splitColon v of Nothing -> return <$> a p Just {} -> do diff --git a/Data/SGF/Types.hs b/src/Data/SGF/Types/Internal.hs similarity index 99% rename from Data/SGF/Types.hs rename to src/Data/SGF/Types/Internal.hs index 73c0ffa..3de04e3 100644 --- a/Data/SGF/Types.hs +++ b/src/Data/SGF/Types/Internal.hs @@ -3,7 +3,7 @@ -- | Types used to represent an SGF tree. Whenever a data type is used by -- exactly one other data type, there will be a \"see also\" link to its -- containing type. -module Data.SGF.Types ( +module Data.SGF.Types.Internal ( -- * Game type Game(..), GameTree(..), GameNode(..), Move(..), Setup(..), @@ -85,6 +85,7 @@ data GameType = Sahara | Byte | Focus | Dvonn | Tamsk | Gipf | Kropki deriving (Eq, Ord, Bounded, Show, Read) +allGameTypesInSGFOrder :: [GameType] allGameTypesInSGFOrder = [Go, Othello, Chess, Gomoku, NineMen'sMorris, Backgammon, ChineseChess, Shogi, LinesOfAction, Ataxx, Hex, Jungle, @@ -204,6 +205,7 @@ data Numbering -- number that the first labeled move is below 100. deriving (Eq, Ord, Show, Read, Enum, Bounded) +allGameInfoTypes :: [GameInfoType] allGameInfoTypes = [TeamName Black, TeamName White, PlayerName Black, PlayerName White, Annotator, Source, User, Copyright, Context, Location, Event, GameName, Opening, Overtime] instance Enum GameInfoType where toEnum = (allGameInfoTypes !!) diff --git a/test/Test.hs b/test/Test.hs new file mode 100644 index 0000000..a824f8c --- /dev/null +++ b/test/Test.hs @@ -0,0 +1 @@ +{-# OPTIONS_GHC -F -pgmF hspec-discover #-} diff --git a/test/Unit/Data/SGF/Parse/EncodingsSpec.hs b/test/Unit/Data/SGF/Parse/EncodingsSpec.hs new file mode 100644 index 0000000..93ca87e --- /dev/null +++ b/test/Unit/Data/SGF/Parse/EncodingsSpec.hs @@ -0,0 +1,16 @@ +module Unit.Data.SGF.Parse.EncodingsSpec where + +import Data.SGF.Parse.Encodings + +import Test.Hspec + + +main :: IO () +main = hspec spec + +spec :: Spec +spec = do + + describe "guessEncoding" $ do + it "does not hang" $ + guessEncoding [85,84,70,45,56] `shouldBe` [encodingFromString "utf-8"]