--- a/tools/PascalParser.hs Sat Nov 12 15:48:55 2011 +0100
+++ b/tools/PascalParser.hs Sat Nov 12 17:06:49 2011 +0100
@@ -1,98 +1,545 @@
module PascalParser where
-import Text.ParserCombinators.Parsec
+import Text.Parsec.Expr
+import Text.Parsec.Char
+import Text.Parsec.Token
+import Text.Parsec.Language
+import Text.Parsec.Prim
+import Text.Parsec.Combinator
+import Text.Parsec.String
import Control.Monad
+import Data.Char
data PascalUnit =
- Program Identificator Implementation FunctionBody
- | Unit Identificator Interface Implementation (Maybe Initialize) (Maybe Finalize)
+ Program Identifier Implementation
+ | Unit Identifier Interface Implementation (Maybe Initialize) (Maybe Finalize)
deriving Show
-
data Interface = Interface Uses TypesAndVars
deriving Show
-data Implementation = Implementation Uses TypesAndVars Functions
+data Implementation = Implementation Uses TypesAndVars
+ deriving Show
+data Identifier = Identifier String
+ deriving Show
+data TypesAndVars = TypesAndVars [TypeVarDeclaration]
+ deriving Show
+data TypeVarDeclaration = TypeDeclaration Identifier TypeDecl
+ | VarDeclaration Bool ([Identifier], TypeDecl) (Maybe Expression)
+ | FunctionDeclaration Identifier TypeDecl (Maybe Phrase)
deriving Show
-data Functions = Functions [Function]
+data TypeDecl = SimpleType Identifier
+ | RangeType Range
+ | Sequence [Identifier]
+ | ArrayDecl Range TypeDecl
+ | RecordType [TypeVarDeclaration]
+ | PointerTo TypeDecl
+ | String
+ | UnknownType
+ deriving Show
+data Range = Range Identifier
+ | RangeFromTo Expression Expression
+ deriving Show
+data Initialize = Initialize String
deriving Show
-data Function = Function String
+data Finalize = Finalize String
+ deriving Show
+data Uses = Uses [Identifier]
deriving Show
-data Identificator = Identificator String
+data Phrase = ProcCall Identifier [Expression]
+ | IfThenElse Expression Phrase (Maybe Phrase)
+ | WhileCycle Expression Phrase
+ | RepeatCycle Expression [Phrase]
+ | ForCycle Identifier Expression Expression Phrase
+ | WithBlock Expression Phrase
+ | Phrases [Phrase]
+ | SwitchCase Expression [(Expression, Phrase)] (Maybe Phrase)
+ | Assignment Reference Expression
+ deriving Show
+data Expression = Expression String
+ | PrefixOp String Expression
+ | PostfixOp String Expression
+ | BinOp String Expression Expression
+ | StringLiteral String
+ | CharCode String
+ | NumberLiteral String
+ | HexNumber String
+ | Reference Reference
+ | Null
deriving Show
-data FunctionBody = FunctionBody String
- deriving Show
-data TypesAndVars = TypesAndVars String
+data Reference = ArrayElement [Expression] Reference
+ | FunCall [Expression] Reference
+ | SimpleReference Identifier
+ | Dereference Reference
+ | RecordField Reference Reference
+ | Address Reference
deriving Show
-data Initialize = Initialize Functions
- deriving Show
-data Finalize = Finalize Functions
- deriving Show
-data Uses = Uses [Identificator]
- deriving Show
+
+pascalLanguageDef
+ = emptyDef
+ { commentStart = "(*"
+ , commentEnd = "*)"
+ , commentLine = "//"
+ , nestedComments = False
+ , identStart = letter <|> oneOf "_"
+ , identLetter = alphaNum <|> oneOf "_."
+ , reservedNames = [
+ "begin", "end", "program", "unit", "interface"
+ , "implementation", "and", "or", "xor", "shl"
+ , "shr", "while", "do", "repeat", "until", "case", "of"
+ , "type", "var", "const", "out", "array", "packed"
+ , "procedure", "function", "with", "for", "to"
+ , "downto", "div", "mod", "record", "set", "nil"
+ , "string", "shortstring"
+ ]
+ , reservedOpNames= []
+ , caseSensitive = False
+ }
+
+pas = patch $ makeTokenParser pascalLanguageDef
+ where
+ patch tp = tp {stringLiteral = sl}
+ sl = do
+ (char '\'')
+ s <- (many $ noneOf "'")
+ (char '\'')
+ ss <- many $ do
+ (char '\'')
+ s' <- (many $ noneOf "'")
+ (char '\'')
+ return $ '\'' : s'
+ comments
+ return $ concat (s:ss)
+
+comments = do
+ spaces
+ skipMany $ do
+ comment
+ spaces
-parsePascalUnit :: String -> Either ParseError PascalUnit
-parsePascalUnit = parse pascalUnit "unit"
- where
- comments = skipMany (comment >> spaces)
- identificator = do
- spaces
- l <- letter <|> oneOf "_"
- ls <- many (alphaNum <|> oneOf "_")
- spaces
- return $ Identificator (l:ls)
+pascalUnit = do
+ comments
+ u <- choice [program, unit]
+ comments
+ return u
+
+comment = choice [
+ char '{' >> manyTill anyChar (try $ char '}')
+ , (try $ string "(*") >> manyTill anyChar (try $ string "*)")
+ , (try $ string "//") >> manyTill anyChar (try newline)
+ ]
+
+iD = do
+ i <- liftM Identifier (identifier pas)
+ comments
+ return i
+
+unit = do
+ string "unit" >> comments
+ name <- iD
+ semi pas
+ comments
+ int <- interface
+ impl <- implementation
+ comments
+ return $ Unit name int impl Nothing Nothing
- pascalUnit = do
- spaces
- comments
- u <- choice [program, unit]
- comments
- spaces
- return u
+
+reference = buildExpressionParser table term <?> "reference"
+ where
+ term = comments >> choice [
+ parens pas reference
+ , char '@' >> reference >>= return . Address
+ , iD >>= return . SimpleReference
+ ] <?> "simple reference"
- comment = choice [
- char '{' >> manyTill anyChar (try $ char '}')
- , string "(*" >> manyTill anyChar (try $ string "*)")
- , string "//" >> manyTill anyChar (try newline)
+ table = [
+ [Postfix $ (parens pas) (option [] parameters) >>= return . FunCall]
+ , [Postfix (char '^' >> return Dereference)]
+ , [Postfix $ (brackets pas) (commaSep1 pas $ expression) >>= return . ArrayElement]
+ , [Infix (try (char '.' >> notFollowedBy (char '.')) >> return RecordField) AssocLeft]
+ ]
+
+
+varsDecl1 = varsParser sepEndBy1
+varsDecl = varsParser sepEndBy
+varsParser m endsWithSemi = do
+ vs <- m (aVarDecl endsWithSemi) (semi pas)
+ return vs
+
+aVarDecl endsWithSemi = do
+ when (not endsWithSemi) $
+ optional $ choice [
+ try $ string "var"
+ , try $ string "const"
+ , try $ string "out"
]
+ comments
+ ids <- do
+ i <- (commaSep1 pas) $ (try iD <?> "variable declaration")
+ char ':'
+ return i
+ comments
+ t <- typeDecl <?> "variable type declaration"
+ comments
+ init <- option Nothing $ do
+ char '='
+ comments
+ e <- expression
+ comments
+ return (Just e)
+ return $ VarDeclaration False (ids, t) init
- unit = do
- name <- unitName
- spaces
+
+constsDecl = do
+ vs <- many1 (try (aConstDecl >>= \i -> semi pas >> return i) >>= \i -> comments >> return i)
+ comments
+ return vs
+ where
+ aConstDecl = do
+ comments
+ i <- iD <?> "const declaration"
+ optional $ do
+ char ':'
+ comments
+ t <- typeDecl
+ return ()
+ char '='
+ comments
+ e <- expression
comments
- int <- string "interface" >> interface
- manyTill anyChar (try $ string "implementation")
- spaces
+ return $ VarDeclaration False ([i], UnknownType) (Just e)
+
+typeDecl = choice [
+ char '^' >> typeDecl >>= return . PointerTo
+ , try (string "shortstring") >> return String
+ , arrayDecl
+ , recordDecl
+ , rangeDecl >>= return . RangeType
+ , sequenceDecl >>= return . Sequence
+ , identifier pas >>= return . SimpleType . Identifier
+ ] <?> "type declaration"
+ where
+ arrayDecl = do
+ try $ string "array"
+ comments
+ char '['
+ r <- rangeDecl
+ char ']'
+ comments
+ string "of"
+ comments
+ t <- typeDecl
+ return $ ArrayDecl r t
+ recordDecl = do
+ optional $ (try $ string "packed") >> comments
+ try $ string "record"
+ comments
+ vs <- varsDecl True
+ string "end"
+ return $ RecordType vs
+ sequenceDecl = (parens pas) $ (commaSep pas) iD
+
+typesDecl = many (aTypeDecl >>= \t -> comments >> return t)
+ where
+ aTypeDecl = do
+ i <- try $ do
+ i <- iD <?> "type declaration"
+ comments
+ char '='
+ return i
+ comments
+ t <- typeDecl
+ comments
+ semi pas
comments
- impl <- implementation
- return $ Unit name int impl Nothing Nothing
- where
- unitName = between (string "unit") (char ';') identificator
+ return $ TypeDeclaration i t
+
+rangeDecl = choice [
+ try $ rangeft
+ , iD >>= return . Range
+ ] <?> "range declaration"
+ where
+ rangeft = do
+ e1 <- expression
+ string ".."
+ e2 <- expression
+ return $ RangeFromTo e1 e2
+
+typeVarDeclaration isImpl = (liftM concat . many . choice) [
+ varSection,
+ constSection,
+ typeSection,
+ funcDecl,
+ procDecl
+ ]
+ where
+ varSection = do
+ try $ string "var"
+ comments
+ v <- varsDecl1 True
+ comments
+ return v
+
+ constSection = do
+ try $ string "const"
+ comments
+ c <- constsDecl
+ comments
+ return c
- interface = do
- spaces
+ typeSection = do
+ try $ string "type"
+ comments
+ t <- typesDecl
+ comments
+ return t
+
+ procDecl = do
+ try $ string "procedure"
+ comments
+ i <- iD
+ optional $ do
+ char '('
+ varsDecl False
+ char ')'
+ comments
+ char ';'
+ b <- if isImpl then
+ do
+ comments
+ optional $ typeVarDeclaration True
+ comments
+ liftM Just functionBody
+ else
+ return Nothing
+ comments
+ return $ [FunctionDeclaration i UnknownType b]
+
+ funcDecl = do
+ try $ string "function"
comments
- u <- uses
- return $ Interface u (TypesAndVars "")
+ i <- iD
+ optional $ do
+ char '('
+ varsDecl False
+ char ')'
+ comments
+ char ':'
+ comments
+ ret <- typeDecl
+ comments
+ char ';'
+ comments
+ b <- if isImpl then
+ do
+ optional $ typeVarDeclaration True
+ comments
+ liftM Just functionBody
+ else
+ return Nothing
+ return $ [FunctionDeclaration i ret Nothing]
+
+program = do
+ string "program"
+ comments
+ name <- iD
+ (char ';')
+ comments
+ impl <- implementation
+ comments
+ return $ Program name impl
+
+interface = do
+ string "interface"
+ comments
+ u <- uses
+ comments
+ tv <- typeVarDeclaration False
+ comments
+ return $ Interface u (TypesAndVars tv)
- program = do
- name <- programName
- spaces
+implementation = do
+ string "implementation"
+ comments
+ u <- uses
+ comments
+ tv <- typeVarDeclaration True
+ string "end."
+ comments
+ return $ Implementation u (TypesAndVars tv)
+
+expression = buildExpressionParser table term <?> "expression"
+ where
+ term = comments >> choice [
+ parens pas $ expression
+ , try $ integer pas >>= return . NumberLiteral . show
+ , stringLiteral pas >>= return . StringLiteral
+ , char '#' >> many digit >>= return . CharCode
+ , char '$' >> many hexDigit >>= return . HexNumber
+ , try $ string "nil" >> return Null
+ , reference >>= return . Reference
+ ] <?> "simple expression"
+
+ table = [
+ [ Infix (char '*' >> return (BinOp "*")) AssocLeft
+ , Infix (char '/' >> return (BinOp "/")) AssocLeft
+ , Infix (try (string "div") >> return (BinOp "div")) AssocLeft
+ , Infix (try (string "mod") >> return (BinOp "mod")) AssocLeft
+ ]
+ , [ Infix (char '+' >> return (BinOp "+")) AssocLeft
+ , Infix (char '-' >> return (BinOp "-")) AssocLeft
+ , Prefix (char '-' >> return (PrefixOp "-"))
+ ]
+ , [ Infix (try (string "<>") >> return (BinOp "<>")) AssocNone
+ , Infix (try (string "<=") >> return (BinOp "<=")) AssocNone
+ , Infix (try (string ">=") >> return (BinOp ">=")) AssocNone
+ , Infix (char '<' >> return (BinOp "<")) AssocNone
+ , Infix (char '>' >> return (BinOp ">")) AssocNone
+ , Infix (char '=' >> return (BinOp "=")) AssocNone
+ ]
+ , [ Infix (try $ string "and" >> return (BinOp "and")) AssocLeft
+ , Infix (try $ string "or" >> return (BinOp "or")) AssocLeft
+ , Infix (try $ string "xor" >> return (BinOp "xor")) AssocLeft
+ ]
+ , [ Infix (try $ string "shl" >> return (BinOp "and")) AssocNone
+ , Infix (try $ string "shr" >> return (BinOp "or")) AssocNone
+ ]
+ , [Prefix (try (string "not") >> return (PrefixOp "not"))]
+ ]
+
+phrasesBlock = do
+ try $ string "begin"
+ comments
+ p <- manyTill phrase (try $ string "end")
+ comments
+ return $ Phrases p
+
+phrase = do
+ o <- choice [
+ phrasesBlock
+ , ifBlock
+ , whileCycle
+ , repeatCycle
+ , switchCase
+ , withBlock
+ , forCycle
+ , (try $ reference >>= \r -> string ":=" >> return r) >>= \r -> expression >>= return . Assignment r
+ , procCall
+ ]
+ optional $ char ';'
+ comments
+ return o
+
+ifBlock = do
+ try $ string "if"
+ comments
+ e <- expression
+ comments
+ string "then"
+ comments
+ o1 <- phrase
+ comments
+ o2 <- optionMaybe $ do
+ try $ string "else"
comments
- impl <- implementation
- return $ Program name impl (FunctionBody "")
- where
- programName = between (string "program") (char ';') identificator
+ o <- phrase
+ comments
+ return o
+ return $ IfThenElse e o1 o2
- implementation = do
- u <- uses
- manyTill anyChar (try $ string "end.")
- return $ Implementation u (TypesAndVars "") (Functions [])
+whileCycle = do
+ try $ string "while"
+ comments
+ e <- expression
+ comments
+ string "do"
+ comments
+ o <- phrase
+ return $ WhileCycle e o
+
+withBlock = do
+ try $ string "with"
+ comments
+ e <- expression
+ comments
+ string "do"
+ comments
+ o <- phrase
+ return $ WithBlock e o
+
+repeatCycle = do
+ try $ string "repeat"
+ comments
+ o <- many phrase
+ string "until"
+ comments
+ e <- expression
+ comments
+ return $ RepeatCycle e o
- uses = liftM Uses (option [] u)
- where
- u = do
- string "uses"
- spaces
- u <- (identificator >>= \i -> spaces >> return i) `sepBy1` (char ',' >> spaces)
- char ';'
- spaces
- return u
+forCycle = do
+ try $ string "for"
+ comments
+ i <- iD
+ comments
+ string ":="
+ comments
+ e1 <- expression
+ comments
+ choice [string "to", string "downto"]
+ comments
+ e2 <- expression
+ comments
+ string "do"
+ comments
+ p <- phrase
+ comments
+ return $ ForCycle i e1 e2 p
+
+switchCase = do
+ try $ string "case"
+ comments
+ e <- expression
+ comments
+ string "of"
+ comments
+ cs <- many1 aCase
+ o2 <- optionMaybe $ do
+ try $ string "else"
+ comments
+ o <- phrase
+ comments
+ return o
+ string "end"
+ return $ SwitchCase e cs o2
+ where
+ aCase = do
+ e <- expression
+ comments
+ char ':'
+ comments
+ p <- phrase
+ comments
+ return (e, p)
+
+procCall = do
+ i <- iD
+ p <- option [] $ (parens pas) parameters
+ return $ ProcCall i p
+
+parameters = (commaSep pas) expression <?> "parameters"
+
+functionBody = do
+ p <- phrasesBlock
+ char ';'
+ comments
+ return p
+
+uses = liftM Uses (option [] u)
+ where
+ u = do
+ string "uses"
+ comments
+ u <- (iD >>= \i -> comments >> return i) `sepBy1` (char ',' >> comments)
+ char ';'
+ comments
+ return u