Improve pascal parser, now it is able to successfully parse uGame.pas (though it eats all comments). Many things are still missing. Well, it's just a matter of time to implement the rest. All basic work is already done anyway.
authorunc0rr
Thu, 03 Nov 2011 17:15:54 +0400
changeset 6270 0a99f73dd8dd
parent 6269 57523ab57218
child 6271 9310cfe6bc37
Improve pascal parser, now it is able to successfully parse uGame.pas (though it eats all comments). Many things are still missing. Well, it's just a matter of time to implement the rest. All basic work is already done anyway.
tools/PascalParser.hs
--- a/tools/PascalParser.hs	Thu Nov 03 05:15:39 2011 +0100
+++ b/tools/PascalParser.hs	Thu Nov 03 17:15:54 2011 +0400
@@ -1,98 +1,357 @@
 module PascalParser where
 
 import Text.ParserCombinators.Parsec
+import Text.ParserCombinators.Parsec.Expr
+import Text.ParserCombinators.Parsec.Token
+import Text.ParserCombinators.Parsec.Language
 import Control.Monad
+import Data.Char
 
 data PascalUnit =
-    Program Identificator Implementation FunctionBody
-    | Unit Identificator Interface Implementation (Maybe Initialize) (Maybe Finalize)
+    Program Identifier Implementation
+    | Unit Identifier Interface Implementation (Maybe Initialize) (Maybe Finalize)
     deriving Show
 
 data Interface = Interface Uses TypesAndVars
     deriving Show
-data Implementation = Implementation Uses TypesAndVars Functions
+data Implementation = Implementation Uses TypesAndVars
     deriving Show
-data Functions = Functions [Function]
+data Identifier = Identifier String
+    deriving Show
+data TypesAndVars = TypesAndVars [TypeVarDeclaration]
     deriving Show
-data Function = Function String
+data TypeVarDeclaration = TypeDeclaration TypeDecl
+    | ConstDeclaration String
+    | VarDeclaration String
+    | FunctionDeclaration Identifier Identifier (Maybe Phrase)
     deriving Show
-data Identificator = Identificator String
+data TypeDecl = SimpleType Identifier
+    | RangeType Range
+    | ArrayDecl Range TypeDecl
+    deriving Show
+data Range = Range Identifier    
+    deriving Show
+data Initialize = Initialize String
     deriving Show
-data FunctionBody = FunctionBody String
+data Finalize = Finalize String
     deriving Show
-data TypesAndVars = TypesAndVars String
+data Uses = Uses [Identifier]
     deriving Show
-data Initialize = Initialize Functions
+data Phrase = ProcCall Identifier [Expression]
+        | IfThenElse Expression Phrase (Maybe Phrase)
+        | WhileCycle Expression Phrase
+        | RepeatCycle Expression Phrase
+        | ForCycle
+        | Phrases [Phrase]
+        | SwitchCase Expression [(Expression, Phrase)] (Maybe Phrase)
+        | Assignment Identifier Expression
     deriving Show
-data Finalize = Finalize Functions
+data Expression = Expression String
+    | FunCall Identifier [Expression]
+    | PrefixOp String Expression
+    | BinOp String Expression Expression
     deriving Show
-data Uses = Uses [Identificator]
-    deriving Show
+    
 
-parsePascalUnit :: String -> Either ParseError PascalUnit
-parsePascalUnit = parse pascalUnit "unit"
-    where
-    comments = skipMany (comment >> spaces)
-    identificator = do
-        spaces
-        l <- letter <|> oneOf "_"
-        ls <- many (alphaNum <|> oneOf "_")
-        spaces
-        return $ Identificator (l:ls)
-
-    pascalUnit = do
-        spaces
-        comments
-        u <- choice [program, unit]
-        comments
-        spaces
-        return u
-
-    comment = choice [
-            char '{' >> manyTill anyChar (try $ char '}')
-            , string "(*" >> manyTill anyChar (try $ string "*)")
-            , string "//" >> manyTill anyChar (try newline)
+pascalLanguageDef
+    = emptyDef
+    { commentStart   = "(*"
+    , commentEnd     = "*)"
+    , commentLine    = "//"
+    , nestedComments = False
+    , identStart     = letter <|> oneOf "_"
+    , identLetter    = alphaNum <|> oneOf "_."
+    , reservedNames  = [
+            "begin", "end", "program", "unit", "interface"
+            , "implementation", "and", "or", "xor", "shl"
+            , "shr", "while", "do", "repeat", "until", "case", "of"
+            , "type", "var", "const", "out", "array"
+            , "procedure", "function"
             ]
-
-    unit = do
-        name <- unitName
+    , reservedOpNames= [] 
+    , caseSensitive  = False   
+    }
+    
+pas = makeTokenParser pascalLanguageDef
+    
+comments = do
+    spaces
+    skipMany $ do
+        comment
         spaces
+
+validIdChar = alphaNum <|> oneOf "_"    
+
+pascalUnit = do
+    comments
+    u <- choice [program, unit]
+    comments
+    return u
+
+comment = choice [
+        char '{' >> manyTill anyChar (try $ char '}')
+        , (try $ string "(*") >> manyTill anyChar (try $ string "*)")
+        , (try $ string "//") >> manyTill anyChar (try newline)
+        ]
+
+unit = do
+    name <- liftM Identifier unitName
+    comments
+    int <- interface
+    impl <- implementation
+    comments
+    return $ Unit name int impl Nothing Nothing
+    where
+        unitName = between (string "unit" >> comments) (char ';') (identifier pas)
+
+varsDecl = do
+    v <- aVarDecl `sepBy1` (char ';' >> comments)
+    char ';'
+    comments
+    return $ VarDeclaration $ show v
+    where
+    aVarDecl = do
+        ids <- (try (identifier pas) >>= \i -> comments >> return (Identifier i)) `sepBy1` (char ',' >> comments)
+        char ':'
         comments
-        int <- string "interface" >> interface
-        manyTill anyChar (try $ string "implementation")
-        spaces
+        t <- typeDecl
+        comments
+        return (ids, t)
+        
+typeDecl = choice [
+    arrayDecl
+    , rangeDecl >>= return . RangeType
+    , identifier pas >>= return . SimpleType . Identifier
+    ] <?> "type declaration"
+    where
+    arrayDecl = do
+        try $ string "array"
+        comments
+        char '['
+        r <- rangeDecl
+        char ']'
+        comments
+        string "of"
         comments
-        impl <- implementation
-        return $ Unit name int impl Nothing Nothing
-        where
-            unitName = between (string "unit") (char ';') identificator
+        t <- typeDecl
+        return $ ArrayDecl r t
+
+rangeDecl = choice [
+    identifier pas >>= return . Range . Identifier
+    ] <?> "range declaration"
 
-    interface = do
-        spaces
+typeVarDeclaration isImpl = choice [
+    varSection,
+    funcDecl,
+    procDecl
+    ]
+    where
+    varSection = do
+        try $ string "var"
+        comments
+        v <- varsDecl
+        return v
+            
+    procDecl = do
+        string "procedure"
+        comments
+        i <- liftM Identifier $ identifier pas
+        optional $ do
+            char '('
+            varsDecl
+            char ')'
+        comments
+        char ';'
+        b <- if isImpl then
+                do
+                comments
+                typeVarDeclaration isImpl
+                comments
+                liftM Just functionBody
+                else
+                return Nothing
+        comments
+        return $ FunctionDeclaration i (Identifier "") b
+        
+    funcDecl = do
+        string "function"
         comments
-        u <- uses
-        return $ Interface u (TypesAndVars "")
+        char '('
+        b <- manyTill anyChar (try $ char ')')
+        char ')'
+        comments
+        char ':'
+        ret <- identifier pas
+        comments
+        char ';'
+        b <- if isImpl then
+                do
+                comments
+                typeVarDeclaration isImpl
+                comments
+                liftM Just functionBody
+                else
+                return Nothing
+        return $ FunctionDeclaration (Identifier "function") (Identifier ret) Nothing
+
+program = do
+    name <- liftM Identifier programName
+    comments
+    impl <- implementation
+    comments
+    return $ Program name impl
+    where
+        programName = between (string "program") (char ';') (identifier pas)
+
+interface = do
+    string "interface"
+    comments
+    u <- uses
+    comments
+    tv <- many (typeVarDeclaration False)
+    comments
+    return $ Interface u (TypesAndVars tv)
 
-    program = do
-        name <- programName
-        spaces
-        comments
-        impl <- implementation
-        return $ Program name impl (FunctionBody "")
-        where
-            programName = between (string "program") (char ';') identificator
+implementation = do
+    string "implementation"
+    comments
+    u <- uses
+    comments
+    tv <- many (typeVarDeclaration True)
+    string "end."
+    comments
+    return $ Implementation u (TypesAndVars tv)
+
+expression = buildExpressionParser table term <?> "expression"
+    where
+    term = comments >> choice [
+        parens pas $ expression 
+        , natural pas >>= return . Expression . show
+        , funCall
+        ] <?> "simple expression"
+
+    table = [ 
+          [Infix (string "^." >> return (BinOp "^.")) AssocLeft]
+        , [Prefix (string "not" >> return (PrefixOp "not"))]
+        , [  Infix (char '*' >> return (BinOp "*")) AssocLeft
+           , Infix (char '/' >> return (BinOp "/")) AssocLeft
+           ]
+        , [  Infix (char '+' >> return (BinOp "+")) AssocLeft
+           , Infix (char '-' >> return (BinOp "-")) AssocLeft
+           ]
+        , [  Infix (try (string "<>" )>> return (BinOp "<>")) AssocNone
+           , Infix (try (string "<=") >> return (BinOp "<=")) AssocNone
+           , Infix (try (string ">=") >> return (BinOp ">=")) AssocNone
+           , Infix (char '<' >> return (BinOp "<")) AssocNone
+           , Infix (char '>' >> return (BinOp ">")) AssocNone
+           , Infix (char '=' >> return (BinOp "=")) AssocNone
+           ]
+        , [  Infix (try $ string "and" >> return (BinOp "and")) AssocNone
+           , Infix (try $ string "or" >> return (BinOp "or")) AssocNone
+           , Infix (try $ string "xor" >> return (BinOp "xor")) AssocNone
+           ]
+        ]
+    
+phrasesBlock = do
+    try $ string "begin"
+    comments
+    p <- manyTill phrase (try $ string "end")
+    comments
+    return $ Phrases p
+    
+phrase = do
+    o <- choice [
+        phrasesBlock
+        , ifBlock
+        , whileCycle
+        , switchCase
+        , try $ identifier pas >>= \i -> string ":=" >> expression >>= return . Assignment (Identifier i)
+        , procCall
+        ]
+    optional $ char ';'
+    comments
+    return o
 
-    implementation = do
-        u <- uses
-        manyTill anyChar (try $ string "end.")
-        return $ Implementation u (TypesAndVars "") (Functions [])
+ifBlock = do
+    try $ string "if"
+    comments
+    e <- expression
+    comments
+    string "then"
+    comments
+    o1 <- phrase
+    comments
+    o2 <- optionMaybe $ do
+        try $ string "else"
+        comments
+        o <- phrase
+        comments
+        return o
+    optional $ char ';'
+    return $ IfThenElse e o1 o2
+
+whileCycle = do
+    try $ string "while"
+    comments
+    e <- expression
+    comments
+    string "do"
+    comments
+    o <- phrase
+    optional $ char ';'
+    return $ WhileCycle e o
 
-    uses = liftM Uses (option [] u)
-        where
-            u = do
-                string "uses"
-                spaces
-                u <- (identificator >>= \i -> spaces >> return i) `sepBy1` (char ',' >> spaces)
-                char ';'
-                spaces
-                return u
+switchCase = do
+    try $ string "case"
+    comments
+    e <- expression
+    comments
+    string "of"
+    comments
+    cs <- many1 aCase
+    o2 <- optionMaybe $ do
+        try $ string "else"
+        comments
+        o <- phrase
+        comments
+        return o
+    string "end"
+    optional $ char ';'
+    return $ SwitchCase e cs o2
+    where
+    aCase = do
+        e <- expression
+        comments
+        char ':'
+        comments
+        p <- phrase
+        comments
+        return (e, p)
+    
+procCall = do
+    i <- liftM Identifier $ identifier pas
+    p <- option [] $ (parens pas) parameters
+    return $ ProcCall i p
+
+funCall = do
+    i <- liftM Identifier $ identifier pas
+    p <- option [] $ (parens pas) parameters
+    return $ FunCall i p
+
+parameters = expression `sepBy` (char ',' >> comments)
+        
+functionBody = do
+    p <- phrasesBlock
+    char ';'
+    comments
+    return p
+
+uses = liftM Uses (option [] u)
+    where
+        u = do
+            string "uses"
+            comments
+            u <- (identifier pas >>= \i -> comments >> return (Identifier i)) `sepBy1` (char ',' >> comments)
+            char ';'
+            comments
+            return u