tools/PascalParser.hs
branchhedgeroid
changeset 6328 d14adf1c7721
parent 6317 83b93a2d2741
child 6355 734fed7aefd3
--- a/tools/PascalParser.hs	Sat Nov 12 15:48:55 2011 +0100
+++ b/tools/PascalParser.hs	Sat Nov 12 17:06:49 2011 +0100
@@ -1,98 +1,545 @@
 module PascalParser where
 
-import Text.ParserCombinators.Parsec
+import Text.Parsec.Expr
+import Text.Parsec.Char
+import Text.Parsec.Token
+import Text.Parsec.Language
+import Text.Parsec.Prim
+import Text.Parsec.Combinator
+import Text.Parsec.String
 import Control.Monad
+import Data.Char
 
 data PascalUnit =
-    Program Identificator Implementation FunctionBody
-    | Unit Identificator Interface Implementation (Maybe Initialize) (Maybe Finalize)
+    Program Identifier Implementation
+    | Unit Identifier Interface Implementation (Maybe Initialize) (Maybe Finalize)
     deriving Show
-
 data Interface = Interface Uses TypesAndVars
     deriving Show
-data Implementation = Implementation Uses TypesAndVars Functions
+data Implementation = Implementation Uses TypesAndVars
+    deriving Show
+data Identifier = Identifier String
+    deriving Show
+data TypesAndVars = TypesAndVars [TypeVarDeclaration]
+    deriving Show
+data TypeVarDeclaration = TypeDeclaration Identifier TypeDecl
+    | VarDeclaration Bool ([Identifier], TypeDecl) (Maybe Expression)
+    | FunctionDeclaration Identifier TypeDecl (Maybe Phrase)
     deriving Show
-data Functions = Functions [Function]
+data TypeDecl = SimpleType Identifier
+    | RangeType Range
+    | Sequence [Identifier]
+    | ArrayDecl Range TypeDecl
+    | RecordType [TypeVarDeclaration]
+    | PointerTo TypeDecl
+    | String
+    | UnknownType
+    deriving Show
+data Range = Range Identifier
+           | RangeFromTo Expression Expression
+    deriving Show
+data Initialize = Initialize String
     deriving Show
-data Function = Function String
+data Finalize = Finalize String
+    deriving Show
+data Uses = Uses [Identifier]
     deriving Show
-data Identificator = Identificator String
+data Phrase = ProcCall Identifier [Expression]
+        | IfThenElse Expression Phrase (Maybe Phrase)
+        | WhileCycle Expression Phrase
+        | RepeatCycle Expression [Phrase]
+        | ForCycle Identifier Expression Expression Phrase
+        | WithBlock Expression Phrase
+        | Phrases [Phrase]
+        | SwitchCase Expression [(Expression, Phrase)] (Maybe Phrase)
+        | Assignment Reference Expression
+    deriving Show
+data Expression = Expression String
+    | PrefixOp String Expression
+    | PostfixOp String Expression
+    | BinOp String Expression Expression
+    | StringLiteral String
+    | CharCode String
+    | NumberLiteral String
+    | HexNumber String
+    | Reference Reference
+    | Null
     deriving Show
-data FunctionBody = FunctionBody String
-    deriving Show
-data TypesAndVars = TypesAndVars String
+data Reference = ArrayElement [Expression] Reference
+    | FunCall [Expression] Reference
+    | SimpleReference Identifier
+    | Dereference Reference
+    | RecordField Reference Reference
+    | Address Reference
     deriving Show
-data Initialize = Initialize Functions
-    deriving Show
-data Finalize = Finalize Functions
-    deriving Show
-data Uses = Uses [Identificator]
-    deriving Show
+    
+pascalLanguageDef
+    = emptyDef
+    { commentStart   = "(*"
+    , commentEnd     = "*)"
+    , commentLine    = "//"
+    , nestedComments = False
+    , identStart     = letter <|> oneOf "_"
+    , identLetter    = alphaNum <|> oneOf "_."
+    , reservedNames  = [
+            "begin", "end", "program", "unit", "interface"
+            , "implementation", "and", "or", "xor", "shl"
+            , "shr", "while", "do", "repeat", "until", "case", "of"
+            , "type", "var", "const", "out", "array", "packed"
+            , "procedure", "function", "with", "for", "to"
+            , "downto", "div", "mod", "record", "set", "nil"
+            , "string", "shortstring"
+            ]
+    , reservedOpNames= [] 
+    , caseSensitive  = False   
+    }
+    
+pas = patch $ makeTokenParser pascalLanguageDef
+    where
+    patch tp = tp {stringLiteral = sl}
+    sl = do
+        (char '\'')
+        s <- (many $ noneOf "'")
+        (char '\'')
+        ss <- many $ do
+            (char '\'')
+            s' <- (many $ noneOf "'")
+            (char '\'')
+            return $ '\'' : s'
+        comments    
+        return $ concat (s:ss)
+    
+comments = do
+    spaces
+    skipMany $ do
+        comment
+        spaces
 
-parsePascalUnit :: String -> Either ParseError PascalUnit
-parsePascalUnit = parse pascalUnit "unit"
-    where
-    comments = skipMany (comment >> spaces)
-    identificator = do
-        spaces
-        l <- letter <|> oneOf "_"
-        ls <- many (alphaNum <|> oneOf "_")
-        spaces
-        return $ Identificator (l:ls)
+pascalUnit = do
+    comments
+    u <- choice [program, unit]
+    comments
+    return u
+
+comment = choice [
+        char '{' >> manyTill anyChar (try $ char '}')
+        , (try $ string "(*") >> manyTill anyChar (try $ string "*)")
+        , (try $ string "//") >> manyTill anyChar (try newline)
+        ]
+
+iD = do
+    i <- liftM Identifier (identifier pas)
+    comments
+    return i
+        
+unit = do
+    string "unit" >> comments
+    name <- iD
+    semi pas
+    comments
+    int <- interface
+    impl <- implementation
+    comments
+    return $ Unit name int impl Nothing Nothing
 
-    pascalUnit = do
-        spaces
-        comments
-        u <- choice [program, unit]
-        comments
-        spaces
-        return u
+    
+reference = buildExpressionParser table term <?> "reference"
+    where
+    term = comments >> choice [
+        parens pas reference 
+        , char '@' >> reference >>= return . Address
+        , iD >>= return . SimpleReference
+        ] <?> "simple reference"
 
-    comment = choice [
-            char '{' >> manyTill anyChar (try $ char '}')
-            , string "(*" >> manyTill anyChar (try $ string "*)")
-            , string "//" >> manyTill anyChar (try newline)
+    table = [ 
+            [Postfix $ (parens pas) (option [] parameters) >>= return . FunCall]
+          , [Postfix (char '^' >> return Dereference)]
+          , [Postfix $ (brackets pas) (commaSep1 pas $ expression) >>= return . ArrayElement]
+          , [Infix (try (char '.' >> notFollowedBy (char '.')) >> return RecordField) AssocLeft]
+        ]
+
+    
+varsDecl1 = varsParser sepEndBy1    
+varsDecl = varsParser sepEndBy
+varsParser m endsWithSemi = do
+    vs <- m (aVarDecl endsWithSemi) (semi pas)
+    return vs
+
+aVarDecl endsWithSemi = do
+    when (not endsWithSemi) $
+        optional $ choice [
+            try $ string "var"
+            , try $ string "const"
+            , try $ string "out"
             ]
+    comments
+    ids <- do
+        i <- (commaSep1 pas) $ (try iD <?> "variable declaration")
+        char ':'
+        return i
+    comments
+    t <- typeDecl <?> "variable type declaration"
+    comments
+    init <- option Nothing $ do
+        char '='
+        comments
+        e <- expression
+        comments
+        return (Just e)
+    return $ VarDeclaration False (ids, t) init
 
-    unit = do
-        name <- unitName
-        spaces
+
+constsDecl = do
+    vs <- many1 (try (aConstDecl >>= \i -> semi pas >> return i) >>= \i -> comments >> return i)
+    comments
+    return vs
+    where
+    aConstDecl = do
+        comments
+        i <- iD <?> "const declaration"
+        optional $ do
+            char ':'
+            comments
+            t <- typeDecl
+            return ()
+        char '='
+        comments
+        e <- expression
         comments
-        int <- string "interface" >> interface
-        manyTill anyChar (try $ string "implementation")
-        spaces
+        return $ VarDeclaration False ([i], UnknownType) (Just e)
+        
+typeDecl = choice [
+    char '^' >> typeDecl >>= return . PointerTo
+    , try (string "shortstring") >> return String
+    , arrayDecl
+    , recordDecl
+    , rangeDecl >>= return . RangeType
+    , sequenceDecl >>= return . Sequence
+    , identifier pas >>= return . SimpleType . Identifier
+    ] <?> "type declaration"
+    where
+    arrayDecl = do
+        try $ string "array"
+        comments
+        char '['
+        r <- rangeDecl
+        char ']'
+        comments
+        string "of"
+        comments
+        t <- typeDecl
+        return $ ArrayDecl r t
+    recordDecl = do
+        optional $ (try $ string "packed") >> comments
+        try $ string "record"
+        comments
+        vs <- varsDecl True
+        string "end"
+        return $ RecordType vs
+    sequenceDecl = (parens pas) $ (commaSep pas) iD
+
+typesDecl = many (aTypeDecl >>= \t -> comments >> return t)
+    where
+    aTypeDecl = do
+        i <- try $ do
+            i <- iD <?> "type declaration"
+            comments
+            char '='
+            return i
+        comments
+        t <- typeDecl
+        comments
+        semi pas
         comments
-        impl <- implementation
-        return $ Unit name int impl Nothing Nothing
-        where
-            unitName = between (string "unit") (char ';') identificator
+        return $ TypeDeclaration i t
+        
+rangeDecl = choice [
+    try $ rangeft
+    , iD >>= return . Range
+    ] <?> "range declaration"
+    where
+    rangeft = do
+    e1 <- expression
+    string ".."
+    e2 <- expression
+    return $ RangeFromTo e1 e2
+    
+typeVarDeclaration isImpl = (liftM concat . many . choice) [
+    varSection,
+    constSection,
+    typeSection,
+    funcDecl,
+    procDecl
+    ]
+    where
+    varSection = do
+        try $ string "var"
+        comments
+        v <- varsDecl1 True
+        comments
+        return v
+
+    constSection = do
+        try $ string "const"
+        comments
+        c <- constsDecl
+        comments
+        return c
 
-    interface = do
-        spaces
+    typeSection = do
+        try $ string "type"
+        comments
+        t <- typesDecl
+        comments
+        return t
+        
+    procDecl = do
+        try $ string "procedure"
+        comments
+        i <- iD
+        optional $ do
+            char '('
+            varsDecl False
+            char ')'
+        comments
+        char ';'
+        b <- if isImpl then
+                do
+                comments
+                optional $ typeVarDeclaration True
+                comments
+                liftM Just functionBody
+                else
+                return Nothing
+        comments
+        return $ [FunctionDeclaration i UnknownType b]
+        
+    funcDecl = do
+        try $ string "function"
         comments
-        u <- uses
-        return $ Interface u (TypesAndVars "")
+        i <- iD
+        optional $ do
+            char '('
+            varsDecl False
+            char ')'
+        comments
+        char ':'
+        comments
+        ret <- typeDecl
+        comments
+        char ';'
+        comments
+        b <- if isImpl then
+                do
+                optional $ typeVarDeclaration True
+                comments
+                liftM Just functionBody
+                else
+                return Nothing
+        return $ [FunctionDeclaration i ret Nothing]
+
+program = do
+    string "program"
+    comments
+    name <- iD
+    (char ';')
+    comments
+    impl <- implementation
+    comments
+    return $ Program name impl
+
+interface = do
+    string "interface"
+    comments
+    u <- uses
+    comments
+    tv <- typeVarDeclaration False
+    comments
+    return $ Interface u (TypesAndVars tv)
 
-    program = do
-        name <- programName
-        spaces
+implementation = do
+    string "implementation"
+    comments
+    u <- uses
+    comments
+    tv <- typeVarDeclaration True
+    string "end."
+    comments
+    return $ Implementation u (TypesAndVars tv)
+
+expression = buildExpressionParser table term <?> "expression"
+    where
+    term = comments >> choice [
+        parens pas $ expression 
+        , try $ integer pas >>= return . NumberLiteral . show
+        , stringLiteral pas >>= return . StringLiteral
+        , char '#' >> many digit >>= return . CharCode
+        , char '$' >> many hexDigit >>= return . HexNumber
+        , try $ string "nil" >> return Null
+        , reference >>= return . Reference
+        ] <?> "simple expression"
+
+    table = [ 
+          [  Infix (char '*' >> return (BinOp "*")) AssocLeft
+           , Infix (char '/' >> return (BinOp "/")) AssocLeft
+           , Infix (try (string "div") >> return (BinOp "div")) AssocLeft
+           , Infix (try (string "mod") >> return (BinOp "mod")) AssocLeft
+          ]
+        , [  Infix (char '+' >> return (BinOp "+")) AssocLeft
+           , Infix (char '-' >> return (BinOp "-")) AssocLeft
+           , Prefix (char '-' >> return (PrefixOp "-"))
+          ]
+        , [  Infix (try (string "<>") >> return (BinOp "<>")) AssocNone
+           , Infix (try (string "<=") >> return (BinOp "<=")) AssocNone
+           , Infix (try (string ">=") >> return (BinOp ">=")) AssocNone
+           , Infix (char '<' >> return (BinOp "<")) AssocNone
+           , Infix (char '>' >> return (BinOp ">")) AssocNone
+           , Infix (char '=' >> return (BinOp "=")) AssocNone
+          ]
+        , [  Infix (try $ string "and" >> return (BinOp "and")) AssocLeft
+           , Infix (try $ string "or" >> return (BinOp "or")) AssocLeft
+           , Infix (try $ string "xor" >> return (BinOp "xor")) AssocLeft
+          ]
+        , [  Infix (try $ string "shl" >> return (BinOp "and")) AssocNone
+           , Infix (try $ string "shr" >> return (BinOp "or")) AssocNone
+          ]
+        , [Prefix (try (string "not") >> return (PrefixOp "not"))]
+        ]
+    
+phrasesBlock = do
+    try $ string "begin"
+    comments
+    p <- manyTill phrase (try $ string "end")
+    comments
+    return $ Phrases p
+    
+phrase = do
+    o <- choice [
+        phrasesBlock
+        , ifBlock
+        , whileCycle
+        , repeatCycle
+        , switchCase
+        , withBlock
+        , forCycle
+        , (try $ reference >>= \r -> string ":=" >> return r) >>= \r -> expression >>= return . Assignment r
+        , procCall
+        ]
+    optional $ char ';'
+    comments
+    return o
+
+ifBlock = do
+    try $ string "if"
+    comments
+    e <- expression
+    comments
+    string "then"
+    comments
+    o1 <- phrase
+    comments
+    o2 <- optionMaybe $ do
+        try $ string "else"
         comments
-        impl <- implementation
-        return $ Program name impl (FunctionBody "")
-        where
-            programName = between (string "program") (char ';') identificator
+        o <- phrase
+        comments
+        return o
+    return $ IfThenElse e o1 o2
 
-    implementation = do
-        u <- uses
-        manyTill anyChar (try $ string "end.")
-        return $ Implementation u (TypesAndVars "") (Functions [])
+whileCycle = do
+    try $ string "while"
+    comments
+    e <- expression
+    comments
+    string "do"
+    comments
+    o <- phrase
+    return $ WhileCycle e o
+
+withBlock = do
+    try $ string "with"
+    comments
+    e <- expression
+    comments
+    string "do"
+    comments
+    o <- phrase
+    return $ WithBlock e o
+    
+repeatCycle = do
+    try $ string "repeat"
+    comments
+    o <- many phrase
+    string "until"
+    comments
+    e <- expression
+    comments
+    return $ RepeatCycle e o
 
-    uses = liftM Uses (option [] u)
-        where
-            u = do
-                string "uses"
-                spaces
-                u <- (identificator >>= \i -> spaces >> return i) `sepBy1` (char ',' >> spaces)
-                char ';'
-                spaces
-                return u
+forCycle = do
+    try $ string "for"
+    comments
+    i <- iD
+    comments
+    string ":="
+    comments
+    e1 <- expression
+    comments
+    choice [string "to", string "downto"]
+    comments
+    e2 <- expression
+    comments
+    string "do"
+    comments
+    p <- phrase
+    comments
+    return $ ForCycle i e1 e2 p
+    
+switchCase = do
+    try $ string "case"
+    comments
+    e <- expression
+    comments
+    string "of"
+    comments
+    cs <- many1 aCase
+    o2 <- optionMaybe $ do
+        try $ string "else"
+        comments
+        o <- phrase
+        comments
+        return o
+    string "end"
+    return $ SwitchCase e cs o2
+    where
+    aCase = do
+        e <- expression
+        comments
+        char ':'
+        comments
+        p <- phrase
+        comments
+        return (e, p)
+    
+procCall = do
+    i <- iD
+    p <- option [] $ (parens pas) parameters
+    return $ ProcCall i p
+
+parameters = (commaSep pas) expression <?> "parameters"
+        
+functionBody = do
+    p <- phrasesBlock
+    char ';'
+    comments
+    return p
+
+uses = liftM Uses (option [] u)
+    where
+        u = do
+            string "uses"
+            comments
+            u <- (iD >>= \i -> comments >> return i) `sepBy1` (char ',' >> comments)
+            char ';'
+            comments
+            return u