tools/PascalParser.hs
branchhedgeroid
changeset 6328 d14adf1c7721
parent 6317 83b93a2d2741
child 6355 734fed7aefd3
equal deleted inserted replaced
6236:1998ff75321a 6328:d14adf1c7721
     1 module PascalParser where
     1 module PascalParser where
     2 
     2 
     3 import Text.ParserCombinators.Parsec
     3 import Text.Parsec.Expr
       
     4 import Text.Parsec.Char
       
     5 import Text.Parsec.Token
       
     6 import Text.Parsec.Language
       
     7 import Text.Parsec.Prim
       
     8 import Text.Parsec.Combinator
       
     9 import Text.Parsec.String
     4 import Control.Monad
    10 import Control.Monad
       
    11 import Data.Char
     5 
    12 
     6 data PascalUnit =
    13 data PascalUnit =
     7     Program Identificator Implementation FunctionBody
    14     Program Identifier Implementation
     8     | Unit Identificator Interface Implementation (Maybe Initialize) (Maybe Finalize)
    15     | Unit Identifier Interface Implementation (Maybe Initialize) (Maybe Finalize)
     9     deriving Show
    16     deriving Show
    10 
       
    11 data Interface = Interface Uses TypesAndVars
    17 data Interface = Interface Uses TypesAndVars
    12     deriving Show
    18     deriving Show
    13 data Implementation = Implementation Uses TypesAndVars Functions
    19 data Implementation = Implementation Uses TypesAndVars
    14     deriving Show
    20     deriving Show
    15 data Functions = Functions [Function]
    21 data Identifier = Identifier String
    16     deriving Show
    22     deriving Show
    17 data Function = Function String
    23 data TypesAndVars = TypesAndVars [TypeVarDeclaration]
    18     deriving Show
    24     deriving Show
    19 data Identificator = Identificator String
    25 data TypeVarDeclaration = TypeDeclaration Identifier TypeDecl
    20     deriving Show
    26     | VarDeclaration Bool ([Identifier], TypeDecl) (Maybe Expression)
    21 data FunctionBody = FunctionBody String
    27     | FunctionDeclaration Identifier TypeDecl (Maybe Phrase)
    22     deriving Show
    28     deriving Show
    23 data TypesAndVars = TypesAndVars String
    29 data TypeDecl = SimpleType Identifier
    24     deriving Show
    30     | RangeType Range
    25 data Initialize = Initialize Functions
    31     | Sequence [Identifier]
    26     deriving Show
    32     | ArrayDecl Range TypeDecl
    27 data Finalize = Finalize Functions
    33     | RecordType [TypeVarDeclaration]
    28     deriving Show
    34     | PointerTo TypeDecl
    29 data Uses = Uses [Identificator]
    35     | String
    30     deriving Show
    36     | UnknownType
    31 
    37     deriving Show
    32 parsePascalUnit :: String -> Either ParseError PascalUnit
    38 data Range = Range Identifier
    33 parsePascalUnit = parse pascalUnit "unit"
    39            | RangeFromTo Expression Expression
    34     where
    40     deriving Show
    35     comments = skipMany (comment >> spaces)
    41 data Initialize = Initialize String
    36     identificator = do
    42     deriving Show
       
    43 data Finalize = Finalize String
       
    44     deriving Show
       
    45 data Uses = Uses [Identifier]
       
    46     deriving Show
       
    47 data Phrase = ProcCall Identifier [Expression]
       
    48         | IfThenElse Expression Phrase (Maybe Phrase)
       
    49         | WhileCycle Expression Phrase
       
    50         | RepeatCycle Expression [Phrase]
       
    51         | ForCycle Identifier Expression Expression Phrase
       
    52         | WithBlock Expression Phrase
       
    53         | Phrases [Phrase]
       
    54         | SwitchCase Expression [(Expression, Phrase)] (Maybe Phrase)
       
    55         | Assignment Reference Expression
       
    56     deriving Show
       
    57 data Expression = Expression String
       
    58     | PrefixOp String Expression
       
    59     | PostfixOp String Expression
       
    60     | BinOp String Expression Expression
       
    61     | StringLiteral String
       
    62     | CharCode String
       
    63     | NumberLiteral String
       
    64     | HexNumber String
       
    65     | Reference Reference
       
    66     | Null
       
    67     deriving Show
       
    68 data Reference = ArrayElement [Expression] Reference
       
    69     | FunCall [Expression] Reference
       
    70     | SimpleReference Identifier
       
    71     | Dereference Reference
       
    72     | RecordField Reference Reference
       
    73     | Address Reference
       
    74     deriving Show
       
    75     
       
    76 pascalLanguageDef
       
    77     = emptyDef
       
    78     { commentStart   = "(*"
       
    79     , commentEnd     = "*)"
       
    80     , commentLine    = "//"
       
    81     , nestedComments = False
       
    82     , identStart     = letter <|> oneOf "_"
       
    83     , identLetter    = alphaNum <|> oneOf "_."
       
    84     , reservedNames  = [
       
    85             "begin", "end", "program", "unit", "interface"
       
    86             , "implementation", "and", "or", "xor", "shl"
       
    87             , "shr", "while", "do", "repeat", "until", "case", "of"
       
    88             , "type", "var", "const", "out", "array", "packed"
       
    89             , "procedure", "function", "with", "for", "to"
       
    90             , "downto", "div", "mod", "record", "set", "nil"
       
    91             , "string", "shortstring"
       
    92             ]
       
    93     , reservedOpNames= [] 
       
    94     , caseSensitive  = False   
       
    95     }
       
    96     
       
    97 pas = patch $ makeTokenParser pascalLanguageDef
       
    98     where
       
    99     patch tp = tp {stringLiteral = sl}
       
   100     sl = do
       
   101         (char '\'')
       
   102         s <- (many $ noneOf "'")
       
   103         (char '\'')
       
   104         ss <- many $ do
       
   105             (char '\'')
       
   106             s' <- (many $ noneOf "'")
       
   107             (char '\'')
       
   108             return $ '\'' : s'
       
   109         comments    
       
   110         return $ concat (s:ss)
       
   111     
       
   112 comments = do
       
   113     spaces
       
   114     skipMany $ do
       
   115         comment
    37         spaces
   116         spaces
    38         l <- letter <|> oneOf "_"
   117 
    39         ls <- many (alphaNum <|> oneOf "_")
   118 pascalUnit = do
    40         spaces
   119     comments
    41         return $ Identificator (l:ls)
   120     u <- choice [program, unit]
    42 
   121     comments
    43     pascalUnit = do
   122     return u
    44         spaces
   123 
    45         comments
   124 comment = choice [
    46         u <- choice [program, unit]
   125         char '{' >> manyTill anyChar (try $ char '}')
    47         comments
   126         , (try $ string "(*") >> manyTill anyChar (try $ string "*)")
    48         spaces
   127         , (try $ string "//") >> manyTill anyChar (try newline)
    49         return u
   128         ]
    50 
   129 
    51     comment = choice [
   130 iD = do
    52             char '{' >> manyTill anyChar (try $ char '}')
   131     i <- liftM Identifier (identifier pas)
    53             , string "(*" >> manyTill anyChar (try $ string "*)")
   132     comments
    54             , string "//" >> manyTill anyChar (try newline)
   133     return i
       
   134         
       
   135 unit = do
       
   136     string "unit" >> comments
       
   137     name <- iD
       
   138     semi pas
       
   139     comments
       
   140     int <- interface
       
   141     impl <- implementation
       
   142     comments
       
   143     return $ Unit name int impl Nothing Nothing
       
   144 
       
   145     
       
   146 reference = buildExpressionParser table term <?> "reference"
       
   147     where
       
   148     term = comments >> choice [
       
   149         parens pas reference 
       
   150         , char '@' >> reference >>= return . Address
       
   151         , iD >>= return . SimpleReference
       
   152         ] <?> "simple reference"
       
   153 
       
   154     table = [ 
       
   155             [Postfix $ (parens pas) (option [] parameters) >>= return . FunCall]
       
   156           , [Postfix (char '^' >> return Dereference)]
       
   157           , [Postfix $ (brackets pas) (commaSep1 pas $ expression) >>= return . ArrayElement]
       
   158           , [Infix (try (char '.' >> notFollowedBy (char '.')) >> return RecordField) AssocLeft]
       
   159         ]
       
   160 
       
   161     
       
   162 varsDecl1 = varsParser sepEndBy1    
       
   163 varsDecl = varsParser sepEndBy
       
   164 varsParser m endsWithSemi = do
       
   165     vs <- m (aVarDecl endsWithSemi) (semi pas)
       
   166     return vs
       
   167 
       
   168 aVarDecl endsWithSemi = do
       
   169     when (not endsWithSemi) $
       
   170         optional $ choice [
       
   171             try $ string "var"
       
   172             , try $ string "const"
       
   173             , try $ string "out"
    55             ]
   174             ]
    56 
   175     comments
    57     unit = do
   176     ids <- do
    58         name <- unitName
   177         i <- (commaSep1 pas) $ (try iD <?> "variable declaration")
    59         spaces
   178         char ':'
    60         comments
   179         return i
    61         int <- string "interface" >> interface
   180     comments
    62         manyTill anyChar (try $ string "implementation")
   181     t <- typeDecl <?> "variable type declaration"
    63         spaces
   182     comments
    64         comments
   183     init <- option Nothing $ do
    65         impl <- implementation
   184         char '='
    66         return $ Unit name int impl Nothing Nothing
   185         comments
    67         where
   186         e <- expression
    68             unitName = between (string "unit") (char ';') identificator
   187         comments
    69 
   188         return (Just e)
    70     interface = do
   189     return $ VarDeclaration False (ids, t) init
    71         spaces
   190 
    72         comments
   191 
    73         u <- uses
   192 constsDecl = do
    74         return $ Interface u (TypesAndVars "")
   193     vs <- many1 (try (aConstDecl >>= \i -> semi pas >> return i) >>= \i -> comments >> return i)
    75 
   194     comments
    76     program = do
   195     return vs
    77         name <- programName
   196     where
    78         spaces
   197     aConstDecl = do
    79         comments
   198         comments
    80         impl <- implementation
   199         i <- iD <?> "const declaration"
    81         return $ Program name impl (FunctionBody "")
   200         optional $ do
    82         where
   201             char ':'
    83             programName = between (string "program") (char ';') identificator
   202             comments
    84 
   203             t <- typeDecl
    85     implementation = do
   204             return ()
    86         u <- uses
   205         char '='
    87         manyTill anyChar (try $ string "end.")
   206         comments
    88         return $ Implementation u (TypesAndVars "") (Functions [])
   207         e <- expression
    89 
   208         comments
    90     uses = liftM Uses (option [] u)
   209         return $ VarDeclaration False ([i], UnknownType) (Just e)
    91         where
   210         
    92             u = do
   211 typeDecl = choice [
    93                 string "uses"
   212     char '^' >> typeDecl >>= return . PointerTo
    94                 spaces
   213     , try (string "shortstring") >> return String
    95                 u <- (identificator >>= \i -> spaces >> return i) `sepBy1` (char ',' >> spaces)
   214     , arrayDecl
    96                 char ';'
   215     , recordDecl
    97                 spaces
   216     , rangeDecl >>= return . RangeType
    98                 return u
   217     , sequenceDecl >>= return . Sequence
       
   218     , identifier pas >>= return . SimpleType . Identifier
       
   219     ] <?> "type declaration"
       
   220     where
       
   221     arrayDecl = do
       
   222         try $ string "array"
       
   223         comments
       
   224         char '['
       
   225         r <- rangeDecl
       
   226         char ']'
       
   227         comments
       
   228         string "of"
       
   229         comments
       
   230         t <- typeDecl
       
   231         return $ ArrayDecl r t
       
   232     recordDecl = do
       
   233         optional $ (try $ string "packed") >> comments
       
   234         try $ string "record"
       
   235         comments
       
   236         vs <- varsDecl True
       
   237         string "end"
       
   238         return $ RecordType vs
       
   239     sequenceDecl = (parens pas) $ (commaSep pas) iD
       
   240 
       
   241 typesDecl = many (aTypeDecl >>= \t -> comments >> return t)
       
   242     where
       
   243     aTypeDecl = do
       
   244         i <- try $ do
       
   245             i <- iD <?> "type declaration"
       
   246             comments
       
   247             char '='
       
   248             return i
       
   249         comments
       
   250         t <- typeDecl
       
   251         comments
       
   252         semi pas
       
   253         comments
       
   254         return $ TypeDeclaration i t
       
   255         
       
   256 rangeDecl = choice [
       
   257     try $ rangeft
       
   258     , iD >>= return . Range
       
   259     ] <?> "range declaration"
       
   260     where
       
   261     rangeft = do
       
   262     e1 <- expression
       
   263     string ".."
       
   264     e2 <- expression
       
   265     return $ RangeFromTo e1 e2
       
   266     
       
   267 typeVarDeclaration isImpl = (liftM concat . many . choice) [
       
   268     varSection,
       
   269     constSection,
       
   270     typeSection,
       
   271     funcDecl,
       
   272     procDecl
       
   273     ]
       
   274     where
       
   275     varSection = do
       
   276         try $ string "var"
       
   277         comments
       
   278         v <- varsDecl1 True
       
   279         comments
       
   280         return v
       
   281 
       
   282     constSection = do
       
   283         try $ string "const"
       
   284         comments
       
   285         c <- constsDecl
       
   286         comments
       
   287         return c
       
   288 
       
   289     typeSection = do
       
   290         try $ string "type"
       
   291         comments
       
   292         t <- typesDecl
       
   293         comments
       
   294         return t
       
   295         
       
   296     procDecl = do
       
   297         try $ string "procedure"
       
   298         comments
       
   299         i <- iD
       
   300         optional $ do
       
   301             char '('
       
   302             varsDecl False
       
   303             char ')'
       
   304         comments
       
   305         char ';'
       
   306         b <- if isImpl then
       
   307                 do
       
   308                 comments
       
   309                 optional $ typeVarDeclaration True
       
   310                 comments
       
   311                 liftM Just functionBody
       
   312                 else
       
   313                 return Nothing
       
   314         comments
       
   315         return $ [FunctionDeclaration i UnknownType b]
       
   316         
       
   317     funcDecl = do
       
   318         try $ string "function"
       
   319         comments
       
   320         i <- iD
       
   321         optional $ do
       
   322             char '('
       
   323             varsDecl False
       
   324             char ')'
       
   325         comments
       
   326         char ':'
       
   327         comments
       
   328         ret <- typeDecl
       
   329         comments
       
   330         char ';'
       
   331         comments
       
   332         b <- if isImpl then
       
   333                 do
       
   334                 optional $ typeVarDeclaration True
       
   335                 comments
       
   336                 liftM Just functionBody
       
   337                 else
       
   338                 return Nothing
       
   339         return $ [FunctionDeclaration i ret Nothing]
       
   340 
       
   341 program = do
       
   342     string "program"
       
   343     comments
       
   344     name <- iD
       
   345     (char ';')
       
   346     comments
       
   347     impl <- implementation
       
   348     comments
       
   349     return $ Program name impl
       
   350 
       
   351 interface = do
       
   352     string "interface"
       
   353     comments
       
   354     u <- uses
       
   355     comments
       
   356     tv <- typeVarDeclaration False
       
   357     comments
       
   358     return $ Interface u (TypesAndVars tv)
       
   359 
       
   360 implementation = do
       
   361     string "implementation"
       
   362     comments
       
   363     u <- uses
       
   364     comments
       
   365     tv <- typeVarDeclaration True
       
   366     string "end."
       
   367     comments
       
   368     return $ Implementation u (TypesAndVars tv)
       
   369 
       
   370 expression = buildExpressionParser table term <?> "expression"
       
   371     where
       
   372     term = comments >> choice [
       
   373         parens pas $ expression 
       
   374         , try $ integer pas >>= return . NumberLiteral . show
       
   375         , stringLiteral pas >>= return . StringLiteral
       
   376         , char '#' >> many digit >>= return . CharCode
       
   377         , char '$' >> many hexDigit >>= return . HexNumber
       
   378         , try $ string "nil" >> return Null
       
   379         , reference >>= return . Reference
       
   380         ] <?> "simple expression"
       
   381 
       
   382     table = [ 
       
   383           [  Infix (char '*' >> return (BinOp "*")) AssocLeft
       
   384            , Infix (char '/' >> return (BinOp "/")) AssocLeft
       
   385            , Infix (try (string "div") >> return (BinOp "div")) AssocLeft
       
   386            , Infix (try (string "mod") >> return (BinOp "mod")) AssocLeft
       
   387           ]
       
   388         , [  Infix (char '+' >> return (BinOp "+")) AssocLeft
       
   389            , Infix (char '-' >> return (BinOp "-")) AssocLeft
       
   390            , Prefix (char '-' >> return (PrefixOp "-"))
       
   391           ]
       
   392         , [  Infix (try (string "<>") >> return (BinOp "<>")) AssocNone
       
   393            , Infix (try (string "<=") >> return (BinOp "<=")) AssocNone
       
   394            , Infix (try (string ">=") >> return (BinOp ">=")) AssocNone
       
   395            , Infix (char '<' >> return (BinOp "<")) AssocNone
       
   396            , Infix (char '>' >> return (BinOp ">")) AssocNone
       
   397            , Infix (char '=' >> return (BinOp "=")) AssocNone
       
   398           ]
       
   399         , [  Infix (try $ string "and" >> return (BinOp "and")) AssocLeft
       
   400            , Infix (try $ string "or" >> return (BinOp "or")) AssocLeft
       
   401            , Infix (try $ string "xor" >> return (BinOp "xor")) AssocLeft
       
   402           ]
       
   403         , [  Infix (try $ string "shl" >> return (BinOp "and")) AssocNone
       
   404            , Infix (try $ string "shr" >> return (BinOp "or")) AssocNone
       
   405           ]
       
   406         , [Prefix (try (string "not") >> return (PrefixOp "not"))]
       
   407         ]
       
   408     
       
   409 phrasesBlock = do
       
   410     try $ string "begin"
       
   411     comments
       
   412     p <- manyTill phrase (try $ string "end")
       
   413     comments
       
   414     return $ Phrases p
       
   415     
       
   416 phrase = do
       
   417     o <- choice [
       
   418         phrasesBlock
       
   419         , ifBlock
       
   420         , whileCycle
       
   421         , repeatCycle
       
   422         , switchCase
       
   423         , withBlock
       
   424         , forCycle
       
   425         , (try $ reference >>= \r -> string ":=" >> return r) >>= \r -> expression >>= return . Assignment r
       
   426         , procCall
       
   427         ]
       
   428     optional $ char ';'
       
   429     comments
       
   430     return o
       
   431 
       
   432 ifBlock = do
       
   433     try $ string "if"
       
   434     comments
       
   435     e <- expression
       
   436     comments
       
   437     string "then"
       
   438     comments
       
   439     o1 <- phrase
       
   440     comments
       
   441     o2 <- optionMaybe $ do
       
   442         try $ string "else"
       
   443         comments
       
   444         o <- phrase
       
   445         comments
       
   446         return o
       
   447     return $ IfThenElse e o1 o2
       
   448 
       
   449 whileCycle = do
       
   450     try $ string "while"
       
   451     comments
       
   452     e <- expression
       
   453     comments
       
   454     string "do"
       
   455     comments
       
   456     o <- phrase
       
   457     return $ WhileCycle e o
       
   458 
       
   459 withBlock = do
       
   460     try $ string "with"
       
   461     comments
       
   462     e <- expression
       
   463     comments
       
   464     string "do"
       
   465     comments
       
   466     o <- phrase
       
   467     return $ WithBlock e o
       
   468     
       
   469 repeatCycle = do
       
   470     try $ string "repeat"
       
   471     comments
       
   472     o <- many phrase
       
   473     string "until"
       
   474     comments
       
   475     e <- expression
       
   476     comments
       
   477     return $ RepeatCycle e o
       
   478 
       
   479 forCycle = do
       
   480     try $ string "for"
       
   481     comments
       
   482     i <- iD
       
   483     comments
       
   484     string ":="
       
   485     comments
       
   486     e1 <- expression
       
   487     comments
       
   488     choice [string "to", string "downto"]
       
   489     comments
       
   490     e2 <- expression
       
   491     comments
       
   492     string "do"
       
   493     comments
       
   494     p <- phrase
       
   495     comments
       
   496     return $ ForCycle i e1 e2 p
       
   497     
       
   498 switchCase = do
       
   499     try $ string "case"
       
   500     comments
       
   501     e <- expression
       
   502     comments
       
   503     string "of"
       
   504     comments
       
   505     cs <- many1 aCase
       
   506     o2 <- optionMaybe $ do
       
   507         try $ string "else"
       
   508         comments
       
   509         o <- phrase
       
   510         comments
       
   511         return o
       
   512     string "end"
       
   513     return $ SwitchCase e cs o2
       
   514     where
       
   515     aCase = do
       
   516         e <- expression
       
   517         comments
       
   518         char ':'
       
   519         comments
       
   520         p <- phrase
       
   521         comments
       
   522         return (e, p)
       
   523     
       
   524 procCall = do
       
   525     i <- iD
       
   526     p <- option [] $ (parens pas) parameters
       
   527     return $ ProcCall i p
       
   528 
       
   529 parameters = (commaSep pas) expression <?> "parameters"
       
   530         
       
   531 functionBody = do
       
   532     p <- phrasesBlock
       
   533     char ';'
       
   534     comments
       
   535     return p
       
   536 
       
   537 uses = liftM Uses (option [] u)
       
   538     where
       
   539         u = do
       
   540             string "uses"
       
   541             comments
       
   542             u <- (iD >>= \i -> comments >> return i) `sepBy1` (char ',' >> comments)
       
   543             char ';'
       
   544             comments
       
   545             return u