1 module PascalParser where |
1 module PascalParser where |
2 |
2 |
3 import Text.ParserCombinators.Parsec |
3 import Text.Parsec.Expr |
|
4 import Text.Parsec.Char |
|
5 import Text.Parsec.Token |
|
6 import Text.Parsec.Language |
|
7 import Text.Parsec.Prim |
|
8 import Text.Parsec.Combinator |
|
9 import Text.Parsec.String |
4 import Control.Monad |
10 import Control.Monad |
|
11 import Data.Char |
5 |
12 |
6 data PascalUnit = |
13 data PascalUnit = |
7 Program Identificator Implementation FunctionBody |
14 Program Identifier Implementation |
8 | Unit Identificator Interface Implementation (Maybe Initialize) (Maybe Finalize) |
15 | Unit Identifier Interface Implementation (Maybe Initialize) (Maybe Finalize) |
9 deriving Show |
16 deriving Show |
10 |
|
11 data Interface = Interface Uses TypesAndVars |
17 data Interface = Interface Uses TypesAndVars |
12 deriving Show |
18 deriving Show |
13 data Implementation = Implementation Uses TypesAndVars Functions |
19 data Implementation = Implementation Uses TypesAndVars |
14 deriving Show |
20 deriving Show |
15 data Functions = Functions [Function] |
21 data Identifier = Identifier String |
16 deriving Show |
22 deriving Show |
17 data Function = Function String |
23 data TypesAndVars = TypesAndVars [TypeVarDeclaration] |
18 deriving Show |
24 deriving Show |
19 data Identificator = Identificator String |
25 data TypeVarDeclaration = TypeDeclaration Identifier TypeDecl |
20 deriving Show |
26 | VarDeclaration Bool ([Identifier], TypeDecl) (Maybe Expression) |
21 data FunctionBody = FunctionBody String |
27 | FunctionDeclaration Identifier TypeDecl (Maybe Phrase) |
22 deriving Show |
28 deriving Show |
23 data TypesAndVars = TypesAndVars String |
29 data TypeDecl = SimpleType Identifier |
24 deriving Show |
30 | RangeType Range |
25 data Initialize = Initialize Functions |
31 | Sequence [Identifier] |
26 deriving Show |
32 | ArrayDecl Range TypeDecl |
27 data Finalize = Finalize Functions |
33 | RecordType [TypeVarDeclaration] |
28 deriving Show |
34 | PointerTo TypeDecl |
29 data Uses = Uses [Identificator] |
35 | String |
30 deriving Show |
36 | UnknownType |
31 |
37 deriving Show |
32 parsePascalUnit :: String -> Either ParseError PascalUnit |
38 data Range = Range Identifier |
33 parsePascalUnit = parse pascalUnit "unit" |
39 | RangeFromTo Expression Expression |
34 where |
40 deriving Show |
35 comments = skipMany (comment >> spaces) |
41 data Initialize = Initialize String |
36 identificator = do |
42 deriving Show |
|
43 data Finalize = Finalize String |
|
44 deriving Show |
|
45 data Uses = Uses [Identifier] |
|
46 deriving Show |
|
47 data Phrase = ProcCall Identifier [Expression] |
|
48 | IfThenElse Expression Phrase (Maybe Phrase) |
|
49 | WhileCycle Expression Phrase |
|
50 | RepeatCycle Expression [Phrase] |
|
51 | ForCycle Identifier Expression Expression Phrase |
|
52 | WithBlock Expression Phrase |
|
53 | Phrases [Phrase] |
|
54 | SwitchCase Expression [(Expression, Phrase)] (Maybe Phrase) |
|
55 | Assignment Reference Expression |
|
56 deriving Show |
|
57 data Expression = Expression String |
|
58 | PrefixOp String Expression |
|
59 | PostfixOp String Expression |
|
60 | BinOp String Expression Expression |
|
61 | StringLiteral String |
|
62 | CharCode String |
|
63 | NumberLiteral String |
|
64 | HexNumber String |
|
65 | Reference Reference |
|
66 | Null |
|
67 deriving Show |
|
68 data Reference = ArrayElement [Expression] Reference |
|
69 | FunCall [Expression] Reference |
|
70 | SimpleReference Identifier |
|
71 | Dereference Reference |
|
72 | RecordField Reference Reference |
|
73 | Address Reference |
|
74 deriving Show |
|
75 |
|
76 pascalLanguageDef |
|
77 = emptyDef |
|
78 { commentStart = "(*" |
|
79 , commentEnd = "*)" |
|
80 , commentLine = "//" |
|
81 , nestedComments = False |
|
82 , identStart = letter <|> oneOf "_" |
|
83 , identLetter = alphaNum <|> oneOf "_." |
|
84 , reservedNames = [ |
|
85 "begin", "end", "program", "unit", "interface" |
|
86 , "implementation", "and", "or", "xor", "shl" |
|
87 , "shr", "while", "do", "repeat", "until", "case", "of" |
|
88 , "type", "var", "const", "out", "array", "packed" |
|
89 , "procedure", "function", "with", "for", "to" |
|
90 , "downto", "div", "mod", "record", "set", "nil" |
|
91 , "string", "shortstring" |
|
92 ] |
|
93 , reservedOpNames= [] |
|
94 , caseSensitive = False |
|
95 } |
|
96 |
|
97 pas = patch $ makeTokenParser pascalLanguageDef |
|
98 where |
|
99 patch tp = tp {stringLiteral = sl} |
|
100 sl = do |
|
101 (char '\'') |
|
102 s <- (many $ noneOf "'") |
|
103 (char '\'') |
|
104 ss <- many $ do |
|
105 (char '\'') |
|
106 s' <- (many $ noneOf "'") |
|
107 (char '\'') |
|
108 return $ '\'' : s' |
|
109 comments |
|
110 return $ concat (s:ss) |
|
111 |
|
112 comments = do |
|
113 spaces |
|
114 skipMany $ do |
|
115 comment |
37 spaces |
116 spaces |
38 l <- letter <|> oneOf "_" |
117 |
39 ls <- many (alphaNum <|> oneOf "_") |
118 pascalUnit = do |
40 spaces |
119 comments |
41 return $ Identificator (l:ls) |
120 u <- choice [program, unit] |
42 |
121 comments |
43 pascalUnit = do |
122 return u |
44 spaces |
123 |
45 comments |
124 comment = choice [ |
46 u <- choice [program, unit] |
125 char '{' >> manyTill anyChar (try $ char '}') |
47 comments |
126 , (try $ string "(*") >> manyTill anyChar (try $ string "*)") |
48 spaces |
127 , (try $ string "//") >> manyTill anyChar (try newline) |
49 return u |
128 ] |
50 |
129 |
51 comment = choice [ |
130 iD = do |
52 char '{' >> manyTill anyChar (try $ char '}') |
131 i <- liftM Identifier (identifier pas) |
53 , string "(*" >> manyTill anyChar (try $ string "*)") |
132 comments |
54 , string "//" >> manyTill anyChar (try newline) |
133 return i |
|
134 |
|
135 unit = do |
|
136 string "unit" >> comments |
|
137 name <- iD |
|
138 semi pas |
|
139 comments |
|
140 int <- interface |
|
141 impl <- implementation |
|
142 comments |
|
143 return $ Unit name int impl Nothing Nothing |
|
144 |
|
145 |
|
146 reference = buildExpressionParser table term <?> "reference" |
|
147 where |
|
148 term = comments >> choice [ |
|
149 parens pas reference |
|
150 , char '@' >> reference >>= return . Address |
|
151 , iD >>= return . SimpleReference |
|
152 ] <?> "simple reference" |
|
153 |
|
154 table = [ |
|
155 [Postfix $ (parens pas) (option [] parameters) >>= return . FunCall] |
|
156 , [Postfix (char '^' >> return Dereference)] |
|
157 , [Postfix $ (brackets pas) (commaSep1 pas $ expression) >>= return . ArrayElement] |
|
158 , [Infix (try (char '.' >> notFollowedBy (char '.')) >> return RecordField) AssocLeft] |
|
159 ] |
|
160 |
|
161 |
|
162 varsDecl1 = varsParser sepEndBy1 |
|
163 varsDecl = varsParser sepEndBy |
|
164 varsParser m endsWithSemi = do |
|
165 vs <- m (aVarDecl endsWithSemi) (semi pas) |
|
166 return vs |
|
167 |
|
168 aVarDecl endsWithSemi = do |
|
169 when (not endsWithSemi) $ |
|
170 optional $ choice [ |
|
171 try $ string "var" |
|
172 , try $ string "const" |
|
173 , try $ string "out" |
55 ] |
174 ] |
56 |
175 comments |
57 unit = do |
176 ids <- do |
58 name <- unitName |
177 i <- (commaSep1 pas) $ (try iD <?> "variable declaration") |
59 spaces |
178 char ':' |
60 comments |
179 return i |
61 int <- string "interface" >> interface |
180 comments |
62 manyTill anyChar (try $ string "implementation") |
181 t <- typeDecl <?> "variable type declaration" |
63 spaces |
182 comments |
64 comments |
183 init <- option Nothing $ do |
65 impl <- implementation |
184 char '=' |
66 return $ Unit name int impl Nothing Nothing |
185 comments |
67 where |
186 e <- expression |
68 unitName = between (string "unit") (char ';') identificator |
187 comments |
69 |
188 return (Just e) |
70 interface = do |
189 return $ VarDeclaration False (ids, t) init |
71 spaces |
190 |
72 comments |
191 |
73 u <- uses |
192 constsDecl = do |
74 return $ Interface u (TypesAndVars "") |
193 vs <- many1 (try (aConstDecl >>= \i -> semi pas >> return i) >>= \i -> comments >> return i) |
75 |
194 comments |
76 program = do |
195 return vs |
77 name <- programName |
196 where |
78 spaces |
197 aConstDecl = do |
79 comments |
198 comments |
80 impl <- implementation |
199 i <- iD <?> "const declaration" |
81 return $ Program name impl (FunctionBody "") |
200 optional $ do |
82 where |
201 char ':' |
83 programName = between (string "program") (char ';') identificator |
202 comments |
84 |
203 t <- typeDecl |
85 implementation = do |
204 return () |
86 u <- uses |
205 char '=' |
87 manyTill anyChar (try $ string "end.") |
206 comments |
88 return $ Implementation u (TypesAndVars "") (Functions []) |
207 e <- expression |
89 |
208 comments |
90 uses = liftM Uses (option [] u) |
209 return $ VarDeclaration False ([i], UnknownType) (Just e) |
91 where |
210 |
92 u = do |
211 typeDecl = choice [ |
93 string "uses" |
212 char '^' >> typeDecl >>= return . PointerTo |
94 spaces |
213 , try (string "shortstring") >> return String |
95 u <- (identificator >>= \i -> spaces >> return i) `sepBy1` (char ',' >> spaces) |
214 , arrayDecl |
96 char ';' |
215 , recordDecl |
97 spaces |
216 , rangeDecl >>= return . RangeType |
98 return u |
217 , sequenceDecl >>= return . Sequence |
|
218 , identifier pas >>= return . SimpleType . Identifier |
|
219 ] <?> "type declaration" |
|
220 where |
|
221 arrayDecl = do |
|
222 try $ string "array" |
|
223 comments |
|
224 char '[' |
|
225 r <- rangeDecl |
|
226 char ']' |
|
227 comments |
|
228 string "of" |
|
229 comments |
|
230 t <- typeDecl |
|
231 return $ ArrayDecl r t |
|
232 recordDecl = do |
|
233 optional $ (try $ string "packed") >> comments |
|
234 try $ string "record" |
|
235 comments |
|
236 vs <- varsDecl True |
|
237 string "end" |
|
238 return $ RecordType vs |
|
239 sequenceDecl = (parens pas) $ (commaSep pas) iD |
|
240 |
|
241 typesDecl = many (aTypeDecl >>= \t -> comments >> return t) |
|
242 where |
|
243 aTypeDecl = do |
|
244 i <- try $ do |
|
245 i <- iD <?> "type declaration" |
|
246 comments |
|
247 char '=' |
|
248 return i |
|
249 comments |
|
250 t <- typeDecl |
|
251 comments |
|
252 semi pas |
|
253 comments |
|
254 return $ TypeDeclaration i t |
|
255 |
|
256 rangeDecl = choice [ |
|
257 try $ rangeft |
|
258 , iD >>= return . Range |
|
259 ] <?> "range declaration" |
|
260 where |
|
261 rangeft = do |
|
262 e1 <- expression |
|
263 string ".." |
|
264 e2 <- expression |
|
265 return $ RangeFromTo e1 e2 |
|
266 |
|
267 typeVarDeclaration isImpl = (liftM concat . many . choice) [ |
|
268 varSection, |
|
269 constSection, |
|
270 typeSection, |
|
271 funcDecl, |
|
272 procDecl |
|
273 ] |
|
274 where |
|
275 varSection = do |
|
276 try $ string "var" |
|
277 comments |
|
278 v <- varsDecl1 True |
|
279 comments |
|
280 return v |
|
281 |
|
282 constSection = do |
|
283 try $ string "const" |
|
284 comments |
|
285 c <- constsDecl |
|
286 comments |
|
287 return c |
|
288 |
|
289 typeSection = do |
|
290 try $ string "type" |
|
291 comments |
|
292 t <- typesDecl |
|
293 comments |
|
294 return t |
|
295 |
|
296 procDecl = do |
|
297 try $ string "procedure" |
|
298 comments |
|
299 i <- iD |
|
300 optional $ do |
|
301 char '(' |
|
302 varsDecl False |
|
303 char ')' |
|
304 comments |
|
305 char ';' |
|
306 b <- if isImpl then |
|
307 do |
|
308 comments |
|
309 optional $ typeVarDeclaration True |
|
310 comments |
|
311 liftM Just functionBody |
|
312 else |
|
313 return Nothing |
|
314 comments |
|
315 return $ [FunctionDeclaration i UnknownType b] |
|
316 |
|
317 funcDecl = do |
|
318 try $ string "function" |
|
319 comments |
|
320 i <- iD |
|
321 optional $ do |
|
322 char '(' |
|
323 varsDecl False |
|
324 char ')' |
|
325 comments |
|
326 char ':' |
|
327 comments |
|
328 ret <- typeDecl |
|
329 comments |
|
330 char ';' |
|
331 comments |
|
332 b <- if isImpl then |
|
333 do |
|
334 optional $ typeVarDeclaration True |
|
335 comments |
|
336 liftM Just functionBody |
|
337 else |
|
338 return Nothing |
|
339 return $ [FunctionDeclaration i ret Nothing] |
|
340 |
|
341 program = do |
|
342 string "program" |
|
343 comments |
|
344 name <- iD |
|
345 (char ';') |
|
346 comments |
|
347 impl <- implementation |
|
348 comments |
|
349 return $ Program name impl |
|
350 |
|
351 interface = do |
|
352 string "interface" |
|
353 comments |
|
354 u <- uses |
|
355 comments |
|
356 tv <- typeVarDeclaration False |
|
357 comments |
|
358 return $ Interface u (TypesAndVars tv) |
|
359 |
|
360 implementation = do |
|
361 string "implementation" |
|
362 comments |
|
363 u <- uses |
|
364 comments |
|
365 tv <- typeVarDeclaration True |
|
366 string "end." |
|
367 comments |
|
368 return $ Implementation u (TypesAndVars tv) |
|
369 |
|
370 expression = buildExpressionParser table term <?> "expression" |
|
371 where |
|
372 term = comments >> choice [ |
|
373 parens pas $ expression |
|
374 , try $ integer pas >>= return . NumberLiteral . show |
|
375 , stringLiteral pas >>= return . StringLiteral |
|
376 , char '#' >> many digit >>= return . CharCode |
|
377 , char '$' >> many hexDigit >>= return . HexNumber |
|
378 , try $ string "nil" >> return Null |
|
379 , reference >>= return . Reference |
|
380 ] <?> "simple expression" |
|
381 |
|
382 table = [ |
|
383 [ Infix (char '*' >> return (BinOp "*")) AssocLeft |
|
384 , Infix (char '/' >> return (BinOp "/")) AssocLeft |
|
385 , Infix (try (string "div") >> return (BinOp "div")) AssocLeft |
|
386 , Infix (try (string "mod") >> return (BinOp "mod")) AssocLeft |
|
387 ] |
|
388 , [ Infix (char '+' >> return (BinOp "+")) AssocLeft |
|
389 , Infix (char '-' >> return (BinOp "-")) AssocLeft |
|
390 , Prefix (char '-' >> return (PrefixOp "-")) |
|
391 ] |
|
392 , [ Infix (try (string "<>") >> return (BinOp "<>")) AssocNone |
|
393 , Infix (try (string "<=") >> return (BinOp "<=")) AssocNone |
|
394 , Infix (try (string ">=") >> return (BinOp ">=")) AssocNone |
|
395 , Infix (char '<' >> return (BinOp "<")) AssocNone |
|
396 , Infix (char '>' >> return (BinOp ">")) AssocNone |
|
397 , Infix (char '=' >> return (BinOp "=")) AssocNone |
|
398 ] |
|
399 , [ Infix (try $ string "and" >> return (BinOp "and")) AssocLeft |
|
400 , Infix (try $ string "or" >> return (BinOp "or")) AssocLeft |
|
401 , Infix (try $ string "xor" >> return (BinOp "xor")) AssocLeft |
|
402 ] |
|
403 , [ Infix (try $ string "shl" >> return (BinOp "and")) AssocNone |
|
404 , Infix (try $ string "shr" >> return (BinOp "or")) AssocNone |
|
405 ] |
|
406 , [Prefix (try (string "not") >> return (PrefixOp "not"))] |
|
407 ] |
|
408 |
|
409 phrasesBlock = do |
|
410 try $ string "begin" |
|
411 comments |
|
412 p <- manyTill phrase (try $ string "end") |
|
413 comments |
|
414 return $ Phrases p |
|
415 |
|
416 phrase = do |
|
417 o <- choice [ |
|
418 phrasesBlock |
|
419 , ifBlock |
|
420 , whileCycle |
|
421 , repeatCycle |
|
422 , switchCase |
|
423 , withBlock |
|
424 , forCycle |
|
425 , (try $ reference >>= \r -> string ":=" >> return r) >>= \r -> expression >>= return . Assignment r |
|
426 , procCall |
|
427 ] |
|
428 optional $ char ';' |
|
429 comments |
|
430 return o |
|
431 |
|
432 ifBlock = do |
|
433 try $ string "if" |
|
434 comments |
|
435 e <- expression |
|
436 comments |
|
437 string "then" |
|
438 comments |
|
439 o1 <- phrase |
|
440 comments |
|
441 o2 <- optionMaybe $ do |
|
442 try $ string "else" |
|
443 comments |
|
444 o <- phrase |
|
445 comments |
|
446 return o |
|
447 return $ IfThenElse e o1 o2 |
|
448 |
|
449 whileCycle = do |
|
450 try $ string "while" |
|
451 comments |
|
452 e <- expression |
|
453 comments |
|
454 string "do" |
|
455 comments |
|
456 o <- phrase |
|
457 return $ WhileCycle e o |
|
458 |
|
459 withBlock = do |
|
460 try $ string "with" |
|
461 comments |
|
462 e <- expression |
|
463 comments |
|
464 string "do" |
|
465 comments |
|
466 o <- phrase |
|
467 return $ WithBlock e o |
|
468 |
|
469 repeatCycle = do |
|
470 try $ string "repeat" |
|
471 comments |
|
472 o <- many phrase |
|
473 string "until" |
|
474 comments |
|
475 e <- expression |
|
476 comments |
|
477 return $ RepeatCycle e o |
|
478 |
|
479 forCycle = do |
|
480 try $ string "for" |
|
481 comments |
|
482 i <- iD |
|
483 comments |
|
484 string ":=" |
|
485 comments |
|
486 e1 <- expression |
|
487 comments |
|
488 choice [string "to", string "downto"] |
|
489 comments |
|
490 e2 <- expression |
|
491 comments |
|
492 string "do" |
|
493 comments |
|
494 p <- phrase |
|
495 comments |
|
496 return $ ForCycle i e1 e2 p |
|
497 |
|
498 switchCase = do |
|
499 try $ string "case" |
|
500 comments |
|
501 e <- expression |
|
502 comments |
|
503 string "of" |
|
504 comments |
|
505 cs <- many1 aCase |
|
506 o2 <- optionMaybe $ do |
|
507 try $ string "else" |
|
508 comments |
|
509 o <- phrase |
|
510 comments |
|
511 return o |
|
512 string "end" |
|
513 return $ SwitchCase e cs o2 |
|
514 where |
|
515 aCase = do |
|
516 e <- expression |
|
517 comments |
|
518 char ':' |
|
519 comments |
|
520 p <- phrase |
|
521 comments |
|
522 return (e, p) |
|
523 |
|
524 procCall = do |
|
525 i <- iD |
|
526 p <- option [] $ (parens pas) parameters |
|
527 return $ ProcCall i p |
|
528 |
|
529 parameters = (commaSep pas) expression <?> "parameters" |
|
530 |
|
531 functionBody = do |
|
532 p <- phrasesBlock |
|
533 char ';' |
|
534 comments |
|
535 return p |
|
536 |
|
537 uses = liftM Uses (option [] u) |
|
538 where |
|
539 u = do |
|
540 string "uses" |
|
541 comments |
|
542 u <- (iD >>= \i -> comments >> return i) `sepBy1` (char ',' >> comments) |
|
543 char ';' |
|
544 comments |
|
545 return u |