1 # Content (c) 2002, 2004, 2006, 2007, 2008, 2009 |
|
2 # David Turner <david@freetype.org> |
|
3 # |
|
4 # This file contains routines used to parse the content of documentation |
|
5 # comment blocks and build more structured objects out of them. |
|
6 # |
|
7 |
|
8 from sources import * |
|
9 from utils import * |
|
10 import string, re |
|
11 |
|
12 |
|
13 # this regular expression is used to detect code sequences. these |
|
14 # are simply code fragments embedded in '{' and '}' like in: |
|
15 # |
|
16 # { |
|
17 # x = y + z; |
|
18 # if ( zookoo == 2 ) |
|
19 # { |
|
20 # foobar(); |
|
21 # } |
|
22 # } |
|
23 # |
|
24 # note that indentation of the starting and ending accolades must be |
|
25 # exactly the same. the code sequence can contain accolades at greater |
|
26 # indentation |
|
27 # |
|
28 re_code_start = re.compile( r"(\s*){\s*$" ) |
|
29 re_code_end = re.compile( r"(\s*)}\s*$" ) |
|
30 |
|
31 |
|
32 # this regular expression is used to isolate identifiers from |
|
33 # other text |
|
34 # |
|
35 re_identifier = re.compile( r'(\w*)' ) |
|
36 |
|
37 |
|
38 # we collect macros ending in `_H'; while outputting the object data, we use |
|
39 # this info together with the object's file location to emit the appropriate |
|
40 # header file macro and name before the object itself |
|
41 # |
|
42 re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' ) |
|
43 |
|
44 |
|
45 ############################################################################# |
|
46 # |
|
47 # The DocCode class is used to store source code lines. |
|
48 # |
|
49 # 'self.lines' contains a set of source code lines that will be dumped as |
|
50 # HTML in a <PRE> tag. |
|
51 # |
|
52 # The object is filled line by line by the parser; it strips the leading |
|
53 # "margin" space from each input line before storing it in 'self.lines'. |
|
54 # |
|
55 class DocCode: |
|
56 |
|
57 def __init__( self, margin, lines ): |
|
58 self.lines = [] |
|
59 self.words = None |
|
60 |
|
61 # remove margin spaces |
|
62 for l in lines: |
|
63 if string.strip( l[:margin] ) == "": |
|
64 l = l[margin:] |
|
65 self.lines.append( l ) |
|
66 |
|
67 def dump( self, prefix = "", width = 60 ): |
|
68 lines = self.dump_lines( 0, width ) |
|
69 for l in lines: |
|
70 print prefix + l |
|
71 |
|
72 def dump_lines( self, margin = 0, width = 60 ): |
|
73 result = [] |
|
74 for l in self.lines: |
|
75 result.append( " " * margin + l ) |
|
76 return result |
|
77 |
|
78 |
|
79 |
|
80 ############################################################################# |
|
81 # |
|
82 # The DocPara class is used to store "normal" text paragraph. |
|
83 # |
|
84 # 'self.words' contains the list of words that make up the paragraph |
|
85 # |
|
86 class DocPara: |
|
87 |
|
88 def __init__( self, lines ): |
|
89 self.lines = None |
|
90 self.words = [] |
|
91 for l in lines: |
|
92 l = string.strip( l ) |
|
93 self.words.extend( string.split( l ) ) |
|
94 |
|
95 def dump( self, prefix = "", width = 60 ): |
|
96 lines = self.dump_lines( 0, width ) |
|
97 for l in lines: |
|
98 print prefix + l |
|
99 |
|
100 def dump_lines( self, margin = 0, width = 60 ): |
|
101 cur = "" # current line |
|
102 col = 0 # current width |
|
103 result = [] |
|
104 |
|
105 for word in self.words: |
|
106 ln = len( word ) |
|
107 if col > 0: |
|
108 ln = ln + 1 |
|
109 |
|
110 if col + ln > width: |
|
111 result.append( " " * margin + cur ) |
|
112 cur = word |
|
113 col = len( word ) |
|
114 else: |
|
115 if col > 0: |
|
116 cur = cur + " " |
|
117 cur = cur + word |
|
118 col = col + ln |
|
119 |
|
120 if col > 0: |
|
121 result.append( " " * margin + cur ) |
|
122 |
|
123 return result |
|
124 |
|
125 |
|
126 |
|
127 ############################################################################# |
|
128 # |
|
129 # The DocField class is used to store a list containing either DocPara or |
|
130 # DocCode objects. Each DocField also has an optional "name" which is used |
|
131 # when the object corresponds to a field or value definition |
|
132 # |
|
133 class DocField: |
|
134 |
|
135 def __init__( self, name, lines ): |
|
136 self.name = name # can be None for normal paragraphs/sources |
|
137 self.items = [] # list of items |
|
138 |
|
139 mode_none = 0 # start parsing mode |
|
140 mode_code = 1 # parsing code sequences |
|
141 mode_para = 3 # parsing normal paragraph |
|
142 |
|
143 margin = -1 # current code sequence indentation |
|
144 cur_lines = [] |
|
145 |
|
146 # now analyze the markup lines to see if they contain paragraphs, |
|
147 # code sequences or fields definitions |
|
148 # |
|
149 start = 0 |
|
150 mode = mode_none |
|
151 |
|
152 for l in lines: |
|
153 # are we parsing a code sequence ? |
|
154 if mode == mode_code: |
|
155 m = re_code_end.match( l ) |
|
156 if m and len( m.group( 1 ) ) <= margin: |
|
157 # that's it, we finished the code sequence |
|
158 code = DocCode( 0, cur_lines ) |
|
159 self.items.append( code ) |
|
160 margin = -1 |
|
161 cur_lines = [] |
|
162 mode = mode_none |
|
163 else: |
|
164 # nope, continue the code sequence |
|
165 cur_lines.append( l[margin:] ) |
|
166 else: |
|
167 # start of code sequence ? |
|
168 m = re_code_start.match( l ) |
|
169 if m: |
|
170 # save current lines |
|
171 if cur_lines: |
|
172 para = DocPara( cur_lines ) |
|
173 self.items.append( para ) |
|
174 cur_lines = [] |
|
175 |
|
176 # switch to code extraction mode |
|
177 margin = len( m.group( 1 ) ) |
|
178 mode = mode_code |
|
179 else: |
|
180 if not string.split( l ) and cur_lines: |
|
181 # if the line is empty, we end the current paragraph, |
|
182 # if any |
|
183 para = DocPara( cur_lines ) |
|
184 self.items.append( para ) |
|
185 cur_lines = [] |
|
186 else: |
|
187 # otherwise, simply add the line to the current |
|
188 # paragraph |
|
189 cur_lines.append( l ) |
|
190 |
|
191 if mode == mode_code: |
|
192 # unexpected end of code sequence |
|
193 code = DocCode( margin, cur_lines ) |
|
194 self.items.append( code ) |
|
195 elif cur_lines: |
|
196 para = DocPara( cur_lines ) |
|
197 self.items.append( para ) |
|
198 |
|
199 def dump( self, prefix = "" ): |
|
200 if self.field: |
|
201 print prefix + self.field + " ::" |
|
202 prefix = prefix + "----" |
|
203 |
|
204 first = 1 |
|
205 for p in self.items: |
|
206 if not first: |
|
207 print "" |
|
208 p.dump( prefix ) |
|
209 first = 0 |
|
210 |
|
211 def dump_lines( self, margin = 0, width = 60 ): |
|
212 result = [] |
|
213 nl = None |
|
214 |
|
215 for p in self.items: |
|
216 if nl: |
|
217 result.append( "" ) |
|
218 |
|
219 result.extend( p.dump_lines( margin, width ) ) |
|
220 nl = 1 |
|
221 |
|
222 return result |
|
223 |
|
224 |
|
225 |
|
226 # this regular expression is used to detect field definitions |
|
227 # |
|
228 re_field = re.compile( r"\s*(\w*|\w(\w|\.)*\w)\s*::" ) |
|
229 |
|
230 |
|
231 |
|
232 class DocMarkup: |
|
233 |
|
234 def __init__( self, tag, lines ): |
|
235 self.tag = string.lower( tag ) |
|
236 self.fields = [] |
|
237 |
|
238 cur_lines = [] |
|
239 field = None |
|
240 mode = 0 |
|
241 |
|
242 for l in lines: |
|
243 m = re_field.match( l ) |
|
244 if m: |
|
245 # we detected the start of a new field definition |
|
246 |
|
247 # first, save the current one |
|
248 if cur_lines: |
|
249 f = DocField( field, cur_lines ) |
|
250 self.fields.append( f ) |
|
251 cur_lines = [] |
|
252 field = None |
|
253 |
|
254 field = m.group( 1 ) # record field name |
|
255 ln = len( m.group( 0 ) ) |
|
256 l = " " * ln + l[ln:] |
|
257 cur_lines = [l] |
|
258 else: |
|
259 cur_lines.append( l ) |
|
260 |
|
261 if field or cur_lines: |
|
262 f = DocField( field, cur_lines ) |
|
263 self.fields.append( f ) |
|
264 |
|
265 def get_name( self ): |
|
266 try: |
|
267 return self.fields[0].items[0].words[0] |
|
268 except: |
|
269 return None |
|
270 |
|
271 def get_start( self ): |
|
272 try: |
|
273 result = "" |
|
274 for word in self.fields[0].items[0].words: |
|
275 result = result + " " + word |
|
276 return result[1:] |
|
277 except: |
|
278 return "ERROR" |
|
279 |
|
280 def dump( self, margin ): |
|
281 print " " * margin + "<" + self.tag + ">" |
|
282 for f in self.fields: |
|
283 f.dump( " " ) |
|
284 print " " * margin + "</" + self.tag + ">" |
|
285 |
|
286 |
|
287 |
|
288 class DocChapter: |
|
289 |
|
290 def __init__( self, block ): |
|
291 self.block = block |
|
292 self.sections = [] |
|
293 if block: |
|
294 self.name = block.name |
|
295 self.title = block.get_markup_words( "title" ) |
|
296 self.order = block.get_markup_words( "sections" ) |
|
297 else: |
|
298 self.name = "Other" |
|
299 self.title = string.split( "Miscellaneous" ) |
|
300 self.order = [] |
|
301 |
|
302 |
|
303 |
|
304 class DocSection: |
|
305 |
|
306 def __init__( self, name = "Other" ): |
|
307 self.name = name |
|
308 self.blocks = {} |
|
309 self.block_names = [] # ordered block names in section |
|
310 self.defs = [] |
|
311 self.abstract = "" |
|
312 self.description = "" |
|
313 self.order = [] |
|
314 self.title = "ERROR" |
|
315 self.chapter = None |
|
316 |
|
317 def add_def( self, block ): |
|
318 self.defs.append( block ) |
|
319 |
|
320 def add_block( self, block ): |
|
321 self.block_names.append( block.name ) |
|
322 self.blocks[block.name] = block |
|
323 |
|
324 def process( self ): |
|
325 # look up one block that contains a valid section description |
|
326 for block in self.defs: |
|
327 title = block.get_markup_text( "title" ) |
|
328 if title: |
|
329 self.title = title |
|
330 self.abstract = block.get_markup_words( "abstract" ) |
|
331 self.description = block.get_markup_items( "description" ) |
|
332 self.order = block.get_markup_words( "order" ) |
|
333 return |
|
334 |
|
335 def reorder( self ): |
|
336 self.block_names = sort_order_list( self.block_names, self.order ) |
|
337 |
|
338 |
|
339 |
|
340 class ContentProcessor: |
|
341 |
|
342 def __init__( self ): |
|
343 """initialize a block content processor""" |
|
344 self.reset() |
|
345 |
|
346 self.sections = {} # dictionary of documentation sections |
|
347 self.section = None # current documentation section |
|
348 |
|
349 self.chapters = [] # list of chapters |
|
350 |
|
351 self.headers = {} # dictionary of header macros |
|
352 |
|
353 def set_section( self, section_name ): |
|
354 """set current section during parsing""" |
|
355 if not self.sections.has_key( section_name ): |
|
356 section = DocSection( section_name ) |
|
357 self.sections[section_name] = section |
|
358 self.section = section |
|
359 else: |
|
360 self.section = self.sections[section_name] |
|
361 |
|
362 def add_chapter( self, block ): |
|
363 chapter = DocChapter( block ) |
|
364 self.chapters.append( chapter ) |
|
365 |
|
366 |
|
367 def reset( self ): |
|
368 """reset the content processor for a new block""" |
|
369 self.markups = [] |
|
370 self.markup = None |
|
371 self.markup_lines = [] |
|
372 |
|
373 def add_markup( self ): |
|
374 """add a new markup section""" |
|
375 if self.markup and self.markup_lines: |
|
376 |
|
377 # get rid of last line of markup if it's empty |
|
378 marks = self.markup_lines |
|
379 if len( marks ) > 0 and not string.strip( marks[-1] ): |
|
380 self.markup_lines = marks[:-1] |
|
381 |
|
382 m = DocMarkup( self.markup, self.markup_lines ) |
|
383 |
|
384 self.markups.append( m ) |
|
385 |
|
386 self.markup = None |
|
387 self.markup_lines = [] |
|
388 |
|
389 def process_content( self, content ): |
|
390 """process a block content and return a list of DocMarkup objects |
|
391 corresponding to it""" |
|
392 markup = None |
|
393 markup_lines = [] |
|
394 first = 1 |
|
395 |
|
396 for line in content: |
|
397 found = None |
|
398 for t in re_markup_tags: |
|
399 m = t.match( line ) |
|
400 if m: |
|
401 found = string.lower( m.group( 1 ) ) |
|
402 prefix = len( m.group( 0 ) ) |
|
403 line = " " * prefix + line[prefix:] # remove markup from line |
|
404 break |
|
405 |
|
406 # is it the start of a new markup section ? |
|
407 if found: |
|
408 first = 0 |
|
409 self.add_markup() # add current markup content |
|
410 self.markup = found |
|
411 if len( string.strip( line ) ) > 0: |
|
412 self.markup_lines.append( line ) |
|
413 elif first == 0: |
|
414 self.markup_lines.append( line ) |
|
415 |
|
416 self.add_markup() |
|
417 |
|
418 return self.markups |
|
419 |
|
420 def parse_sources( self, source_processor ): |
|
421 blocks = source_processor.blocks |
|
422 count = len( blocks ) |
|
423 |
|
424 for n in range( count ): |
|
425 source = blocks[n] |
|
426 if source.content: |
|
427 # this is a documentation comment, we need to catch |
|
428 # all following normal blocks in the "follow" list |
|
429 # |
|
430 follow = [] |
|
431 m = n + 1 |
|
432 while m < count and not blocks[m].content: |
|
433 follow.append( blocks[m] ) |
|
434 m = m + 1 |
|
435 |
|
436 doc_block = DocBlock( source, follow, self ) |
|
437 |
|
438 def finish( self ): |
|
439 # process all sections to extract their abstract, description |
|
440 # and ordered list of items |
|
441 # |
|
442 for sec in self.sections.values(): |
|
443 sec.process() |
|
444 |
|
445 # process chapters to check that all sections are correctly |
|
446 # listed there |
|
447 for chap in self.chapters: |
|
448 for sec in chap.order: |
|
449 if self.sections.has_key( sec ): |
|
450 section = self.sections[sec] |
|
451 section.chapter = chap |
|
452 section.reorder() |
|
453 chap.sections.append( section ) |
|
454 else: |
|
455 sys.stderr.write( "WARNING: chapter '" + \ |
|
456 chap.name + "' in " + chap.block.location() + \ |
|
457 " lists unknown section '" + sec + "'\n" ) |
|
458 |
|
459 # check that all sections are in a chapter |
|
460 # |
|
461 others = [] |
|
462 for sec in self.sections.values(): |
|
463 if not sec.chapter: |
|
464 others.append( sec ) |
|
465 |
|
466 # create a new special chapter for all remaining sections |
|
467 # when necessary |
|
468 # |
|
469 if others: |
|
470 chap = DocChapter( None ) |
|
471 chap.sections = others |
|
472 self.chapters.append( chap ) |
|
473 |
|
474 |
|
475 |
|
476 class DocBlock: |
|
477 |
|
478 def __init__( self, source, follow, processor ): |
|
479 processor.reset() |
|
480 |
|
481 self.source = source |
|
482 self.code = [] |
|
483 self.type = "ERRTYPE" |
|
484 self.name = "ERRNAME" |
|
485 self.section = processor.section |
|
486 self.markups = processor.process_content( source.content ) |
|
487 |
|
488 # compute block type from first markup tag |
|
489 try: |
|
490 self.type = self.markups[0].tag |
|
491 except: |
|
492 pass |
|
493 |
|
494 # compute block name from first markup paragraph |
|
495 try: |
|
496 markup = self.markups[0] |
|
497 para = markup.fields[0].items[0] |
|
498 name = para.words[0] |
|
499 m = re_identifier.match( name ) |
|
500 if m: |
|
501 name = m.group( 1 ) |
|
502 self.name = name |
|
503 except: |
|
504 pass |
|
505 |
|
506 if self.type == "section": |
|
507 # detect new section starts |
|
508 processor.set_section( self.name ) |
|
509 processor.section.add_def( self ) |
|
510 elif self.type == "chapter": |
|
511 # detect new chapter |
|
512 processor.add_chapter( self ) |
|
513 else: |
|
514 processor.section.add_block( self ) |
|
515 |
|
516 # now, compute the source lines relevant to this documentation |
|
517 # block. We keep normal comments in for obvious reasons (??) |
|
518 source = [] |
|
519 for b in follow: |
|
520 if b.format: |
|
521 break |
|
522 for l in b.lines: |
|
523 # collect header macro definitions |
|
524 m = re_header_macro.match( l ) |
|
525 if m: |
|
526 processor.headers[m.group( 2 )] = m.group( 1 ); |
|
527 |
|
528 # we use "/* */" as a separator |
|
529 if re_source_sep.match( l ): |
|
530 break |
|
531 source.append( l ) |
|
532 |
|
533 # now strip the leading and trailing empty lines from the sources |
|
534 start = 0 |
|
535 end = len( source ) - 1 |
|
536 |
|
537 while start < end and not string.strip( source[start] ): |
|
538 start = start + 1 |
|
539 |
|
540 while start < end and not string.strip( source[end] ): |
|
541 end = end - 1 |
|
542 |
|
543 if start == end and not string.strip( source[start] ): |
|
544 self.code = [] |
|
545 else: |
|
546 self.code = source[start:end + 1] |
|
547 |
|
548 def location( self ): |
|
549 return self.source.location() |
|
550 |
|
551 def get_markup( self, tag_name ): |
|
552 """return the DocMarkup corresponding to a given tag in a block""" |
|
553 for m in self.markups: |
|
554 if m.tag == string.lower( tag_name ): |
|
555 return m |
|
556 return None |
|
557 |
|
558 def get_markup_name( self, tag_name ): |
|
559 """return the name of a given primary markup in a block""" |
|
560 try: |
|
561 m = self.get_markup( tag_name ) |
|
562 return m.get_name() |
|
563 except: |
|
564 return None |
|
565 |
|
566 def get_markup_words( self, tag_name ): |
|
567 try: |
|
568 m = self.get_markup( tag_name ) |
|
569 return m.fields[0].items[0].words |
|
570 except: |
|
571 return [] |
|
572 |
|
573 def get_markup_text( self, tag_name ): |
|
574 result = self.get_markup_words( tag_name ) |
|
575 return string.join( result ) |
|
576 |
|
577 def get_markup_items( self, tag_name ): |
|
578 try: |
|
579 m = self.get_markup( tag_name ) |
|
580 return m.fields[0].items |
|
581 except: |
|
582 return None |
|
583 |
|
584 # eof |
|