#! /usr/local/bin/python # copyright 2008, Mike Howard and Clove Technologies, Inc. All Rights Reserved. # Use is granted to any and all under the terms of the GNU Public License, # version 2 - as it exists at the current time. """ #doc-start h1. extract_doc.py - a document extractor and formatter for embedded Textile Copyright 2008, Mike Howard and Clove Technologies, Inc. All Rights Reserved. Use is granted to any and all under the terms of the GNU Public License, version 2 - as it exists at the current time. *extract_doc.py* is a very simple program which extracts all the plain text between marker tags [#doc-start and #doc-end] in one or more source code files and translates it into HTML. Of course, the translation to HTML is a lot better if the text is formatted correctly using the Textile formating language. This version of *extract_doc.py* uses the python implementation of Textile in textile-2.0.11. From the PKG_INFO of that distribution:
Metadata-Version: 1.0
Name: textile
Version: 2.0.11
Summary: This is Textile. A Humane Web Text Generator.
Home-page: http://dealmeida.net/projects/textile/
Author: Roberto A. F. De Almeida
Author-email: roberto@dealmeida.net
License: Freely Distributable
Download-URL: http://dom.eav.free.fr/textile-2.0.10.tar.gz
Description: Textile is a XHTML generator using a simple markup developed by Dea
n Allen. This is a Python port with support for code validation, itex to MathML 
translation, Python code coloring and much more.
        
Platform: any
Running *extract_doc.py* is simple: * cd to directory containing your source code * type 'python extract_doc.py' or 'python extract_doc.py file names' This will create a subdirectory named _doc_ and write one HTML file for each source file found - named .html. Options are available to: * point to a different source directory * change the suffixes recognized as 'source files' [defaults to .php and .inc] * change the name of the output directory * create disgustingly verbose output One small point: the -a option appends extensions to the current list, wherease the -e option Replaces the current list. Another small point: for the 'forgetful' there are several synonyms for #doc-start and #doc-end. They are: * #doc-start, #start-doc, #begin-doc, and #doc-begin * #doc-end, #end-doc, #stop-doc, and #doc-stop Last small point: #doc-start and #doc-end are not recognized if they don't start in column 1. Hope this is useful. Mike Howard - http://www.clove.com #doc-end """ import sys import os import os.path import getopt import re import textile # Globals src_dir = '.' doc_dir = 'doc' verbose = 0 debug = False extension_list = ['.php', '.inc'] webpage_top = """ %s """ webpage_bottom = """ """ USAGE = "usage: " + os.path.basename(sys.argv[0]) + "[-h|*.php *.inc]" HELP = (sys.argv[0] + """ creates html documentation files from Textile formatted comments in *.php and *.inc files Only lines between #doc-start and #doc-end markers - beginning in column 0 - are processed. All documentation is run through the PyTextile implementation of Textile markup. I'm currently using textile-2.0.11. From the Textile PKG-INFO: Metadata-Version: 1.0 Name: textile Version: 2.0.11 Summary: This is Textile. A Humane Web Text Generator. Home-page: http://dealmeida.net/projects/textile/ Author: Roberto A. F. De Almeida Author-email: roberto@dealmeida.net License: Freely Distributable Download-URL: http://dom.eav.free.fr/textile-2.0.10.tar.gz Description: Textile is a XHTML generator using a simple markup developed by Dea n Allen. This is a Python port with support for code validation, itex to MathML translation, Python code coloring and much more. Platform: any """, '', 'Option Meaning', '-d/--doc-dir place to put documentation [%s]' % doc_dir, '-a/--add-ext comma separated list of extensions to add to %s' % extension_list, '-e/--ext-list comma separated list of extensions to replace %s by' % extension_list, '-s/--src-dir=path source directory to scan - if no file list given [%s]' % src_dir, '-v/--verbose increase verbosity level', '-D/--debug send output to STDOUT' ) shortopts = "hd:s:a:e:vD" longopts = ['help', 'doc-dir=', 'add-ext=', 'ext-list=', 'verbose', 'src-dir]', 'debug'] # Process Options opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) if len(opts) > 0: for opt, val in opts: if opt in ('-h', '--help'): for hlp in HELP: print hlp sys.exit(0) elif opt in ('-d', '--doc-dir'): doc_dir = val elif opt in ('-a', '--add-ext'): for ext in val.split(','): extension_list.append(ext) elif opt in ('-e', '--ext-list'): extension_list = [] for ext in val.split(','): extension_list.append(ext) elif opt in ('-v', '--verbose'): verbose += 1 elif opt in ('-s', '--src-dir'): src_dir = val elif opt in ('-D', '--debug'): debug = True else: print "Illegal Option: %s" % opt print USAGE sys.exit(1) if verbose: if len(args) == 0: print 'Source Directory: ', src_dir print 'Document Directory: ', doc_dir print "Extension List: ", extension_list # build file list def process_fname(fname, list): global doc_dir basename = os.path.basename(fname) for ext in extension_list: offset = -len(ext) if fname[offset:] == ext: list.append((fname, os.path.join(os.path.join(os.path.dirname(fname), doc_dir), os.path.basename(fname)[0:offset] + '.html'))) list = [] if len(args) > 0: for fname in args: process_fname(fname, list) else: # FIXME: walk the directory structure looking for php and inc files for dir, dir_list, file_list in os.walk(src_dir): for fname in file_list: process_fname(os.path.join(dir, fname), list) def check_or_create_doc_dir(doc_fname): """docstring for check_or_create_doc_dir""" doc_dir_path = os.path.dirname(doc_fname) if os.access(doc_dir_path, os.W_OK) == False: try: os.mkdir(doc_dir_path) except: return False return file(doc_fname, "w") # State Machine Stuff STATE_PRINT_OFF = 0 STATE_PRINT_ON = 1 STATE_PRINT_NOP = 2 state = STATE_PRINT_OFF # Actions after State Detection ACTION_NOPRINT = 1 ACTION_PRINT = 2 ACTION_PRT_CONST = 3 class StateTransition(): def __init__(self, regx, print_state, action_state, params = None): self.regx = re.compile(regx) self.print_state = print_state self.action_state = action_state self.params = params state_transitions = ( StateTransition(r'^\s*#(doc-start|start-doc|begin-doc|doc-begin)', STATE_PRINT_ON, ACTION_NOPRINT), StateTransition(r'\s*#(doc-end|end-doc|stop-doc|doc-stop)', STATE_PRINT_OFF, ACTION_NOPRINT), StateTransition(r'define\(([\'"])([^,)]*)\1,\s*([^)]*)\)\s*;(\s*(//|#)\s*(.*))?', STATE_PRINT_NOP, ACTION_PRT_CONST, (1, 2, -1)), StateTransition(r'\s*const\s+([_A-Z][_a-zA-Z0-9]*)\s*=\s*(\d+)\s*((//|#)\s*(.*))?\s*;', STATE_PRINT_NOP, ACTION_PRT_CONST, (0, 1, -1)), StateTransition(r'\s*const\s+([_A-Z][_a-zA-Z0-9]*)\s*=\s*([_A-Z][a-zA-Z0-9_]*)\s*((//|#)\s*(.*))?\s*;', STATE_PRINT_NOP, ACTION_PRT_CONST, (0, 1, -1)), StateTransition(r'\s*const\s+([_A-Z][_a-zA-Z0-9]*)\s*=\s*"(([^"]|\")*)"\s*((//|#)\s*(.*)\s*)?;', STATE_PRINT_NOP, ACTION_PRT_CONST, (0, 1, -1)), StateTransition(r"\s*const\s+([_A-Z][_a-zA-Z0-9]*)\s*=\s*'(([^']|\')*)'\s*((//|#)\s*(.*)\s*)?;", STATE_PRINT_NOP, ACTION_PRT_CONST, (0, 1, -1)), ) allowed_tags = ['pre', 'hr'] def strip_tags(matchobj): global allowed_tags return matchobj.group() if matchobj.groups(1)[0] in allowed_tags else '<' + matchobj.groups(1)[0] + '>' def strip_comment_leader(line): """docstring for strip_comment_leader""" for leader in ('#', '//'): if line[0:len(leader)] == leader: line = strip_comment_leader(line[len(leader):]) line = re.sub(r']*>', strip_tags, line) return line def const_def(groups, indicies): """const_def(groups) formats a constant definition given the groups entry of the match object which detected it""" str ="%s = %s." % (groups[indicies[0]], groups[indicies[1]]) if groups[indicies[2]]: str += " %s" % groups[indicies[2]] return str + "\n" def process_line(line, line_list): """docstring for process_line""" global state global line_count for st in state_transitions: match_obj = st.regx.match(line) if match_obj: if st.print_state == STATE_PRINT_ON: if len(line_list) > 0: line_list.append(""); state = STATE_PRINT_ON elif st.print_state == STATE_PRINT_OFF: state = STATE_PRINT_OFF if st.action_state == ACTION_PRINT: line_list.append(line) elif st.action_state == ACTION_NOPRINT: return elif st.action_state== ACTION_PRT_CONST: line_list.append(const_def(match_obj.groups(), st.params)) return else: raise "Internal Error - illegal Action: %d" % action if state == STATE_PRINT_ON: line_list.append(strip_comment_leader(line)) else: line_count += 1 if __name__ == '__main__': file_line_count = {} # Process Files for fname, doc_fname in list: print "Processing %s - output to %s" % (fname, doc_fname) doc_file = check_or_create_doc_dir(doc_fname) if not debug else sys.stdout if doc_file: line_count = 0 doc_file.write(webpage_top % fname) state = STATE_PRINT_OFF line_list = [] for line in file(fname, "r").readlines(): process_line(line.strip(), line_list) textilizer = textile.Textiler("\n".join(line_list)) doc_file.write( textilizer.process() ) doc_file.write(webpage_bottom) file_line_count[fname] = line_count if not debug: doc_file.close() else: print "Unable to create doc file: %s" % doc_fname total = 0 key_ar = file_line_count.keys() key_ar.sort() for key in key_ar: print "%-20s: %d" % (key, file_line_count[key]) total += file_line_count[key] print "" print "total: ", total