#! /usr/local/bin/python
# copyright 2008, Mike Howard and Clove Technologies, Inc. All Rights Reserved.
# Use is granted to any and all under the terms of the GNU Public License,
# version 2 - as it exists at the current time.
"""
#doc-start
h1. extract_doc.py - a document extractor and formatter for embedded Textile
Copyright 2008, Mike Howard and Clove Technologies, Inc. All Rights Reserved.
Use is granted to any and all under the terms of the GNU Public License,
version 2 - as it exists at the current time.
*extract_doc.py* is a very simple program which extracts all the plain
text between marker tags [#doc-start and #doc-end] in one or more
source code files and translates it into HTML.
Of course, the translation to HTML is a lot better if the text is formatted
correctly using the Textile formating language.
This version of *extract_doc.py* uses the python implementation
of Textile in textile-2.0.11. From the PKG_INFO of that distribution:
Metadata-Version: 1.0
Name: textile
Version: 2.0.11
Summary: This is Textile. A Humane Web Text Generator.
Home-page: http://dealmeida.net/projects/textile/
Author: Roberto A. F. De Almeida
Author-email: roberto@dealmeida.net
License: Freely Distributable
Download-URL: http://dom.eav.free.fr/textile-2.0.10.tar.gz
Description: Textile is a XHTML generator using a simple markup developed by Dea
n Allen. This is a Python port with support for code validation, itex to MathML
translation, Python code coloring and much more.
Platform: any
Running *extract_doc.py* is simple:
* cd to directory containing your source code
* type 'python extract_doc.py' or 'python extract_doc.py file names'
This will create a subdirectory named _doc_ and write one HTML
file for each source file found - named .html.
Options are available to:
* point to a different source directory
* change the suffixes recognized as 'source files' [defaults to .php and .inc]
* change the name of the output directory
* create disgustingly verbose output
One small point: the -a option appends extensions to the current list,
wherease the -e option Replaces the current list.
Another small point: for the 'forgetful' there are several synonyms
for #doc-start and #doc-end. They are:
* #doc-start, #start-doc, #begin-doc, and #doc-begin
* #doc-end, #end-doc, #stop-doc, and #doc-stop
Last small point: #doc-start and #doc-end are not recognized if they
don't start in column 1.
Hope this is useful.
Mike Howard - http://www.clove.com
#doc-end
"""
import sys
import os
import os.path
import getopt
import re
import textile
# Globals
src_dir = '.'
doc_dir = 'doc'
verbose = 0
debug = False
extension_list = ['.php', '.inc']
webpage_top = """
%s
"""
webpage_bottom = """
"""
USAGE = "usage: " + os.path.basename(sys.argv[0]) + "[-h|*.php *.inc]"
HELP = (sys.argv[0] + """ creates html documentation
files from Textile formatted comments in *.php and *.inc files
Only lines between #doc-start and #doc-end markers - beginning in column 0
- are processed. All documentation is run through the PyTextile implementation
of Textile markup. I'm currently using textile-2.0.11.
From the Textile PKG-INFO:
Metadata-Version: 1.0
Name: textile
Version: 2.0.11
Summary: This is Textile. A Humane Web Text Generator.
Home-page: http://dealmeida.net/projects/textile/
Author: Roberto A. F. De Almeida
Author-email: roberto@dealmeida.net
License: Freely Distributable
Download-URL: http://dom.eav.free.fr/textile-2.0.10.tar.gz
Description: Textile is a XHTML generator using a simple markup developed by Dea
n Allen. This is a Python port with support for code validation, itex to MathML
translation, Python code coloring and much more.
Platform: any
""",
'',
'Option Meaning',
'-d/--doc-dir place to put documentation [%s]' % doc_dir,
'-a/--add-ext comma separated list of extensions to add to %s' % extension_list,
'-e/--ext-list comma separated list of extensions to replace %s by' % extension_list,
'-s/--src-dir=path source directory to scan - if no file list given [%s]' % src_dir,
'-v/--verbose increase verbosity level',
'-D/--debug send output to STDOUT'
)
shortopts = "hd:s:a:e:vD"
longopts = ['help', 'doc-dir=', 'add-ext=', 'ext-list=', 'verbose', 'src-dir]', 'debug']
# Process Options
opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)
if len(opts) > 0:
for opt, val in opts:
if opt in ('-h', '--help'):
for hlp in HELP:
print hlp
sys.exit(0)
elif opt in ('-d', '--doc-dir'):
doc_dir = val
elif opt in ('-a', '--add-ext'):
for ext in val.split(','):
extension_list.append(ext)
elif opt in ('-e', '--ext-list'):
extension_list = []
for ext in val.split(','):
extension_list.append(ext)
elif opt in ('-v', '--verbose'):
verbose += 1
elif opt in ('-s', '--src-dir'):
src_dir = val
elif opt in ('-D', '--debug'):
debug = True
else:
print "Illegal Option: %s" % opt
print USAGE
sys.exit(1)
if verbose:
if len(args) == 0:
print 'Source Directory: ', src_dir
print 'Document Directory: ', doc_dir
print "Extension List: ", extension_list
# build file list
def process_fname(fname, list):
global doc_dir
basename = os.path.basename(fname)
for ext in extension_list:
offset = -len(ext)
if fname[offset:] == ext:
list.append((fname, os.path.join(os.path.join(os.path.dirname(fname), doc_dir),
os.path.basename(fname)[0:offset] + '.html')))
list = []
if len(args) > 0:
for fname in args:
process_fname(fname, list)
else:
# FIXME: walk the directory structure looking for php and inc files
for dir, dir_list, file_list in os.walk(src_dir):
for fname in file_list:
process_fname(os.path.join(dir, fname), list)
def check_or_create_doc_dir(doc_fname):
"""docstring for check_or_create_doc_dir"""
doc_dir_path = os.path.dirname(doc_fname)
if os.access(doc_dir_path, os.W_OK) == False:
try:
os.mkdir(doc_dir_path)
except:
return False
return file(doc_fname, "w")
# State Machine Stuff
STATE_PRINT_OFF = 0
STATE_PRINT_ON = 1
STATE_PRINT_NOP = 2
state = STATE_PRINT_OFF
# Actions after State Detection
ACTION_NOPRINT = 1
ACTION_PRINT = 2
ACTION_PRT_CONST = 3
class StateTransition():
def __init__(self, regx, print_state, action_state, params = None):
self.regx = re.compile(regx)
self.print_state = print_state
self.action_state = action_state
self.params = params
state_transitions = (
StateTransition(r'^\s*#(doc-start|start-doc|begin-doc|doc-begin)',
STATE_PRINT_ON, ACTION_NOPRINT),
StateTransition(r'\s*#(doc-end|end-doc|stop-doc|doc-stop)',
STATE_PRINT_OFF, ACTION_NOPRINT),
StateTransition(r'define\(([\'"])([^,)]*)\1,\s*([^)]*)\)\s*;(\s*(//|#)\s*(.*))?',
STATE_PRINT_NOP, ACTION_PRT_CONST, (1, 2, -1)),
StateTransition(r'\s*const\s+([_A-Z][_a-zA-Z0-9]*)\s*=\s*(\d+)\s*((//|#)\s*(.*))?\s*;',
STATE_PRINT_NOP, ACTION_PRT_CONST, (0, 1, -1)),
StateTransition(r'\s*const\s+([_A-Z][_a-zA-Z0-9]*)\s*=\s*([_A-Z][a-zA-Z0-9_]*)\s*((//|#)\s*(.*))?\s*;',
STATE_PRINT_NOP, ACTION_PRT_CONST, (0, 1, -1)),
StateTransition(r'\s*const\s+([_A-Z][_a-zA-Z0-9]*)\s*=\s*"(([^"]|\")*)"\s*((//|#)\s*(.*)\s*)?;',
STATE_PRINT_NOP, ACTION_PRT_CONST, (0, 1, -1)),
StateTransition(r"\s*const\s+([_A-Z][_a-zA-Z0-9]*)\s*=\s*'(([^']|\')*)'\s*((//|#)\s*(.*)\s*)?;",
STATE_PRINT_NOP, ACTION_PRT_CONST, (0, 1, -1)),
)
allowed_tags = ['pre', 'hr']
def strip_tags(matchobj):
global allowed_tags
return matchobj.group() if matchobj.groups(1)[0] in allowed_tags else '<' + matchobj.groups(1)[0] + '>'
def strip_comment_leader(line):
"""docstring for strip_comment_leader"""
for leader in ('#', '//'):
if line[0:len(leader)] == leader:
line = strip_comment_leader(line[len(leader):])
line = re.sub(r'?([a-zA-Z][a-zA-Z1-6]*)[^>]*>', strip_tags, line)
return line
def const_def(groups, indicies):
"""const_def(groups) formats a constant definition given the groups entry of
the match object which detected it"""
str ="%s = %s." % (groups[indicies[0]], groups[indicies[1]])
if groups[indicies[2]]:
str += " %s" % groups[indicies[2]]
return str + "\n"
def process_line(line, line_list):
"""docstring for process_line"""
global state
global line_count
for st in state_transitions:
match_obj = st.regx.match(line)
if match_obj:
if st.print_state == STATE_PRINT_ON:
if len(line_list) > 0:
line_list.append("");
state = STATE_PRINT_ON
elif st.print_state == STATE_PRINT_OFF:
state = STATE_PRINT_OFF
if st.action_state == ACTION_PRINT:
line_list.append(line)
elif st.action_state == ACTION_NOPRINT:
return
elif st.action_state== ACTION_PRT_CONST:
line_list.append(const_def(match_obj.groups(), st.params))
return
else:
raise "Internal Error - illegal Action: %d" % action
if state == STATE_PRINT_ON:
line_list.append(strip_comment_leader(line))
else:
line_count += 1
if __name__ == '__main__':
file_line_count = {}
# Process Files
for fname, doc_fname in list:
print "Processing %s - output to %s" % (fname, doc_fname)
doc_file = check_or_create_doc_dir(doc_fname) if not debug else sys.stdout
if doc_file:
line_count = 0
doc_file.write(webpage_top % fname)
state = STATE_PRINT_OFF
line_list = []
for line in file(fname, "r").readlines():
process_line(line.strip(), line_list)
textilizer = textile.Textiler("\n".join(line_list))
doc_file.write( textilizer.process() )
doc_file.write(webpage_bottom)
file_line_count[fname] = line_count
if not debug:
doc_file.close()
else:
print "Unable to create doc file: %s" % doc_fname
total = 0
key_ar = file_line_count.keys()
key_ar.sort()
for key in key_ar:
print "%-20s: %d" % (key, file_line_count[key])
total += file_line_count[key]
print ""
print "total: ", total