#!/usr/bin/env python """Doxygen XML to SWIG docstring converter. Converts Doxygen generated XML files into a file containing docstrings that can be used by SWIG-1.3.x. Note that you need to get SWIG version > 1.3.23 or use Robin Dunn's docstring patch to be able to use the resulting output. Usage: doxy2swig.py input.xml output.i input.xml is your doxygen generated XML file and output.i is where the output will be written (the file will be clobbered). """ # This code is implemented using Mark Pilgrim's code as a guideline: # http://www.faqs.org/docs/diveintopython/kgp_divein.html # # Author: Prabhu Ramachandran # License: BSD style from xml.dom import minidom import re import textwrap import sys import types import os.path def my_open_read(source): if hasattr(source, "read"): return source else: return open(source) def my_open_write(dest, mode="w"): if hasattr(dest, "write"): return dest else: return open(dest, mode) class Doxy2SWIG: """Converts Doxygen generated XML files into a file containing docstrings that can be used by SWIG-1.3.x that have support for feature("docstring"). Once the data is parsed it is stored in self.pieces. """ def __init__(self, src): """Initialize the instance given a source object (file or filename). """ f = my_open_read(src) self.my_dir = os.path.dirname(f.name) self.xmldoc = minidom.parse(f).documentElement f.close() self.pieces = [] self.pieces.append(f"\n// File: {os.path.basename(f.name)}\n") self.space_re = re.compile(r"\s+") self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)') self.multi = 0 self.ignores = ( "inheritancegraph", "param", "listofallmembers", "innerclass", "name", "declname", "incdepgraph", "invincdepgraph", "programlisting", "type", "references", "referencedby", "location", "collaborationgraph", "reimplements", "reimplementedby", "derivedcompoundref", "basecompoundref", ) # self.generics = [] def generate(self): """Parses the file set in the initialization. The resulting data is stored in `self.pieces`. """ self.parse(self.xmldoc) def parse(self, node): """Parse a given node. This function in turn calls the `parse_` functions which handle the respective nodes. """ pm = getattr(self, f"parse_{node.__class__.__name__}") pm(node) def parse_Document(self, node): self.parse(node.documentElement) def parse_Text(self, node): txt = node.data txt = txt.replace("\\sphinx", r" ") txt = txt.replace("\\endsphinx", r" ") txt = txt.replace("\\", r"\\\\") txt = txt.replace('"', r"\"") # ignore pure whitespace m = self.space_re.match(txt) if m and len(m.group()) == len(txt): pass else: self.add_text(textwrap.fill(txt)) def parse_Element(self, node): """Parse an `ELEMENT_NODE`. This calls specific `do_` handers for different elements. If no handler is available the `generic_parse` method is called. All tagNames specified in `self.ignores` are simply ignored. """ name = node.tagName ignores = self.ignores if name in ignores: return attr = f"do_{name}" if hasattr(self, attr): handlerMethod = getattr(self, attr) handlerMethod(node) else: self.generic_parse(node) # if name not in self.generics: self.generics.append(name) def add_text(self, value): """Adds text corresponding to `value` into `self.pieces`.""" listTypes = (list, tuple) if type(value) in listTypes: self.pieces.extend(value) else: self.pieces.append(value) def get_specific_nodes(self, node, names): """Given a node and a sequence of strings in `names`, return a dictionary containing the names as keys and child `ELEMENT_NODEs`, that have a `tagName` equal to the name. """ nodes = [ (x.tagName, x) for x in node.childNodes if x.nodeType == x.ELEMENT_NODE and x.tagName in names ] return dict(nodes) def generic_parse(self, node, pad=0): """A Generic parser for arbitrary tags in a node. Parameters: - node: A node in the DOM. - pad: `int` (default: 0) If 0 the node data is not padded with newlines. If 1 it appends a newline after parsing the childNodes. If 2 it pads before and after the nodes are processed. Defaults to 0. """ npiece = 0 if pad: npiece = len(self.pieces) if pad == 2: self.add_text("\n") for n in node.childNodes: self.parse(n) if pad: if len(self.pieces) > npiece: self.add_text("\n") def space_parse(self, node): self.add_text(" ") self.generic_parse(node) do_ref = space_parse do_emphasis = space_parse do_bold = space_parse do_computeroutput = space_parse do_formula = space_parse def do_compoundname(self, node): self.add_text("\n\n") data = node.firstChild.data self.add_text('%%feature("docstring") %s "\n' % data) def do_compounddef(self, node): kind = node.attributes["kind"].value if kind in ("class", "struct"): prot = node.attributes["prot"].value if prot != "public": return names = ( "compoundname", "briefdescription", "detaileddescription", "includes", ) first = self.get_specific_nodes(node, names) for n in names: if n in first: self.parse(first[n]) self.add_text(['";', "\n"]) for n in node.childNodes: if n not in first.values(): self.parse(n) elif kind in ("file", "namespace"): nodes = node.getElementsByTagName("sectiondef") for n in nodes: self.parse(n) def do_includes(self, node): # self.add_text('C++ includes: ') # self.generic_parse(node, pad=1) pass def do_parameterlist(self, node): self.add_text(["\n", "\n", "Parameters:", "\n"]) self.generic_parse(node, pad=1) def do_para(self, node): self.add_text("\n") self.generic_parse(node, pad=1) def do_parametername(self, node): self.add_text("\n") self.add_text(f"{node.firstChild.data}: ") def do_parameterdefinition(self, node): self.generic_parse(node, pad=1) def do_detaileddescription(self, node): self.generic_parse(node, pad=1) def do_briefdescription(self, node): self.generic_parse(node, pad=1) def do_memberdef(self, node): prot = node.attributes["prot"].value id = node.attributes["id"].value kind = node.attributes["kind"].value tmp = node.parentNode.parentNode.parentNode compdef = tmp.getElementsByTagName("compounddef")[0] cdef_kind = compdef.attributes["kind"].value if prot == "public": first = self.get_specific_nodes(node, ("definition", "name")) name = first["name"].firstChild.data if name[:8] == "operator": # Don't handle operators yet. return defn = first["definition"].firstChild.data self.add_text("\n") self.add_text('%feature("docstring") ') anc = node.parentNode.parentNode if cdef_kind in ("file", "namespace"): ns_node = anc.getElementsByTagName("innernamespace") if not ns_node and cdef_kind == "namespace": ns_node = anc.getElementsByTagName("compoundname") if ns_node: ns = ns_node[0].firstChild.data self.add_text(f' {ns}::{name} "\n{defn}') else: self.add_text(f' {name} "\n{defn}') elif cdef_kind in ("class", "struct"): # Get the full function name. anc_node = anc.getElementsByTagName("compoundname") cname = anc_node[0].firstChild.data self.add_text(f' {cname}::{name} "\n{defn}') for n in node.childNodes: if n not in first.values(): self.parse(n) self.add_text(['";', "\n"]) def do_definition(self, node): data = node.firstChild.data self.add_text(f'{data} "\n{data}') def do_sectiondef(self, node): kind = node.attributes["kind"].value if kind in ("public-func", "func"): self.generic_parse(node) def do_simplesect(self, node): kind = node.attributes["kind"].value if kind in ("date", "rcs", "version"): pass elif kind == "warning": self.add_text(["\n", "WARNING: "]) self.generic_parse(node) elif kind == "see": self.add_text("\n") self.add_text("See: ") self.generic_parse(node) else: self.generic_parse(node) def do_argsstring(self, node): self.generic_parse(node, pad=1) def do_member(self, node): kind = node.attributes["kind"].value refid = node.attributes["refid"].value if kind == "function" and refid[:9] == "namespace": self.generic_parse(node) def do_doxygenindex(self, node): self.multi = 1 comps = node.getElementsByTagName("compound") for c in comps: refid = c.attributes["refid"].value fname = refid + ".xml" if not os.path.exists(fname): fname = os.path.join(self.my_dir, fname) print(f"parsing file: {fname}") p = Doxy2SWIG(fname) p.generate() self.pieces.extend(self.clean_pieces(p.pieces)) def write(self, fname, mode="w"): o = my_open_write(fname, mode) if self.multi: o.write("".join(self.pieces)) else: o.write("".join(self.clean_pieces(self.pieces))) o.close() def clean_pieces(self, pieces): """Cleans the list of strings given as `pieces`. It replaces multiple newlines by a maximum of 2 and returns a new list. It also wraps the paragraphs nicely. """ ret = [] count = 0 for i in pieces: if i == "\n": count = count + 1 else: if i == '";': if count: ret.append("\n") elif count > 2: ret.append("\n\n") elif count: ret.append("\n" * count) count = 0 ret.append(i) _data = "".join(ret) ret = [] for i in _data.split("\n\n"): if i == "Parameters:": ret.extend(["Parameters:\n-----------", "\n\n"]) elif i.find("// File:") > -1: # leave comments alone. ret.extend([i, "\n"]) else: _tmp = textwrap.fill(i.strip()) _tmp = self.lead_spc.sub(r'\1"\2', _tmp) ret.extend([_tmp, "\n\n"]) return ret def main(input, output): p = Doxy2SWIG(input) p.generate() p.write(output) if __name__ == "__main__": if len(sys.argv) != 3: print(__doc__) sys.exit(1) main(sys.argv[1], sys.argv[2])