Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 1 | # SPDX-License-Identifier: GPL-2.0 |
| 2 | # Copyright 2019 Jonathan Corbet <corbet@lwn.net> |
| 3 | # |
| 4 | # Apply kernel-specific tweaks after the initial document processing |
| 5 | # has been done. |
| 6 | # |
| 7 | from docutils import nodes |
Jonathan Corbet | bcac386 | 2020-01-22 16:06:28 -0700 | [diff] [blame] | 8 | import sphinx |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 9 | from sphinx import addnodes |
Jonathan Corbet | bcac386 | 2020-01-22 16:06:28 -0700 | [diff] [blame] | 10 | if sphinx.version_info[0] < 2 or \ |
| 11 | sphinx.version_info[0] == 2 and sphinx.version_info[1] < 1: |
| 12 | from sphinx.environment import NoUri |
| 13 | else: |
| 14 | from sphinx.errors import NoUri |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 15 | import re |
Nícolas F. R. A. Prado | d82b1e8 | 2020-09-03 00:58:19 +0000 | [diff] [blame] | 16 | from itertools import chain |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 17 | |
| 18 | # |
Jonathan Corbet | 4f3e690 | 2020-10-30 09:35:39 -0600 | [diff] [blame] | 19 | # Python 2 lacks re.ASCII... |
| 20 | # |
| 21 | try: |
| 22 | ascii_p3 = re.ASCII |
| 23 | except AttributeError: |
| 24 | ascii_p3 = 0 |
| 25 | |
| 26 | # |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 27 | # Regex nastiness. Of course. |
| 28 | # Try to identify "function()" that's not already marked up some |
| 29 | # other way. Sphinx doesn't like a lot of stuff right after a |
| 30 | # :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last |
| 31 | # bit tries to restrict matches to things that won't create trouble. |
| 32 | # |
Jonathan Corbet | 4f3e690 | 2020-10-30 09:35:39 -0600 | [diff] [blame] | 33 | RE_function = re.compile(r'\b(([a-zA-Z_]\w+)\(\))', flags=ascii_p3) |
Nícolas F. R. A. Prado | 06dc65b | 2020-10-13 23:13:11 +0000 | [diff] [blame] | 34 | |
| 35 | # |
| 36 | # Sphinx 2 uses the same :c:type role for struct, union, enum and typedef |
| 37 | # |
Nícolas F. R. A. Prado | f66e47f | 2020-10-13 23:13:17 +0000 | [diff] [blame] | 38 | RE_generic_type = re.compile(r'\b(struct|union|enum|typedef)\s+([a-zA-Z_]\w+)', |
Jonathan Corbet | 4f3e690 | 2020-10-30 09:35:39 -0600 | [diff] [blame] | 39 | flags=ascii_p3) |
Nícolas F. R. A. Prado | 06dc65b | 2020-10-13 23:13:11 +0000 | [diff] [blame] | 40 | |
| 41 | # |
| 42 | # Sphinx 3 uses a different C role for each one of struct, union, enum and |
| 43 | # typedef |
| 44 | # |
Jonathan Corbet | 4f3e690 | 2020-10-30 09:35:39 -0600 | [diff] [blame] | 45 | RE_struct = re.compile(r'\b(struct)\s+([a-zA-Z_]\w+)', flags=ascii_p3) |
| 46 | RE_union = re.compile(r'\b(union)\s+([a-zA-Z_]\w+)', flags=ascii_p3) |
| 47 | RE_enum = re.compile(r'\b(enum)\s+([a-zA-Z_]\w+)', flags=ascii_p3) |
| 48 | RE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3) |
Nícolas F. R. A. Prado | 06dc65b | 2020-10-13 23:13:11 +0000 | [diff] [blame] | 49 | |
Nícolas F. R. A. Prado | d18b017 | 2020-09-11 13:34:39 +0000 | [diff] [blame] | 50 | # |
| 51 | # Detects a reference to a documentation page of the form Documentation/... with |
| 52 | # an optional extension |
| 53 | # |
Nícolas F. R. A. Prado | f66e47f | 2020-10-13 23:13:17 +0000 | [diff] [blame] | 54 | RE_doc = re.compile(r'\bDocumentation(/[\w\-_/]+)(\.\w+)*') |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 55 | |
Nícolas F. R. A. Prado | fb56827 | 2020-11-17 02:12:01 +0000 | [diff] [blame] | 56 | RE_namespace = re.compile(r'^\s*..\s*c:namespace::\s*(\S+)\s*$') |
| 57 | |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 58 | # |
Nícolas F. R. A. Prado | 3050edf | 2020-10-13 23:13:23 +0000 | [diff] [blame] | 59 | # Reserved C words that we should skip when cross-referencing |
| 60 | # |
| 61 | Skipnames = [ 'for', 'if', 'register', 'sizeof', 'struct', 'unsigned' ] |
| 62 | |
| 63 | |
| 64 | # |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 65 | # Many places in the docs refer to common system calls. It is |
| 66 | # pointless to try to cross-reference them and, as has been known |
| 67 | # to happen, somebody defining a function by these names can lead |
| 68 | # to the creation of incorrect and confusing cross references. So |
| 69 | # just don't even try with these names. |
| 70 | # |
Jonathan Neuschäfer | 11fec009 | 2019-08-12 18:07:04 +0200 | [diff] [blame] | 71 | Skipfuncs = [ 'open', 'close', 'read', 'write', 'fcntl', 'mmap', |
Jonathan Neuschäfer | 82bf829 | 2019-08-12 18:07:05 +0200 | [diff] [blame] | 72 | 'select', 'poll', 'fork', 'execve', 'clone', 'ioctl', |
| 73 | 'socket' ] |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 74 | |
Nícolas F. R. A. Prado | fb56827 | 2020-11-17 02:12:01 +0000 | [diff] [blame] | 75 | c_namespace = '' |
| 76 | |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 77 | def markup_refs(docname, app, node): |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 78 | t = node.astext() |
| 79 | done = 0 |
| 80 | repl = [ ] |
Nícolas F. R. A. Prado | d82b1e8 | 2020-09-03 00:58:19 +0000 | [diff] [blame] | 81 | # |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 82 | # Associate each regex with the function that will markup its matches |
Nícolas F. R. A. Prado | d82b1e8 | 2020-09-03 00:58:19 +0000 | [diff] [blame] | 83 | # |
Nícolas F. R. A. Prado | 06dc65b | 2020-10-13 23:13:11 +0000 | [diff] [blame] | 84 | markup_func_sphinx2 = {RE_doc: markup_doc_ref, |
| 85 | RE_function: markup_c_ref, |
| 86 | RE_generic_type: markup_c_ref} |
| 87 | |
| 88 | markup_func_sphinx3 = {RE_doc: markup_doc_ref, |
Nícolas F. R. A. Prado | c51d9b0 | 2020-10-13 23:13:34 +0000 | [diff] [blame] | 89 | RE_function: markup_func_ref_sphinx3, |
Nícolas F. R. A. Prado | 06dc65b | 2020-10-13 23:13:11 +0000 | [diff] [blame] | 90 | RE_struct: markup_c_ref, |
| 91 | RE_union: markup_c_ref, |
| 92 | RE_enum: markup_c_ref, |
| 93 | RE_typedef: markup_c_ref} |
| 94 | |
| 95 | if sphinx.version_info[0] >= 3: |
| 96 | markup_func = markup_func_sphinx3 |
| 97 | else: |
| 98 | markup_func = markup_func_sphinx2 |
| 99 | |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 100 | match_iterators = [regex.finditer(t) for regex in markup_func] |
| 101 | # |
| 102 | # Sort all references by the starting position in text |
| 103 | # |
| 104 | sorted_matches = sorted(chain(*match_iterators), key=lambda m: m.start()) |
Nícolas F. R. A. Prado | d82b1e8 | 2020-09-03 00:58:19 +0000 | [diff] [blame] | 105 | for m in sorted_matches: |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 106 | # |
Nícolas F. R. A. Prado | d82b1e8 | 2020-09-03 00:58:19 +0000 | [diff] [blame] | 107 | # Include any text prior to match as a normal text node. |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 108 | # |
| 109 | if m.start() > done: |
| 110 | repl.append(nodes.Text(t[done:m.start()])) |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 111 | |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 112 | # |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 113 | # Call the function associated with the regex that matched this text and |
| 114 | # append its return to the text |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 115 | # |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 116 | repl.append(markup_func[m.re](docname, app, m)) |
| 117 | |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 118 | done = m.end() |
| 119 | if done < len(t): |
| 120 | repl.append(nodes.Text(t[done:])) |
| 121 | return repl |
| 122 | |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 123 | # |
Nícolas F. R. A. Prado | c51d9b0 | 2020-10-13 23:13:34 +0000 | [diff] [blame] | 124 | # In sphinx3 we can cross-reference to C macro and function, each one with its |
| 125 | # own C role, but both match the same regex, so we try both. |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 126 | # |
Nícolas F. R. A. Prado | c51d9b0 | 2020-10-13 23:13:34 +0000 | [diff] [blame] | 127 | def markup_func_ref_sphinx3(docname, app, match): |
| 128 | class_str = ['c-func', 'c-macro'] |
| 129 | reftype_str = ['function', 'macro'] |
| 130 | |
| 131 | cdom = app.env.domains['c'] |
| 132 | # |
| 133 | # Go through the dance of getting an xref out of the C domain |
| 134 | # |
Nícolas F. R. A. Prado | fb56827 | 2020-11-17 02:12:01 +0000 | [diff] [blame] | 135 | base_target = match.group(2) |
Nícolas F. R. A. Prado | c51d9b0 | 2020-10-13 23:13:34 +0000 | [diff] [blame] | 136 | target_text = nodes.Text(match.group(0)) |
| 137 | xref = None |
Nícolas F. R. A. Prado | fb56827 | 2020-11-17 02:12:01 +0000 | [diff] [blame] | 138 | possible_targets = [base_target] |
| 139 | # Check if this document has a namespace, and if so, try |
| 140 | # cross-referencing inside it first. |
| 141 | if c_namespace: |
| 142 | possible_targets.insert(0, c_namespace + "." + base_target) |
Nícolas F. R. A. Prado | c51d9b0 | 2020-10-13 23:13:34 +0000 | [diff] [blame] | 143 | |
Nícolas F. R. A. Prado | fb56827 | 2020-11-17 02:12:01 +0000 | [diff] [blame] | 144 | if base_target not in Skipnames: |
| 145 | for target in possible_targets: |
| 146 | if target not in Skipfuncs: |
| 147 | for class_s, reftype_s in zip(class_str, reftype_str): |
| 148 | lit_text = nodes.literal(classes=['xref', 'c', class_s]) |
| 149 | lit_text += target_text |
| 150 | pxref = addnodes.pending_xref('', refdomain = 'c', |
| 151 | reftype = reftype_s, |
| 152 | reftarget = target, modname = None, |
| 153 | classname = None) |
| 154 | # |
| 155 | # XXX The Latex builder will throw NoUri exceptions here, |
| 156 | # work around that by ignoring them. |
| 157 | # |
| 158 | try: |
| 159 | xref = cdom.resolve_xref(app.env, docname, app.builder, |
| 160 | reftype_s, target, pxref, |
| 161 | lit_text) |
| 162 | except NoUri: |
| 163 | xref = None |
| 164 | |
| 165 | if xref: |
| 166 | return xref |
Nícolas F. R. A. Prado | c51d9b0 | 2020-10-13 23:13:34 +0000 | [diff] [blame] | 167 | |
| 168 | return target_text |
| 169 | |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 170 | def markup_c_ref(docname, app, match): |
Nícolas F. R. A. Prado | c51d9b0 | 2020-10-13 23:13:34 +0000 | [diff] [blame] | 171 | class_str = {# Sphinx 2 only |
| 172 | RE_function: 'c-func', |
Nícolas F. R. A. Prado | 06dc65b | 2020-10-13 23:13:11 +0000 | [diff] [blame] | 173 | RE_generic_type: 'c-type', |
| 174 | # Sphinx 3+ only |
| 175 | RE_struct: 'c-struct', |
| 176 | RE_union: 'c-union', |
| 177 | RE_enum: 'c-enum', |
| 178 | RE_typedef: 'c-type', |
| 179 | } |
Nícolas F. R. A. Prado | c51d9b0 | 2020-10-13 23:13:34 +0000 | [diff] [blame] | 180 | reftype_str = {# Sphinx 2 only |
| 181 | RE_function: 'function', |
Nícolas F. R. A. Prado | 06dc65b | 2020-10-13 23:13:11 +0000 | [diff] [blame] | 182 | RE_generic_type: 'type', |
| 183 | # Sphinx 3+ only |
| 184 | RE_struct: 'struct', |
| 185 | RE_union: 'union', |
| 186 | RE_enum: 'enum', |
| 187 | RE_typedef: 'type', |
| 188 | } |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 189 | |
| 190 | cdom = app.env.domains['c'] |
| 191 | # |
| 192 | # Go through the dance of getting an xref out of the C domain |
| 193 | # |
Nícolas F. R. A. Prado | fb56827 | 2020-11-17 02:12:01 +0000 | [diff] [blame] | 194 | base_target = match.group(2) |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 195 | target_text = nodes.Text(match.group(0)) |
| 196 | xref = None |
Nícolas F. R. A. Prado | fb56827 | 2020-11-17 02:12:01 +0000 | [diff] [blame] | 197 | possible_targets = [base_target] |
| 198 | # Check if this document has a namespace, and if so, try |
| 199 | # cross-referencing inside it first. |
| 200 | if c_namespace: |
| 201 | possible_targets.insert(0, c_namespace + "." + base_target) |
| 202 | |
| 203 | if base_target not in Skipnames: |
| 204 | for target in possible_targets: |
| 205 | if not (match.re == RE_function and target in Skipfuncs): |
| 206 | lit_text = nodes.literal(classes=['xref', 'c', class_str[match.re]]) |
| 207 | lit_text += target_text |
| 208 | pxref = addnodes.pending_xref('', refdomain = 'c', |
| 209 | reftype = reftype_str[match.re], |
| 210 | reftarget = target, modname = None, |
| 211 | classname = None) |
| 212 | # |
| 213 | # XXX The Latex builder will throw NoUri exceptions here, |
| 214 | # work around that by ignoring them. |
| 215 | # |
| 216 | try: |
| 217 | xref = cdom.resolve_xref(app.env, docname, app.builder, |
| 218 | reftype_str[match.re], target, pxref, |
| 219 | lit_text) |
| 220 | except NoUri: |
| 221 | xref = None |
| 222 | |
| 223 | if xref: |
| 224 | return xref |
| 225 | |
| 226 | return target_text |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 227 | |
Nícolas F. R. A. Prado | d18b017 | 2020-09-11 13:34:39 +0000 | [diff] [blame] | 228 | # |
| 229 | # Try to replace a documentation reference of the form Documentation/... with a |
| 230 | # cross reference to that page |
| 231 | # |
| 232 | def markup_doc_ref(docname, app, match): |
| 233 | stddom = app.env.domains['std'] |
| 234 | # |
| 235 | # Go through the dance of getting an xref out of the std domain |
| 236 | # |
| 237 | target = match.group(1) |
| 238 | xref = None |
| 239 | pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc', |
| 240 | reftarget = target, modname = None, |
| 241 | classname = None, refexplicit = False) |
| 242 | # |
| 243 | # XXX The Latex builder will throw NoUri exceptions here, |
| 244 | # work around that by ignoring them. |
| 245 | # |
| 246 | try: |
| 247 | xref = stddom.resolve_xref(app.env, docname, app.builder, 'doc', |
| 248 | target, pxref, None) |
| 249 | except NoUri: |
| 250 | xref = None |
| 251 | # |
| 252 | # Return the xref if we got it; otherwise just return the plain text. |
| 253 | # |
| 254 | if xref: |
| 255 | return xref |
| 256 | else: |
| 257 | return nodes.Text(match.group(0)) |
| 258 | |
Nícolas F. R. A. Prado | fb56827 | 2020-11-17 02:12:01 +0000 | [diff] [blame] | 259 | def get_c_namespace(app, docname): |
| 260 | source = app.env.doc2path(docname) |
| 261 | with open(source) as f: |
| 262 | for l in f: |
| 263 | match = RE_namespace.search(l) |
| 264 | if match: |
| 265 | return match.group(1) |
| 266 | return '' |
| 267 | |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 268 | def auto_markup(app, doctree, name): |
Nícolas F. R. A. Prado | fb56827 | 2020-11-17 02:12:01 +0000 | [diff] [blame] | 269 | global c_namespace |
| 270 | c_namespace = get_c_namespace(app, name) |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 271 | # |
| 272 | # This loop could eventually be improved on. Someday maybe we |
| 273 | # want a proper tree traversal with a lot of awareness of which |
| 274 | # kinds of nodes to prune. But this works well for now. |
| 275 | # |
| 276 | # The nodes.literal test catches ``literal text``, its purpose is to |
| 277 | # avoid adding cross-references to functions that have been explicitly |
| 278 | # marked with cc:func:. |
| 279 | # |
| 280 | for para in doctree.traverse(nodes.paragraph): |
| 281 | for node in para.traverse(nodes.Text): |
| 282 | if not isinstance(node.parent, nodes.literal): |
Nícolas F. R. A. Prado | 1ac4cfb | 2020-09-11 13:34:33 +0000 | [diff] [blame] | 283 | node.parent.replace(node, markup_refs(name, app, node)) |
Jonathan Corbet | d74b0d3 | 2019-04-25 07:55:07 -0600 | [diff] [blame] | 284 | |
| 285 | def setup(app): |
| 286 | app.connect('doctree-resolved', auto_markup) |
| 287 | return { |
| 288 | 'parallel_read_safe': True, |
| 289 | 'parallel_write_safe': True, |
| 290 | } |