forked from zephyrproject-rtos/zephyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_content.py
executable file
·206 lines (172 loc) · 7.68 KB
/
extract_content.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/env python3
#
# Copyright (c) 2018, Foundries.io Ltd
# Copyright (c) 2018, Nordic Semiconductor ASA
# Copyright (c) 2017, Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Internal script used by the documentation's build system to create
# the "final" docs tree which is then compiled by Sphinx.
#
# This works around the fact that Sphinx needs a single documentation
# root directory, while Zephyr's documentation files are spread around
# the tree.
import argparse
import collections
import fnmatch
import os
from os import path
import re
import shutil
import sys
# directives to parse for included files
DIRECTIVES = ["figure", "include", "image", "literalinclude"]
# A simple namedtuple for a generated output file.
#
# - src: source file, what file should be copied (in source directory)
# - dst: destination file, path it should be copied to (in build directory)
Output = collections.namedtuple('Output', 'src dst')
# Represents the content which must be extracted from the Zephyr tree,
# as well as the output directories needed to contain it.
#
# - outputs: list of Output objects for extracted content.
# - output_dirs: set of directories which must exist to contain
# output destination files.
Content = collections.namedtuple('Content', 'outputs output_dirs')
def src_deps(zephyr_base, src_file, dest):
# - zephyr_base: the ZEPHYR_BASE directory containing src_file
# - src_file: path to a source file in the documentation
# - dest: path to the top-level output/destination directory
#
# Return a list of Output objects which contain src_file's
# additional dependencies, as they should be copied into
# dest. Output paths inside dest are based on each
# dependency's relative path from zephyr_base.
# Inspect only .rst files for directives referencing other files
# we'll need to copy (as configured in the DIRECTIVES variable)
if not src_file.endswith(".rst"):
return []
# Load the file's contents, bailing on decode errors.
try:
with open(src_file, encoding="utf-8") as f:
content = [x.strip() for x in f.readlines()]
except UnicodeDecodeError as e:
sys.stderr.write(
"Malformed {} in {}\n"
" Context: {}\n"
" Problematic data: {}\n"
" Reason: {}\n".format(
e.encoding, src_file,
e.object[max(e.start - 40, 0):e.end + 40],
e.object[e.start:e.end],
e.reason))
return []
# Source file's directory.
src_dir = path.dirname(src_file)
# Destination directory for any dependencies.
dst_dir = path.join(dest, path.relpath(src_dir, start=zephyr_base))
# Find directives in the content which imply additional
# dependencies. We assume each such directive takes a single
# argument, which is a (relative) path to the additional
# dependency file.
directives = "|".join(DIRECTIVES)
pattern = re.compile(r"\.\.\s+(?P<directive>%s)::\s+(?P<dep_rel>.*)" %
directives)
deps = []
for l in content:
m = pattern.match(l)
if not m:
continue
dep_rel = m.group('dep_rel') # relative to src_dir
dep_src = path.abspath(path.join(src_dir, dep_rel))
if not path.isfile(dep_src):
print("File not found:", dep_src, "\n referenced by:",
src_file, file=sys.stderr)
continue
dep_dst = path.abspath(path.join(dst_dir, dep_rel))
deps.append(Output(dep_src, dep_dst))
return deps
def find_content(zephyr_base, src, dest, fnfilter, ignore):
# Create a list of Outputs to copy over, and new directories we
# might need to make to contain them. Don't copy any files or
# otherwise modify dest.
outputs = []
output_dirs = set()
for dirpath, dirnames, filenames in os.walk(path.join(zephyr_base, src)):
# Limit the rest of the walk to subdirectories that aren't ignored.
dirnames[:] = [d for d in dirnames if not
path.normpath(path.join(dirpath, d)).startswith(ignore)]
# If the current directory contains no matching files, keep going.
sources = fnmatch.filter(filenames, fnfilter)
if not sources:
continue
# There are sources here; track that the output directory
# needs to exist.
dst_dir = path.join(dest, path.relpath(dirpath, start=zephyr_base))
output_dirs.add(path.abspath(dst_dir))
# Initialize an Output for each source file, as well as any of
# that file's additional dependencies. Make sure output
# directories for dependencies are tracked too.
for src_rel in sources:
src_abs = path.join(dirpath, src_rel)
deps = src_deps(zephyr_base, src_abs, dest)
for depdir in (path.dirname(d.dst) for d in deps):
output_dirs.add(depdir)
outputs.extend(deps)
outputs.append(Output(src_abs,
path.abspath(path.join(dst_dir, src_rel))))
return Content(outputs, output_dirs)
def extract_content(content):
# Ensure each output subdirectory exists.
for d in content.output_dirs:
os.makedirs(d, exist_ok=True)
# Create each output file. Use copy2() to avoid updating
# modification times unnecessarily, as this triggers documentation
# rebuilds.
for output in content.outputs:
shutil.copy2(output.src, output.dst)
def main():
parser = argparse.ArgumentParser(
description='''Recursively copy documentation files from ZEPHYR_BASE to
a destination folder, along with files referenced in those .rst files
by a configurable list of directives: {}. The ZEPHYR_BASE environment
variable is used to determine source directories to copy files
from.'''.format(DIRECTIVES))
parser.add_argument('--outputs',
help='If given, save input/output files to this path')
parser.add_argument('--just-outputs', action='store_true',
help='''Skip extraction and just list outputs.
Cannot be given without --outputs.''')
parser.add_argument('--ignore', action='append',
help='''Source directories to ignore when copying
files. This may be given multiple times.''')
parser.add_argument('content_config', nargs='+',
help='''A glob:source:destination specification
for content to extract. The "glob" is a documentation
file name pattern to include, "source" is a source
directory to search for such files in, and
"destination" is the directory to copy it into.''')
args = parser.parse_args()
if "ZEPHYR_BASE" not in os.environ:
sys.exit("ZEPHYR_BASE environment variable undefined.")
zephyr_base = os.environ["ZEPHYR_BASE"]
if not args.ignore:
ignore = ()
else:
ignore = tuple(path.normpath(ign) for ign in args.ignore)
if args.just_outputs and not args.outputs:
sys.exit('--just-outputs cannot be given without --outputs')
content_config = [cfg.split(':', 2) for cfg in args.content_config]
outputs = set()
for fnfilter, source, dest in content_config:
content = find_content(zephyr_base, source, dest, fnfilter, ignore)
if not args.just_outputs:
extract_content(content)
outputs |= set(content.outputs)
if args.outputs:
with open(args.outputs, 'w') as f:
for o in outputs:
print(o.src, file=f, end='\n')
print(o.dst, file=f, end='\n')
if __name__ == "__main__":
main()