forked from github/codeql
-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathpopulator.py
148 lines (135 loc) · 6.2 KB
/
populator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import sys
import os
import subprocess
from ast import literal_eval
from semmle import logging
from semmle import traverser
from semmle import cmdline
from semmle import worker
from semmle.util import VERSION, update_analysis_version, get_analysis_major_version
from buildtools.version import executable
'''The populator generates trap files from a Python project.
The populator consists of two parts: a traverser front end which traverses the file
system and multiple worker back ends which extract information from the modules.
'''
#NOTE: The front-end is simply an iterable of "extractables" and it should be easy to
#plug-in new front-ends if needed.
def cleanup_sys_path(path):
'''Clean up sys.path removing duplicates and
current working directory, making it safe for analysis.
'''
#Remove duplicates
path = [ p for i, p in enumerate(path) if i == 0 or p != path[i-1] ]
#Remove curent working directory
cwd = os.getcwd()
if cwd in path:
path.remove(cwd)
return path
def get_py2_sys_path(logger, py3_sys_path):
'''Get the sys.path for Python 2, if it is available. If no Python 2 is available,
simply return the Python 3 sys.path. Returns a tuple of the sys.path and a boolean indicating
whether Python 2 is available.'''
try:
command = " ".join(executable(2) + ['-c "import sys; print(sys.path)"'])
# We need `shell=True` here in order for the test framework to function correctly. For
# whatever reason, the `PATH` variable is ignored if `shell=False`.
# Also, this in turn forces us to give the whole command as a string, rather than a list.
# Otherwise, the effect is that the Python interpreter is invoked _as a REPL_, rather than
# with the given piece of code.
output = subprocess.check_output(command, shell=True).decode(sys.getfilesystemencoding())
py2_sys_path = literal_eval(output)
# Ensure that the first element of the sys.path is the same as the Python 3 sys.path --
# specifically a reference to our local `tools` directory. This ensures that the `six` stubs
# are picked up from there. The item we're overwriting here is '', which would be cleaned up
# later anyway.
py2_sys_path[0] = py3_sys_path[0]
return py2_sys_path, True
except (subprocess.CalledProcessError, ValueError, SyntaxError) as e:
logger.error("Error while getting Python 2 sys.path:")
logger.error(e)
logger.info("No Python 2 found. Using Python 3 sys.path.")
return py3_sys_path, False
def main(sys_path = sys.path[:]):
options, args = cmdline.parse(sys.argv[1:])
logger = logging.Logger(options.verbosity, options.colorize)
# This is not the prettiest way to do it, but when running tests we want to ensure that the
# `--lang` flag influences the analysis version (e.g. so that we include the correct stdlib TRAP
# file). So, we change the values of the appropriate variables (which would otherwise be based
# on `CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION`), overwriting the previous values.
if options.language_version:
last_version = options.language_version[-1]
update_analysis_version(last_version)
found_py2 = False
if get_analysis_major_version() == 2 and options.extract_stdlib:
# Setup `sys_path` to use the Python 2 standard library
sys_path, found_py2 = get_py2_sys_path(logger, sys_path)
# use utf-8 as the character encoding for stdout/stderr to be able to properly
# log/print things on systems that use bad default encodings (windows).
sys.stdout.reconfigure(encoding='utf-8')
sys.stderr.reconfigure(encoding='utf-8')
sys.setrecursionlimit(2000)
sys_path = cleanup_sys_path(sys_path)
options.sys_path = sys_path[1:]
if sys.version_info.major == 2:
logger.error("Extraction using Python 2 is not supported.")
logger.warning("To use the Python extractor, please ensure that Python 3 is available on your system.")
logger.warning("For more information, see https://codeql.github.com/docs/codeql-overview/system-requirements/#additional-software-requirements")
logger.warning("and https://codeql.github.com/docs/codeql-overview/supported-languages-and-frameworks/#languages-and-compilers")
logger.close()
logging.stop()
sys.exit(1)
elif found_py2:
logger.info("Extraction will use the Python 2 standard library.")
else:
logger.info("Extraction will use the Python 3 standard library.")
logger.info("sys_path is: %s", sys_path)
try:
the_traverser = traverser.Traverser(options, args, logger)
except Exception as ex:
logger.error("%s", ex)
logger.close()
logging.stop()
sys.exit(1)
run(options, args, the_traverser, logger)
def run(options, args, the_traverser, logger: logging.Logger):
logger.info("Python version %s", sys.version.split()[0])
logger.info("Python extractor version %s", VERSION)
if 'CODEQL_EXTRACTOR_PYTHON_SOURCE_ARCHIVE_DIR' in os.environ:
archive = os.environ['CODEQL_EXTRACTOR_PYTHON_SOURCE_ARCHIVE_DIR']
elif 'SOURCE_ARCHIVE' in os.environ:
archive = os.environ['SOURCE_ARCHIVE']
else:
archive = None
trap_dir = cmdline.output_dir_from_options_and_env(options)
try:
pool = worker.ExtractorPool.from_options(options, trap_dir, archive, logger)
except ValueError as ve:
logger.error("%s", ve)
logger.close()
sys.exit(1)
try:
exitcode = 0
pool.extract(the_traverser)
except worker.ExtractorFailure:
exitcode = 1
except KeyboardInterrupt:
exitcode = 2
logger.info("Keyboard interrupt")
except BaseException as ex:
exitcode = 3
logger.error("Unexpected exception: %s ", ex)
logger.traceback(logging.WARN)
finally:
if exitcode:
logger.debug("Stopping...")
pool.stop()
else:
logger.debug("Writing interpreter trap")
pool.close()
logger.close()
logging.stop()
logger.write_message(logging.DEBUG, "Stopped." if exitcode else "Done.")
if exitcode:
sys.exit(exitcode)
if __name__ == "__main__":
main()