Skip to content

Commit

Permalink
Controller-side module caching.
Browse files Browse the repository at this point in the history
This makes our recursive, ast.parse performance measures as fast as
pre-ziploader baseline.

Since this unittest isn't testing that the returned module data is
correct we don't need to worry about os.rename not having any module
data.  Should devise a separate test for the module and caching code
  • Loading branch information
abadger committed Apr 12, 2016
1 parent b5717ef commit dcc5dfd
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 48 deletions.
23 changes: 15 additions & 8 deletions bin/ansible
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ except Exception:
pass

import os
import shutil
import sys
import traceback

# for debug
from multiprocessing import Lock
debug_lock = Lock()

import ansible.constants as C
from ansible.errors import AnsibleError, AnsibleOptionsError, AnsibleParserError
from ansible.utils.display import Display
from ansible.utils.unicode import to_unicode
Expand Down Expand Up @@ -87,33 +89,38 @@ if __name__ == '__main__':

cli = mycli(sys.argv)
cli.parse()
sys.exit(cli.run())
exit_code = cli.run()

except AnsibleOptionsError as e:
cli.parser.print_help()
display.error(to_unicode(e), wrap_text=False)
sys.exit(5)
exit_code = 5
except AnsibleParserError as e:
display.error(to_unicode(e), wrap_text=False)
sys.exit(4)
exit_code = 4
# TQM takes care of these, but leaving comment to reserve the exit codes
# except AnsibleHostUnreachable as e:
# display.error(str(e))
# sys.exit(3)
# exit_code = 3
# except AnsibleHostFailed as e:
# display.error(str(e))
# sys.exit(2)
# exit_code = 2
except AnsibleError as e:
display.error(to_unicode(e), wrap_text=False)
sys.exit(1)
exit_code = 1
except KeyboardInterrupt:
display.error("User interrupted execution")
sys.exit(99)
exit_code = 99
except Exception as e:
have_cli_options = cli is not None and cli.options is not None
display.error("Unexpected Exception: %s" % to_unicode(e), wrap_text=False)
if not have_cli_options or have_cli_options and cli.options.verbosity > 2:
display.display(u"the full traceback was:\n\n%s" % to_unicode(traceback.format_exc()))
else:
display.display("to see the full traceback, use -vvv")
sys.exit(250)
exit_code = 250
finally:
# Remove ansible tempdir
shutil.rmtree(C.DEFAULT_LOCAL_TMP, True)

sys.exit(exit_code)
16 changes: 16 additions & 0 deletions docsite/rst/intro_configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,22 @@ This is the default location Ansible looks to find modules::
Ansible knows how to look in multiple locations if you feed it a colon separated path, and it also will look for modules in the
"./library" directory alongside a playbook.

.. _local_tmp:

local_tmp
=========

When Ansible gets ready to send a module to a remote machine it usually has to
add a few things to the module: Some boilerplate code, the module's
parameters, and a few constants from the config file. This combination of
things gets stored in a temporary file until ansible exits and cleans up after
itself. The default location is a subdirectory of the user's home directory.
If you'd like to change that, you can do so by altering this setting::

local_tmp = $HOME/.ansible/tmp

Ansible will then choose a random directory name inside this location.

.. _log_path:

log_path
Expand Down
1 change: 1 addition & 0 deletions examples/ansible.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#inventory = /etc/ansible/hosts
#library = /usr/share/my_modules/
#remote_tmp = $HOME/.ansible/tmp
#local_tmp = $HOME/.ansible/tmp
#forks = 5
#poll_interval = 15
#sudo_user = root
Expand Down
8 changes: 6 additions & 2 deletions hacking/test-module
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@
# test-module -m ../library/file/lineinfile -a "dest=/etc/exports line='/srv/home hostname1(rw,sync)'" --check
# test-module -m ../library/commands/command -a "echo hello" -n -o "test_hello"

import sys
import base64
from multiprocessing import Lock
import optparse
import os
import subprocess
import sys
import traceback
import optparse

import ansible.utils.vars as utils_vars
from ansible.parsing.dataloader import DataLoader
from ansible.parsing.utils.jsonify import jsonify
Expand Down Expand Up @@ -133,10 +135,12 @@ def boilerplate_module(modfile, args, interpreter, check, destfile):

modname = os.path.basename(modfile)
modname = os.path.splitext(modname)[0]
action_write_lock = Lock()
(module_data, module_style, shebang) = module_common.modify_module(
modname,
modfile,
complex_args,
action_write_lock,
task_vars=task_vars
)

Expand Down
9 changes: 8 additions & 1 deletion lib/ansible/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
__metaclass__ = type

import os
import tempfile
from string import ascii_letters, digits

from ansible.compat.six import string_types
Expand Down Expand Up @@ -47,7 +48,7 @@ def shell_expand(path):
path = os.path.expanduser(os.path.expandvars(path))
return path

def get_config(p, section, key, env_var, default, boolean=False, integer=False, floating=False, islist=False, isnone=False, ispath=False, ispathlist=False):
def get_config(p, section, key, env_var, default, boolean=False, integer=False, floating=False, islist=False, isnone=False, ispath=False, ispathlist=False, istmppath=False):
''' return a configuration variable with casting '''
value = _get_config(p, section, key, env_var, default)
if boolean:
Expand All @@ -65,6 +66,11 @@ def get_config(p, section, key, env_var, default, boolean=False, integer=False,
value = None
elif ispath:
value = shell_expand(value)
elif istmppath:
value = shell_expand(value)
if not os.path.exists(value):
os.makedirs(value, 0o700)
value = tempfile.mkdtemp(prefix='ansible-local-tmp', dir=value)
elif ispathlist:
if isinstance(value, string_types):
value = [shell_expand(x) for x in value.split(os.pathsep)]
Expand Down Expand Up @@ -136,6 +142,7 @@ def load_config_file():
DEFAULT_MODULE_PATH = get_config(p, DEFAULTS, 'library', 'ANSIBLE_LIBRARY', None, ispathlist=True)
DEFAULT_ROLES_PATH = get_config(p, DEFAULTS, 'roles_path', 'ANSIBLE_ROLES_PATH', '/etc/ansible/roles', ispathlist=True)
DEFAULT_REMOTE_TMP = get_config(p, DEFAULTS, 'remote_tmp', 'ANSIBLE_REMOTE_TEMP', '$HOME/.ansible/tmp')
DEFAULT_LOCAL_TMP = get_config(p, DEFAULTS, 'local_tmp', 'ANSIBLE_LOCAL_TEMP', '$HOME/.ansible/tmp', istmppath=True)
DEFAULT_MODULE_NAME = get_config(p, DEFAULTS, 'module_name', None, 'command')
DEFAULT_FORKS = get_config(p, DEFAULTS, 'forks', 'ANSIBLE_FORKS', 5, integer=True)
DEFAULT_MODULE_ARGS = get_config(p, DEFAULTS, 'module_args', 'ANSIBLE_MODULE_ARGS', '')
Expand Down
84 changes: 58 additions & 26 deletions lib/ansible/executor/module_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from ansible import constants as C
from ansible.errors import AnsibleError
from ansible.utils.unicode import to_bytes, to_unicode
from ansible.plugins.strategy import action_write_locks

try:
from __main__ import display
Expand Down Expand Up @@ -275,7 +276,11 @@ def _get_facility(task_vars):
facility = task_vars['ansible_syslog_facility']
return facility

def meta_finder(data, snippet_names, snippet_data, zf):
def recursive_finder(data, snippet_names, snippet_data, zf):
"""
Using ModuleDepFinder, make sure we have all of the module_utils files that
the module its module_utils files needs.
"""
tree = ast.parse(data)
finder = ModuleDepFinder()
finder.visit(tree)
Expand All @@ -290,7 +295,7 @@ def meta_finder(data, snippet_names, snippet_data, zf):
snippet_names.update(new_snippets)

for snippet_name in tuple(new_snippets):
meta_finder(snippet_data[snippet_name], snippet_names, snippet_data, zf)
recursive_finder(snippet_data[snippet_name], snippet_names, snippet_data, zf)
del snippet_data[snippet_name]

def _find_snippet_imports(module_name, module_data, module_path, module_args, task_vars, module_compression):
Expand Down Expand Up @@ -350,23 +355,61 @@ def _find_snippet_imports(module_name, module_data, module_path, module_args, ta
except AttributeError:
display.warning(u'Bad module compression string specified: %s. Using ZIP_STORED (no compression)' % module_compression)
compression_method = zipfile.ZIP_STORED
zipoutput = BytesIO()
zf = zipfile.ZipFile(zipoutput, mode='w', compression=compression_method)
zf.writestr('ansible/__init__.py', b''.join((b"__version__ = '", to_bytes(__version__), b"'\n")))
zf.writestr('ansible/module_utils/__init__.py', b'')
zf.writestr('ansible/module_exec/__init__.py', b'')

zf.writestr('ansible/module_exec/%s/__init__.py' % module_name, b"")
zf.writestr('ansible/module_exec/%s/__main__.py' % module_name, module_data)

snippet_data = dict()
meta_finder(module_data, snippet_names, snippet_data, zf)
zf.close()

lookup_path = os.path.join(C.DEFAULT_LOCAL_TMP, 'ziploader_cache')
if not os.path.exists(lookup_path):
os.mkdir(lookup_path)
cached_module_filename = os.path.join(lookup_path, "%s-%s" % (module_name, module_compression))

zipdata = None
# Optimization -- don't lock if the module has already been cached
if os.path.exists(cached_module_filename):
zipdata = open(cached_module_filename, 'rb').read()
# Fool the check later... I think we should just remove the check
snippet_names.add('basic')
else:
with action_write_locks[module_name]:
# Check that no other process has created this while we were
# waiting for the lock
if not os.path.exists(cached_module_filename):
# Create the module zip data
zipoutput = BytesIO()
zf = zipfile.ZipFile(zipoutput, mode='w', compression=compression_method)
zf.writestr('ansible/__init__.py', b''.join((b"__version__ = '", to_bytes(__version__), b"'\n")))
zf.writestr('ansible/module_utils/__init__.py', b'')
zf.writestr('ansible/module_exec/__init__.py', b'')

zf.writestr('ansible/module_exec/%s/__init__.py' % module_name, b"")
zf.writestr('ansible/module_exec/%s/__main__.py' % module_name, module_data)

snippet_data = dict()
recursive_finder(module_data, snippet_names, snippet_data, zf)
zf.close()
zipdata = base64.b64encode(zipoutput.getvalue())

# Write the assembled module to a temp file (write to temp
# so that no one looking for the file reads a partially
# written file)
with open(cached_module_filename + '-part', 'w') as f:
f.write(zipdata)

# Rename the file into its final position in the cache so
# future users of this module can read it off the
# filesystem instead of constructing from scratch.
os.rename(cached_module_filename + '-part', cached_module_filename)

if zipdata is None:
# Another process wrote the file while we were waiting for
# the write lock. Go ahead and read the data from disk
# instead of re-creating it.
zipdata = open(cached_module_filename, 'rb').read()
# Fool the check later... I think we should just remove the check
snippet_names.add('basic')
shebang, interpreter = _get_shebang(u'/usr/bin/python', task_vars)
if shebang is None:
shebang = u'#!/usr/bin/python'
output.write(to_bytes(STRIPPED_ZIPLOADER_TEMPLATE % dict(
zipdata=base64.b64encode(zipoutput.getvalue()),
zipdata=zipdata,
ansible_module=module_name,
args=python_repred_args,
constants=python_repred_constants,
Expand Down Expand Up @@ -450,17 +493,6 @@ def modify_module(module_name, module_path, module_args, task_vars=dict(), modul
which results in the inclusion of the common code from powershell.ps1
"""
### TODO: Optimization ideas if this code is actually a source of slowness:
# * Fix comment stripping: Currently doesn't preserve shebangs and encoding info (but we unconditionally add encoding info)
# * Use pyminifier if installed
# * comment stripping/pyminifier needs to have config setting to turn it
# off for debugging purposes (goes along with keep remote but should be
# separate otherwise users wouldn't be able to get info on what the
# minifier output)
# * Only split into lines and recombine into strings once
# * Cache the modified module? If only the args are different and we do
# that as the last step we could cache all the work up to that point.

with open(module_path, 'rb') as f:

# read in the module source
Expand Down
23 changes: 20 additions & 3 deletions lib/ansible/plugins/strategy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type

from ansible.compat.six.moves import queue as Queue
from ansible.compat.six import iteritems, text_type, string_types

import json
import time
import zlib
from collections import defaultdict
from multiprocessing import Lock

from jinja2.exceptions import UndefinedError

from ansible.compat.six.moves import queue as Queue
from ansible.compat.six import iteritems, text_type, string_types
from ansible import constants as C
from ansible.errors import AnsibleError, AnsibleParserError, AnsibleUndefinedVariable
from ansible.executor.play_iterator import PlayIterator
Expand All @@ -51,6 +52,8 @@

__all__ = ['StrategyBase']

action_write_locks = defaultdict(Lock)


# TODO: this should probably be in the plugins/__init__.py, with
# a smarter mechanism to set all of the attributes based on
Expand Down Expand Up @@ -141,6 +144,20 @@ def _queue_task(self, host, task, task_vars, play_context):

display.debug("entering _queue_task() for %s/%s" % (host, task))

# Add a write lock for tasks.
# Maybe this should be added somewhere further up the call stack but
# this is the earliest in the code where we have task (1) extracted
# into its own variable and (2) there's only a single code path
# leading to the module being run. This is called by three
# functions: __init__.py::_do_handler_run(), linear.py::run(), and
# free.py::run() so we'd have to add to all three to do it there.
# The next common higher level is __init__.py::run() and that has
# tasks inside of play_iterator so we'd have to extract them to do it
# there.
if not action_write_locks[task.action]:
display.warning('Python defaultdict did not create the Lock for us. Creating manually')
action_write_locks[task.action] = Lock()

# and then queue the new task
display.debug("%s - putting task (%s) in queue" % (host, task))
try:
Expand Down
17 changes: 9 additions & 8 deletions test/units/plugins/action/test_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,15 @@ def mock_find_plugin(name, options):

# test python module formatting
with patch.object(builtins, 'open', mock_open(read_data=to_bytes(python_module_replacers.strip(), encoding='utf-8'))) as m:
mock_task.args = dict(a=1, foo='fö〩')
mock_connection.module_implementation_preferences = ('',)
(style, shebang, data) = action_base._configure_module(mock_task.action, mock_task.args)
self.assertEqual(style, "new")
self.assertEqual(shebang, b"#!/usr/bin/python")

# test module not found
self.assertRaises(AnsibleError, action_base._configure_module, 'badmodule', mock_task.args)
with patch.object(os, 'rename') as m:
mock_task.args = dict(a=1, foo='fö〩')
mock_connection.module_implementation_preferences = ('',)
(style, shebang, data) = action_base._configure_module(mock_task.action, mock_task.args)
self.assertEqual(style, "new")
self.assertEqual(shebang, b"#!/usr/bin/python")

# test module not found
self.assertRaises(AnsibleError, action_base._configure_module, 'badmodule', mock_task.args)

# test powershell module formatting
with patch.object(builtins, 'open', mock_open(read_data=to_bytes(powershell_module_replacers.strip(), encoding='utf-8'))) as m:
Expand Down

0 comments on commit dcc5dfd

Please sign in to comment.