Skip to content

Commit

Permalink
optimize host_group_vars and vars plugin loading (ansible#79945)
Browse files Browse the repository at this point in the history
* Improve host_group_vars efficiency:

* normalize the basedir with `os.path.realpath()` once and cache it
* cache missing paths/files
* reduce the calls to `isinstance`

Add a couple more general improvements in vars/plugins.py get_vars_from_path():

* call `PluginLoader.all()` once for vars plugins and reload specific
  plugins subsequently
* don't reload legacy/builtin vars plugins that are not enabled

Add a test for host_group_vars and legacy plugin loading

Co-authored-by: Matt Davis <[email protected]>

* changelog

* Add a new is_stateless attribute to the vars plugin baseclass

update integration tests to be quieter and use the same test pattern

Fix deprecation and adjust test that didn't catch the issue (deprecation only occured when the value was False)

move realpath cache to host_group_vars (do not smuggle call state as instance data)

refactor under a single 'if cache:' statement

Call os.path.isdir instead of always calling os.path.exists first. Just call os.path.exists to differentiate between missing and non-directory.

remove call to super(VarsModule, self).get_vars()

use the entity name as the cache key instead of variable location

Remove isinstance checks and use a class attribute just in case any plugins are subclassing Host/Group

Replace startswith by checking index 0 of the name instead, since host/group names are required

* rename is_stateless to cache_instance to make it more clear what it does

* add plugin instance cache using the path to plugin loader

reduce loading stage option if a new instance isn't created

don't require a known subdir on PluginLoader instantiation for backwards
compatibility

rename attribute again

contain reading from/initializing cached instances to a plugin loader method

* Deprecate v2 vars plugins

* Refactor to use the cache in existing plugin loader methods

Rename the attribute again

Refactor host_group_vars with requested changes

Make changelog a bugfixes fragment

Add a deprecation fragment for v2 vars plugins.

Add type hints

* unbreak group_vars

* Apply suggestions from code review

* misc tweaks

* always cache instance by both requested and resolved FQ name
* add lru_cache to stage calculation to avoid repeated config consultation

* handle KeyError from missing stage option

---------

Co-authored-by: Matt Davis <[email protected]>
  • Loading branch information
s-hertel and nitzmahone authored Oct 3, 2023
1 parent 9b3ed5e commit debf2be
Show file tree
Hide file tree
Showing 12 changed files with 252 additions and 100 deletions.
5 changes: 5 additions & 0 deletions changelogs/fragments/79945-host_group_vars-improvements.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bugfixes:
- Cache host_group_vars after instantiating it once and limit the amount of repetitive work it needs to do every time it runs.
- Call PluginLoader.all() once for vars plugins, and load vars plugins that run automatically or are enabled specifically by name subsequently.
deprecated_features:
- Old style vars plugins which use the entrypoints `get_host_vars` or `get_group_vars` are deprecated. The plugin should be updated to inherit from `BaseVarsPlugin` and define a `get_vars` method as the entrypoint.
7 changes: 7 additions & 0 deletions lib/ansible/inventory/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
__metaclass__ = type

from collections.abc import Mapping, MutableMapping
from enum import Enum
from itertools import chain

from ansible import constants as C
Expand Down Expand Up @@ -53,8 +54,14 @@ def to_safe_group_name(name, replacer="_", force=False, silent=False):
return name


class InventoryObjectType(Enum):
HOST = 0
GROUP = 1


class Group:
''' a group of ansible hosts '''
base_type = InventoryObjectType.GROUP

# __slots__ = [ 'name', 'hosts', 'vars', 'child_groups', 'parent_groups', 'depth', '_hosts_cache' ]

Expand Down
3 changes: 2 additions & 1 deletion lib/ansible/inventory/host.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from collections.abc import Mapping, MutableMapping

from ansible.inventory.group import Group
from ansible.inventory.group import Group, InventoryObjectType
from ansible.parsing.utils.addresses import patterns
from ansible.utils.vars import combine_vars, get_unique_id

Expand All @@ -31,6 +31,7 @@

class Host:
''' a single ansible host '''
base_type = InventoryObjectType.HOST

# __slots__ = [ 'name', 'vars', 'groups' ]

Expand Down
55 changes: 43 additions & 12 deletions lib/ansible/plugins/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,10 @@ def __init__(self, class_name, package, config, subdir, aliases=None, required_b
self._module_cache = MODULE_CACHE[class_name]
self._paths = PATH_CACHE[class_name]
self._plugin_path_cache = PLUGIN_PATH_CACHE[class_name]
try:
self._plugin_instance_cache = {} if self.type == 'vars' else None
except ValueError:
self._plugin_instance_cache = None

self._searched_paths = set()

Expand All @@ -262,6 +266,7 @@ def _clear_caches(self):
self._module_cache = MODULE_CACHE[self.class_name]
self._paths = PATH_CACHE[self.class_name]
self._plugin_path_cache = PLUGIN_PATH_CACHE[self.class_name]
self._plugin_instance_cache = {} if self.type == 'vars' else None
self._searched_paths = set()

def __setstate__(self, data):
Expand Down Expand Up @@ -866,23 +871,35 @@ def get_with_context(self, name, *args, **kwargs):
collection_list = kwargs.pop('collection_list', None)
if name in self.aliases:
name = self.aliases[name]

if self._plugin_instance_cache and (cached_load_result := self._plugin_instance_cache.get(name)):
# Resolving the FQCN is slow, even if we've passed in the resolved FQCN.
# Short-circuit here if we've previously resolved this name.
# This will need to be restricted if non-vars plugins start using the cache, since
# some non-fqcn plugin need to be resolved again with the collections list.
return get_with_context_result(*cached_load_result)

plugin_load_context = self.find_plugin_with_context(name, collection_list=collection_list)
if not plugin_load_context.resolved or not plugin_load_context.plugin_resolved_path:
# FIXME: this is probably an error (eg removed plugin)
return get_with_context_result(None, plugin_load_context)

fq_name = plugin_load_context.resolved_fqcn
if '.' not in fq_name:
if '.' not in fq_name and plugin_load_context.plugin_resolved_collection:
fq_name = '.'.join((plugin_load_context.plugin_resolved_collection, fq_name))
name = plugin_load_context.plugin_resolved_name
resolved_type_name = plugin_load_context.plugin_resolved_name
path = plugin_load_context.plugin_resolved_path
if self._plugin_instance_cache and (cached_load_result := self._plugin_instance_cache.get(fq_name)):
# This is unused by vars plugins, but it's here in case the instance cache expands to other plugin types.
# We get here if we've seen this plugin before, but it wasn't called with the resolved FQCN.
return get_with_context_result(*cached_load_result)
redirected_names = plugin_load_context.redirect_list or []

if path not in self._module_cache:
self._module_cache[path] = self._load_module_source(name, path)
self._module_cache[path] = self._load_module_source(resolved_type_name, path)
found_in_cache = False

self._load_config_defs(name, self._module_cache[path], path)
self._load_config_defs(resolved_type_name, self._module_cache[path], path)

obj = getattr(self._module_cache[path], self.class_name)

Expand All @@ -899,24 +916,27 @@ def get_with_context(self, name, *args, **kwargs):
return get_with_context_result(None, plugin_load_context)

# FIXME: update this to use the load context
self._display_plugin_load(self.class_name, name, self._searched_paths, path, found_in_cache=found_in_cache, class_only=class_only)
self._display_plugin_load(self.class_name, resolved_type_name, self._searched_paths, path, found_in_cache=found_in_cache, class_only=class_only)

if not class_only:
try:
# A plugin may need to use its _load_name in __init__ (for example, to set
# or get options from config), so update the object before using the constructor
instance = object.__new__(obj)
self._update_object(instance, name, path, redirected_names, fq_name)
self._update_object(instance, resolved_type_name, path, redirected_names, fq_name)
obj.__init__(instance, *args, **kwargs) # pylint: disable=unnecessary-dunder-call
obj = instance
except TypeError as e:
if "abstract" in e.args[0]:
# Abstract Base Class or incomplete plugin, don't load
display.v('Returning not found on "%s" as it has unimplemented abstract methods; %s' % (name, to_native(e)))
display.v('Returning not found on "%s" as it has unimplemented abstract methods; %s' % (resolved_type_name, to_native(e)))
return get_with_context_result(None, plugin_load_context)
raise

self._update_object(obj, name, path, redirected_names, fq_name)
self._update_object(obj, resolved_type_name, path, redirected_names, fq_name)
if self._plugin_instance_cache is not None and getattr(obj, 'is_stateless', False):
# store under both the originally requested name and the resolved FQ name
self._plugin_instance_cache[name] = self._plugin_instance_cache[fq_name] = (obj, plugin_load_context)
return get_with_context_result(obj, plugin_load_context)

def _display_plugin_load(self, class_name, name, searched_paths, path, found_in_cache=None, class_only=None):
Expand Down Expand Up @@ -1008,6 +1028,16 @@ def all(self, *args, **kwargs):
yield path
continue

if path in legacy_excluding_builtin:
fqcn = basename
else:
fqcn = f"ansible.builtin.{basename}"

if self._plugin_instance_cache is not None and fqcn in self._plugin_instance_cache:
# Here just in case, but we don't call all() multiple times for vars plugins, so this should not be used.
yield self._plugin_instance_cache[basename][0]
continue

if path not in self._module_cache:
if self.type in ('filter', 'test'):
# filter and test plugin files can contain multiple plugins
Expand Down Expand Up @@ -1055,11 +1085,12 @@ def all(self, *args, **kwargs):
except TypeError as e:
display.warning("Skipping plugin (%s) as it seems to be incomplete: %s" % (path, to_text(e)))

if path in legacy_excluding_builtin:
fqcn = basename
else:
fqcn = f"ansible.builtin.{basename}"
self._update_object(obj, basename, path, resolved=fqcn)

if self._plugin_instance_cache is not None and fqcn not in self._plugin_instance_cache:
# Use get_with_context to cache the plugin the first time we see it.
self.get_with_context(fqcn)[0]

yield obj


Expand Down
1 change: 1 addition & 0 deletions lib/ansible/plugins/vars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class BaseVarsPlugin(AnsiblePlugin):
"""
Loads variables for groups and/or hosts
"""
is_stateless = False

def __init__(self):
""" constructor """
Expand Down
94 changes: 66 additions & 28 deletions lib/ansible/plugins/vars/host_group_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,60 +55,98 @@

import os
from ansible.errors import AnsibleParserError
from ansible.module_utils.common.text.converters import to_bytes, to_native, to_text
from ansible.module_utils.common.text.converters import to_native
from ansible.plugins.vars import BaseVarsPlugin
from ansible.inventory.host import Host
from ansible.inventory.group import Group
from ansible.utils.path import basedir
from ansible.inventory.group import InventoryObjectType
from ansible.utils.vars import combine_vars

CANONICAL_PATHS = {} # type: dict[str, str]
FOUND = {} # type: dict[str, list[str]]
NAK = set() # type: set[str]
PATH_CACHE = {} # type: dict[tuple[str, str], str]


class VarsModule(BaseVarsPlugin):

REQUIRES_ENABLED = True
is_stateless = True

def load_found_files(self, loader, data, found_files):
for found in found_files:
new_data = loader.load_from_file(found, cache=True, unsafe=True)
if new_data: # ignore empty files
data = combine_vars(data, new_data)
return data

def get_vars(self, loader, path, entities, cache=True):
''' parses the inventory file '''

if not isinstance(entities, list):
entities = [entities]

super(VarsModule, self).get_vars(loader, path, entities)
# realpath is expensive
try:
realpath_basedir = CANONICAL_PATHS[path]
except KeyError:
CANONICAL_PATHS[path] = realpath_basedir = os.path.realpath(basedir(path))

data = {}
for entity in entities:
if isinstance(entity, Host):
subdir = 'host_vars'
elif isinstance(entity, Group):
subdir = 'group_vars'
else:
try:
entity_name = entity.name
except AttributeError:
raise AnsibleParserError("Supplied entity must be Host or Group, got %s instead" % (type(entity)))

try:
first_char = entity_name[0]
except (TypeError, IndexError, KeyError):
raise AnsibleParserError("Supplied entity must be Host or Group, got %s instead" % (type(entity)))

# avoid 'chroot' type inventory hostnames /path/to/chroot
if not entity.name.startswith(os.path.sep):
if first_char != os.path.sep:
try:
found_files = []
# load vars
b_opath = os.path.realpath(to_bytes(os.path.join(self._basedir, subdir)))
opath = to_text(b_opath)
key = '%s.%s' % (entity.name, opath)
if cache and key in FOUND:
found_files = FOUND[key]
try:
entity_type = entity.base_type
except AttributeError:
raise AnsibleParserError("Supplied entity must be Host or Group, got %s instead" % (type(entity)))

if entity_type is InventoryObjectType.HOST:
subdir = 'host_vars'
elif entity_type is InventoryObjectType.GROUP:
subdir = 'group_vars'
else:
# no need to do much if path does not exist for basedir
if os.path.exists(b_opath):
if os.path.isdir(b_opath):
self._display.debug("\tprocessing dir %s" % opath)
found_files = loader.find_vars_files(opath, entity.name)
FOUND[key] = found_files
else:
self._display.warning("Found %s that is not a directory, skipping: %s" % (subdir, opath))

for found in found_files:
new_data = loader.load_from_file(found, cache=True, unsafe=True)
if new_data: # ignore empty files
data = combine_vars(data, new_data)
raise AnsibleParserError("Supplied entity must be Host or Group, got %s instead" % (type(entity)))

if cache:
try:
opath = PATH_CACHE[(realpath_basedir, subdir)]
except KeyError:
opath = PATH_CACHE[(realpath_basedir, subdir)] = os.path.join(realpath_basedir, subdir)

if opath in NAK:
continue
key = '%s.%s' % (entity_name, opath)
if key in FOUND:
data = self.load_found_files(loader, data, FOUND[key])
continue
else:
opath = PATH_CACHE[(realpath_basedir, subdir)] = os.path.join(realpath_basedir, subdir)

if os.path.isdir(opath):
self._display.debug("\tprocessing dir %s" % opath)
FOUND[key] = found_files = loader.find_vars_files(opath, entity_name)
elif not os.path.exists(opath):
# cache missing dirs so we don't have to keep looking for things beneath the
NAK.add(opath)
else:
self._display.warning("Found %s that is not a directory, skipping: %s" % (subdir, opath))
# cache non-directory matches
NAK.add(opath)

data = self.load_found_files(loader, data, found_files)

except Exception as e:
raise AnsibleParserError(to_native(e))
Expand Down
2 changes: 2 additions & 0 deletions lib/ansible/utils/vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ def merge_hash(x, y, recursive=True, list_merge='replace'):
# except performance)
if x == {} or x == y:
return y.copy()
if y == {}:
return x

# in the following we will copy elements from y to x, but
# we don't want to modify x, so we create a copy of it
Expand Down
3 changes: 3 additions & 0 deletions lib/ansible/vars/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ def _combine_and_track(data, new_data, source):
See notes in the VarsWithSources docstring for caveats and limitations of the source tracking
'''
if new_data == {}:
return data

if C.DEFAULT_DEBUG:
# Populate var sources dict
for key in new_data:
Expand Down
Loading

0 comments on commit debf2be

Please sign in to comment.