Skip to content

Commit

Permalink
Properly unlock the socket file lock in ansible-connection (ansible#3…
Browse files Browse the repository at this point in the history
…9223)

Also use a lock file per host, rather than one global file lock.

Commit 9c0275a introduced a bug where the lock file was only being
unlocked by the child PID of the resulting fork done in ansible-connection.
This causes delays when a large inventory causes a lot of contention on
that global lock. This patch fixes the problem by ensuring the lock is
released regardless of the fork condition, and also to use a lock file
based on the remote address of the target host, removing the global lock
bottleneck.

Fixes ansible#38892
  • Loading branch information
jimi-c authored Apr 25, 2018
1 parent 12f2b95 commit 7ce9968
Showing 1 changed file with 57 additions and 46 deletions.
103 changes: 57 additions & 46 deletions bin/ansible-connection
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import traceback
import errno
import json

from contextlib import contextmanager

from ansible import constants as C
from ansible.module_utils._text import to_bytes, to_native, to_text
from ansible.module_utils.six import PY3
Expand All @@ -33,6 +35,21 @@ from ansible.utils.display import Display
from ansible.utils.jsonrpc import JsonRpcServer


@contextmanager
def file_lock(lock_path):
"""
Uses contextmanager to create and release a file lock based on the
given path. This allows us to create locks using `with file_lock()`
to prevent deadlocks related to failure to unlock properly.
"""

lock_fd = os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o600)
fcntl.lockf(lock_fd, fcntl.LOCK_EX)
yield
fcntl.lockf(lock_fd, fcntl.LOCK_UN)
os.close(lock_fd)


class ConnectionProcess(object):
'''
The connection process wraps around a Connection object that manages
Expand Down Expand Up @@ -209,60 +226,54 @@ def main():
tmp_path = unfrackpath(C.PERSISTENT_CONTROL_PATH_DIR)
makedirs_safe(tmp_path)

lock_path = unfrackpath("%s/.ansible_pc_lock" % tmp_path)
lock_path = unfrackpath("%s/.ansible_pc_lock_%s" % (tmp_path, play_context.remote_addr))
socket_path = unfrackpath(cp % dict(directory=tmp_path))

# if the socket file doesn't exist, spin up the daemon process
lock_fd = os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o600)
fcntl.lockf(lock_fd, fcntl.LOCK_EX)

if not os.path.exists(socket_path):
messages.append('local domain socket does not exist, starting it')
original_path = os.getcwd()
r, w = os.pipe()
pid = fork_process()
with file_lock(lock_path):
if not os.path.exists(socket_path):
messages.append('local domain socket does not exist, starting it')
original_path = os.getcwd()
r, w = os.pipe()
pid = fork_process()

if pid == 0:
try:
os.close(r)
wfd = os.fdopen(w, 'w')
process = ConnectionProcess(wfd, play_context, socket_path, original_path, ansible_playbook_pid)
process.start()
except Exception:
messages.append(traceback.format_exc())
rc = 1
if pid == 0:
try:
os.close(r)
wfd = os.fdopen(w, 'w')
process = ConnectionProcess(wfd, play_context, socket_path, original_path, ansible_playbook_pid)
process.start()
except Exception:
messages.append(traceback.format_exc())
rc = 1

fcntl.lockf(lock_fd, fcntl.LOCK_UN)
os.close(lock_fd)
if rc == 0:
process.run()

if rc == 0:
process.run()
sys.exit(rc)

sys.exit(rc)
else:
os.close(w)
rfd = os.fdopen(r, 'r')
data = json.loads(rfd.read())
messages.extend(data.pop('messages'))
result.update(data)

else:
os.close(w)
rfd = os.fdopen(r, 'r')
data = json.loads(rfd.read())
messages.extend(data.pop('messages'))
result.update(data)

else:
messages.append('found existing local domain socket, using it!')
conn = Connection(socket_path)
pc_data = to_text(init_data)
try:
messages.extend(conn.update_play_context(pc_data))
except Exception as exc:
# Only network_cli has update_play context, so missing this is
# not fatal e.g. netconf
if isinstance(exc, ConnectionError) and getattr(exc, 'code', None) == -32601:
pass
else:
result.update({
'error': to_text(exc),
'exception': traceback.format_exc()
})
messages.append('found existing local domain socket, using it!')
conn = Connection(socket_path)
pc_data = to_text(init_data)
try:
messages.extend(conn.update_play_context(pc_data))
except Exception as exc:
# Only network_cli has update_play context, so missing this is
# not fatal e.g. netconf
if isinstance(exc, ConnectionError) and getattr(exc, 'code', None) == -32601:
pass
else:
result.update({
'error': to_text(exc),
'exception': traceback.format_exc()
})

messages.append(sys.stdout.getvalue())
result.update({
Expand Down

0 comments on commit 7ce9968

Please sign in to comment.