Skip to content

Commit

Permalink
Merge pull request ansible#12165 from amenonsen/address-parsing
Browse files Browse the repository at this point in the history
Hi @amenonsen - thanks for fixing up the hunting down the unicode bug and expanding test_addresses.  The code looks good, merging!-- Be systematic about parsing and validating hostnames and addresses
  • Loading branch information
chrrrles committed Sep 11, 2015
2 parents 4156bce + 88a20e7 commit ba7734b
Show file tree
Hide file tree
Showing 8 changed files with 309 additions and 145 deletions.
24 changes: 7 additions & 17 deletions lib/ansible/inventory/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from ansible.inventory.host import Host
from ansible.plugins import vars_loader
from ansible.utils.vars import combine_vars
from ansible.parsing.utils.addresses import parse_address

try:
from __main__ import display
Expand Down Expand Up @@ -83,27 +84,16 @@ def parse_inventory(self, host_list):
host_list = host_list.split(",")
host_list = [ h for h in host_list if h and h.strip() ]

self.parser = None

if host_list is None:
self.parser = None
pass
elif isinstance(host_list, list):
self.parser = None
all = Group('all')
self.groups = [ all ]
ipv6_re = re.compile('\[([a-f:A-F0-9]*[%[0-z]+]?)\](?::(\d+))?')
for x in host_list:
m = ipv6_re.match(x)
if m:
all.add_host(Host(m.groups()[0], m.groups()[1]))
else:
if ":" in x:
tokens = x.rsplit(":", 1)
# if there is ':' in the address, then this is an ipv6
if ':' in tokens[0]:
all.add_host(Host(x))
else:
all.add_host(Host(tokens[0], tokens[1]))
else:
all.add_host(Host(x))
for h in host_list:
(host, port) = parse_address(h, allow_ranges=False)
all.add_host(Host(host, port))
elif self._loader.path_exists(host_list):
#TODO: switch this to a plugin loader and a 'condition' per plugin on which it should be tried, restoring 'inventory pllugins'
if self._loader.is_directory(host_list):
Expand Down
2 changes: 1 addition & 1 deletion lib/ansible/inventory/expand_hosts.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def detect_range(line = None):
Returnes True if the given line contains a pattern, else False.
'''
if 0 <= line.find("[") < line.find(":") < line.find("]"):
if '[' in line:
return True
else:
return False
Expand Down
57 changes: 11 additions & 46 deletions lib/ansible/inventory/ini.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from ansible.inventory.group import Group
from ansible.inventory.expand_hosts import detect_range
from ansible.inventory.expand_hosts import expand_hostname_range
from ansible.parsing.utils.addresses import parse_address
from ansible.utils.unicode import to_unicode, to_bytes

class InventoryParser(object):
Expand Down Expand Up @@ -265,30 +266,20 @@ def _expand_hostpattern(self, hostpattern):
optional port number that applies to all of them.
'''

# Is a port number specified?
#
# This may be a mandatory :NN suffix on any square-bracketed expression
# (IPv6 address, IPv4 address, host name, host pattern), or an optional
# :NN suffix on an IPv4 address, host name, or pattern. IPv6 addresses
# must be in square brackets if a port is specified.
# Can the given hostpattern be parsed as a host with an optional port
# specification?

port = None
(pattern, port) = parse_address(hostpattern, allow_ranges=True)
if not pattern:
self._raise_error("Can't parse '%s' as host[:port]" % hostpattern)

for type in ['bracketed_hostport', 'hostport']:
m = self.patterns[type].match(hostpattern)
if m:
(hostpattern, port) = m.groups()
continue
# Once we have separated the pattern, we expand it into list of one or
# more hostnames, depending on whether it contains any [x:y] ranges.

# Now we're left with just the pattern, which results in a list of one
# or more hostnames, depending on whether it contains any [x:y] ranges.
#
# FIXME: We could be more strict here about validation.

if detect_range(hostpattern):
hostnames = expand_hostname_range(hostpattern)
if detect_range(pattern):
hostnames = expand_hostname_range(pattern)
else:
hostnames = [hostpattern]
hostnames = [pattern]

return (hostnames, port)

Expand Down Expand Up @@ -374,29 +365,3 @@ def _compile_patterns(self):
$ # end of the line
''', re.X
)

# The following patterns match the various ways in which a port number
# may be specified on an IPv6 address, IPv4 address, hostname, or host
# pattern. All of the above may be enclosed in square brackets with a
# mandatory :NN suffix; or all but the first may be given without any
# brackets but with an :NN suffix.

self.patterns['bracketed_hostport'] = re.compile(
r'''^
\[(.+)\] # [host identifier]
:([0-9]+) # :port number
$
''', re.X
)

self.patterns['hostport'] = re.compile(
r'''^
((?: # We want to match:
[^:\[\]] # (a non-range character
| # ...or...
\[[^\]]*\] # a complete bracketed expression)
)*) # repeated as many times as possible
:([0-9]+) # followed by a port number
$
''', re.X
)
208 changes: 208 additions & 0 deletions lib/ansible/parsing/utils/addresses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
# Copyright 2015 Abhijit Menon-Sen <[email protected]>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.

# Make coding more python3-ish
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type

import re

# Components that match a numeric or alphanumeric begin:end or begin:end:step
# range expression inside square brackets.

numeric_range = r'''
\[
(?:[0-9]+:[0-9]+) # numeric begin:end
(?::[0-9]+)? # numeric :step (optional)
\]
'''

alphanumeric_range = r'''
\[
(?:
[a-z]:[a-z]| # one-char alphabetic range
[0-9]+:[0-9]+ # ...or a numeric one
)
(?::[0-9]+)? # numeric :step (optional)
\]
'''

# Components that match a 16-bit portion of an IPv6 address in hexadecimal
# notation (0..ffff) or an 8-bit portion of an IPv4 address in decimal notation
# (0..255) or an [x:y(:z)] numeric range.

ipv6_component = r'''
(?:
[0-9a-f]{{1,4}}| # 0..ffff
{range} # or a numeric range
)
'''.format(range=numeric_range)

ipv4_component = r'''
(?:
[01]?[0-9]{{1,2}}| # 0..199
2[0-4][0-9]| # 200..249
25[0-5]| # 250..255
{range} # or a numeric range
)
'''.format(range=numeric_range)

# A hostname label, e.g. 'foo' in 'foo.example.com'. Consists of alphanumeric
# characters plus dashes (and underscores) or valid ranges. The label may not
# start or end with a hyphen or an underscore. This is interpolated into the
# hostname pattern below. We don't try to enforce the 63-char length limit.

label = r'''
(?:[\w]|{range}) # Starts with an alphanumeric or a range
(?:[\w_-]|{range})* # Then zero or more of the same or [_-]
(?<![_-]) # ...as long as it didn't end with [_-]
'''.format(range=alphanumeric_range)

patterns = {
# This matches a square-bracketed expression with a port specification. What
# is inside the square brackets is validated later.

'bracketed_hostport': re.compile(
r'''^
\[(.+)\] # [host identifier]
:([0-9]+) # :port number
$
''', re.X
),

# This matches a bare IPv4 address or hostname (or host pattern including
# [x:y(:z)] ranges) with a port specification.

'hostport': re.compile(
r'''^
((?: # We want to match:
[^:\[\]] # (a non-range character
| # ...or...
\[[^\]]*\] # a complete bracketed expression)
)*) # repeated as many times as possible
:([0-9]+) # followed by a port number
$
''', re.X
),

# This matches an IPv4 address, but also permits range expressions.

'ipv4': re.compile(
r'''^
(?:{i4}\.){{3}}{i4} # Three parts followed by dots plus one
$
'''.format(i4=ipv4_component), re.X|re.I
),

# This matches an IPv6 address, but also permits range expressions.
#
# This expression looks complex, but it really only spells out the various
# combinations in which the basic unit of an IPv6 address (0..ffff) can be
# written, from :: to 1:2:3:4:5:6:7:8, plus the IPv4-in-IPv6 variants such
# as ::ffff:192.0.2.3.
#
# Note that we can't just use ipaddress.ip_address() because we also have to
# accept ranges in place of each component.

'ipv6': re.compile(
r'''^
(?:{0}:){{7}}{0}| # uncompressed: 1:2:3:4:5:6:7:8
(?:{0}:){{1,6}}:| # compressed variants, which are all
(?:{0}:)(?:{0}){{1,6}}| # a::b for various lengths of a,b
(?:{0}:){{2}}(?::{0}){{1,5}}|
(?:{0}:){{3}}(?::{0}){{1,4}}|
(?:{0}:){{4}}(?::{0}){{1,3}}|
(?:{0}:){{5}}(?::{0}){{1,2}}|
(?:{0}:){{6}}(?::{0})| # ...all with 2 <= a+b <= 7
:(?::{0}){{1,6}}| # ::ffff(:ffff...)
{0}?::| # ffff::, ::
# ipv4-in-ipv6 variants
(?:0:){{6}}(?:{0}\.){{3}}{0}|
::(?:ffff:)?(?:{0}\.){{3}}{0}|
(?:0:){{5}}ffff:(?:{0}\.){{3}}{0}
$
'''.format(ipv6_component), re.X|re.I
),

# This matches a hostname or host pattern including [x:y(:z)] ranges.
#
# We roughly follow DNS rules here, but also allow ranges (and underscores).
# In the past, no systematic rules were enforced about inventory hostnames,
# but the parsing context (e.g. shlex.split(), fnmatch.fnmatch()) excluded
# various metacharacters anyway.
#
# We don't enforce DNS length restrictions here (63 characters per label,
# 253 characters total) or make any attempt to process IDNs.

'hostname': re.compile(
r'''^
{label} # We must have at least one label
(?:\.{label})* # Followed by zero or more .labels
$
'''.format(label=label), re.X|re.I|re.UNICODE
),
}

def parse_address(address, allow_ranges=False):
"""
Takes a string and returns a (host, port) tuple. If the host is None, then
the string could not be parsed as a host identifier with an optional port
specification. If the port is None, then no port was specified.
The host identifier may be a hostname (qualified or not), an IPv4 address,
or an IPv6 address. If allow_ranges is True, then any of those may contain
[x:y] range specifications, e.g. foo[1:3] or foo[0:5]-bar[x-z].
The port number is an optional :NN suffix on an IPv4 address or host name,
or a mandatory :NN suffix on any square-bracketed expression: IPv6 address,
IPv4 address, or host name. (This means the only way to specify a port for
an IPv6 address is to enclose it in square brackets.)
"""

# First, we extract the port number if one is specified.

port = None
for type in ['bracketed_hostport', 'hostport']:
m = patterns[type].match(address)
if m:
(address, port) = m.groups()
port = int(port)
continue

# What we're left with now must be an IPv4 or IPv6 address, possibly with
# numeric ranges, or a hostname with alphanumeric ranges.

host = None
for type in ['ipv4', 'ipv6', 'hostname']:
m = patterns[type].match(address)
if m:
host = address
continue

# If it isn't any of the above, we don't understand it.

if not host:
return (None, None)

# If we get to this point, we know that any included ranges are valid. If
# the caller is prepared to handle them, all is well. Otherwise we treat
# it as a parse failure.

if not allow_ranges and '[' in host:
return (None, None)

return (host, port)
36 changes: 8 additions & 28 deletions lib/ansible/plugins/action/add_host.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import re

from ansible.plugins.action import ActionBase
from ansible.parsing.utils.addresses import parse_address
from ansible.errors import AnsibleError, AnsibleParserError

class ActionModule(ActionBase):
''' Create inventory hosts and groups in the memory inventory'''
Expand All @@ -40,9 +42,11 @@ def run(self, tmp=None, task_vars=dict()):
new_name = self._task.args.get('name', self._task.args.get('hostname', None))
#vv("creating host via 'add_host': hostname=%s" % new_name)

new_name, new_port = _parse_ip_host_and_port(new_name)
if new_port:
self._task.args['ansible_ssh_port'] = new_port
name, port = parse_address(new_name, allow_ranges=False)
if not name:
raise AnsibleError("Invalid inventory hostname: %s" % new_name)
if port:
self._task.args['ansible_ssh_port'] = port

groups = self._task.args.get('groupname', self._task.args.get('groups', self._task.args.get('group', '')))
# add it to the group if that was specified
Expand All @@ -58,28 +62,4 @@ def run(self, tmp=None, task_vars=dict()):
if not k in [ 'name', 'hostname', 'groupname', 'groups' ]:
host_vars[k] = self._task.args[k]

return dict(changed=True, add_host=dict(host_name=new_name, groups=new_groups, host_vars=host_vars))

def _parse_ip_host_and_port(hostname):
"""
Attempt to parse the hostname and port from a hostname, e.g.,
some-host-name
some-host-name:80
8.8.8.8
8.8.8.8:80
2001:db8:0:1
[2001:db8:0:1]:80
"""
if hostname.count(':') > 1:
match = re.match(
'\[(?P<ip>[^\]]+)\](:(?P<port>[0-9]+))?',
hostname
)
if match:
return match.group('ip'), match.group('port')
else:
return hostname, None
elif ':' in hostname:
return hostname.rsplit(':', 1)
return hostname, None
return dict(changed=True, add_host=dict(host_name=name, groups=new_groups, host_vars=host_vars))
Loading

0 comments on commit ba7734b

Please sign in to comment.