forked from ansible/ansible
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request ansible#12165 from amenonsen/address-parsing
Hi @amenonsen - thanks for fixing up the hunting down the unicode bug and expanding test_addresses. The code looks good, merging!-- Be systematic about parsing and validating hostnames and addresses
- Loading branch information
Showing
8 changed files
with
309 additions
and
145 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
# Copyright 2015 Abhijit Menon-Sen <[email protected]> | ||
# | ||
# This file is part of Ansible | ||
# | ||
# Ansible is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# Ansible is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with Ansible. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
# Make coding more python3-ish | ||
from __future__ import (absolute_import, division, print_function) | ||
__metaclass__ = type | ||
|
||
import re | ||
|
||
# Components that match a numeric or alphanumeric begin:end or begin:end:step | ||
# range expression inside square brackets. | ||
|
||
numeric_range = r''' | ||
\[ | ||
(?:[0-9]+:[0-9]+) # numeric begin:end | ||
(?::[0-9]+)? # numeric :step (optional) | ||
\] | ||
''' | ||
|
||
alphanumeric_range = r''' | ||
\[ | ||
(?: | ||
[a-z]:[a-z]| # one-char alphabetic range | ||
[0-9]+:[0-9]+ # ...or a numeric one | ||
) | ||
(?::[0-9]+)? # numeric :step (optional) | ||
\] | ||
''' | ||
|
||
# Components that match a 16-bit portion of an IPv6 address in hexadecimal | ||
# notation (0..ffff) or an 8-bit portion of an IPv4 address in decimal notation | ||
# (0..255) or an [x:y(:z)] numeric range. | ||
|
||
ipv6_component = r''' | ||
(?: | ||
[0-9a-f]{{1,4}}| # 0..ffff | ||
{range} # or a numeric range | ||
) | ||
'''.format(range=numeric_range) | ||
|
||
ipv4_component = r''' | ||
(?: | ||
[01]?[0-9]{{1,2}}| # 0..199 | ||
2[0-4][0-9]| # 200..249 | ||
25[0-5]| # 250..255 | ||
{range} # or a numeric range | ||
) | ||
'''.format(range=numeric_range) | ||
|
||
# A hostname label, e.g. 'foo' in 'foo.example.com'. Consists of alphanumeric | ||
# characters plus dashes (and underscores) or valid ranges. The label may not | ||
# start or end with a hyphen or an underscore. This is interpolated into the | ||
# hostname pattern below. We don't try to enforce the 63-char length limit. | ||
|
||
label = r''' | ||
(?:[\w]|{range}) # Starts with an alphanumeric or a range | ||
(?:[\w_-]|{range})* # Then zero or more of the same or [_-] | ||
(?<![_-]) # ...as long as it didn't end with [_-] | ||
'''.format(range=alphanumeric_range) | ||
|
||
patterns = { | ||
# This matches a square-bracketed expression with a port specification. What | ||
# is inside the square brackets is validated later. | ||
|
||
'bracketed_hostport': re.compile( | ||
r'''^ | ||
\[(.+)\] # [host identifier] | ||
:([0-9]+) # :port number | ||
$ | ||
''', re.X | ||
), | ||
|
||
# This matches a bare IPv4 address or hostname (or host pattern including | ||
# [x:y(:z)] ranges) with a port specification. | ||
|
||
'hostport': re.compile( | ||
r'''^ | ||
((?: # We want to match: | ||
[^:\[\]] # (a non-range character | ||
| # ...or... | ||
\[[^\]]*\] # a complete bracketed expression) | ||
)*) # repeated as many times as possible | ||
:([0-9]+) # followed by a port number | ||
$ | ||
''', re.X | ||
), | ||
|
||
# This matches an IPv4 address, but also permits range expressions. | ||
|
||
'ipv4': re.compile( | ||
r'''^ | ||
(?:{i4}\.){{3}}{i4} # Three parts followed by dots plus one | ||
$ | ||
'''.format(i4=ipv4_component), re.X|re.I | ||
), | ||
|
||
# This matches an IPv6 address, but also permits range expressions. | ||
# | ||
# This expression looks complex, but it really only spells out the various | ||
# combinations in which the basic unit of an IPv6 address (0..ffff) can be | ||
# written, from :: to 1:2:3:4:5:6:7:8, plus the IPv4-in-IPv6 variants such | ||
# as ::ffff:192.0.2.3. | ||
# | ||
# Note that we can't just use ipaddress.ip_address() because we also have to | ||
# accept ranges in place of each component. | ||
|
||
'ipv6': re.compile( | ||
r'''^ | ||
(?:{0}:){{7}}{0}| # uncompressed: 1:2:3:4:5:6:7:8 | ||
(?:{0}:){{1,6}}:| # compressed variants, which are all | ||
(?:{0}:)(?:{0}){{1,6}}| # a::b for various lengths of a,b | ||
(?:{0}:){{2}}(?::{0}){{1,5}}| | ||
(?:{0}:){{3}}(?::{0}){{1,4}}| | ||
(?:{0}:){{4}}(?::{0}){{1,3}}| | ||
(?:{0}:){{5}}(?::{0}){{1,2}}| | ||
(?:{0}:){{6}}(?::{0})| # ...all with 2 <= a+b <= 7 | ||
:(?::{0}){{1,6}}| # ::ffff(:ffff...) | ||
{0}?::| # ffff::, :: | ||
# ipv4-in-ipv6 variants | ||
(?:0:){{6}}(?:{0}\.){{3}}{0}| | ||
::(?:ffff:)?(?:{0}\.){{3}}{0}| | ||
(?:0:){{5}}ffff:(?:{0}\.){{3}}{0} | ||
$ | ||
'''.format(ipv6_component), re.X|re.I | ||
), | ||
|
||
# This matches a hostname or host pattern including [x:y(:z)] ranges. | ||
# | ||
# We roughly follow DNS rules here, but also allow ranges (and underscores). | ||
# In the past, no systematic rules were enforced about inventory hostnames, | ||
# but the parsing context (e.g. shlex.split(), fnmatch.fnmatch()) excluded | ||
# various metacharacters anyway. | ||
# | ||
# We don't enforce DNS length restrictions here (63 characters per label, | ||
# 253 characters total) or make any attempt to process IDNs. | ||
|
||
'hostname': re.compile( | ||
r'''^ | ||
{label} # We must have at least one label | ||
(?:\.{label})* # Followed by zero or more .labels | ||
$ | ||
'''.format(label=label), re.X|re.I|re.UNICODE | ||
), | ||
} | ||
|
||
def parse_address(address, allow_ranges=False): | ||
""" | ||
Takes a string and returns a (host, port) tuple. If the host is None, then | ||
the string could not be parsed as a host identifier with an optional port | ||
specification. If the port is None, then no port was specified. | ||
The host identifier may be a hostname (qualified or not), an IPv4 address, | ||
or an IPv6 address. If allow_ranges is True, then any of those may contain | ||
[x:y] range specifications, e.g. foo[1:3] or foo[0:5]-bar[x-z]. | ||
The port number is an optional :NN suffix on an IPv4 address or host name, | ||
or a mandatory :NN suffix on any square-bracketed expression: IPv6 address, | ||
IPv4 address, or host name. (This means the only way to specify a port for | ||
an IPv6 address is to enclose it in square brackets.) | ||
""" | ||
|
||
# First, we extract the port number if one is specified. | ||
|
||
port = None | ||
for type in ['bracketed_hostport', 'hostport']: | ||
m = patterns[type].match(address) | ||
if m: | ||
(address, port) = m.groups() | ||
port = int(port) | ||
continue | ||
|
||
# What we're left with now must be an IPv4 or IPv6 address, possibly with | ||
# numeric ranges, or a hostname with alphanumeric ranges. | ||
|
||
host = None | ||
for type in ['ipv4', 'ipv6', 'hostname']: | ||
m = patterns[type].match(address) | ||
if m: | ||
host = address | ||
continue | ||
|
||
# If it isn't any of the above, we don't understand it. | ||
|
||
if not host: | ||
return (None, None) | ||
|
||
# If we get to this point, we know that any included ranges are valid. If | ||
# the caller is prepared to handle them, all is well. Otherwise we treat | ||
# it as a parse failure. | ||
|
||
if not allow_ranges and '[' in host: | ||
return (None, None) | ||
|
||
return (host, port) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.