Skip to content

Commit

Permalink
Add type hints for parser submodules
Browse files Browse the repository at this point in the history
  • Loading branch information
p-l- committed Jun 10, 2021
1 parent d67ff2c commit 2705b0c
Show file tree
Hide file tree
Showing 12 changed files with 162 additions and 70 deletions.
2 changes: 1 addition & 1 deletion .travis/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ if [ "$DB" = "maxmind" ]; then
fi
echo "pylint OK"
mv ivre_bak ivre
if ! MYPYPATH=./pkg/stubs/ mypy --follow-imports=skip --disallow-untyped-calls --disallow-untyped-decorators --disallow-untyped-defs --disallow-incomplete-defs --no-implicit-optional --warn-redundant-casts --warn-unused-ignores --warn-return-any ./ivre/{active,analyzer,data,tools,types}/*.py ./ivre/{__init__,activecli,agent,config,flow,geoiputils,graphroute,keys,nmapopt,utils,zgrabout}.py; then
if ! MYPYPATH=./pkg/stubs/ mypy --follow-imports=skip --disallow-untyped-calls --disallow-untyped-decorators --disallow-untyped-defs --disallow-incomplete-defs --no-implicit-optional --warn-redundant-casts --warn-unused-ignores --warn-return-any ./ivre/{active,analyzer,data,parser,tools,types}/*.py ./ivre/{__init__,activecli,agent,config,flow,geoiputils,graphroute,keys,nmapopt,utils,zgrabout}.py; then
echo "mypy KO"
exit -1
fi
Expand Down
58 changes: 42 additions & 16 deletions ivre/parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,37 +20,63 @@


import subprocess
from types import TracebackType
from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Type, Union, cast


from ivre.utils import FileOpener


class Parser(FileOpener):
class Parser:
"""Parent class for file parsers"""

def __next__(self):
return self.parse_line(super().__next__())
def __init__(self, fname: Union[str, BinaryIO]) -> None:
self.fopener = FileOpener(fname)
self.fdesc = self.fopener.fdesc

def __iter__(self) -> Iterator[Dict[str, Any]]:
return self

class CmdParser:
"""Parent class for file parsers with commands"""
def __next__(self) -> Dict[str, Any]:
return self.parse_line(next(self.fdesc))

def __init__(self, cmd, cmdkargs):
cmdkargs["stdout"] = subprocess.PIPE
# pylint: disable=consider-using-with
self.proc = subprocess.Popen(cmd, **cmdkargs)
self.fdesc = self.proc.stdout
def parse_line(self, line: bytes) -> Dict[str, Any]:
raise NotImplementedError

def __iter__(self):
return self
def fileno(self) -> int:
return self.fdesc.fileno()

def __next__(self):
return self.parse_line(next(self.fdesc))
def close(self) -> None:
self.fdesc.close()

def __enter__(self):
def __enter__(self) -> "Parser":
return self

def __exit__(self, exc_type, exc_val, exc_tb):
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_val: Optional[BaseException],
exc_tb: Optional[TracebackType],
) -> None:
self.fopener.__exit__(exc_type, exc_val, exc_tb)


class CmdParser(Parser):
"""Parent class for file parsers with commands"""

def __init__(self, cmd: List[str], cmdkargs: Dict[str, Any]) -> None:
cmdkargs["stdout"] = subprocess.PIPE
# pylint: disable=consider-using-with
self.proc = subprocess.Popen(cmd, **cmdkargs)
assert self.proc.stdout is not None
self.fdesc = cast(BinaryIO, self.proc.stdout)

def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_val: Optional[BaseException],
exc_tb: Optional[TracebackType],
) -> None:
self.fdesc.close()
if self.proc is not None:
self.proc.wait()
21 changes: 11 additions & 10 deletions ivre/parser/airodump.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"""Support for Airodump csv files"""

import datetime
from typing import Any, Callable, Dict, Optional


from ivre.parser import Parser
Expand All @@ -45,35 +46,35 @@ class Airodump(Parser):
"channel": TYPE_INT,
"# beacons": TYPE_INT,
}
converters = {
converters: Dict[Optional[int], Callable[[str], Any]] = {
TYPE_INT: int,
TYPE_DATE: lambda val: datetime.datetime.strptime(val, "%Y-%m-%d %H:%M:%S"),
TYPE_IP: lambda val: ".".join(elt.strip() for elt in val.split(".")),
TYPE_MAC: lambda val: val.strip().lower(),
None: lambda val: val.strip(),
}

def __init__(self, fname):
def __init__(self, fname: str) -> None:
super().__init__(fname)
self.nextline_headers = False

def parse_line(self, line):
line = line.decode().rstrip("\r\n")
if not line:
def parse_line(self, line: bytes) -> Dict[str, Any]:
line_s = line.decode().rstrip("\r\n")
if not line_s:
self.nextline_headers = True
return next(self)
line = [elt.strip() for elt in line.split(",")]
line_l = [elt.strip() for elt in line_s.split(",")]
if self.nextline_headers:
self.fields = line
self.cur_types = [self.types.get(field) for field in line]
self.fields = line_l
self.cur_types = [self.types.get(field) for field in line_l]
self.nextline_headers = False
return next(self)
return dict(
zip(
self.fields,
(
self.converters.get(self.cur_types[i])(val)
for (i, val) in enumerate(line)
self.converters[self.cur_types[i]](val)
for (i, val) in enumerate(line_l)
),
)
)
7 changes: 4 additions & 3 deletions ivre/parser/argus.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@


import datetime
from typing import Any, BinaryIO, Dict, Optional, Union


from ivre.parser import CmdParser
Expand All @@ -45,7 +46,7 @@ class Argus(CmdParser):
aggregation = ["saddr", "sport", "daddr", "dport", "proto"]
timefmt = "%s.%f"

def __init__(self, fdesc, pcap_filter=None):
def __init__(self, fdesc: Union[str, BinaryIO], pcap_filter: Optional[str] = None):
"""Creates the Argus object.
fdesc: a file-like object or a filename
Expand All @@ -65,8 +66,8 @@ def __init__(self, fdesc, pcap_filter=None):
self.fdesc.readline()

@classmethod
def parse_line(cls, line):
fields = dict(
def parse_line(cls, line: bytes) -> Dict[str, Any]:
fields: Dict[str, Any] = dict(
(name, val.strip().decode())
for name, val in zip(cls.fields, line.split(b","))
)
Expand Down
30 changes: 18 additions & 12 deletions ivre/parser/iptables.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,51 +35,57 @@
"""Support for Iptables log from syslog files."""

import datetime
from typing import Any, Dict, Optional


from ivre.parser import Parser
from ivre.utils import LOGGER


class Iptables(Parser):
"""Iptables log generator from a syslog file descriptor."""

def __init__(self, fname, pcap_filter=None):
def __init__(self, fname: str, pcap_filter: Optional[str] = None) -> None:
"""Init Ipatbles class."""
if pcap_filter is not None:
LOGGER.warning("PCAP filter not supported in Iptables")
super().__init__(fname)

def parse_line(self, line):
def parse_line(self, line: bytes) -> Dict[str, Any]:
"""Process current line in Parser.__next__."""
field_idx = line.find(b"IN=")
if field_idx < 0:
# It's not an iptables log
return next(self)

# Converts the syslog iptables log into hash
fields = dict(
(key.lower(), value)
fields: Dict[str, Any] = dict(
(key.decode().lower(), value.decode())
for key, value in (
val.split(b"=", 1) if b"=" in val else (val, b"")
for val in line[field_idx:].rstrip(b"\r\n").split()
)
)

try:
fields[b"start_time"] = datetime.datetime.strptime(
fields["start_time"] = datetime.datetime.strptime(
line[:15].decode(), "%b %d %H:%M:%S"
)
except ValueError:
# Bad Date format
return next(self)

# sanitized
fields[b"proto"] = fields[b"proto"].lower()
fields["proto"] = fields["proto"].lower()
# Rename fields according to flow2db specifications.
if fields[b"proto"] in (b"udp", b"tcp"):
fields[b"sport"] = int(fields[b"spt"])
fields[b"dport"] = int(fields[b"dpt"])
if fields["proto"] in ("udp", "tcp"):
fields["sport"] = int(fields.pop("spt"))
fields["dport"] = int(fields.pop("dpt"))

# This data is mandatory but undefined in iptables logs, so make
# a choice.
fields[b"cspkts"] = fields[b"scpkts"] = 0
fields[b"scbytes"] = fields[b"csbytes"] = 0
fields[b"end_time"] = fields[b"start_time"]
fields["cspkts"] = fields["scpkts"] = 0
fields["scbytes"] = fields["csbytes"] = 0
fields["end_time"] = fields["start_time"]

return fields
13 changes: 8 additions & 5 deletions ivre/parser/netflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"""Support for NetFlow files"""

import datetime
from typing import Any, BinaryIO, Dict, Optional, Union, cast


from ivre import utils
Expand Down Expand Up @@ -52,7 +53,9 @@ class NetFlow(CmdParser):
}
timefmt = "%Y-%m-%d %H:%M:%S.%f"

def __init__(self, fdesc, pcap_filter=None):
def __init__(
self, fdesc: Union[str, BinaryIO], pcap_filter: Optional[str] = None
) -> None:
"""Creates the NetFlow object.
fdesc: a file-like object or a filename
Expand All @@ -66,23 +69,23 @@ def __init__(self, fdesc, pcap_filter=None):
if fde.read(2) not in utils.FileOpener.FILE_OPENERS_MAGIC:
cmd.extend(["-r", fdesc])
else:
cmdkargs["stdin"] = utils.open_file(fdesc)
cmdkargs["stdin"] = cast(BinaryIO, utils.open_file(fdesc))
else:
cmdkargs["stdin"] = fdesc
if pcap_filter is not None:
cmd.append(pcap_filter)
super().__init__(cmd, cmdkargs)

@classmethod
def str2int(cls, val):
def str2int(cls, val: str) -> int:
try:
return int(val)
except ValueError:
return int(float(val[:-1]) * cls.units[val[-1]])

@classmethod
def parse_line(cls, line):
fields = dict(
def parse_line(cls, line: bytes) -> Dict[str, Any]:
fields: Dict[str, Any] = dict(
(name[0], val.strip())
for name, val in zip(cls.fields, line.decode().split(","))
)
Expand Down
29 changes: 17 additions & 12 deletions ivre/parser/zeek.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import datetime
import re
from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Union


from ivre.parser import Parser
Expand All @@ -36,14 +37,14 @@ class ZeekFile(Parser):
float_types = set([b"interval"])
time_types = set([b"time"])

def __init__(self, fname):
def __init__(self, fname: Union[BinaryIO, str]) -> None:
self.sep = b" " # b"\t"
self.set_sep = b","
self.empty_field = b"(empty)"
self.unset_field = b"-"
self.fields = []
self.types = []
self.path = None
self.fields: List[bytes] = []
self.types: List[bytes] = []
self.path: Optional[str] = None
self.nextlines = []
super().__init__(fname)
for line in self.fdesc:
Expand All @@ -53,12 +54,15 @@ def __init__(self, fname):
break
self.parse_header_line(line)

def __next__(self):
def __enter__(self) -> "ZeekFile":
return self

def __next__(self) -> Dict[str, Any]:
return self.parse_line(
self.nextlines.pop(0) if self.nextlines else next(self.fdesc).strip()
)

def parse_header_line(self, line):
def parse_header_line(self, line: bytes) -> None:
if not line:
return
if line[:1] != b"#":
Expand Down Expand Up @@ -93,19 +97,20 @@ def parse_header_line(self, line):
elif directive == b"types":
self.types = arg.split(self.sep)

def parse_line(self, line):
def parse_line(self, line: bytes) -> Dict[str, Any]:
if line.startswith(b"#"):
self.parse_header_line(line)
return next(self)
res = {}
fields = line.split(self.sep)

for field, name, typ in zip(fields, self.fields, self.types):
name = name.replace(b".", b"_").decode()
res[name] = self.fix_value(field, typ)
res[name.replace(b".", b"_").decode()] = self.fix_value(field, typ)
return res

def fix_value(self, val, typ):
def fix_value(
self, val: bytes, typ: bytes
) -> Optional[Union[bool, str, int, float, datetime.datetime, list]]:
if val == self.unset_field:
return None
if typ == b"bool":
Expand All @@ -127,10 +132,10 @@ def fix_value(self, val, typ):
return val.decode()

@property
def field_types(self):
def field_types(self) -> List[Tuple[bytes, bytes]]:
return list(zip(self.fields, self.types))

def __str__(self):
def __str__(self) -> str:
return "\n".join(
[
"%s = %r" % (k, getattr(self, k))
Expand Down
4 changes: 2 additions & 2 deletions ivre/tools/flow2db.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def main() -> None:
try:
fileparser = PARSERS_CHOICE[args.type]
except KeyError:
with utils.open_file(fname) as fdesc:
with utils.open_file(fname) as fdesc_tmp:
try:
fileparser = PARSERS_MAGIC[fdesc.read(4)]
fileparser = PARSERS_MAGIC[fdesc_tmp.read(4)]
except KeyError:
utils.LOGGER.warning(
"Cannot find the appropriate parser for file %r",
Expand Down
4 changes: 3 additions & 1 deletion ivre/tools/zeek2db.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ def main() -> None:
"Parsing %s\n\t%s",
fname,
"Fields:\n%s\n"
% "\n".join("%s: %s" % (f, t) for f, t in zeekf.field_types),
% "\n".join(
"%s: %s" % (f.decode(), t.decode()) for f, t in zeekf.field_types
),
)
if zeekf.path in FUNCTIONS:
func = FUNCTIONS[zeekf.path]
Expand Down
Loading

0 comments on commit 2705b0c

Please sign in to comment.