Add type hints for parser submodules

xavier-martin · Jun 10, 2021 · 2705b0c · 2705b0c
1 parent d67ff2c
commit 2705b0c
Show file tree

Hide file tree

Showing 12 changed files with 162 additions and 70 deletions.
diff --git a/.travis/script.sh b/.travis/script.sh
@@ -22,7 +22,7 @@ if [ "$DB" = "maxmind" ]; then
 	fi
 	echo "pylint OK"
 	mv ivre_bak ivre
-	if ! MYPYPATH=./pkg/stubs/ mypy --follow-imports=skip --disallow-untyped-calls --disallow-untyped-decorators --disallow-untyped-defs --disallow-incomplete-defs --no-implicit-optional --warn-redundant-casts --warn-unused-ignores --warn-return-any ./ivre/{active,analyzer,data,tools,types}/*.py ./ivre/{__init__,activecli,agent,config,flow,geoiputils,graphroute,keys,nmapopt,utils,zgrabout}.py; then
+	if ! MYPYPATH=./pkg/stubs/ mypy --follow-imports=skip --disallow-untyped-calls --disallow-untyped-decorators --disallow-untyped-defs --disallow-incomplete-defs --no-implicit-optional --warn-redundant-casts --warn-unused-ignores --warn-return-any ./ivre/{active,analyzer,data,parser,tools,types}/*.py ./ivre/{__init__,activecli,agent,config,flow,geoiputils,graphroute,keys,nmapopt,utils,zgrabout}.py; then
 	    echo "mypy KO"
 	    exit -1
 	fi

diff --git a/ivre/parser/__init__.py b/ivre/parser/__init__.py
@@ -20,37 +20,63 @@
 
 
 import subprocess
+from types import TracebackType
+from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Type, Union, cast
 
 
 from ivre.utils import FileOpener
 
 
-class Parser(FileOpener):
+class Parser:
     """Parent class for file parsers"""
 
-    def __next__(self):
-        return self.parse_line(super().__next__())
+    def __init__(self, fname: Union[str, BinaryIO]) -> None:
+        self.fopener = FileOpener(fname)
+        self.fdesc = self.fopener.fdesc
 
+    def __iter__(self) -> Iterator[Dict[str, Any]]:
+        return self
 
-class CmdParser:
-    """Parent class for file parsers with commands"""
+    def __next__(self) -> Dict[str, Any]:
+        return self.parse_line(next(self.fdesc))
 
-    def __init__(self, cmd, cmdkargs):
-        cmdkargs["stdout"] = subprocess.PIPE
-        # pylint: disable=consider-using-with
-        self.proc = subprocess.Popen(cmd, **cmdkargs)
-        self.fdesc = self.proc.stdout
+    def parse_line(self, line: bytes) -> Dict[str, Any]:
+        raise NotImplementedError
 
-    def __iter__(self):
-        return self
+    def fileno(self) -> int:
+        return self.fdesc.fileno()
 
-    def __next__(self):
-        return self.parse_line(next(self.fdesc))
+    def close(self) -> None:
+        self.fdesc.close()
 
-    def __enter__(self):
+    def __enter__(self) -> "Parser":
         return self
 
-    def __exit__(self, exc_type, exc_val, exc_tb):
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[TracebackType],
+    ) -> None:
+        self.fopener.__exit__(exc_type, exc_val, exc_tb)
+
+
+class CmdParser(Parser):
+    """Parent class for file parsers with commands"""
+
+    def __init__(self, cmd: List[str], cmdkargs: Dict[str, Any]) -> None:
+        cmdkargs["stdout"] = subprocess.PIPE
+        # pylint: disable=consider-using-with
+        self.proc = subprocess.Popen(cmd, **cmdkargs)
+        assert self.proc.stdout is not None
+        self.fdesc = cast(BinaryIO, self.proc.stdout)
+
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[TracebackType],
+    ) -> None:
         self.fdesc.close()
         if self.proc is not None:
             self.proc.wait()
diff --git a/ivre/parser/airodump.py b/ivre/parser/airodump.py
@@ -19,6 +19,7 @@
 """Support for Airodump csv files"""
 
 import datetime
+from typing import Any, Callable, Dict, Optional
 
 
 from ivre.parser import Parser
@@ -45,35 +46,35 @@ class Airodump(Parser):
         "channel": TYPE_INT,
         "# beacons": TYPE_INT,
     }
-    converters = {
+    converters: Dict[Optional[int], Callable[[str], Any]] = {
         TYPE_INT: int,
         TYPE_DATE: lambda val: datetime.datetime.strptime(val, "%Y-%m-%d %H:%M:%S"),
         TYPE_IP: lambda val: ".".join(elt.strip() for elt in val.split(".")),
         TYPE_MAC: lambda val: val.strip().lower(),
         None: lambda val: val.strip(),
     }
 
-    def __init__(self, fname):
+    def __init__(self, fname: str) -> None:
         super().__init__(fname)
         self.nextline_headers = False
 
-    def parse_line(self, line):
-        line = line.decode().rstrip("\r\n")
-        if not line:
+    def parse_line(self, line: bytes) -> Dict[str, Any]:
+        line_s = line.decode().rstrip("\r\n")
+        if not line_s:
             self.nextline_headers = True
             return next(self)
-        line = [elt.strip() for elt in line.split(",")]
+        line_l = [elt.strip() for elt in line_s.split(",")]
         if self.nextline_headers:
-            self.fields = line
-            self.cur_types = [self.types.get(field) for field in line]
+            self.fields = line_l
+            self.cur_types = [self.types.get(field) for field in line_l]
             self.nextline_headers = False
             return next(self)
         return dict(
             zip(
                 self.fields,
                 (
-                    self.converters.get(self.cur_types[i])(val)
-                    for (i, val) in enumerate(line)
+                    self.converters[self.cur_types[i]](val)
+                    for (i, val) in enumerate(line_l)
                 ),
             )
         )
diff --git a/ivre/parser/argus.py b/ivre/parser/argus.py
@@ -20,6 +20,7 @@
 
 
 import datetime
+from typing import Any, BinaryIO, Dict, Optional, Union
 
 
 from ivre.parser import CmdParser
@@ -45,7 +46,7 @@ class Argus(CmdParser):
     aggregation = ["saddr", "sport", "daddr", "dport", "proto"]
     timefmt = "%s.%f"
 
-    def __init__(self, fdesc, pcap_filter=None):
+    def __init__(self, fdesc: Union[str, BinaryIO], pcap_filter: Optional[str] = None):
         """Creates the Argus object.
 
         fdesc: a file-like object or a filename
@@ -65,8 +66,8 @@ def __init__(self, fdesc, pcap_filter=None):
         self.fdesc.readline()
 
     @classmethod
-    def parse_line(cls, line):
-        fields = dict(
+    def parse_line(cls, line: bytes) -> Dict[str, Any]:
+        fields: Dict[str, Any] = dict(
             (name, val.strip().decode())
             for name, val in zip(cls.fields, line.split(b","))
         )

diff --git a/ivre/parser/iptables.py b/ivre/parser/iptables.py
@@ -35,51 +35,57 @@
 """Support for Iptables log from syslog files."""
 
 import datetime
+from typing import Any, Dict, Optional
+
+
 from ivre.parser import Parser
+from ivre.utils import LOGGER
 
 
 class Iptables(Parser):
     """Iptables log generator from a syslog file descriptor."""
 
-    def __init__(self, fname, pcap_filter=None):
+    def __init__(self, fname: str, pcap_filter: Optional[str] = None) -> None:
         """Init Ipatbles class."""
+        if pcap_filter is not None:
+            LOGGER.warning("PCAP filter not supported in Iptables")
         super().__init__(fname)
 
-    def parse_line(self, line):
+    def parse_line(self, line: bytes) -> Dict[str, Any]:
         """Process current line in Parser.__next__."""
         field_idx = line.find(b"IN=")
         if field_idx < 0:
             # It's not an iptables log
             return next(self)
 
         # Converts the syslog iptables log into hash
-        fields = dict(
-            (key.lower(), value)
+        fields: Dict[str, Any] = dict(
+            (key.decode().lower(), value.decode())
             for key, value in (
                 val.split(b"=", 1) if b"=" in val else (val, b"")
                 for val in line[field_idx:].rstrip(b"\r\n").split()
             )
         )
 
         try:
-            fields[b"start_time"] = datetime.datetime.strptime(
+            fields["start_time"] = datetime.datetime.strptime(
                 line[:15].decode(), "%b %d %H:%M:%S"
             )
         except ValueError:
             # Bad Date format
             return next(self)
 
         # sanitized
-        fields[b"proto"] = fields[b"proto"].lower()
+        fields["proto"] = fields["proto"].lower()
         # Rename fields according to flow2db specifications.
-        if fields[b"proto"] in (b"udp", b"tcp"):
-            fields[b"sport"] = int(fields[b"spt"])
-            fields[b"dport"] = int(fields[b"dpt"])
+        if fields["proto"] in ("udp", "tcp"):
+            fields["sport"] = int(fields.pop("spt"))
+            fields["dport"] = int(fields.pop("dpt"))
 
         # This data is mandatory but undefined in iptables logs, so make
         # a choice.
-        fields[b"cspkts"] = fields[b"scpkts"] = 0
-        fields[b"scbytes"] = fields[b"csbytes"] = 0
-        fields[b"end_time"] = fields[b"start_time"]
+        fields["cspkts"] = fields["scpkts"] = 0
+        fields["scbytes"] = fields["csbytes"] = 0
+        fields["end_time"] = fields["start_time"]
 
         return fields
diff --git a/ivre/parser/netflow.py b/ivre/parser/netflow.py
@@ -19,6 +19,7 @@
 """Support for NetFlow files"""
 
 import datetime
+from typing import Any, BinaryIO, Dict, Optional, Union, cast
 
 
 from ivre import utils
@@ -52,7 +53,9 @@ class NetFlow(CmdParser):
     }
     timefmt = "%Y-%m-%d %H:%M:%S.%f"
 
-    def __init__(self, fdesc, pcap_filter=None):
+    def __init__(
+        self, fdesc: Union[str, BinaryIO], pcap_filter: Optional[str] = None
+    ) -> None:
         """Creates the NetFlow object.
 
         fdesc: a file-like object or a filename
@@ -66,23 +69,23 @@ def __init__(self, fdesc, pcap_filter=None):
                 if fde.read(2) not in utils.FileOpener.FILE_OPENERS_MAGIC:
                     cmd.extend(["-r", fdesc])
                 else:
-                    cmdkargs["stdin"] = utils.open_file(fdesc)
+                    cmdkargs["stdin"] = cast(BinaryIO, utils.open_file(fdesc))
         else:
             cmdkargs["stdin"] = fdesc
         if pcap_filter is not None:
             cmd.append(pcap_filter)
         super().__init__(cmd, cmdkargs)
 
     @classmethod
-    def str2int(cls, val):
+    def str2int(cls, val: str) -> int:
         try:
             return int(val)
         except ValueError:
             return int(float(val[:-1]) * cls.units[val[-1]])
 
     @classmethod
-    def parse_line(cls, line):
-        fields = dict(
+    def parse_line(cls, line: bytes) -> Dict[str, Any]:
+        fields: Dict[str, Any] = dict(
             (name[0], val.strip())
             for name, val in zip(cls.fields, line.decode().split(","))
         )

diff --git a/ivre/parser/zeek.py b/ivre/parser/zeek.py
@@ -20,6 +20,7 @@
 
 import datetime
 import re
+from typing import Any, BinaryIO, Dict, List, Optional, Tuple, Union
 
 
 from ivre.parser import Parser
@@ -36,14 +37,14 @@ class ZeekFile(Parser):
     float_types = set([b"interval"])
     time_types = set([b"time"])
 
-    def __init__(self, fname):
+    def __init__(self, fname: Union[BinaryIO, str]) -> None:
         self.sep = b" "  # b"\t"
         self.set_sep = b","
         self.empty_field = b"(empty)"
         self.unset_field = b"-"
-        self.fields = []
-        self.types = []
-        self.path = None
+        self.fields: List[bytes] = []
+        self.types: List[bytes] = []
+        self.path: Optional[str] = None
         self.nextlines = []
         super().__init__(fname)
         for line in self.fdesc:
@@ -53,12 +54,15 @@ def __init__(self, fname):
                 break
             self.parse_header_line(line)
 
-    def __next__(self):
+    def __enter__(self) -> "ZeekFile":
+        return self
+
+    def __next__(self) -> Dict[str, Any]:
         return self.parse_line(
             self.nextlines.pop(0) if self.nextlines else next(self.fdesc).strip()
         )
 
-    def parse_header_line(self, line):
+    def parse_header_line(self, line: bytes) -> None:
         if not line:
             return
         if line[:1] != b"#":
@@ -93,19 +97,20 @@ def parse_header_line(self, line):
         elif directive == b"types":
             self.types = arg.split(self.sep)
 
-    def parse_line(self, line):
+    def parse_line(self, line: bytes) -> Dict[str, Any]:
         if line.startswith(b"#"):
             self.parse_header_line(line)
             return next(self)
         res = {}
         fields = line.split(self.sep)
 
         for field, name, typ in zip(fields, self.fields, self.types):
-            name = name.replace(b".", b"_").decode()
-            res[name] = self.fix_value(field, typ)
+            res[name.replace(b".", b"_").decode()] = self.fix_value(field, typ)
         return res
 
-    def fix_value(self, val, typ):
+    def fix_value(
+        self, val: bytes, typ: bytes
+    ) -> Optional[Union[bool, str, int, float, datetime.datetime, list]]:
         if val == self.unset_field:
             return None
         if typ == b"bool":
@@ -127,10 +132,10 @@ def fix_value(self, val, typ):
         return val.decode()
 
     @property
-    def field_types(self):
+    def field_types(self) -> List[Tuple[bytes, bytes]]:
         return list(zip(self.fields, self.types))
 
-    def __str__(self):
+    def __str__(self) -> str:
         return "\n".join(
             [
                 "%s = %r" % (k, getattr(self, k))

diff --git a/ivre/tools/flow2db.py b/ivre/tools/flow2db.py
@@ -70,9 +70,9 @@ def main() -> None:
         try:
             fileparser = PARSERS_CHOICE[args.type]
         except KeyError:
-            with utils.open_file(fname) as fdesc:
+            with utils.open_file(fname) as fdesc_tmp:
                 try:
-                    fileparser = PARSERS_MAGIC[fdesc.read(4)]
+                    fileparser = PARSERS_MAGIC[fdesc_tmp.read(4)]
                 except KeyError:
                     utils.LOGGER.warning(
                         "Cannot find the appropriate parser for file %r",

diff --git a/ivre/tools/zeek2db.py b/ivre/tools/zeek2db.py
@@ -153,7 +153,9 @@ def main() -> None:
                 "Parsing %s\n\t%s",
                 fname,
                 "Fields:\n%s\n"
-                % "\n".join("%s: %s" % (f, t) for f, t in zeekf.field_types),
+                % "\n".join(
+                    "%s: %s" % (f.decode(), t.decode()) for f, t in zeekf.field_types
+                ),
             )
             if zeekf.path in FUNCTIONS:
                 func = FUNCTIONS[zeekf.path]