forked from wpietri/sucks
-
Notifications
You must be signed in to change notification settings - Fork 5
/
log_clean.py
98 lines (86 loc) · 3.16 KB
/
log_clean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import base64
import re
import sys
# a script to take an xmpppeek log of a Ecovacs app session with a Deebot N79 and strip out some of the nonsense,
# including any private identifiers
source_ip = None
userid = None
resourceid = None
robotid = None
auth_glob = None
for line in sys.stdin:
# remove the garbage
line = line.rstrip()
line = re.sub("\[\\d{4}-\\d{2}-\\d{2} ", "", line)
line = re.sub("\.\\d{6}-\\d{2}:\\d{2}\] \[", " ", line)
line = re.sub("]$", " ", line)
line = re.sub("\(([SC])2[SC]\) [.0-9]+:\\d+ -> [.0-9]+:\d+\]", "\\1", line)
line = re.sub("\}\}\}", "", line)
line = re.sub("\{\{\{", "", line)
# find the private bits and remove them
if not source_ip:
match = re.search("Client connect from ([.0-9]+)", line)
if match:
source_ip = match.group(1)
if not userid:
match = re.search("(20\d{6}[0-9a-f]{13})@ecouser.net/([0-9a-f]{8})", line)
if match:
userid = match.group(1)
resourceid = match.group(2)
if not robotid:
match = re.search("(E\d{8,})@126.ecorobot.net/atom", line)
if match:
robotid = match.group(1)
if not auth_glob:
match = re.search(
'<auth mechanism="PLAIN" xmlns="urn:ietf:params:xml:ns:xmpp-sasl">([-A-Za-z0-9+/=]+)</auth>',
line,
)
if match:
auth_glob = match.group(1)
if source_ip:
line = re.sub(source_ip, "SOURCEIP", line)
if userid:
line = re.sub(userid, "USERID", line)
if resourceid:
line = re.sub(resourceid, "RESOURCEID", line)
if robotid:
line = re.sub(robotid, "ROBOTID", line)
if auth_glob:
line = re.sub(auth_glob, "AUTHGLOB", line)
# translate client commmands
line = re.sub(
'<iq id="(\d+)" to="[email protected]/atom" from="[email protected]/RESOURCEID" type="set"><query xmlns="com:ctl">(<ctl .*>)</query></iq>',
"id=\\1 command=\\2",
line,
)
# translate server responses
line = re.sub(
'<iq to="[email protected]/RESOURCEID" type="result" id="(\d+)" from="[email protected]/atom"/>',
"id=\\1 result =empty",
line,
)
line = re.sub(
'<iq to="[email protected]/RESOURCEID" type="set" id="(\d+)" from="[email protected]/atom"><query xmlns="com:ctl"><ctl id="(\d+)" ret="([^"]+)"/></query></iq>',
"id=\\1 id=\\2 result=\\3",
line,
)
line = re.sub(
'<iq to="[email protected]/RESOURCEID" type="set" id="(\d+)" from="[email protected]/atom"><query xmlns="com:ctl">(<ctl .*)</query></iq>',
"id=\\1 response=\\2",
line,
)
print(line)
# per SASL plain auth: https://tools.ietf.org/html/rfc4616
(authentication_id, authorization_id, password) = (
base64.b64decode(auth_glob).decode().split(sep="\0")
)
# no idea what the leading field is, and the resource appears to be the same
(mystery, resource, secret) = password.split("/")
print("------------------")
print("sample config:")
print("user=" + userid)
print("domain=ecouser.net")
print("resource=" + resourceid)
print("secret=" + secret)
print("vacuum=" + robotid + "@126.ecorobot.net")