Skip to content

Commit

Permalink
Merge branch 'fix-readline-multibyte'
Browse files Browse the repository at this point in the history
  • Loading branch information
sebres committed Mar 25, 2021
2 parents 725354c + 4b17ddd commit d8e450c
Show file tree
Hide file tree
Showing 10 changed files with 273 additions and 112 deletions.
5 changes: 5 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,17 @@ ver. 1.0.1-dev-1 (20??/??/??) - development nightly edition
different from 0) in case of unsane environment.

### Fixes
* readline fixed to consider interim new-line character as part of code point in multi-byte logs
(e. g. unicode encoding like utf-16be, utf-16le);
* `filter.d/drupal-auth.conf` more strict regex, extended to match "Login attempt failed from" (gh-2742)

### New Features and Enhancements
* `actioncheck` behavior is changed now (gh-488), so invariant check as well as restore or repair
of sane environment (in case of recognized unsane state) would only occur on action errors (e. g.
if ban or unban operations are exiting with other code as 0)
* better recognition of log rotation, better performance by reopen: avoid unnecessary seek to begin of file
(and hash calculation)
* file filter reads only complete lines (ended with new-line) now, so waits for end of line (for its completion)


ver. 0.11.2 (2020/11/23) - heal-the-world-with-security-tools
Expand Down
11 changes: 2 additions & 9 deletions fail2ban/client/fail2banregex.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,6 @@ def __init__(self, opts):
def output(self, line):
if not self._opts.out: output(line)

def decode_line(self, line):
return FileContainer.decode_line('<LOG>', self._encoding, line)

def encode_line(self, line):
return line.encode(self._encoding, 'ignore')

Expand Down Expand Up @@ -723,10 +720,6 @@ def print_failregexes(title, failregexes):

return True

def file_lines_gen(self, hdlr):
for line in hdlr:
yield self.decode_line(line)

def start(self, args):

cmd_log, cmd_regex = args[:2]
Expand All @@ -745,10 +738,10 @@ def start(self, args):

if os.path.isfile(cmd_log):
try:
hdlr = open(cmd_log, 'rb')
test_lines = FileContainer(cmd_log, self._encoding, doOpen=True)

self.output( "Use log file : %s" % cmd_log )
self.output( "Use encoding : %s" % self._encoding )
test_lines = self.file_lines_gen(hdlr)
except IOError as e: # pragma: no cover
output( e )
return False
Expand Down
2 changes: 1 addition & 1 deletion fail2ban/server/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ def _addLog(self, cur, jail, name, pos=0, md5=None):
except TypeError:
firstLineMD5 = None

if not firstLineMD5 and (pos or md5):
if firstLineMD5 is None and (pos or md5 is not None):
cur.execute(
"INSERT OR REPLACE INTO logs(jail, path, firstlinemd5, lastfilepos) "
"VALUES(?, ?, ?, ?)", (jail.name, name, md5, pos))
Expand Down
198 changes: 144 additions & 54 deletions fail2ban/server/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,14 +1131,14 @@ def getFailures(self, filename, inOperation=None):
while not self.idle:
line = log.readline()
if not self.active: break; # jail has been stopped
if not line:
if line is None:
# The jail reached the bottom, simply set in operation for this log
# (since we are first time at end of file, growing is only possible after modifications):
log.inOperation = True
break
# acquire in operation from log and process:
self.inOperation = inOperation if inOperation is not None else log.inOperation
self.processLineAndAdd(line.rstrip('\r\n'))
self.processLineAndAdd(line)
finally:
log.close()
db = self.jail.database
Expand All @@ -1155,6 +1155,8 @@ def seekToTime(self, container, date, accuracy=3):
if logSys.getEffectiveLevel() <= logging.DEBUG:
logSys.debug("Seek to find time %s (%s), file size %s", date,
MyTime.time2str(date), fs)
if not fs:
return
minp = container.getPos()
maxp = fs
tryPos = minp
Expand All @@ -1178,8 +1180,8 @@ def seekToTime(self, container, date, accuracy=3):
dateTimeMatch = None
nextp = None
while True:
line = container.readline()
if not line:
line = container.readline(False)
if line is None:
break
(timeMatch, template) = self.dateDetector.matchTime(line)
if timeMatch:
Expand Down Expand Up @@ -1276,25 +1278,34 @@ def stop(self):

class FileContainer:

def __init__(self, filename, encoding, tail=False):
def __init__(self, filename, encoding, tail=False, doOpen=False):
self.__filename = filename
self.waitForLineEnd = True
self.setEncoding(encoding)
self.__tail = tail
self.__handler = None
self.__pos = 0
self.__pos4hash = 0
self.__hash = ''
self.__hashNextTime = time.time() + 30
# Try to open the file. Raises an exception if an error occurred.
handler = open(filename, 'rb')
stats = os.fstat(handler.fileno())
self.__ino = stats.st_ino
if doOpen: # fail2ban-regex only (don't need to reopen it and check for rotation)
self.__handler = handler
return
try:
firstLine = handler.readline()
# Computes the MD5 of the first line.
self.__hash = md5sum(firstLine).hexdigest()
# Start at the beginning of file if tail mode is off.
if tail:
handler.seek(0, 2)
self.__pos = handler.tell()
else:
self.__pos = 0
stats = os.fstat(handler.fileno())
self.__ino = stats.st_ino
if stats.st_size:
firstLine = handler.readline()
# first line available and contains new-line:
if firstLine != firstLine.rstrip(b'\r\n'):
# Computes the MD5 of the first line.
self.__hash = md5sum(firstLine).hexdigest()
# if tail mode scroll to the end of file
if tail:
handler.seek(0, 2)
self.__pos = handler.tell()
finally:
handler.close()
## shows that log is in operation mode (expecting new messages only from here):
Expand All @@ -1304,6 +1315,10 @@ def getFileName(self):
return self.__filename

def getFileSize(self):
h = self.__handler
if h is not None:
stats = os.fstat(h.fileno())
return stats.st_size
return os.path.getsize(self.__filename);

def setEncoding(self, encoding):
Expand All @@ -1322,38 +1337,54 @@ def getPos(self):
def setPos(self, value):
self.__pos = value

def open(self):
self.__handler = open(self.__filename, 'rb')
# Set the file descriptor to be FD_CLOEXEC
fd = self.__handler.fileno()
flags = fcntl.fcntl(fd, fcntl.F_GETFD)
fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC)
# Stat the file before even attempting to read it
stats = os.fstat(self.__handler.fileno())
if not stats.st_size:
# yoh: so it is still an empty file -- nothing should be
# read from it yet
# print "D: no content -- return"
return False
firstLine = self.__handler.readline()
# Computes the MD5 of the first line.
myHash = md5sum(firstLine).hexdigest()
## print "D: fn=%s hashes=%s/%s inos=%s/%s pos=%s rotate=%s" % (
## self.__filename, self.__hash, myHash, stats.st_ino, self.__ino, self.__pos,
## self.__hash != myHash or self.__ino != stats.st_ino)
## sys.stdout.flush()
# Compare hash and inode
if self.__hash != myHash or self.__ino != stats.st_ino:
logSys.log(logging.MSG, "Log rotation detected for %s", self.__filename)
self.__hash = myHash
self.__ino = stats.st_ino
self.__pos = 0
# Sets the file pointer to the last position.
self.__handler.seek(self.__pos)
def open(self, forcePos=None):
h = open(self.__filename, 'rb')
try:
# Set the file descriptor to be FD_CLOEXEC
fd = h.fileno()
flags = fcntl.fcntl(fd, fcntl.F_GETFD)
fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC)
myHash = self.__hash
# Stat the file before even attempting to read it
stats = os.fstat(h.fileno())
rotflg = stats.st_size < self.__pos or stats.st_ino != self.__ino
if rotflg or not len(myHash) or time.time() > self.__hashNextTime:
myHash = ''
firstLine = h.readline()
# Computes the MD5 of the first line (if it is complete)
if firstLine != firstLine.rstrip(b'\r\n'):
myHash = md5sum(firstLine).hexdigest()
self.__hashNextTime = time.time() + 30
elif stats.st_size == self.__pos:
myHash = self.__hash
# Compare size, hash and inode
if rotflg or myHash != self.__hash:
if self.__hash != '':
logSys.log(logging.MSG, "Log rotation detected for %s, reason: %r", self.__filename,
(stats.st_size, self.__pos, stats.st_ino, self.__ino, myHash, self.__hash))
self.__ino = stats.st_ino
self.__pos = 0
self.__hash = myHash
# if nothing to read from file yet (empty or no new data):
if forcePos is not None:
self.__pos = forcePos
elif stats.st_size <= self.__pos:
return False
# Sets the file pointer to the last position.
h.seek(self.__pos)
# leave file open (to read content):
self.__handler = h; h = None
finally:
# close (no content or error only)
if h:
h.close(); h = None
return True

def seek(self, offs, endLine=True):
h = self.__handler
if h is None:
self.open(offs)
h = self.__handler
# seek to given position
h.seek(offs, 0)
# goto end of next line
Expand All @@ -1371,6 +1402,9 @@ def decode_line(filename, enc, line):
try:
return line.decode(enc, 'strict')
except (UnicodeDecodeError, UnicodeEncodeError) as e:
# avoid warning if got incomplete end of line (e. g. '\n' in "...[0A" followed by "00]..." for utf-16le:
if (e.end == len(line) and line[e.start] in b'\r\n'):
return line[0:e.start].decode(enc, 'replace')
global _decode_line_warn
lev = 7
if not _decode_line_warn.get(filename, 0):
Expand All @@ -1379,29 +1413,85 @@ def decode_line(filename, enc, line):
logSys.log(lev,
"Error decoding line from '%s' with '%s'.", filename, enc)
if logSys.getEffectiveLevel() <= lev:
logSys.log(lev, "Consider setting logencoding=utf-8 (or another appropriate"
" encoding) for this jail. Continuing"
" to process line ignoring invalid characters: %r",
logSys.log(lev,
"Consider setting logencoding to appropriate encoding for this jail. "
"Continuing to process line ignoring invalid characters: %r",
line)
# decode with replacing error chars:
line = line.decode(enc, 'replace')
return line

def readline(self):
def readline(self, complete=True):
"""Read line from file
In opposite to pythons readline it doesn't return new-line,
so returns either the line if line is complete (and complete=True) or None
if line is not complete (and complete=True) or there is no content to read.
If line is complete (and complete is True), it also shift current known
position to begin of next line.
Also it is safe against interim new-line bytes (e. g. part of multi-byte char)
in given encoding.
"""
if self.__handler is None:
return ""
return FileContainer.decode_line(
self.getFileName(), self.getEncoding(), self.__handler.readline())
# read raw bytes up to \n char:
b = self.__handler.readline()
if not b:
return None
bl = len(b)
# convert to log-encoding (new-line char could disappear if it is part of multi-byte sequence):
r = FileContainer.decode_line(
self.getFileName(), self.getEncoding(), b)
# trim new-line at end and check the line was written complete (contains a new-line):
l = r.rstrip('\r\n')
if complete:
if l == r:
# try to fill buffer in order to find line-end in log encoding:
fnd = 0
while 1:
r = self.__handler.readline()
if not r:
break
b += r
bl += len(r)
# convert to log-encoding:
r = FileContainer.decode_line(
self.getFileName(), self.getEncoding(), b)
# ensure new-line is not in the middle (buffered 2 strings, e. g. in utf-16le it is "...[0A"+"00]..."):
e = r.find('\n')
if e >= 0 and e != len(r)-1:
l, r = r[0:e], r[0:e+1]
# back to bytes and get offset to seek after NL:
r = r.encode(self.getEncoding(), 'replace')
self.__handler.seek(-bl+len(r), 1)
return l
# trim new-line at end and check the line was written complete (contains a new-line):
l = r.rstrip('\r\n')
if l != r:
return l
if self.waitForLineEnd:
# not fulfilled - seek back and return:
self.__handler.seek(-bl, 1)
return None
return l

def close(self):
if not self.__handler is None:
# Saves the last position.
if self.__handler is not None:
# Saves the last real position.
self.__pos = self.__handler.tell()
# Closes the file.
self.__handler.close()
self.__handler = None
## print "D: Closed %s with pos %d" % (handler, self.__pos)
## sys.stdout.flush()

def __iter__(self):
return self
def next(self):
line = self.readline()
if line is None:
self.close()
raise StopIteration
return line

_decode_line_warn = Utils.Cache(maxCount=1000, maxTime=24*60*60);

Expand Down
4 changes: 1 addition & 3 deletions fail2ban/server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,11 +332,9 @@ def wait_for(cond, timeout, interval=None):
timeout_expr = lambda: time.time() > time0
else:
timeout_expr = timeout
if not interval:
interval = Utils.DEFAULT_SLEEP_INTERVAL
if timeout_expr():
break
stm = min(stm + interval, Utils.DEFAULT_SLEEP_TIME)
stm = min(stm + (interval or Utils.DEFAULT_SLEEP_INTERVAL), Utils.DEFAULT_SLEEP_TIME)
time.sleep(stm)
return ret

Expand Down
7 changes: 4 additions & 3 deletions fail2ban/tests/databasetestcase.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,19 +212,20 @@ def testAddJail(self):
self.jail.name in self.db.getJailNames(True),
"Jail not added to database")

def testAddLog(self):
def _testAddLog(self):
self.testAddJail() # Jail required

_, filename = tempfile.mkstemp(".log", "Fail2BanDb_")
self.fileContainer = FileContainer(filename, "utf-8")

self.db.addLog(self.jail, self.fileContainer)
pos = self.db.addLog(self.jail, self.fileContainer)
self.assertTrue(pos is None); # unknown previously

self.assertIn(filename, self.db.getLogPaths(self.jail))
os.remove(filename)

def testUpdateLog(self):
self.testAddLog() # Add log file
self._testAddLog() # Add log file

# Write some text
filename = self.fileContainer.getFileName()
Expand Down
Loading

0 comments on commit d8e450c

Please sign in to comment.