Skip to content

Commit

Permalink
Ignore BOM in batch files (Fixes #2450)
Browse files Browse the repository at this point in the history
  • Loading branch information
phihag committed Feb 25, 2014
1 parent f6acbde commit 62e609a
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 7 deletions.
11 changes: 11 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


# Various small unit tests
import io
import xml.etree.ElementTree

#from youtube_dl.utils import htmlentity_transform
Expand All @@ -21,6 +22,7 @@
orderedSet,
PagedList,
parse_duration,
read_batch_urls,
sanitize_filename,
shell_quote,
smuggle_url,
Expand Down Expand Up @@ -250,5 +252,14 @@ def get_page(pagenum):
def test_struct_unpack(self):
self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))

def test_read_batch_urls(self):
f = io.StringIO(u'''\xef\xbb\xbf foo
bar\r
baz
# More after this line\r
; or after this
bam''')
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])

if __name__ == '__main__':
unittest.main()
13 changes: 6 additions & 7 deletions youtube_dl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
get_cachedir,
MaxDownloadsReached,
preferredencoding,
read_batch_urls,
SameFileError,
setproctitle,
std_headers,
Expand Down Expand Up @@ -552,21 +553,19 @@ def _real_main(argv=None):
sys.exit(0)

# Batch file verification
batchurls = []
batch_urls = []
if opts.batchfile is not None:
try:
if opts.batchfile == '-':
batchfd = sys.stdin
else:
batchfd = open(opts.batchfile, 'r')
batchurls = batchfd.readlines()
batchurls = [x.strip() for x in batchurls]
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
batch_urls = read_batch_urls(batchfd)
if opts.verbose:
write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args
all_urls = batch_urls + args
all_urls = [url.strip() for url in all_urls]
_enc = preferredencoding()
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
Expand Down
17 changes: 17 additions & 0 deletions youtube_dl/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import contextlib
import ctypes
import datetime
import email.utils
Expand Down Expand Up @@ -1245,3 +1246,19 @@ def struct_unpack(spec, *args):
else:
struct_pack = struct.pack
struct_unpack = struct.unpack


def read_batch_urls(batch_fd):
def fixup(url):
if not isinstance(url, compat_str):
url = url.decode('utf-8', 'replace')
BOM_UTF8 = u'\xef\xbb\xbf'
if url.startswith(BOM_UTF8):
url = url[len(BOM_UTF8):]
url = url.strip()
if url.startswith(('#', ';', ']')):
return False
return url

with contextlib.closing(batch_fd) as fd:
return [url for url in map(fixup, fd) if url]

0 comments on commit 62e609a

Please sign in to comment.