-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathskillshare.py
105 lines (95 loc) · 4.87 KB
/
skillshare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import requests, json, sys, re, os
from slugify import slugify
class Skillshare(object):
def __init__(
self,
cookie,
download_path=os.environ.get('FILE_PATH', './Skillshare'),
pk='BCpkADawqM2OOcM6njnM7hf9EaK6lIFlqiXB0iWjqGWUQjU7R8965xUvIQNqdQbnDTLz0IAO7E6Ir2rIbXJtFdzrGtitoee0n1XXRliD-RH9A-svuvNW9qgo3Bh34HEZjXjG4Nml4iyz3KqF',
brightcove_account_id=3695997568001,
):
self.cookie = cookie.strip().strip('"')
self.download_path = download_path
self.pk = pk.strip()
self.brightcove_account_id = brightcove_account_id
self.pythonversion = 3 if sys.version_info >= (3, 0) else 2
def is_unicode_string(self, string):
if (self.pythonversion == 3 and isinstance(string, str)) or (self.pythonversion == 2 and isinstance(string, unicode)):
return True
else:
return False
def download_course_by_url(self, url):
m = re.match('https://www.skillshare.com/classes/.*?/(\\d+)', url)
assert m, 'Failed to parse class ID from URL'
self.download_course_by_class_id(m.group(1))
def download_course_by_class_id(self, class_id):
data = self.fetch_course_data_by_class_id(class_id=class_id)
teacher_name = None
if 'vanity_username' in data['_embedded']['teacher']:
teacher_name = data['_embedded']['teacher']['vanity_username']
if not teacher_name:
teacher_name = data['_embedded']['teacher']['full_name']
assert teacher_name, 'Failed to read teacher name from data'
if self.is_unicode_string(teacher_name):
teacher_name = teacher_name.encode('ascii', 'replace')
title = data['title']
if self.is_unicode_string(title):
title = title.encode('ascii', 'replace')
base_path = os.path.abspath(os.path.join(self.download_path, slugify(teacher_name), slugify(title))).rstrip('/')
if not os.path.exists(base_path):
os.makedirs(base_path)
for u in data['_embedded']['units']['_embedded']['units']:
for s in u['_embedded']['sessions']['_embedded']['sessions']:
video_id = None
if 'video_hashed_id' in s:
if s['video_hashed_id']:
video_id = s['video_hashed_id'].split(':')[1]
assert video_id, 'Failed to read video ID from data'
s_title = s['title']
if self.is_unicode_string(s_title):
s_title = s_title.encode('ascii', 'replace')
file_name = '{} - {}'.format(str(s['index'] + 1).zfill(2), slugify(s_title))
self.download_video(fpath='{base_path}/{session}.mp4'.format(base_path=base_path,
session=file_name),
video_id=video_id)
print('')
def fetch_course_data_by_class_id(self, class_id):
res = requests.get(url=('https://api.skillshare.com/classes/{}'.format(class_id)),
headers={'Accept':'application/vnd.skillshare.class+json;,version=0.8',
'User-Agent':'Skillshare/4.1.1; Android 5.1.1',
'Host':'api.skillshare.com',
'cookie':self.cookie})
assert res.status_code == 200, 'Fetch error, code == {}'.format(res.status_code)
return res.json()
def download_video(self, fpath, video_id):
meta_url = 'https://edge.api.brightcove.com/playback/v1/accounts/{account_id}/videos/{video_id}'.format(account_id=(self.brightcove_account_id),
video_id=video_id)
meta_res = requests.get(meta_url,
headers={'Accept':'application/json;pk={}'.format(self.pk),
'User-Agent':'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Origin':'https://www.skillshare.com'})
assert not meta_res.status_code != 200, 'Failed to fetch video meta'
for x in meta_res.json()['sources']:
if 'container' in x:
if x['container'] == 'MP4' and 'src' in x:
dl_url = x['src']
break
print('Downloading {}...'.format(fpath))
if os.path.exists(fpath):
print('Video already downloaded, skipping...')
return
with open(fpath, 'wb') as (f):
response = requests.get(dl_url, allow_redirects=True, stream=True)
total_length = response.headers.get('content-length')
if not total_length:
f.write(response.content)
else:
dl = 0
total_length = int(total_length)
for data in response.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
done = int(50 * dl / total_length)
sys.stdout.write('\r[%s%s]' % ('=' * done, ' ' * (50 - done)))
sys.stdout.flush()
print('')