-
Notifications
You must be signed in to change notification settings - Fork 52
/
Copy pathrepaste.py
180 lines (127 loc) · 4.38 KB
/
repaste.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
from util import hook, http
import urllib
import random
import urllib2
import htmlentitydefs
import re
re_htmlent = re.compile("&(" + "|".join(htmlentitydefs.name2codepoint.keys()) + ");")
re_numeric = re.compile(r'&#(x?)([a-fA-F0-9]+);')
def db_init(db):
db.execute("create table if not exists repaste(chan, manual, primary key(chan))")
db.commit()
def decode_html(text):
text = re.sub(re_htmlent,
lambda m: unichr(htmlentitydefs.name2codepoint[m.group(1)]),
text)
text = re.sub(re_numeric,
lambda m: unichr(int(m.group(2), 16 if m.group(1) else 10)),
text)
return text
def scrape_mibpaste(url):
if not url.startswith("http"):
url = "http://" + url
pagesource = http.get(url)
rawpaste = re.search(r'(?s)(?<=<body>\n).+(?=<hr>)', pagesource).group(0)
filterbr = rawpaste.replace("<br />", "")
unescaped = decode_html(filterbr)
stripped = unescaped.strip()
return stripped
def scrape_pastebin(url):
id = re.search(r'(?:www\.)?pastebin.com/([a-zA-Z0-9]+)$', url).group(1)
rawurl = "http://pastebin.com/raw.php?i=" + id
text = http.get(rawurl)
return text
autorepastes = {}
#@hook.regex('(pastebin\.com)(/[^ ]+)')
@hook.regex('(mibpaste\.com)(/[^ ]+)')
def autorepaste(inp, input=None, notice=None, db=None, chan=None, nick=None):
db_init(db)
manual = db.execute("select manual from repaste where chan=?", (chan, )).fetchone()
if manual and len(manual) and manual[0]:
return
url = inp.group(1) + inp.group(2)
urllib.unquote(url)
if url in autorepastes:
out = autorepastes[url]
notice("In the future, please use a less awful pastebin (e.g. pastebin.com)")
else:
out = repaste("http://" + url, input, db, False)
autorepastes[url] = out
notice("In the future, please use a less awful pastebin (e.g. pastebin.com) instead of %s." % inp.group(1))
input.say("%s (repasted for %s)" % (out, nick))
scrapers = {
r'mibpaste\.com': scrape_mibpaste,
r'pastebin\.com': scrape_pastebin
}
def scrape(url):
for pat, scraper in scrapers.iteritems():
print "matching " + repr(pat) + " " + url
if re.search(pat, url):
break
else:
return None
return scraper(url)
def paste_sprunge(text, syntax=None, user=None):
data = urllib.urlencode({"sprunge": text})
url = urllib2.urlopen("http://sprunge.us/", data).read().strip()
if syntax:
url += "?" + syntax
return url
def paste_ubuntu(text, user=None, syntax='text'):
data = urllib.urlencode({"poster": user,
"syntax": syntax,
"content": text})
return urllib2.urlopen("http://paste.ubuntu.com/", data).url
def paste_gist(text, user=None, syntax=None, description=None):
data = {
'file_contents[gistfile1]': text,
'action_button': "private"
}
if description:
data['description'] = description
if syntax:
data['file_ext[gistfile1]'] = "." + syntax
req = urllib2.urlopen('https://gist.github.com/gists', urllib.urlencode(data).encode('utf8'))
return req.url
def paste_strictfp(text, user=None, syntax="plain"):
data = urllib.urlencode(dict(
language=syntax,
paste=text,
private="private",
submit="Paste"))
req = urllib2.urlopen("http://paste.strictfp.com/", data)
return req.url
pasters = dict(
ubuntu=paste_ubuntu,
sprunge=paste_sprunge,
gist=paste_gist,
strictfp=paste_strictfp
)
@hook.command
def repaste(inp, input=None, db=None, isManual=True):
".repaste mode|list|[provider] [syntax] <pastebinurl> -- Reuploads mibpaste to [provider]."
parts = inp.split()
db_init(db)
if parts[0] == 'list':
return " ".join(pasters.keys())
paster = paste_gist
args = {}
if not parts[0].startswith("http"):
p = parts[0].lower()
if p in pasters:
paster = pasters[p]
parts = parts[1:]
if not parts[0].startswith("http"):
p = parts[0].lower()
parts = parts[1:]
args["syntax"] = p
if len(parts) > 1:
return "PEBKAC"
args["user"] = input.user
url = parts[0]
scraped = scrape(url)
if not scraped:
return "No scraper for given url"
args["text"] = scraped
pasted = paster(**args)
return pasted