forked from mutantmonkey/phenny
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcodepoints.py
134 lines (113 loc) · 3.91 KB
/
codepoints.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python
"""
codepoints.py - Phenny Codepoints Module
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/
"""
import re, unicodedata
from itertools import islice
def about(u, cp=None, name=None):
if cp is None:
cp = ord(u)
if name is None:
try: name = unicodedata.name(u)
except ValueError:
return 'U+%04X (No name found)' % cp
if not unicodedata.combining(u):
template = 'U+%04X %s (%s)'
else: template = 'U+%04X %s (\xe2\x97\x8c%s)'
return template % (cp, name, u)
def codepoint_simple(arg):
arg = arg.upper()
r_label = re.compile('\\b' + arg.replace(' ', '.*\\b') + '\\b')
results = []
for cp in range(0xFFFF):
u = chr(cp)
try: name = unicodedata.name(u)
except ValueError: continue
if r_label.search(name):
results.append((len(name), u, cp, name))
if not results:
r_label = re.compile('\\b' + arg.replace(' ', '.*\\b'))
for cp in range(0xFFFF):
u = chr(cp)
try: name = unicodedata.name(u)
except ValueError: continue
if r_label.search(name):
results.append((len(name), u, cp, name))
if not results:
return None
length, u, cp, name = sorted(results)[0]
return about(u, cp, name)
def codepoint_extended(arg):
arg = arg.upper()
try: r_search = re.compile(arg)
except: raise ValueError('Broken regexp: %r' % arg)
for cp in range(1, 0x10FFFF):
u = chr(cp)
name = unicodedata.name(u, '-')
if r_search.search(name):
yield about(u, cp, name)
def u(phenny, input):
"""Look up unicode information."""
arg = input.bytes[3:]
# phenny.msg('#inamidst', '%r' % arg)
if not arg:
return phenny.reply('You gave me zero length input.')
elif not arg.strip(' '):
if len(arg) > 1: return phenny.reply('%s SPACEs (U+0020)' % len(arg))
return phenny.reply('1 SPACE (U+0020)')
# @@ space
if set(arg.upper()) - set(
'ABCDEFGHIJKLMNOPQRSTUVWYXYZ0123456789- .?+*{}[]\\/^$'):
printable = False
elif len(arg) > 1:
printable = True
else: printable = False
if printable:
extended = False
for c in '.?+*{}[]\\/^$':
if c in arg:
extended = True
break
if len(arg) == 4:
try: u = chr(int(arg, 16))
except ValueError: pass
else: return phenny.say(about(u))
if extended:
# look up a codepoint with regexp
results = list(islice(codepoint_extended(arg), 4))
for i, result in enumerate(results):
if (i < 2) or ((i == 2) and (len(results) < 4)):
phenny.say(result)
elif (i == 2) and (len(results) > 3):
phenny.say(result + ' [...]')
if not results:
phenny.reply('Sorry, no results')
else:
# look up a codepoint freely
result = codepoint_simple(arg)
if result is not None:
phenny.say(result)
else: phenny.reply("Sorry, no results for %r." % arg)
else:
text = arg
# look up less than three podecoints
if len(text) <= 3:
for u in text:
phenny.say(about(u))
# look up more than three podecoints
elif len(text) <= 10:
phenny.reply(' '.join('U+%04X' % ord(c) for c in text))
else: phenny.reply('Sorry, your input is too long!')
u.commands = ['u']
u.example = '.u 203D'
def bytes(phenny, input):
"""Show the input as pretty printed bytes."""
b = input.bytes
phenny.reply('%r' % b[b.find(' ') + 1:])
bytes.commands = ['bytes']
bytes.example = '.bytes \xe3\x8b\xa1'
if __name__ == '__main__':
print(__doc__.strip())