forked from geekcomputers/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSOUNDEX.py
74 lines (63 loc) · 2.29 KB
/
SOUNDEX.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding: utf-8 -*-
def SOUNDEX(TERM: str):
# Step 0: Covert the TERM to UpperCase
TERM = TERM.upper()
TERM_LETTERS = [char for char in TERM if char.isalpha()]
# List the Remove occurrences of A, E, I, O, U, Y, H, W.
Remove_List = ("A", "E", "I", "O", "U", "Y", "H", "W")
# Save the first letter
first_letter = TERM_LETTERS[0]
# Take the Other letters instead of First_Letter
Characters = TERM_LETTERS[1:]
# Remove items from Character using Remove_List
Characters = [
To_Characters
for To_Characters in Characters
if To_Characters not in Remove_List
]
# if len(Characters) == 0:
# return first_letter + "000"
# Replace all the Characters with Numeric Values (instead of the first letter) with digits according to Soundex Algorythem Ruels
Replace_List = {
("B", "F", "P", "V"): 1,
("C", "G", "J", "K", "Q", "S", "X", "Z"): 2,
("D", "T"): 3,
("L"): 4,
("M", "N"): 5,
("R"): 6,
}
Characters = [
value if char else char
for char in Characters
for group, value in Replace_List.items()
if char in group
]
# Step 3: Replace all adjacent same number with one number
Characters = [
char
for Letter_Count, char in enumerate(Characters)
if (
Letter_Count == len(Characters) - 1
or (
Letter_Count + 1 < len(Characters)
and char != Characters[Letter_Count + 1]
)
)
]
# If the saved Characters’s Number is the same the resulting First Letter,keep the First Letter AND remove the Number
if len(TERM_LETTERS) != 1:
if first_letter == TERM_LETTERS[1]:
Characters[0] = TERM[0]
else:
Characters.insert(0, first_letter)
# If the Number of Characters are less than 4 insert 3 zeros to Characters
# Remove all except first letter and 3 digits after it.
# first_letter = Characters[0]
# Characters = Characters[1:]
# Characters = [char for char in Characters if isinstance(char, int)][0:3]
while len(Characters) < 4:
Characters.append(0)
if len(Characters) > 4:
Characters = Characters[0:4]
INDEX = "".join([str(C) for C in Characters])
return INDEX