Update breaking_bad.py

kiidbrian · Dec 18, 2017 · 8f78a40 · 8f78a40
1 parent 48bddd8
commit 8f78a40
Showing 1 changed file with 40 additions and 51 deletions.
diff --git a/string/breaking_bad.py b/string/breaking_bad.py
@@ -1,87 +1,76 @@
 """
-Given an api which returns an array of chemical names and an array of chemical
-symbols, display the chemical names with their symbol surrounded by square
-brackets:
+Given an api which returns an array of words and an array of symbols, display
+the word with their matched symbol surrounded by square brackets.
 
-Ex:
-Chemicals array: ['Amazon', 'Microsoft', 'Google']
-Symbols: ['I', 'Am', 'cro', 'Na', 'le', 'abc']
+If the word string matches more than one symbol, then choose the one with
+longest length. (ex. 'Microsoft' matches 'i' and 'cro'):
+
+Example:
+Words array: ['Amazon', 'Microsoft', 'Google']
+Symbols: ['i', 'Am', 'cro', 'Na', 'le', 'abc']
 
 Output:
 [Am]azon, Mi[cro]soft, Goog[le]
 
-If the chemical string matches more than one symbol, then choose the one with
-longest length. (ex. 'Microsoft' matches 'i' and 'cro')
-
-My solution:
+My solution(Wrong):
 (I sorted the symbols array in descending order of length and ran loop over
-chemicals array to find a symbol match(using indexOf in javascript) which
+words array to find a symbol match(using indexOf in javascript) which
 worked. But I didn't make it through the interview, I am guessing my solution
 was O(n^2) and they expected an efficient algorithm.
 
-
-note:
-This approach didn't pass interview because it did wrong in conditions like:
-
-input:
-chemicals = ['Amazon', 'Microsoft', 'Google']
-symbols = ['I', 'Am', 'cro', 'Na', 'le', 'abc', 'o']  # add 'o' here
-
-expected:
-['[am]azon', 'mi[cro]soft', 'goog[le]']
-
-exact output:
+output:
 ['[Am]azon', 'Mi[cro]soft', 'Goog[le]', 'Amaz[o]n', 'Micr[o]s[o]ft', 'G[o][o]gle']
 """
 
-chemicals = ['Amazon', 'Microsoft', 'Google']
-symbols = ['I', 'Am', 'cro', 'le', 'abc']
+words = ['Amazon', 'Microsoft', 'Google']
+symbols = ['i', 'Am', 'cro', 'le', 'abc']
 
-def match_symbol(chemicals, symbols):
+def match_symbol(words, symbols):
     import re
     combined = []
 
     for s in symbols:
-        for c in chemicals:
+        for c in words:
             r = re.search(s, c)
             if r:
                 combined.append(re.sub(s, "[{}]".format(s), c))
 
     return combined
 
-print(match_symbol(chemicals, symbols))
+print(match_symbol(words, symbols))
 
 
 
 """
-An improved version of above, get right output.
-
-symbols = ['I', 'Am', 'cro', 'Na', 'le', 'abc', 'o']
-words = ['Amazon', 'Microsoft', 'Google']
-print(match_symbol_1(symbols, words))
->>> ['[Am]azon', 'Mi[cro]soft', 'Goog[le]']
-
-O(mn) time
-m = len(symbols), n = len(words)
-Assuming re.findall(s, w) is O(1) time.
+O(n * max(log(n), l)) time complexity
+n = len(words), l = len of a word
 """
 
-def match_symbol_1(symbols, words):
-    import re
-    bracketed_list = []
-    for w in words:
-        longest_match=''
-        for s in symbols:
-            matchs = re.findall(s, w)
-            for m in matchs:
-                longest_match = m if len(longest_match) < len(m) else longest_match
-        bracketed.append(re.sub(longest_match, '[{}]'.format(longest_match), w))
-    return bracketed_list
-
+def match_symbol_1(words, symbols):
+    res = []
+    # reversely sort the symbols according to their lengths.
+    symbols = sorted(symbols, key = lambda _: len(_), reverse = True)
+    for word in words:
+        for symbol in symbols:
+            word_replaced = ''
+            # once match, append the `word_replaced` to res, process next word
+            if word.find(symbol) != -1:
+                word_replaced = word.replace(symbol, '[' + symbol + ']')
+                res.append(word_replaced)
+                break
+        # if this word matches no symbol, append it.
+        if word_replaced == '':
+            res.append(word)
+    return res
+
+words = ['Amazon', 'Microsoft', 'Google', 'Facebook']
+symbols = ['i', 'Am', 'cro', 'Na', 'le', 'abc']
+print(match_symbol_1(words, symbols))
+# ['[Am]azon', 'Mi[cro]soft', 'Goog[le]', 'Facebook']
 
 
 """
-One approach is to use a Trie for the dictionary (the symbols), and then match
+Another approach is to use a Trie for the dictionary (the symbols), and then match
 brute force. The complexity will depend on the dictionary;
 if all are suffixes of the other, it will be n*m
 (where m is the size of the dictionary). For example, in Python: