Skip to content

Commit 6ee6f12

Browse files
authoredAug 28, 2017
Merge branch 'master' into master
2 parents 1bc3a17 + 75ccf5b commit 6ee6f12

19 files changed

+977
-43
lines changed
 

‎.travis.yml

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ python:
66
- "3.5"
77
- "3.6"
88
- "3.6-dev"
9-
- "3.7-dev"
10-
- "nightly"
9+
1110
install:
1211
- if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi
1312
- if [ "$TRAVIS_PYTHON_VERSION" != "3.2" ]; then travis_retry pip install coverage; fi

‎Graphs/Breadth_First_Search.py

+33-8
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
class Graph:
1+
class GRAPH:
2+
"""docstring for GRAPH"""
3+
def __init__(self, nodes):
4+
self.nodes=nodes
5+
self.graph=[[0]*nodes for i in range (nodes)]
6+
self.visited=[0]*nodes
27

3-
def __init__(self, vertex):
4-
self.vertex = vertex
5-
self.graph = [[0] * vertex for i in range(vertex) ]
6-
7-
def add_edge(self, u, v):
8-
self.graph[u - 1][v - 1] = 1
9-
self.graph[v - 1][u - 1] = 1
108

119
def show(self):
1210

@@ -43,3 +41,30 @@ def bfs(self,v):
4341
g.add_edge(5,9)
4442
g.add_edge(6,10)
4543
g.bfs(4)
44+
=======
45+
print self.graph
46+
47+
def add_edge(self, i, j):
48+
self.graph[i][j]=1
49+
self.graph[j][i]=1
50+
51+
def bfs(self,s):
52+
queue=[s]
53+
self.visited[s]=1
54+
while len(queue)!=0:
55+
x=queue.pop(0)
56+
print(x)
57+
for i in range(0,self.nodes):
58+
if self.graph[x][i]==1 and self.visited[i]==0:
59+
queue.append(i)
60+
self.visited[i]=1
61+
62+
n=int(input("Enter the number of Nodes : "))
63+
g=GRAPH(n)
64+
e=int(input("Enter the no of edges : "))
65+
print("Enter the edges (u v)")
66+
for i in range(0,e):
67+
u,v=map(int, raw_input().split())
68+
g.add_edge(u,v)
69+
s=int(input("Enter the source node :"))
70+
g.bfs(s)

‎Graphs/Deep_First_Search.py

+25-26
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,32 @@
1-
class Graph:
1+
class GRAPH:
2+
"""docstring for GRAPH"""
3+
def __init__(self, nodes):
4+
self.nodes=nodes
5+
self.graph=[[0]*nodes for i in range (nodes)]
6+
self.visited=[0]*nodes
27

3-
def __init__(self, vertex):
4-
self.vertex = vertex
5-
self.graph = [[0] * vertex for i in range(vertex) ]
6-
self.visited = [False] * vertex
78

8-
def add_edge(self, u, v):
9-
self.graph[u - 1][v - 1] = 1
10-
self.graph[v - 1][u - 1] = 1
119
def show(self):
10+
print self.graph
1211

13-
for i in self.graph:
14-
for j in i:
15-
print(j, end=' ')
16-
print(' ')
12+
def add_edge(self, i, j):
13+
self.graph[i][j]=1
14+
self.graph[j][i]=1
1715

18-
19-
def dfs(self, u):
20-
self.visited[u - 1] = True
21-
print('%d visited' % u)
22-
for i in range(1, self.vertex + 1):
23-
if self.graph[u - 1][i - 1] == 1 and self.visited[i - 1] == False:
16+
def dfs(self,s):
17+
self.visited[s]=1
18+
print(s)
19+
for i in range(0,self.nodes):
20+
if self.visited[i]==0 and self.graph[s][i]==1:
2421
self.dfs(i)
22+
2523

26-
27-
g = Graph(5)
28-
g.add_edge(1,4)
29-
g.add_edge(4,2)
30-
g.add_edge(4,5)
31-
g.add_edge(2,5)
32-
g.add_edge(5,3)
33-
g.dfs(1)
24+
n=int(input("Enter the number of Nodes : "))
25+
g=GRAPH(n)
26+
e=int(input("Enter the no of edges : "))
27+
print("Enter the edges (u v)")
28+
for i in range(0,e):
29+
u,v=map(int, raw_input().split())
30+
g.add_edge(u,v)
31+
s=int(input("Enter the source node :"))
32+
g.dfs(s)

‎README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ __Properties__
7474
### Shell
7575
![alt text][shell-image]
7676

77-
From [Wikipedia][shell-wiki]: Shellsort is a generalization of insertion sort that allows the exchange of items that are far apart. The idea is to arrange the list of elements so that, starting anywherem considereing every nth element gives a sorted list. Such a list is said to be h-sorted. Equivanelty, it can be thought of as h intterleaved lists, each individually sorted.
77+
From [Wikipedia][shell-wiki]: Shellsort is a generalization of insertion sort that allows the exchange of items that are far apart. The idea is to arrange the list of elements so that, starting anywhere, considering every nth element gives a sorted list. Such a list is said to be h-sorted. Equivalently, it can be thought of as h interleaved lists, each individually sorted.
7878

7979
__Properties__
8080
* Worst case performance O(nlog2 2n)
@@ -83,7 +83,7 @@ __Properties__
8383

8484
###### View the algorithm in [action][shell-toptal]
8585

86-
###Time-Compexity Graphs
86+
### Time-Compexity Graphs
8787

8888
Comparing the complexity of sorting algorithms (Bubble Sort, Insertion Sort, Selection Sort)
8989

‎data_structures/AVL/AVL.py

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
'''
2+
A AVL tree
3+
'''
4+
5+
6+
class Node:
7+
8+
def __init__(self, label):
9+
self.label = label
10+
self.left = None
11+
self.rigt = None
12+
self.parent = None
13+
self.height = 0
14+
15+
def getLabel(self):
16+
return self.label
17+
18+
def setLabel(self, label):
19+
self.label = label
20+
21+
def getLeft(self):
22+
return self.left
23+
24+
def setLeft(self, left):
25+
self.left = left
26+
27+
def getRight(self):
28+
return self.rigt
29+
30+
def setRight(self, right):
31+
self.rigt = right
32+
33+
def getParent(self):
34+
return self.parent
35+
36+
def setParent(self, parent):
37+
self.parent = parent
38+
39+
def setHeight(self, height):
40+
self.height = height
41+
42+
def getHeight(self, height):
43+
return self.height
44+
45+
46+
class AVL:
47+
48+
def __init__(self):
49+
self.root = None
50+
self.size = 0
51+
52+
def insert(self, value):
53+
node = Node(value)
54+
if self.root is None:
55+
self.root = node
56+
self.size = 1
57+
else:
58+
# Same as Binary Tree
59+
dad_node = None
60+
curr_node = self.root
61+
62+
while True:
63+
if curr_node is not None:
64+
65+
dad_node = curr_node
66+
67+
if node.getLabel() < curr_node.getLabel():
68+
curr_node = curr_node.getLeft()
69+
else:
70+
curr_node = curr_node.getRight()
71+
else:
72+
if node.getLabel() < dad_node.getLabel():
73+
dad_node.setLeft(node)
74+
dad_node.setHeight(dad_node.getHeight() + 1)
75+
76+
if (dad_node.getRight().getHeight() -
77+
dad_node.getLeft.getHeight() > 1):
78+
self.rebalance(dad_node)
79+
80+
else:
81+
dad_node.setRight(node)
82+
dad_node.setHeight(dad_node.getHeight() + 1)
83+
84+
if (dad_node.getRight().getHeight() -
85+
dad_node.getLeft.getHeight() > 1):
86+
self.rebalance(dad_node)
87+
break
88+
89+
def rebalance(self, node):
90+
if (node.getRight().getHeight() -
91+
node.getLeft.getHeight() > 1):
92+
if (node.getRight().getHeight() >
93+
node.getLeft.getHeight()):
94+
pass
95+
else:
96+
pass
97+
pass
98+
elif (node.getRight().getHeight() -
99+
node.getLeft.getHeight() > 2):
100+
if (node.getRight().getHeight() >
101+
node.getLeft.getHeight()):
102+
pass
103+
else:
104+
pass
105+
pass
106+
pass
107+
108+
def rotate_left(self, node):
109+
# TODO: is this pythonic enought?
110+
aux = node.getLabel()
111+
node = aux.getRight()
112+
node.setHeight(node.getHeight() - 1)
113+
node.setLeft(Node(aux))
114+
node.getLeft().setHeight(node.getHeight() + 1)
115+
node.getRight().setHeight(node.getRight().getHeight() - 1)
116+
117+
def rotate_right(self, node):
118+
aux = node.getLabel()
119+
node = aux.getLeft()
120+
node.setHeight(node.getHeight() - 1)
121+
node.setRight(Node(aux))
122+
node.getLeft().setHeight(node.getHeight() + 1)
123+
node.getLeft().setHeight(node.getLeft().getHeight() - 1)
124+
125+
def double_rotate_left(self, node):
126+
self.rotate_right(node.getRight().getRight())
127+
self.rotate_left(node)
128+
129+
def double_rotate_right(self, node):
130+
self.rotate_left(node.getLeft().getLeft())
131+
self.rotate_right(node)

‎data_structures/Binary Tree/binary_seach_tree.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
'''
22
A binary search Tree
33
'''
4+
5+
46
class Node:
57

68
def __init__(self, label):
@@ -12,7 +14,7 @@ def getLabel(self):
1214
return self.label
1315

1416
def setLabel(self, label):
15-
self.label = label
17+
self.label = label
1618

1719
def getLeft(self):
1820
return self.left
@@ -34,7 +36,7 @@ def __init__(self):
3436

3537
def insert(self, label):
3638

37-
#Create a new Node
39+
# Create a new Node
3840

3941
node = Node(label)
4042

@@ -45,7 +47,7 @@ def insert(self, label):
4547
curr_node = self.root
4648

4749
while True:
48-
if curr_node != None:
50+
if curr_node is not None:
4951

5052
dad_node = curr_node
5153

@@ -61,12 +63,12 @@ def insert(self, label):
6163
break
6264

6365
def empty(self):
64-
if self.root == None:
66+
if self.root is None:
6567
return True
6668
return False
6769

6870
def preShow(self, curr_node):
69-
if curr_node != None:
71+
if curr_node is None:
7072
print(curr_node.getLabel(), end=" ")
7173

7274
self.preShow(curr_node.getLeft())

‎data_structures/Graph/Graph.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Author: OMKAR PATHAK
2+
3+
# We can use Python's dictionary for constructing the graph
4+
5+
class AdjacencyList(object):
6+
def __init__(self):
7+
self.List = {}
8+
9+
def addEdge(self, fromVertex, toVertex):
10+
# check if vertex is already present
11+
if fromVertex in self.List.keys():
12+
self.List[fromVertex].append(toVertex)
13+
else:
14+
self.List[fromVertex] = [toVertex]
15+
16+
def printList(self):
17+
for i in self.List:
18+
print(i,'->',' -> '.join([str(j) for j in self.List[i]]))
19+
20+
if __name__ == '__main__':
21+
al = AdjacencyList()
22+
al.addEdge(0, 1)
23+
al.addEdge(0, 4)
24+
al.addEdge(4, 1)
25+
al.addEdge(4, 3)
26+
al.addEdge(1, 0)
27+
al.addEdge(1, 4)
28+
al.addEdge(1, 3)
29+
al.addEdge(1, 2)
30+
al.addEdge(2, 3)
31+
al.addEdge(3, 4)
32+
33+
al.printList()
34+
35+
# OUTPUT:
36+
# 0 -> 1 -> 4
37+
# 1 -> 0 -> 4 -> 3 -> 2
38+
# 2 -> 3
39+
# 3 -> 4
40+
# 4 -> 1 -> 3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Author: OMKAR PATHAK
2+
3+
class Graph():
4+
def __init__(self):
5+
self.vertex = {}
6+
7+
# for printing the Graph vertexes
8+
def printGraph(self):
9+
for i in self.vertex.keys():
10+
print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
11+
12+
# for adding the edge beween two vertexes
13+
def addEdge(self, fromVertex, toVertex):
14+
# check if vertex is already present,
15+
if fromVertex in self.vertex.keys():
16+
self.vertex[fromVertex].append(toVertex)
17+
else:
18+
# else make a new vertex
19+
self.vertex[fromVertex] = [toVertex]
20+
21+
def BFS(self, startVertex):
22+
# Take a list for stoting already visited vertexes
23+
visited = [False] * len(self.vertex)
24+
25+
# create a list to store all the vertexes for BFS
26+
queue = []
27+
28+
# mark the source node as visited and enqueue it
29+
visited[startVertex] = True
30+
queue.append(startVertex)
31+
32+
while queue:
33+
startVertex = queue.pop(0)
34+
print(startVertex, end = ' ')
35+
36+
# mark all adjacent nodes as visited and print them
37+
for i in self.vertex[startVertex]:
38+
if visited[i] == False:
39+
queue.append(i)
40+
visited[i] = True
41+
42+
if __name__ == '__main__':
43+
g = Graph()
44+
g.addEdge(0, 1)
45+
g.addEdge(0, 2)
46+
g.addEdge(1, 2)
47+
g.addEdge(2, 0)
48+
g.addEdge(2, 3)
49+
g.addEdge(3, 3)
50+
51+
g.printGraph()
52+
print('BFS:')
53+
g.BFS(2)
54+
55+
# OUTPUT:
56+
# 0  ->  1 -> 2
57+
# 1  ->  2
58+
# 2  ->  0 -> 3
59+
# 3  ->  3
60+
# BFS:
61+
# 2 0 3 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Author: OMKAR PATHAK
2+
3+
class Graph():
4+
def __init__(self):
5+
self.vertex = {}
6+
7+
# for printing the Graph vertexes
8+
def printGraph(self):
9+
print(self.vertex)
10+
for i in self.vertex.keys():
11+
print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]]))
12+
13+
# for adding the edge beween two vertexes
14+
def addEdge(self, fromVertex, toVertex):
15+
# check if vertex is already present,
16+
if fromVertex in self.vertex.keys():
17+
self.vertex[fromVertex].append(toVertex)
18+
else:
19+
# else make a new vertex
20+
self.vertex[fromVertex] = [toVertex]
21+
22+
def DFS(self):
23+
# visited array for storing already visited nodes
24+
visited = [False] * len(self.vertex)
25+
26+
# call the recursive helper function
27+
for i in range(len(self.vertex)):
28+
if visited[i] == False:
29+
self.DFSRec(i, visited)
30+
31+
def DFSRec(self, startVertex, visited):
32+
# mark start vertex as visited
33+
visited[startVertex] = True
34+
35+
print(startVertex, end = ' ')
36+
37+
# Recur for all the vertexes that are adjacent to this node
38+
for i in self.vertex.keys():
39+
if visited[i] == False:
40+
self.DFSRec(i, visited)
41+
42+
if __name__ == '__main__':
43+
g = Graph()
44+
g.addEdge(0, 1)
45+
g.addEdge(0, 2)
46+
g.addEdge(1, 2)
47+
g.addEdge(2, 0)
48+
g.addEdge(2, 3)
49+
g.addEdge(3, 3)
50+
51+
g.printGraph()
52+
print('DFS:')
53+
g.DFS()
54+
55+
# OUTPUT:
56+
# 0  ->  1 -> 2
57+
# 1  ->  2
58+
# 2  ->  0 -> 3
59+
# 3  ->  3
60+
# DFS:
61+
# 0 1 2 3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Author: OMKAR PATHAK
2+
3+
import Stack
4+
5+
def parseParenthesis(string):
6+
balanced = 1
7+
index = 0
8+
myStack = Stack.Stack(len(string))
9+
while (index < len(string)) and (balanced == 1):
10+
check = string[index]
11+
if check == '(':
12+
myStack.push(check)
13+
else:
14+
if myStack.isEmpty():
15+
balanced = 0
16+
else:
17+
myStack.pop()
18+
index += 1
19+
20+
if balanced == 1 and myStack.isEmpty():
21+
return True
22+
else:
23+
return False
24+
25+
if __name__ == '__main__':
26+
print(parseParenthesis('((()))')) # True
27+
print(parseParenthesis('((())')) # False
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Author: OMKAR PATHAK
2+
3+
import Stack
4+
5+
def isOperand(char):
6+
return (ord(char) >= ord('a') and ord(char) <= ord('z')) or (ord(char) >= ord('A') and ord(char) <= ord('Z'))
7+
8+
def precedence(char):
9+
if char == '+' or char == '-':
10+
return 1
11+
elif char == '*' or char == '/':
12+
return 2
13+
elif char == '^':
14+
return 3
15+
else:
16+
return -1
17+
18+
def infixToPostfix(myExp, myStack):
19+
postFix = []
20+
for i in range(len(myExp)):
21+
if (isOperand(myExp[i])):
22+
postFix.append(myExp[i])
23+
elif(myExp[i] == '('):
24+
myStack.push(myExp[i])
25+
elif(myExp[i] == ')'):
26+
topOperator = myStack.pop()
27+
while(not myStack.isEmpty() and topOperator != '('):
28+
postFix.append(topOperator)
29+
topOperator = myStack.pop()
30+
else:
31+
while (not myStack.isEmpty()) and (precedence(myExp[i]) <= precedence(myStack.peek())):
32+
postFix.append(myStack.pop())
33+
myStack.push(myExp[i])
34+
35+
while(not myStack.isEmpty()):
36+
postFix.append(myStack.pop())
37+
return ' '.join(postFix)
38+
39+
if __name__ == '__main__':
40+
myExp = 'a+b*(c^d-e)^(f+g*h)-i'
41+
myExp = [i for i in myExp]
42+
print('Infix:',' '.join(myExp))
43+
myStack = Stack.Stack(len(myExp))
44+
print('Postfix:',infixToPostfix(myExp, myStack))
45+
46+
# OUTPUT:
47+
# Infix: a + b * ( c ^ d - e ) ^ ( f + g * h ) - i
48+
# Postfix: a b c d ^ e - f g h * + ^ * + i -

‎data_structures/Stacks/Stack.py

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Author: OMKAR PATHAK
2+
3+
class Stack(object):
4+
def __init__(self, limit = 10):
5+
self.stack = []
6+
self.limit = limit
7+
8+
# for printing the stack contents
9+
def __str__(self):
10+
return ' '.join([str(i) for i in self.stack])
11+
12+
# for pushing an element on to the stack
13+
def push(self, data):
14+
if len(self.stack) >= self.limit:
15+
print('Stack Overflow')
16+
else:
17+
self.stack.append(data)
18+
19+
# for popping the uppermost element
20+
def pop(self):
21+
if len(self.stack) <= 0:
22+
return -1
23+
else:
24+
return self.stack.pop()
25+
26+
# for peeking the top-most element of the stack
27+
def peek(self):
28+
if len(self.stack) <= 0:
29+
return -1
30+
else:
31+
return self.stack[len(self.stack) - 1]
32+
33+
# to check if stack is empty
34+
def isEmpty(self):
35+
return self.stack == []
36+
37+
# for checking the size of stack
38+
def size(self):
39+
return len(self.stack)
40+
41+
if __name__ == '__main__':
42+
myStack = Stack()
43+
for i in range(10):
44+
myStack.push(i)
45+
print(myStack)
46+
myStack.pop() # popping the top element
47+
print(myStack)
48+
myStack.peek() # printing the top element
49+
myStack.isEmpty()
50+
myStack.size()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import tensorflow as tf
2+
from random import choice, shuffle
3+
from numpy import array
4+
5+
6+
def TFKMeansCluster(vectors, noofclusters):
7+
"""
8+
K-Means Clustering using TensorFlow.
9+
'vectors' should be a n*k 2-D NumPy array, where n is the number
10+
of vectors of dimensionality k.
11+
'noofclusters' should be an integer.
12+
"""
13+
14+
noofclusters = int(noofclusters)
15+
assert noofclusters < len(vectors)
16+
17+
#Find out the dimensionality
18+
dim = len(vectors[0])
19+
20+
#Will help select random centroids from among the available vectors
21+
vector_indices = list(range(len(vectors)))
22+
shuffle(vector_indices)
23+
24+
#GRAPH OF COMPUTATION
25+
#We initialize a new graph and set it as the default during each run
26+
#of this algorithm. This ensures that as this function is called
27+
#multiple times, the default graph doesn't keep getting crowded with
28+
#unused ops and Variables from previous function calls.
29+
30+
graph = tf.Graph()
31+
32+
with graph.as_default():
33+
34+
#SESSION OF COMPUTATION
35+
36+
sess = tf.Session()
37+
38+
##CONSTRUCTING THE ELEMENTS OF COMPUTATION
39+
40+
##First lets ensure we have a Variable vector for each centroid,
41+
##initialized to one of the vectors from the available data points
42+
centroids = [tf.Variable((vectors[vector_indices[i]]))
43+
for i in range(noofclusters)]
44+
##These nodes will assign the centroid Variables the appropriate
45+
##values
46+
centroid_value = tf.placeholder("float64", [dim])
47+
cent_assigns = []
48+
for centroid in centroids:
49+
cent_assigns.append(tf.assign(centroid, centroid_value))
50+
51+
##Variables for cluster assignments of individual vectors(initialized
52+
##to 0 at first)
53+
assignments = [tf.Variable(0) for i in range(len(vectors))]
54+
##These nodes will assign an assignment Variable the appropriate
55+
##value
56+
assignment_value = tf.placeholder("int32")
57+
cluster_assigns = []
58+
for assignment in assignments:
59+
cluster_assigns.append(tf.assign(assignment,
60+
assignment_value))
61+
62+
##Now lets construct the node that will compute the mean
63+
#The placeholder for the input
64+
mean_input = tf.placeholder("float", [None, dim])
65+
#The Node/op takes the input and computes a mean along the 0th
66+
#dimension, i.e. the list of input vectors
67+
mean_op = tf.reduce_mean(mean_input, 0)
68+
69+
##Node for computing Euclidean distances
70+
#Placeholders for input
71+
v1 = tf.placeholder("float", [dim])
72+
v2 = tf.placeholder("float", [dim])
73+
euclid_dist = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub(
74+
v1, v2), 2)))
75+
76+
##This node will figure out which cluster to assign a vector to,
77+
##based on Euclidean distances of the vector from the centroids.
78+
#Placeholder for input
79+
centroid_distances = tf.placeholder("float", [noofclusters])
80+
cluster_assignment = tf.argmin(centroid_distances, 0)
81+
82+
##INITIALIZING STATE VARIABLES
83+
84+
##This will help initialization of all Variables defined with respect
85+
##to the graph. The Variable-initializer should be defined after
86+
##all the Variables have been constructed, so that each of them
87+
##will be included in the initialization.
88+
init_op = tf.initialize_all_variables()
89+
90+
#Initialize all variables
91+
sess.run(init_op)
92+
93+
##CLUSTERING ITERATIONS
94+
95+
#Now perform the Expectation-Maximization steps of K-Means clustering
96+
#iterations. To keep things simple, we will only do a set number of
97+
#iterations, instead of using a Stopping Criterion.
98+
noofiterations = 100
99+
for iteration_n in range(noofiterations):
100+
101+
##EXPECTATION STEP
102+
##Based on the centroid locations till last iteration, compute
103+
##the _expected_ centroid assignments.
104+
#Iterate over each vector
105+
for vector_n in range(len(vectors)):
106+
vect = vectors[vector_n]
107+
#Compute Euclidean distance between this vector and each
108+
#centroid. Remember that this list cannot be named
109+
#'centroid_distances', since that is the input to the
110+
#cluster assignment node.
111+
distances = [sess.run(euclid_dist, feed_dict={
112+
v1: vect, v2: sess.run(centroid)})
113+
for centroid in centroids]
114+
#Now use the cluster assignment node, with the distances
115+
#as the input
116+
assignment = sess.run(cluster_assignment, feed_dict = {
117+
centroid_distances: distances})
118+
#Now assign the value to the appropriate state variable
119+
sess.run(cluster_assigns[vector_n], feed_dict={
120+
assignment_value: assignment})
121+
122+
##MAXIMIZATION STEP
123+
#Based on the expected state computed from the Expectation Step,
124+
#compute the locations of the centroids so as to maximize the
125+
#overall objective of minimizing within-cluster Sum-of-Squares
126+
for cluster_n in range(noofclusters):
127+
#Collect all the vectors assigned to this cluster
128+
assigned_vects = [vectors[i] for i in range(len(vectors))
129+
if sess.run(assignments[i]) == cluster_n]
130+
#Compute new centroid location
131+
new_location = sess.run(mean_op, feed_dict={
132+
mean_input: array(assigned_vects)})
133+
#Assign value to appropriate variable
134+
sess.run(cent_assigns[cluster_n], feed_dict={
135+
centroid_value: new_location})
136+
137+
#Return centroids and assignments
138+
centroids = sess.run(centroids)
139+
assignments = sess.run(assignments)
140+
return centroids, assignments
141+

‎machine_learning/gradient_descent.py

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
"""
2+
Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function.
3+
"""
4+
import numpy
5+
6+
# List of input, output pairs
7+
train_data = (((5, 2, 3), 15), ((6, 5, 9), 25),
8+
((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41))
9+
test_data = (((515, 22, 13), 555), ((61, 35, 49), 150))
10+
parameter_vector = [2, 4, 1, 5]
11+
m = len(train_data)
12+
LEARNING_RATE = 0.009
13+
14+
15+
def _error(example_no, data_set='train'):
16+
"""
17+
:param data_set: train data or test data
18+
:param example_no: example number whose error has to be checked
19+
:return: error in example pointed by example number.
20+
"""
21+
return calculate_hypothesis_value(example_no, data_set) - output(example_no, data_set)
22+
23+
24+
def _hypothesis_value(data_input_tuple):
25+
"""
26+
Calculates hypothesis function value for a given input
27+
:param data_input_tuple: Input tuple of a particular example
28+
:return: Value of hypothesis function at that point.
29+
Note that there is an 'biased input' whose value is fixed as 1.
30+
It is not explicitly mentioned in input data.. But, ML hypothesis functions use it.
31+
So, we have to take care of it separately. Line 36 takes care of it.
32+
"""
33+
hyp_val = 0
34+
for i in range(len(parameter_vector) - 1):
35+
hyp_val += data_input_tuple[i]*parameter_vector[i+1]
36+
hyp_val += parameter_vector[0]
37+
return hyp_val
38+
39+
40+
def output(example_no, data_set):
41+
"""
42+
:param data_set: test data or train data
43+
:param example_no: example whose output is to be fetched
44+
:return: output for that example
45+
"""
46+
if data_set == 'train':
47+
return train_data[example_no][1]
48+
elif data_set == 'test':
49+
return test_data[example_no][1]
50+
51+
52+
def calculate_hypothesis_value(example_no, data_set):
53+
"""
54+
Calculates hypothesis value for a given example
55+
:param data_set: test data or train_data
56+
:param example_no: example whose hypothesis value is to be calculated
57+
:return: hypothesis value for that example
58+
"""
59+
if data_set == "train":
60+
return _hypothesis_value(train_data[example_no][0])
61+
elif data_set == "test":
62+
return _hypothesis_value(test_data[example_no][0])
63+
64+
65+
def summation_of_cost_derivative(index, end=m):
66+
"""
67+
Calculates the sum of cost function derivative
68+
:param index: index wrt derivative is being calculated
69+
:param end: value where summation ends, default is m, number of examples
70+
:return: Returns the summation of cost derivative
71+
Note: If index is -1, this means we are calculating summation wrt to biased parameter.
72+
"""
73+
summation_value = 0
74+
for i in range(end):
75+
if index == -1:
76+
summation_value += _error(i)
77+
else:
78+
summation_value += _error(i)*train_data[i][0][index]
79+
return summation_value
80+
81+
82+
def get_cost_derivative(index):
83+
"""
84+
:param index: index of the parameter vector wrt to derivative is to be calculated
85+
:return: derivative wrt to that index
86+
Note: If index is -1, this means we are calculating summation wrt to biased parameter.
87+
"""
88+
cost_derivative_value = summation_of_cost_derivative(index, m)/m
89+
return cost_derivative_value
90+
91+
92+
def run_gradient_descent():
93+
global parameter_vector
94+
# Tune these values to set a tolerance value for predicted output
95+
absolute_error_limit = 0.000002
96+
relative_error_limit = 0
97+
j = 0
98+
while True:
99+
j += 1
100+
temp_parameter_vector = [0, 0, 0, 0]
101+
for i in range(0, len(parameter_vector)):
102+
cost_derivative = get_cost_derivative(i-1)
103+
temp_parameter_vector[i] = parameter_vector[i] - \
104+
LEARNING_RATE*cost_derivative
105+
if numpy.allclose(parameter_vector, temp_parameter_vector,
106+
atol=absolute_error_limit, rtol=relative_error_limit):
107+
break
108+
parameter_vector = temp_parameter_vector
109+
print("Number of iterations:", j)
110+
111+
112+
def test_gradient_descent():
113+
for i in range(len(test_data)):
114+
print("Actual output value:", output(i, 'test'))
115+
print("Hypothesis output:", calculate_hypothesis_value(i, 'test'))
116+
117+
118+
if __name__ == '__main__':
119+
run_gradient_descent()
120+
print("\nTesting gradient descent for a linear hypothesis function.\n")
121+
test_gradient_descent()

‎machine_learning/linear_regression.py

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""
2+
Linear regression is the most basic type of regression commonly used for
3+
predictive analysis. The idea is preety simple, we have a dataset and we have
4+
a feature's associated with it. The Features should be choose very cautiously
5+
as they determine, how much our model will be able to make future predictions.
6+
We try to set these Feature weights, over many iterations, so that they best
7+
fits our dataset. In this particular code, i had used a CSGO dataset (ADR vs
8+
Rating). We try to best fit a line through dataset and estimate the parameters.
9+
"""
10+
11+
import requests
12+
import numpy as np
13+
14+
15+
def collect_dataset():
16+
""" Collect dataset of CSGO
17+
The dataset contains ADR vs Rating of a Player
18+
:return : dataset obtained from the link, as matrix
19+
"""
20+
response = requests.get('https://raw.githubusercontent.com/yashLadha/' +
21+
'The_Math_of_Intelligence/master/Week1/ADRvs' +
22+
'Rating.csv')
23+
lines = response.text.splitlines()
24+
data = []
25+
for item in lines:
26+
item = item.split(',')
27+
data.append(item)
28+
data.pop(0) # This is for removing the labels from the list
29+
dataset = np.matrix(data)
30+
return dataset
31+
32+
33+
def run_steep_gradient_descent(data_x, data_y,
34+
len_data, alpha, theta):
35+
""" Run steep gradient descent and updates the Feature vector accordingly_
36+
:param data_x : contains the dataset
37+
:param data_y : contains the output associated with each data-entry
38+
:param len_data : length of the data_
39+
:param alpha : Learning rate of the model
40+
:param theta : Feature vector (weight's for our model)
41+
;param return : Updated Feature's, using
42+
curr_features - alpha_ * gradient(w.r.t. feature)
43+
"""
44+
n = len_data
45+
46+
prod = np.dot(theta, data_x.transpose())
47+
prod -= data_y.transpose()
48+
sum_grad = np.dot(prod, data_x)
49+
theta = theta - (alpha / n) * sum_grad
50+
return theta
51+
52+
53+
def sum_of_square_error(data_x, data_y, len_data, theta):
54+
""" Return sum of square error for error calculation
55+
:param data_x : contains our dataset
56+
:param data_y : contains the output (result vector)
57+
:param len_data : len of the dataset
58+
:param theta : contains the feature vector
59+
:return : sum of square error computed from given feature's
60+
"""
61+
error = 0.0
62+
prod = np.dot(theta, data_x.transpose())
63+
prod -= data_y.transpose()
64+
sum_elem = np.sum(np.square(prod))
65+
error = sum_elem / (2 * len_data)
66+
return error
67+
68+
69+
def run_linear_regression(data_x, data_y):
70+
""" Implement Linear regression over the dataset
71+
:param data_x : contains our dataset
72+
:param data_y : contains the output (result vector)
73+
:return : feature for line of best fit (Feature vector)
74+
"""
75+
iterations = 100000
76+
alpha = 0.0001550
77+
78+
no_features = data_x.shape[1]
79+
len_data = data_x.shape[0] - 1
80+
81+
theta = np.zeros((1, no_features))
82+
83+
for i in range(0, iterations):
84+
theta = run_steep_gradient_descent(data_x, data_y,
85+
len_data, alpha, theta)
86+
error = sum_of_square_error(data_x, data_y, len_data, theta)
87+
print('At Iteration %d - Error is %.5f ' % (i + 1, error))
88+
89+
return theta
90+
91+
92+
def main():
93+
""" Driver function """
94+
data = collect_dataset()
95+
96+
len_data = data.shape[0]
97+
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
98+
data_y = data[:, -1].astype(float)
99+
100+
theta = run_linear_regression(data_x, data_y)
101+
len_result = theta.shape[1]
102+
print('Resultant Feature vector : ')
103+
for i in range(0, len_result):
104+
print('%.5f' % (theta[0, i]))
105+
106+
107+
if __name__ == '__main__':
108+
main()

‎other/anagrams.txt

Whitespace-only changes.

‎searches/binary_search.py

+33
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,39 @@ def binary_search_std_lib(sorted_collection, item):
8080
return index
8181
return None
8282

83+
def binary_search_by_recursion(sorted_collection, item, left, right):
84+
85+
"""Pure implementation of binary search algorithm in Python by recursion
86+
87+
Be careful collection must be sorted, otherwise result will be
88+
unpredictable
89+
First recursion should be started with left=0 and right=(len(sorted_collection)-1)
90+
91+
:param sorted_collection: some sorted collection with comparable items
92+
:param item: item value to search
93+
:return: index of found item or None if item is not found
94+
95+
Examples:
96+
>>> binary_search_std_lib([0, 5, 7, 10, 15], 0)
97+
0
98+
99+
>>> binary_search_std_lib([0, 5, 7, 10, 15], 15)
100+
4
101+
102+
>>> binary_search_std_lib([0, 5, 7, 10, 15], 5)
103+
1
104+
105+
>>> binary_search_std_lib([0, 5, 7, 10, 15], 6)
106+
107+
"""
108+
midpoint = left + (right - left) // 2
109+
110+
if sorted_collection[midpoint] == item:
111+
return midpoint
112+
elif sorted_collection[midpoint] > item:
113+
return binary_search_by_recursion(sorted_collection, item, left, right-1)
114+
else:
115+
return binary_search_by_recursion(sorted_collection, item, left+1, right)
83116

84117
def __assert_sorted(collection):
85118
"""Check if collection is sorted, if not - raises :py:class:`ValueError`

‎sorts/bucket_sort.py

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env python
2+
# Author: OMKAR PATHAK
3+
# This program will illustrate how to implement bucket sort algorithm
4+
5+
# Wikipedia says: Bucket sort, or bin sort, is a sorting algorithm that works by distributing the
6+
# elements of an array into a number of buckets. Each bucket is then sorted individually, either using
7+
# a different sorting algorithm, or by recursively applying the bucket sorting algorithm. It is a
8+
# distribution sort, and is a cousin of radix sort in the most to least significant digit flavour.
9+
# Bucket sort is a generalization of pigeonhole sort. Bucket sort can be implemented with comparisons
10+
# and therefore can also be considered a comparison sort algorithm. The computational complexity estimates
11+
# involve the number of buckets.
12+
13+
# Time Complexity of Solution:
14+
# Best Case O(n); Average Case O(n); Worst Case O(n)
15+
16+
from P26_InsertionSort import insertionSort
17+
import math
18+
19+
DEFAULT_BUCKET_SIZE = 5
20+
21+
def bucketSort(myList, bucketSize=DEFAULT_BUCKET_SIZE):
22+
if(len(myList) == 0):
23+
print('You don\'t have any elements in array!')
24+
25+
minValue = myList[0]
26+
maxValue = myList[0]
27+
28+
# For finding minimum and maximum values
29+
for i in range(0, len(myList)):
30+
if myList[i] < minValue:
31+
minValue = myList[i]
32+
elif myList[i] > maxValue:
33+
maxValue = myList[i]
34+
35+
# Initialize buckets
36+
bucketCount = math.floor((maxValue - minValue) / bucketSize) + 1
37+
buckets = []
38+
for i in range(0, bucketCount):
39+
buckets.append([])
40+
41+
# For putting values in buckets
42+
for i in range(0, len(myList)):
43+
buckets[math.floor((myList[i] - minValue) / bucketSize)].append(myList[i])
44+
45+
# Sort buckets and place back into input array
46+
sortedArray = []
47+
for i in range(0, len(buckets)):
48+
insertionSort(buckets[i])
49+
for j in range(0, len(buckets[i])):
50+
sortedArray.append(buckets[i][j])
51+
52+
return sortedArray
53+
54+
if __name__ == '__main__':
55+
sortedArray = bucketSort([12, 23, 4, 5, 3, 2, 12, 81, 56, 95])
56+
print(sortedArray)

‎sorts/topological_sort.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# a
2+
# / \
3+
# b c
4+
# / \
5+
# d e
6+
edges = {'a': ['c', 'b'], 'b': ['d', 'e'], 'c': [], 'd': [], 'e': []}
7+
vertices = ['a', 'b', 'c', 'd', 'e']
8+
9+
10+
def topological_sort(start, visited, sort):
11+
"""Perform topolical sort on a directed acyclic graph."""
12+
current = start
13+
# add current to visited
14+
visited.append(current)
15+
neighbors = edges[current]
16+
for neighbor in neighbors:
17+
# if neighbor not in visited, visit
18+
if neighbor not in visited:
19+
sort = topological_sort(neighbor, visited, sort)
20+
# if all neighbors visited add current to sort
21+
sort.append(current)
22+
# if all vertices haven't been visited select a new one to visit
23+
if len(visited) != len(vertices):
24+
for vertice in vertices:
25+
if vertice not in visited:
26+
sort = topological_sort(vertice, visited, sort)
27+
# return sort
28+
return sort
29+
30+
31+
sort = topological_sort('a', [], [])
32+
print(sort)

0 commit comments

Comments
 (0)
Please sign in to comment.