Skip to content

Commit

Permalink
Additional modules and updates
Browse files Browse the repository at this point in the history
Adding additional 3rd and 4th module while updating various files in others.
  • Loading branch information
hessmjr committed Oct 18, 2014
1 parent 5f4531c commit 335a99a
Show file tree
Hide file tree
Showing 22 changed files with 1,316 additions and 39 deletions.
4 changes: 2 additions & 2 deletions Module 1/App_1.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""
Algorithmic Thinking - Module 1
Mark Hess
09-07-2014
Degree Distributions for graphs
Graph Basics and Random Digraphs
Analysis of citation graphs
Application file
"""

Expand Down
6 changes: 2 additions & 4 deletions Module 1/DPA.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
"""
Algorithmic Thinking - Module 1
Mark Hess
09-07-2014
Degree Distributions for graphs
Provided Helper class for implementing efficient version
of DPA algorithm
Graph Basics and Random Digraphs
Provided Helper class for implementing efficient version of DPA algorithm
"""

# general imports
Expand Down
2 changes: 1 addition & 1 deletion Module 1/Project_1.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""
Algorithmic Thinking - Module 1
Mark Hess
09-07-2014
Graph Basics and Random Digraphs
Degree Distributions for graphs
Project File
"""
Expand Down
7 changes: 5 additions & 2 deletions Module 2/App_2.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""
Application 2
Algorithmic Thinking - Module 2
09-21-2014
Provided code for Application portion of Module 2
Beadth-First Seach and Connected Components
Analysis of a Computer Network
Application File
"""

# general imports
Expand Down
67 changes: 43 additions & 24 deletions Module 2/Project_2.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
"""
Project 2 for Algorithmic Thinking
No collaboration
4 functions:
Breath-first search (visited)
Connected component (visited)
Largest Connected Component Size
Graph Resilience
"""

"""
Algorithmic Thinking - Module 2
09-21-2014
Beadth-First Seach and Connected Components
Connected Components and graph resilience
Project File
"""

from collections import deque
import random

Expand All @@ -18,17 +17,22 @@ def bfs_visited(ugraph, start_node):
return: set of nodes
"""
#variable initialization
queue = deque()
visited = set([start_node])
queue.append(start_node)

#main loop for computing which nodes are visited by start node
while len(queue) > 0:
node_j = queue.popleft()
for neighbor in ugraph[node_j]:
if neighbor not in visited:
visited.add(neighbor)
visited.add(neighbor)
queue.append(neighbor)

#return set of nodes visided by start node
return visited



def cc_visited(ugraph):
"""
Expand All @@ -37,15 +41,20 @@ def cc_visited(ugraph):
return: list of sets of nodes
"""
#variable initilization
remaining = ugraph.keys()
remaining = set(remaining)
connected = []

#main loop for calculating all connected components in undirectional graph
while len(remaining) > 0:
node_i = random.sample(remaining, 1)[0]
current = bfs_visited(ugraph, node_i)
connected.append(current)
for node in current:
remaining.remove(node)

#return list of sets of components
return connected

def largest_cc_size(ugraph):
Expand All @@ -54,28 +63,38 @@ def largest_cc_size(ugraph):
return: int
"""
size = 0
connected = cc_visited(ugraph)
for component in connected:
if len(component) > size:
#variable initialization
size = 0
connected = cc_visited(ugraph)

#loop through components to calculate largest one
for component in connected:
if len(component) > size:
size = len(component)

#return int of largest component
return size

def compute_resilience(ugraph, attack_order):
"""
Takes in undirected graph and a list of nodes. Iterates throught the attack order
Takes in undirected graph and a list of nodes. Iterates throught the attack order
nodes removing the given node and its edges from the graph. Then computes the
largest connected component for the remaining graph.
return: list of largest connected components after each removal
"""
sizes = []
newgraph = ugraph.copy()
"""
#initialize variables
sizes = []
newgraph = ugraph.copy()
sizes.append(largest_cc_size(newgraph))
for node in attack_order:
newgraph.pop(node)
for nodes in newgraph:
newgraph[nodes].discard(node)

#loop for calculating and removing nodes
for node in attack_order:
newgraph.pop(node)
for nodes in newgraph:
newgraph[nodes].discard(node)
sizes.append(largest_cc_size(newgraph))

#return list of sizes for components
return sizes

Binary file added Module 3/App_3_Q1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Module 3/App_3_Q10a.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Module 3/App_3_Q10b.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Module 3/App_3_Q10c.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Module 3/App_3_Q2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Module 3/App_3_Q3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Module 3/App_3_Q5.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Module 3/App_3_Q6.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
184 changes: 184 additions & 0 deletions Module 3/Project_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
"""
Algorithmic Thinking - Module 3
10-5-2014
Divide and Conquer Method and Clustering
Closest Pairs and Clustering Algorithms
Project File
Student will implement four functions:
slow_closest_pairs(cluster_list)
fast_closest_pair(cluster_list) - implement fast_helper()
hierarchical_clustering(cluster_list, num_clusters)
kmeans_clustering(cluster_list, num_clusters, num_iterations)
where cluster_list is a list of clusters in the plane
"""

import math
import alg_cluster

def pair_distance(cluster_list, idx1, idx2):
"""
Helper function to compute Euclidean distance between two clusters
in cluster_list with indices idx1 and idx2
Returns tuple (dist, idx1, idx2) with idx1 < idx2 where dist is distance between
cluster_list[idx1] and cluster_list[idx2]
"""
return (cluster_list[idx1].distance(cluster_list[idx2]), min(idx1, idx2), max(idx1, idx2))



def slow_closest_pairs(cluster_list):
"""
Compute the set of closest pairs of cluster in list of clusters
using O(n^2) all pairs algorithm
Returns the set of all tuples of the form (dist, idx1, idx2)
where the cluster_list[idx1] and cluster_list[idx2] have minimum distance dist.
"""
current_dist = float("inf") # set default current distance
closest_pair = [] # default empty list
epsilon = .000000001 # default value to measure against differences

for index_u in xrange(len(cluster_list)): # go through each index
for index_v in xrange(len(cluster_list)): # each comparison index
if index_u != index_v: # make sure indeces are not the same
distance_pair = pair_distance(cluster_list, index_u, index_v) # compute distance based on cluster index
if abs(distance_pair[0] - current_dist) < epsilon: # test if less than current distance
closest_pair.append(distance_pair)# assign new distance and cluster indeces
elif distance_pair[0] < current_dist:
closest_pair = [distance_pair]
current_dist = distance_pair[0]

if len(closest_pair) == 0:
closest_pair = [(current_dist, -1, -1)] # set default distance and index
return set(closest_pair)



def fast_closest_pair(cluster_list):
"""
Compute a closest pair of clusters in cluster_list
using O(n log(n)) divide and conquer algorithm
Returns a tuple (distance, idx1, idx2) with idx1 < idx 2 where
cluster_list[idx1] and cluster_list[idx2]
have the smallest distance dist of any pair of clusters
"""

def fast_helper(cluster_list, horiz_order, vert_order):
"""
Divide and conquer method for computing distance between closest pair of points
Running time is O(n * log(n))
horiz_order and vert_order are lists of indices for clusters
ordered horizontally and vertically
Returns a tuple (distance, idx1, idx2) with idx1 < idx 2 where
cluster_list[idx1] and cluster_list[idx2]
have the smallest distance dist of any pair of clusters
"""
if len(horiz_order) < 4: # test if number of clusters less than 4
list_q = [cluster_list[idx] for idx in horiz_order]
closest_pair = list(slow_closest_pairs(list_q))
return tuple((closest_pair[0][0], horiz_order[closest_pair[0][1]], horiz_order[closest_pair[0][2]]))# if less than use brute force algorithm
## base case for fast helper

else:
idx_m = len(horiz_order) / 2 # number of points in each list half
horiz_left, horiz_right = horiz_order[:idx_m], horiz_order[idx_m:]
# split lists into halves
vert_left, vert_right = [idx for idx in vert_order if idx in set(horiz_left)], [idx for idx in vert_order if idx in set(horiz_right)]
# split vertical elements in half to match horizontal
left_distance, right_distance = fast_helper(cluster_list, horiz_left, vert_left), fast_helper(cluster_list, horiz_right, vert_right)
# recursively find the closest distances in the smaller lists
if left_distance[0] < right_distance[0]:
closest_pair = left_distance
else:
closest_pair = right_distance
## divide cluster lists in half and find distances

hcoord = (1/2.0) * (cluster_list[horiz_order[idx_m - 1]].horiz_center() + cluster_list[horiz_order[idx_m]].horiz_center())
# find the horizontal coordinate of the two middle clusters
list_split = [idx for idx in vert_order if abs(cluster_list[idx].horiz_center() - hcoord) < closest_pair[0]]
for idx_u in xrange(len(list_split) - 1):
for idx_v in xrange(idx_u + 1, min([idx_u + 3, len(list_split) - 1]) + 1):
contender = pair_distance(cluster_list, list_split[idx_u], list_split[idx_v])
if closest_pair[0] > contender[0]:
closest_pair = contender
## conquer remaining clusters by comparison

return closest_pair

# compute list of indices for the clusters ordered in the horizontal direction
hcoord_and_index = [(cluster_list[idx].horiz_center(), idx)
for idx in range(len(cluster_list))]
hcoord_and_index.sort()
horiz_order = [hcoord_and_index[idx][1] for idx in range(len(hcoord_and_index))]

# compute list of indices for the clusters ordered in vertical direction
vcoord_and_index = [(cluster_list[idx].vert_center(), idx)
for idx in range(len(cluster_list))]
vcoord_and_index.sort()
vert_order = [vcoord_and_index[idx][1] for idx in range(len(vcoord_and_index))]

# compute answer recursively
answer = fast_helper(cluster_list, horiz_order, vert_order)
return (answer[0], min(answer[1:]), max(answer[1:]))



def hierarchical_clustering(cluster_list, num_clusters):
"""
Compute a hierarchical clustering of a set of clusters
Note: the function mutates cluster_list
Input: List of clusters, number of clusters
Output: List of clusters whose length is num_clusters
"""
while len(cluster_list) > num_clusters:
current = fast_closest_pair(cluster_list)
cluster_list[current[1]].merge_clusters(cluster_list[current[2]])
cluster_list.pop(current[2])
return cluster_list



def kmeans_clustering(cluster_list, num_clusters, num_iterations):
"""
Compute the k-means clustering of a set of clusters
Input: List of clusters, number of clusters, number of iterations
Output: List of clusters whose length is num_clusters
"""
cluster_list_sorted = sorted(cluster_list, key = lambda x: x.total_population(), reverse = True)
k_clusters = cluster_list_sorted[:num_clusters]
# initialize k-means clusters to be initial clusters with largest populations
for dummy_idx in xrange(num_iterations):
new_clusters = [alg_cluster.Cluster(set([]), 0, 0, 1, 0) for dummy_idx in xrange(num_clusters)]
for idx_j in xrange(len(cluster_list)):
current_dist = [cluster_list[idx_j].distance(k_clusters[idx_l]) for idx_l in xrange(num_clusters)]
idx_l = min(xrange(len(current_dist)), key=current_dist.__getitem__)
new_clusters[idx_l].merge_clusters(cluster_list[idx_j])
k_clusters = new_clusters[:]

return k_clusters















46 changes: 46 additions & 0 deletions Module 3/Test_3.py

Large diffs are not rendered by default.

Loading

0 comments on commit 335a99a

Please sign in to comment.