Additional modules and updates

Adding additional 3rd and 4th module while updating various files in others.
ribery77 · Oct 18, 2014 · 335a99a · 335a99a
1 parent 5f4531c
commit 335a99a
Show file tree

Hide file tree

Showing 22 changed files with 1,316 additions and 39 deletions.
diff --git a/Module 1/App_1.py b/Module 1/App_1.py
@@ -1,9 +1,9 @@
 """
 Algorithmic Thinking - Module 1
-Mark Hess
 09-07-2014
 
-Degree Distributions for graphs
+Graph Basics and Random Digraphs
+Analysis of citation graphs
 Application file
 """
 

diff --git a/Module 1/DPA.py b/Module 1/DPA.py
@@ -1,11 +1,9 @@
 """
 Algorithmic Thinking - Module 1
-Mark Hess
 09-07-2014
 
-Degree Distributions for graphs
-Provided Helper class for implementing efficient version
-of DPA algorithm
+Graph Basics and Random Digraphs
+Provided Helper class for implementing efficient version of DPA algorithm
 """
 
 # general imports

diff --git a/Module 1/Project_1.py b/Module 1/Project_1.py
@@ -1,8 +1,8 @@
 """
 Algorithmic Thinking - Module 1
-Mark Hess
 09-07-2014
 
+Graph Basics and Random Digraphs
 Degree Distributions for graphs
 Project File
 """

diff --git a/Module 2/App_2.py b/Module 2/App_2.py
@@ -1,7 +1,10 @@
 """
-Application 2
+Algorithmic Thinking - Module 2
+09-21-2014
 
-Provided code for Application portion of Module 2
+Beadth-First Seach and Connected Components
+Analysis of a Computer Network
+Application File
 """
 
 # general imports

diff --git a/Module 2/Project_2.py b/Module 2/Project_2.py
@@ -1,13 +1,12 @@
-"""
-Project 2 for Algorithmic Thinking
-No collaboration
-4 functions:
-    Breath-first search (visited)
-    Connected component (visited)
-    Largest Connected Component Size
-    Graph Resilience
-"""
-
+"""
+Algorithmic Thinking - Module 2
+09-21-2014
+
+Beadth-First Seach and Connected Components
+Connected Components and graph resilience
+Project File
+"""
+
 from collections import deque
 import random
 
@@ -18,17 +17,22 @@ def bfs_visited(ugraph, start_node):
 	
     return: set of nodes
     """
+    #variable initialization
     queue = deque()
     visited = set([start_node])
     queue.append(start_node)
+
+    #main loop for computing which nodes are visited by start node
     while len(queue) > 0:
         node_j = queue.popleft()
         for neighbor in ugraph[node_j]:
             if neighbor not in visited:
-                visited.add(neighbor)
+                visited.add(neighbor)
                 queue.append(neighbor)
+
+    #return set of nodes visided by start node
     return visited
-
+
 
 def cc_visited(ugraph):
     """
@@ -37,15 +41,20 @@ def cc_visited(ugraph):
 
     return: list of sets of nodes
     """
+    #variable initilization
     remaining = ugraph.keys()
     remaining = set(remaining)
     connected = []
+
+    #main loop for calculating all connected components in undirectional graph
     while len(remaining) > 0:
         node_i = random.sample(remaining, 1)[0]
         current = bfs_visited(ugraph, node_i)
         connected.append(current)
         for node in current:
             remaining.remove(node)
+
+    #return list of sets of components
     return connected
 
 def largest_cc_size(ugraph):
@@ -54,28 +63,38 @@ def largest_cc_size(ugraph):
 
     return: int
     """
-    size = 0
-    connected = cc_visited(ugraph)
-    for component in connected:
-        if len(component) > size:
+    #variable initialization
+    size = 0
+    connected = cc_visited(ugraph)
+
+    #loop through components to calculate largest one
+    for component in connected:
+        if len(component) > size:
             size = len(component)
+
+    #return int of largest component
     return size
 
 def compute_resilience(ugraph, attack_order):
     """
-    Takes in undirected graph and a list of nodes.  Iterates throught the attack order 
+    Takes in undirected graph and a list of nodes.  Iterates throught the attack order 
     nodes removing the given node and its edges from the graph.  Then computes the
     largest connected component for the remaining graph.
 
     return: list of largest connected components after each removal
-    """
-    sizes = []
-    newgraph = ugraph.copy()
+    """
+    #initialize variables
+    sizes = []
+    newgraph = ugraph.copy()
     sizes.append(largest_cc_size(newgraph))
-    for node in attack_order:
-        newgraph.pop(node)
-        for nodes in newgraph:
-            newgraph[nodes].discard(node)
+
+    #loop for calculating and removing nodes
+    for node in attack_order:
+        newgraph.pop(node)
+        for nodes in newgraph:
+            newgraph[nodes].discard(node)
         sizes.append(largest_cc_size(newgraph))
+
+    #return list of sizes for components
     return sizes
 
diff --git a/Module 3/App_3_Q1.png b/Module 3/App_3_Q1.png
diff --git a/Module 3/App_3_Q10a.png b/Module 3/App_3_Q10a.png
diff --git a/Module 3/App_3_Q10b.png b/Module 3/App_3_Q10b.png
diff --git a/Module 3/App_3_Q10c.png b/Module 3/App_3_Q10c.png
diff --git a/Module 3/App_3_Q2.png b/Module 3/App_3_Q2.png
diff --git a/Module 3/App_3_Q3.png b/Module 3/App_3_Q3.png
diff --git a/Module 3/App_3_Q5.png b/Module 3/App_3_Q5.png
diff --git a/Module 3/App_3_Q6.png b/Module 3/App_3_Q6.png
diff --git a/Module 3/Project_3.py b/Module 3/Project_3.py
@@ -0,0 +1,184 @@
+"""
+Algorithmic Thinking - Module 3
+10-5-2014
+
+Divide and Conquer Method and Clustering
+Closest Pairs and Clustering Algorithms
+Project File
+
+Student will implement four functions:
+slow_closest_pairs(cluster_list)
+fast_closest_pair(cluster_list) - implement fast_helper()
+hierarchical_clustering(cluster_list, num_clusters)
+kmeans_clustering(cluster_list, num_clusters, num_iterations)
+
+where cluster_list is a list of clusters in the plane
+"""
+
+import math
+import alg_cluster
+
+def pair_distance(cluster_list, idx1, idx2):
+    """
+    Helper function to compute Euclidean distance between two clusters
+    in cluster_list with indices idx1 and idx2
+    
+    Returns tuple (dist, idx1, idx2) with idx1 < idx2 where dist is distance between
+    cluster_list[idx1] and cluster_list[idx2]
+    """
+    return (cluster_list[idx1].distance(cluster_list[idx2]), min(idx1, idx2), max(idx1, idx2))
+
+
+
+def slow_closest_pairs(cluster_list):
+    """
+    Compute the set of closest pairs of cluster in list of clusters
+    using O(n^2) all pairs algorithm
+    
+    Returns the set of all tuples of the form (dist, idx1, idx2) 
+    where the cluster_list[idx1] and cluster_list[idx2] have minimum distance dist.   
+    
+    """
+    current_dist = float("inf") # set default current distance
+    closest_pair = [] # default empty list
+    epsilon = .000000001 # default value to measure against differences
+
+    for index_u in xrange(len(cluster_list)): # go through each index
+        for index_v in xrange(len(cluster_list)): # each comparison index
+            if index_u != index_v: # make sure indeces are not the same
+                distance_pair = pair_distance(cluster_list, index_u, index_v) # compute distance based on cluster index
+                if abs(distance_pair[0] - current_dist) < epsilon:  # test if less than current distance
+                    closest_pair.append(distance_pair)# assign new distance and cluster indeces
+                elif distance_pair[0] < current_dist:
+                    closest_pair = [distance_pair]
+                    current_dist = distance_pair[0]
+
+    if len(closest_pair) == 0:
+        closest_pair = [(current_dist, -1, -1)] # set default distance and index
+    return set(closest_pair)
+
+
+
+def fast_closest_pair(cluster_list):
+    """
+    Compute a closest pair of clusters in cluster_list
+    using O(n log(n)) divide and conquer algorithm
+    
+    Returns a tuple (distance, idx1, idx2) with idx1 < idx 2 where
+    cluster_list[idx1] and cluster_list[idx2]
+    have the smallest distance dist of any pair of clusters
+    """
+
+    def fast_helper(cluster_list, horiz_order, vert_order):
+        """
+        Divide and conquer method for computing distance between closest pair of points
+        Running time is O(n * log(n))
+        
+        horiz_order and vert_order are lists of indices for clusters
+        ordered horizontally and vertically
+        
+        Returns a tuple (distance, idx1, idx2) with idx1 < idx 2 where
+        cluster_list[idx1] and cluster_list[idx2]
+        have the smallest distance dist of any pair of clusters
+    
+        """
+        if len(horiz_order) < 4: # test if number of clusters less than 4
+            list_q = [cluster_list[idx] for idx in horiz_order]
+            closest_pair = list(slow_closest_pairs(list_q))
+            return tuple((closest_pair[0][0], horiz_order[closest_pair[0][1]], horiz_order[closest_pair[0][2]]))# if less than use brute force algorithm
+        ## base case for fast helper
+
+        else:
+            idx_m = len(horiz_order) / 2 # number of points in each list half
+            horiz_left, horiz_right = horiz_order[:idx_m], horiz_order[idx_m:] 
+            # split lists into halves
+            vert_left, vert_right = [idx for idx in vert_order if idx in set(horiz_left)], [idx for idx in vert_order if idx in set(horiz_right)]
+            # split vertical elements in half to match horizontal
+            left_distance, right_distance = fast_helper(cluster_list, horiz_left, vert_left), fast_helper(cluster_list, horiz_right, vert_right)
+            # recursively find the closest distances in the smaller lists
+            if left_distance[0] < right_distance[0]:
+                closest_pair = left_distance
+            else:
+                closest_pair = right_distance
+        ## divide cluster lists in half and find distances
+
+            hcoord = (1/2.0) * (cluster_list[horiz_order[idx_m - 1]].horiz_center() + cluster_list[horiz_order[idx_m]].horiz_center())
+            # find the horizontal coordinate of the two middle clusters
+            list_split = [idx for idx in vert_order if abs(cluster_list[idx].horiz_center() - hcoord) < closest_pair[0]] 
+            for idx_u in xrange(len(list_split) - 1):
+                for idx_v in xrange(idx_u + 1, min([idx_u + 3, len(list_split) - 1]) + 1):
+                    contender = pair_distance(cluster_list, list_split[idx_u], list_split[idx_v])
+                    if closest_pair[0] > contender[0]:
+                        closest_pair = contender
+        ## conquer remaining clusters by comparison
+
+        return closest_pair
+
+    # compute list of indices for the clusters ordered in the horizontal direction
+    hcoord_and_index = [(cluster_list[idx].horiz_center(), idx) 
+                        for idx in range(len(cluster_list))]    
+    hcoord_and_index.sort()
+    horiz_order = [hcoord_and_index[idx][1] for idx in range(len(hcoord_and_index))]
+
+    # compute list of indices for the clusters ordered in vertical direction
+    vcoord_and_index = [(cluster_list[idx].vert_center(), idx) 
+                        for idx in range(len(cluster_list))]    
+    vcoord_and_index.sort()
+    vert_order = [vcoord_and_index[idx][1] for idx in range(len(vcoord_and_index))]
+
+    # compute answer recursively
+    answer = fast_helper(cluster_list, horiz_order, vert_order)
+    return (answer[0], min(answer[1:]), max(answer[1:]))
+
+
+
+def hierarchical_clustering(cluster_list, num_clusters):
+    """
+    Compute a hierarchical clustering of a set of clusters
+    Note: the function mutates cluster_list
+    
+    Input: List of clusters, number of clusters
+    Output: List of clusters whose length is num_clusters
+    """
+    while len(cluster_list) > num_clusters:
+        current = fast_closest_pair(cluster_list)
+        cluster_list[current[1]].merge_clusters(cluster_list[current[2]])
+        cluster_list.pop(current[2])
+    return cluster_list
+
+
+
+def kmeans_clustering(cluster_list, num_clusters, num_iterations):
+    """
+    Compute the k-means clustering of a set of clusters
+    
+    Input: List of clusters, number of clusters, number of iterations
+    Output: List of clusters whose length is num_clusters
+    """
+    cluster_list_sorted = sorted(cluster_list, key = lambda x: x.total_population(), reverse = True)
+    k_clusters = cluster_list_sorted[:num_clusters]
+    # initialize k-means clusters to be initial clusters with largest populations
+    for dummy_idx in xrange(num_iterations):
+        new_clusters = [alg_cluster.Cluster(set([]), 0, 0, 1, 0) for dummy_idx in xrange(num_clusters)]
+        for idx_j in xrange(len(cluster_list)):
+            current_dist = [cluster_list[idx_j].distance(k_clusters[idx_l]) for idx_l in xrange(num_clusters)]
+            idx_l = min(xrange(len(current_dist)), key=current_dist.__getitem__)
+            new_clusters[idx_l].merge_clusters(cluster_list[idx_j])      
+        k_clusters = new_clusters[:]
+
+    return k_clusters
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Module 3/Test_3.py b/Module 3/Test_3.py