Skip to content

Commit

Permalink
silhouette find cluster points
Browse files Browse the repository at this point in the history
  • Loading branch information
elro77 committed Dec 21, 2021
1 parent af422ae commit 5c4be4b
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 15 deletions.
4 changes: 2 additions & 2 deletions DBSCAN_Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
to think for another way
== Version 1.07, using matrixes and np.sum() np.where and running dbscan for each grid tile the results were improved for
260 seconds for 100000
260 seconds for 100,000
Expand Down Expand Up @@ -129,8 +129,8 @@
elapsed = time.time() - t
print("my clustering time: ",elapsed)

silhouette = Silhouette()

silhouette = Silhouette()
t = time.time()
silhouetteValue = silhouette.calculateSilhouetteValue(testArray, np.array(myClusteringResult))
elapsed = time.time() - t
Expand Down
57 changes: 44 additions & 13 deletions silhouette.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class Silhouette:
def __init__(self):
self.clustersDictionaryIndexes = dict()
self.clustersDictionaryVectors = dict()
self.clusterGravityPointDictionary = dict()
self.clusterPairsDictionary = dict()
self.listAvgSilhouette = []
self.listVectorsForDistanceMatrix = []
self.distances = np.zeros(1)
Expand All @@ -32,19 +34,21 @@ def __init__(self):
def calculateSilhouetteValue(self, dataset, clusters):
#t = time.time()
self.createclustersDictionaryIndexes(dataset, clusters)
self.createClusterGravityPoint()
self.findClusterPairs()

#elapsed = time.time() - t
#print("createclustersDictionaryIndexes time: ",elapsed)
self.distances = dist(np.array(self.listVectorsForDistanceMatrix))



for cluster in self.clustersDictionaryIndexes:
self.listAvgSilhouette.append(self.calculateAvgSilhoueteOfCluster(cluster))
arrayValues = np.array(self.listAvgSilhouette)
return np.average(arrayValues)






def createclustersDictionaryIndexes(self, dataset, clusters):
#pIndex is the true index of the point in data set
pIndex = -1
Expand All @@ -66,14 +70,48 @@ def createclustersDictionaryIndexes(self, dataset, clusters):
self.clustersDictionaryIndexes[cluster].append(index)
index += 1


def createClusterGravityPoint(self):
for key in self.clustersDictionaryVectors:
pointsArray = np.array(self.clustersDictionaryVectors[key])
#calculate average of all colomns
clusterPoint = pointsArray.mean(0)
if(key in self.clusterGravityPointDictionary) == False:
self.clusterGravityPointDictionary.update({key : clusterPoint})


def findClusterPairs(self):
clusterPoints = []
for key in self.clusterGravityPointDictionary:
clusterPoints.append( self.clusterGravityPointDictionary[key])

clustersDistanceMatrix = dist(np.array(clusterPoints))
for row in range(len(clustersDistanceMatrix)):
minNumber = 99999
minColmn = -1
for colomn in range(len(clustersDistanceMatrix[row])):
if clustersDistanceMatrix[row, colomn] == 0:
continue
if clustersDistanceMatrix[row, colomn] < minNumber:
minNumber = clustersDistanceMatrix[row, colomn]
minColmn = colomn
if (row in self.clusterPairsDictionary) == False:
self.clusterPairsDictionary.update({row : minColmn })







#Calculate avg S values of the cluster
def calculateAvgSilhoueteOfCluster(self, clusterNumber):
#because the calcualtion is the same for all the cluster member they will all have the same A value
a = self.calculateClusterAValue(clusterNumber)
arrayAValues = self.calculateClusterAValue(clusterNumber)
arrayBValues = self.calculateBValues(clusterNumber)
arraySValues = np.zeros(len(self.clustersDictionaryIndexes[clusterNumber]))
for i in range(len(arraySValues)):
a = arrayAValues[i]
b = arrayBValues[i]
if a < b:
arraySValues[i] = 1 - (a / b)
Expand All @@ -89,14 +127,7 @@ def calculateAvgSilhoueteOfCluster(self, clusterNumber):

return np.average(arraySValues)










def calculateClusterAValue(self, clusterNumber):
#because all the memeber will have the same distance sum we can calculate it only once
numberOfMembers = len(self.clustersDictionaryIndexes[clusterNumber])
Expand Down

0 comments on commit 5c4be4b

Please sign in to comment.