Skip to content

Commit

Permalink
Revert "add a little fix to silhouette"
Browse files Browse the repository at this point in the history
This reverts commit a6231bd.
  • Loading branch information
elro77 committed Dec 18, 2021
1 parent a6231bd commit a90aee4
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 41 deletions.
29 changes: 5 additions & 24 deletions DBSCAN_Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,25 +96,24 @@



testArray = vectorsArray[0:50000]
testArray = vectorsArray[0:100000]
#====== Sklearn =================
#the sklearn clustering takes 120 seconds to accomplish
#return an array where each index is the vector(point) and value is it clustering
#where -1 will represnt as a noise

t = time.time()
clustering = DBSCAN(eps=4, min_samples=2).fit(testArray)
clustering = DBSCAN(eps=3, min_samples=2).fit(testArray)
labels = clustering.labels_
elapsed = time.time() - t
print("optimal clustering time: ",elapsed)


#=================================


#============ my implementation =============
t = time.time()
dbscan = CMyDBSCAN(len(testArray), 4, 2)
dbscan = CMyDBSCAN(len(testArray), 3, 2)
myClusteringResult = dbscan.startClustering(testArray)
elapsed = time.time() - t
print("my clustering time: ",elapsed)
Expand All @@ -129,31 +128,13 @@

#=================================

"""
for eps in range(1,6):
for minPts in range (2,6):
#t = time.time()
dbscan = CMyDBSCAN(len(testArray), eps, minPts)
myClusteringResult = dbscan.startClustering(testArray)
#elapsed = time.time() - t
#print("my clustering time: ",elapsed)
silhouette = Silhouette()
#t = time.time()
silhouetteValue = silhouette.calculateSilhouetteValue(testArray, np.array(myClusteringResult))
#elapsed = time.time() - t
#print("calculateSilhouetteValue time: ",elapsed)
print("value at (eps = ",eps,", minPts = ",minPts,") is: ",silhouetteValue)
"""
"""

#check correctness
for i in range(len(labels)):
if labels[i] != myClusteringResult[i]:
print("different at: ",i)
print("finish testing")
"""


#testing area

24 changes: 9 additions & 15 deletions MyDBSCAN.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,36 +145,30 @@ def connectNodes(self, key, pIndex, qIndex):

def initGraph(self, data):
cnt = 0
cRow = 0
for key in self.actualKeys:
cnt+=1
print("#",cnt)
t = time.time()
#cnt+=1
#print("#",cnt)
#t = time.time()
result = self.dist(np.array(self.gridDictionaryVectors[key]))
elapsed = time.time() - t
print("dist calc : ",elapsed)
#elapsed = time.time() - t
#print("dist calc : ",elapsed)

t = time.time()
mat = result < self.eps
pIndex = -1
for row in mat:
cRow += 1
t__ = time.time()
pIndex += 1
trueAmounts = np.sum(row)
if trueAmounts >= self.minPoints:
#try to modify here
for colom in range(len(row)):
if row[colom] == True:
self.connectNodes(key, pIndex, colom)
elapsed = time.time() - t__
print("time passed for row #" ,cRow, ": ",elapsed)

elapsed = time.time() - t
print("time passed for key" ,key, ": ",elapsed)

#elapsed = time.time() - t
#print("time passed for key" ,key, ": ",elapsed)


print("iterations = ",cnt)
#print("iterations = ",cnt)



Expand Down
21 changes: 19 additions & 2 deletions silhouette.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ def calculateSilhouetteValue(self, dataset, clusters):
self.createclustersDictionaryIndexes(dataset, clusters)
#elapsed = time.time() - t
#print("createclustersDictionaryIndexes time: ",elapsed)
if(len(self.listVectorsForDistanceMatrix) == 0):
return -1
self.distances = dist(np.array(self.listVectorsForDistanceMatrix))

for cluster in self.clustersDictionaryIndexes:
Expand Down Expand Up @@ -131,6 +129,25 @@ def calculateBValues(self, clusterNumber):
arrayBValues[arrayIndex] = bValue
return arrayBValues
















def calcSumOfDistance(self, clusterNumber, pIndex):
x=0




def dist(A):
Expand Down

0 comments on commit a90aee4

Please sign in to comment.