Skip to content

Commit

Permalink
improving number of iterations
Browse files Browse the repository at this point in the history
  • Loading branch information
elro77 committed Dec 15, 2021
1 parent 6be53af commit e5123fe
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 20 deletions.
2 changes: 1 addition & 1 deletion DBSCAN_Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@



testArray = vectorsArray[0:50000]
testArray = vectorsArray[5000:10000]
#====== Sklearn =================
#the sklearn clustering takes 120 seconds to accomplish
#return an array where each index is the vector(point) and value is it clustering
Expand Down
59 changes: 40 additions & 19 deletions MyDBSCAN.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def __init__(self, _size, _eps ,_minPoints):
self.clusters = [-1] * _size
self.noisePoints = [False] * _size
self.undefinedPoints = [True] * _size
self.actualKeys = set()
#integers
self.minPoints = _minPoints
self.eps = _eps
Expand Down Expand Up @@ -102,26 +103,48 @@ def createGraph(self,data):
elapsed = time.time() - t
print("initGraph time: ",elapsed)


def zipGrid(self):
for currentKey in self.actualKeys:
for key in range(currentKey - self.eps, currentKey - self.eps):
if(key < 0) or (currentKey == key):
continue
if(key in self.actualKeys) == False:
continue
self.gridDictionaryVectors[currentKey] += self.gridDictionaryVectors[key]
self.gridDictionaryIndexes[currentKey] += self.gridDictionaryIndexes[key]

self.gridDictionaryVectors[key] += self.gridDictionaryVectors[currentKey]
self.gridDictionaryIndexes[key] += self.gridDictionaryIndexes[currentKey]


def initgridDictionaryVectors(self, data):
#here we must run through all points and connect them via map with O(n) only!
dimentions = len(data[0])
for pIndex in range(len(data)):
avg = int(np.sum(data[pIndex])/dimentions)
for key in range(avg - self.eps, avg + self.eps):
if key < 0:
continue
if (key in self.gridDictionaryVectors) == False:
self.gridDictionaryVectors.update({key : []})
self.gridDictionaryIndexes.update({key : []})
self.gridDictionaryVectors[key].append(data[pIndex])
self.gridDictionaryIndexes[key].append(pIndex)
currentKey = int(np.sum(data[pIndex])/dimentions)
self.actualKeys.add(currentKey)
if (currentKey in self.gridDictionaryVectors) == False:
self.gridDictionaryVectors.update({currentKey : []})
self.gridDictionaryIndexes.update({currentKey : []})
self.gridDictionaryVectors[currentKey].append(data[pIndex])
self.gridDictionaryIndexes[currentKey].append(pIndex)
t = time.time()
self.zipGrid()
elapsed = time.time() - t
print("zip grid time: ",elapsed)




def initGraph(self, data):
for key in self.gridDictionaryVectors:
#print("start key: ",key)
#t = time.time()
cnt = 0
for key in self.actualKeys:
cnt+=1
t = time.time()
result = self.dist(np.array(self.gridDictionaryVectors[key]))
elapsed = time.time() - t
print("dist calc : ",elapsed)
for pIndex in range(len(self.gridDictionaryIndexes[key])):
for qIndex in range(pIndex, len(self.gridDictionaryIndexes[key])):
if result[pIndex, qIndex] <= self.eps:
Expand All @@ -132,13 +155,11 @@ def initGraph(self, data):
self.connectionsDictionary[realPIndex].add(realQIndex)
if (realQIndex in self.connectionsDictionary) == False:
self.connectionsDictionary.update({realQIndex : set()})
self.connectionsDictionary[realQIndex].add(realPIndex)



#elapsed = time.time() - t
# print("time passed for key" ,key, ": ",elapsed)

self.connectionsDictionary[realQIndex].add(realPIndex)
elapsed = time.time() - t
print("time passed for key" ,key, ": ",elapsed)
print("iterations = ",cnt)




Expand Down

0 comments on commit e5123fe

Please sign in to comment.