Skip to content

Commit

Permalink
adapt node degree, edge size and domain construction
Browse files Browse the repository at this point in the history
  • Loading branch information
0cmenog committed Jun 14, 2024
1 parent 63b098c commit 6aac1dd
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 4 deletions.
7 changes: 5 additions & 2 deletions sidemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def main():
alreadyAddedPages = [url.page]

if(depth < maxDepth and url.isUrl()):
if not(url.page in graph): graph[url.page] = {"links": [], "outOfScopeURLs": [], "internal": {"nodeSize": 2}}
if not(url.page in graph): graph[url.page] = {"links": [], "outOfScopeURLs": [], "internal": {"nodeSize": 1}}
utils.printVerb(verbosity, 'W', "On page " + url.url)
# get page code
try:
Expand All @@ -61,7 +61,10 @@ def main():
# foundUrl has already been visited from this url
if foundUrl.page in alreadyAddedPages:
utils.printVerb(verbosity, 'Y', "[-] Found once again " + foundUrl.url)
if utils.isInScope(url.domain, foundUrl.domain) and not(foundUrl.getExtension() in banExts): graph[url.page]["links"].append(foundUrl.page)
if utils.isInScope(url.domain, foundUrl.domain) and not(foundUrl.getExtension() in banExts):
graph[url.page]["links"].append(foundUrl.page)
# increase degree of the target node
graph = utils.increaseNodeDegree(foundUrl.page, graph)
# foundUrl is a new one
else:
# foundUrl is from a website to map
Expand Down
3 changes: 2 additions & 1 deletion url.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def _isFile(self, url: str) -> bool:
urlParts = url.split('/')
if len(urlParts) >= 2:
return '.' in urlParts[-1]
return False

## construct from url

Expand All @@ -68,7 +69,7 @@ def _constructHostname(self, url: str) -> str:
# can not have '/' at the end
def _constructDomain(self, url: str) -> str:
parts = self._constructHostname(url).split('.')
return '.'.join(parts[1:]) if len(parts) >= 2 else '.'.join(parts)
return '.'.join(parts[1:]) if len(parts) > 2 else '.'.join(parts)

## operations on url

Expand Down
3 changes: 2 additions & 1 deletion utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def increaseNodeDegree(page: str, graph: {}) -> {}:
if page in graph:
graph[page]["internal"]["nodeSize"] += 1
else:
# minimal nodeSize + 1
graph[page] = {"links": [], "outOfScopeURLs": [], "internal": {"nodeSize": 2}}
return graph

Expand Down Expand Up @@ -95,7 +96,7 @@ def makeNXGraph(graph: {}) -> nx.classes.digraph.DiGraph:
for key, values in linksAndProps.items():
if(key == "links"):
for value in set(values):
g.add_edge(url, value, size=values.count(value))
g.add_edge(url, value, size=log(values.count(value)+1))
# add attributes
nx.set_node_attributes(g, {url: {"click": '\n'.join(linksAndProps["outOfScopeURLs"])}})
return g
Expand Down

0 comments on commit 6aac1dd

Please sign in to comment.