Skip to content

Commit

Permalink
Merge pull request #56 from ambroggi/dev
Browse files Browse the repository at this point in the history
Here is item save file
  • Loading branch information
bayegaspard authored Oct 16, 2023
2 parents ef393d2 + 3ef3700 commit 8a0d76e
Show file tree
Hide file tree
Showing 7 changed files with 173 additions and 131 deletions.
3 changes: 2 additions & 1 deletion Tests/test_fullRun.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ def testLoadDataset():
assert torch.all(x[0] == y[0])

def testLoadDatasetfromSave():
main.Config.parameters["attemptLoad"][0] = 1
main.Config.parameters["attemptLoadModel"][0] = 1
main.Config.parameters["attemptLoadData"][0] = 1
main.torch.manual_seed(1)
train1, test1, val1 = FileHandling.checkAttempLoad("")
main.torch.manual_seed(1)
Expand Down
2 changes: 1 addition & 1 deletion build_number.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
504
506
132 changes: 70 additions & 62 deletions src/main/Config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# pass
# raise doubleImport
print(f"A POSSIBLE PROBLEM HAS OCCURED, Config was loaded improperly, from {__name__} instead of directly\
this might break some global variables by having two copies",file=sys.stderr)
this might break some global variables by having two copies", file=sys.stderr)

#TODO: Rework config so that it is less janky and uses less bad practices of global variables.
# Possibly by moving HelperFunctions Loop functions to outside of the program
Expand Down Expand Up @@ -41,22 +41,23 @@ def loopOverUnknowns(unknownlist=False):
if len(knownVals)<2:
print("Too few knowns, things might break")
parameters["Unknowns"] = f"{len(unknownlist)} Unknowns"
parameters["Unknowns_clss"] = [unknownlist,"Values used for testing"]
parameters["Knowns_clss"] = [knownVals,"Values used for training"]
parameters["Unknowns_clss"] = [unknownlist, "Values used for testing"]
parameters["Knowns_clss"] = [knownVals, "Values used for training"]
return knownVals

#This is the diffrent optimization functions
opt_func = {"Adam":torch.optim.Adam,"SGD":torch.optim.SGD, "RMSprop":torch.optim.RMSprop}
opt_func = {"Adam":torch.optim.Adam, "SGD":torch.optim.SGD, "RMSprop":torch.optim.RMSprop}


#Here are all of the paremeters for the model.
parameters = {
#These parameters are orginized like this:
#"ParamName":[Value,"Description",[possible values]]
#"ParamName":[Value, "Description", [possible values]]
#for a parameter called "ParamName" with a value of Value
"batch_size":[100000, "Number of items per batch"],
"num_workers":[14, "Number of threads working on building batches"],
"attemptLoad":[0, "0: do not use saves\n1:use saves"],
"attemptLoadModel":[0, "0: do not use saves for the model\n1:use saves for the model"],
"attemptLoadData":[0, "0: do not use saves for the dataset\n1:use saves for the dataset"],
"testlength":[1/4, "[0,1) percentage of training to test with"],
"Mix unknowns and validation": [1,"0 or 1, 0 means that the test set is purely unknowns and 1 means that the testset is the validation set plus unknowns (for testing)"],
"MaxPerClass": [1000, "Maximum number of samples per class\n if Dataloader_Variation is Cluster and this value is a float it interprets it as the maximum percentage of the class instead."],
Expand All @@ -83,46 +84,47 @@ def loopOverUnknowns(unknownlist=False):
"4: Loop through predefined hyperparameters found in datasets/hyperparamList.csv"],
"Dataset": ["Payload_data_CICIDS2017", "This is what dataset we are using,", ["Payload_data_CICIDS2017","Payload_data_UNSW"]],
"SchedulerStepSize": [10, "This is how often the scheduler takes a step, 3 means every third epoch"],
"SchedulerStep": [0.9,"This is how big a step the scheduler takes, leave 0 for no step"]
"SchedulerStep": [0.9,"This is how big a step the scheduler takes, leave 0 for no step"],
"ApplyPrelimSoft": [1, "This says to use a preliminary softmax and only use unknown detection on things that fail the softmax unknown detection"]
}


#Argparse tutorial: https://docs.python.org/3/howto/argparse.html
parser = argparse.ArgumentParser()
for x in parameters.keys():
if x in ["batch_size","num_workers","MaxPerClass","num_epochs","Degree of Overcompleteness","Number of Layers","Nodes","SchedulerStepSize"]:
parser.add_argument(f"--{x}",type=int,default=parameters[x][0],help=parameters[x][1],required=False)
if x in ["testlength","learningRate","threshold","Dropout","Temperature","SchedulerStep"]:
parser.add_argument(f"--{x}",type=float,default=parameters[x][0],help=parameters[x][1],required=False)
if x in ["attemptLoad","Mix unknowns and validation"]:
parser.add_argument(f"--{x}",type=int,choices=[1,0],default=parameters[x][0],help=parameters[x][1],required=False)
if x in ["batch_size", "num_workers", "MaxPerClass", "num_epochs", "Degree of Overcompleteness", "Number of Layers", "Nodes", "SchedulerStepSize"]:
parser.add_argument(f"--{x}", type=int, default=parameters[x][0], help=parameters[x][1], required=False)
if x in ["testlength", "learningRate", "threshold", "Dropout", "Temperature", "SchedulerStep"]:
parser.add_argument(f"--{x}", type=float, default=parameters[x][0], help=parameters[x][1], required=False)
if x in ["attemptLoadModel", "attemptLoadData","Mix unknowns and validation","ApplyPrelimSoft"]:
parser.add_argument(f"--{x}", type=int, choices=[1, 0], default=parameters[x][0], help=parameters[x][1], required=False)
if x in ["LOOP"]:
parser.add_argument(f"--{x}",type=int,choices=[0,1,2,3,4],default=parameters[x][0],help=parameters[x][1],required=False)
if x in ["model","OOD Type","Dataloader_Variation","Activation","Dataset"]:
parser.add_argument(f"--{x}",choices=parameters[x].pop(),default=parameters[x][0],help=parameters[x][1],required=False)
parser.add_argument(f"--{x}", type=int, choices=[0, 1, 2, 3, 4], default=parameters[x][0], help=parameters[x][1], required=False)
if x in ["model", "OOD Type", "Dataloader_Variation", "Activation", "Dataset"]:
parser.add_argument(f"--{x}", choices=parameters[x].pop(), default=parameters[x][0], help=parameters[x][1], required=False)
if x in ["Unknowns_clss"]:
parser.add_argument(f"--{x}",default=f"{parameters[x][0]}",help=parameters[x][1],required=False)
if "pytest" not in sys.modules: #The argument parser appears to have issues with the pytest tests. I have no idea why.
parser.add_argument(f"--{x}", default=f"{parameters[x][0]}", help=parameters[x][1], required=False)
if "pytest" not in sys.modules: # The argument parser appears to have issues with the pytest tests. I have no idea why.
args = parser.parse_args()
for x in args._get_kwargs():
parameters[x[0]][0] = x[1]

if isinstance(parameters["Unknowns_clss"][0],str):
if isinstance(parameters["Unknowns_clss"][0], str):
if len(parameters["Unknowns_clss"][0])>0 and len(parameters["Unknowns_clss"][0])!=2: #Not sure why I need this specifier but it breaks if the default is []
# print(len(parameters["Unknowns_clss"][0]))
parameters["Unknowns_clss"][0] = [int(y) for y in parameters["Unknowns_clss"][0].removesuffix("]").removeprefix("[").split(sep=",")]
parameters["Unknowns_clss"][0] = [int(y) for y in parameters["Unknowns_clss"][0].removesuffix("]").removeprefix("[").split(sep=", ")]
else:
parameters["Unknowns_clss"][0] = []


DOC_kernels = [3,4,5]
DOC_kernels = [3, 4, 5]

#Set Number of classes:
if parameters["Dataset"][0] == "Payload_data_UNSW":
parameters["CLASSES"][0] = 10
UnusedClasses = []
else:
UnusedClasses = [8,9,10]
UnusedClasses = [8, 9, 10]
UnusedClasses = []

#Dendrogram chunk uses a slightly diffrent output on the model structure.
Expand All @@ -140,34 +142,34 @@ def loopOverUnknowns(unknownlist=False):


#This is to test all of the algorithms one after the other. (Loop 1 values)
alg = ["Soft","Open","Energy","COOL","DOC","iiMod"]
batch = [100,1000,10000,100000]
datapoints_per_class = [10,100,1000]
thresholds = [0.1,1,10]
thresholds = [30,20,15,5]
alg = ["Soft", "Open", "Energy", "COOL", "DOC", "iiMod"]
batch = [100, 1000, 10000, 100000]
datapoints_per_class = [10, 100, 1000]
thresholds = [0.1, 1, 10]
thresholds = [30, 20, 15, 5]
thresholds = [parameters["threshold"][0]]
learning_rates = [0.1,0.01,0.001,0.0001]
activation = ["ReLU", "Tanh", "Sigmoid","Leaky"]
groups = [[],[2],[2,3],[2,3,4],[2,3,4,5],[2,3,4,5,6],[2,3,4,5,6,7],[1,2,3,4,5,6,7],[1,2,3,4,5,6,7,8]]
#groups = [[7,8,9]]
learning_rates = [0.1, 0.01, 0.001, 0.0001]
activation = ["ReLU", "Tanh", "Sigmoid", "Leaky"]
groups = [[], [2], [2, 3], [2, 3, 4], [2, 3, 4, 5], [2, 3, 4, 5, 6], [2, 3, 4, 5, 6, 7], [1, 2, 3, 4, 5, 6, 7], [1, 2, 3, 4, 5, 6, 7, 8]]
#groups = [[7, 8, 9]]
if parameters["Dataset"][0] == "Payload_data_CICIDS2017":
incGroups = [[2,3,4,5,6,7,8,9,10,11,12,13,14],[3,4,5,6,7,8,9,10,11,12,13,14],[4,5,6,7,8,9,10,11,12,13,14],[5,6,7,8,9,10,11,12,13,14],[6,7,8,9,10,11,12,13,14],[7,8,9,10,11,12,13,14],[8,9,10,11,12,13,14],[9,10,11,12,13,14],[10,11,12,13,14],[11,12,13,14],[12,13,14],[13,14],[14]]
incGroups = [[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [6, 7, 8, 9, 10, 11, 12, 13, 14], [7, 8, 9, 10, 11, 12, 13, 14], [8, 9, 10, 11, 12, 13, 14], [9, 10, 11, 12, 13, 14], [10, 11, 12, 13, 14], [11, 12, 13, 14], [12, 13, 14], [13, 14], [14]]
#This one list is for loop 2. Note: array size should be decreasing.
else:
incGroups = [[2,3,4,5,6,7,8,9],[3,4,5,6,7,8,9],[4,5,6,7,8,9],[5,6,7,8,9],[6,7,8,9],[7,8,9],[8,9],[9]]
incGroups = [[2, 3, 4, 5, 6, 7, 8, 9], [3, 4, 5, 6, 7, 8, 9], [4, 5, 6, 7, 8, 9], [5, 6, 7, 8, 9], [6, 7, 8, 9], [7, 8, 9], [8, 9], [9]]
epochs= []
epochs = [1,10,100,150]
epochs = [1, 10, 100, 150]


# groups = [list(range(2,parameters["CLASSES"][0]))]
# groups = [list(range(2, parameters["CLASSES"][0]))]
# #Little bit of code that generates incremental numbers of unknowns.
# while len(groups[0])>2:
# new = groups[0].copy()
# new.pop(0)
# new.pop(0)
# groups.insert(0,new)
# groups.insert(0, new)
# #Little bit of code that generates decrementing numbers of unknowns.
# incGroups = [list(range(2,parameters["CLASSES"][0]))]
# incGroups = [list(range(2, parameters["CLASSES"][0]))]
# while len(incGroups[-1])>1:
# new = incGroups[-1].copy()
# new.pop(0)
Expand All @@ -194,29 +196,29 @@ def loopOverUnknowns(unknownlist=False):
#Adds in everything in config:

# #learning_rates.remove(Config.parameters["learningRate"][0])
# learning_rates.insert(0,parameters["learningRate"][0])
# learning_rates.insert(0, parameters["learningRate"][0])
# #epochs.remove(Config.parameters["num_epochs"][0])
# epochs.insert(0,parameters["num_epochs"][0])
# groups.insert(0,helper_variables["unknowns_clss"])
# epochs.insert(0, parameters["num_epochs"][0])
# groups.insert(0, helper_variables["unknowns_clss"])

# #Always starts with the configured activation type
# alg.remove(parameters["OOD Type"][0])
# alg.insert(0,parameters["OOD Type"][0])
# alg.insert(0, parameters["OOD Type"][0])

#This is an array to eaiser loop through everything.
loops = [batch,learning_rates,activation,["Standard","Cluster"],groups]
loops = [batch, learning_rates, activation, ["Standard", "Cluster"], groups]
# loops = [groups]
loops2 = ["batch_size","learningRate","Activation","Dataloader_Variation","Unknowns"]
loops2 = ["batch_size", "learningRate", "Activation", "Dataloader_Variation", "Unknowns"]
# loops2 = ["Unknowns"]
for i in range(len(loops)):
if loops2[i] == "Unknowns":
loops[i].insert(0,parameters["Unknowns_clss"][0])
loops[i].insert(0, parameters["Unknowns_clss"][0])
elif loops2[i] == "optimizer":
loops[i].insert(0,parameters[loops2[i]])
loops[i].insert(0, parameters[loops2[i]])
elif loops2[i] == "None":
pass
else:
loops[i].insert(0,parameters[loops2[i]][0])
loops[i].insert(0, parameters[loops2[i]][0])

#Override the unknowns because model is kept
if parameters["LOOP"][0] == 2:
Expand All @@ -237,30 +239,36 @@ def algorithmSpecificSettings(alg="None"):


# match alg:
if alg == "Soft":
pass
if alg == "Open":
parameters["threshold"][0] = 0.8
if alg == "Energy":
parameters["threshold"][0] = 0.474
if alg == "COOL":
parameters["threshold"][0] = 0.516034961
if alg == "DOC":
parameters["threshold"][0] = 0.06449493
if alg == "iiMod":
parameters["threshold"][0] = 102064.4453
# if alg == "Soft":
# pass
# if alg == "Open":
# parameters["threshold"][0] = 0.8
# if alg == "Energy":
# parameters["threshold"][0] = 0.474
# if alg == "COOL":
# parameters["threshold"][0] = 0.516034961
# if alg == "DOC":
# parameters["threshold"][0] = 0.06449493
# if alg == "iiMod":
# parameters["threshold"][0] = 102064.4453

if parameters["LOOP"][0] == 3:
print("Warning: Unknowns may have been changed due to LOOP 3 percentages file")
import pandas as pd
# parameters["num_epochs"][0] = 0
parameters["loopLevel"] = [0,"What percentages the model is on"]
parameters["loopLevel"] = [0, "What percentages the model is on"]
parameters["MaxSamples"] = [parameters["MaxPerClass"][0], "Max number of samples total"]
file = pd.read_csv("datasets/percentages.csv", index_col=None).to_numpy()
zeros = file[0][:parameters["CLASSES"][0]] == 0
unknownClasses = zeros.nonzero()[0]
parameters["Unknowns_clss"][0] = unknownClasses.tolist()


#Getting version number
#https://gist.github.com/sg-s/2ddd0fe91f6037ffb1bce28be0e74d4e
f = open("build_number.txt","r")
parameters["Version"] = [f.read(),"The version number"]
f = open("build_number.txt", "r")
parameters["Version"] = [f.read(), "The version number"]

save_as_tensorboard = True
datasetRandomOffset =True
datasetRandomOffset = True
dataparallel = True
53 changes: 45 additions & 8 deletions src/main/FileHandling.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def getDatagroup():
def checkAttempLoad(root_path=""):
"""
Creates the training, testing, and validaton datasets and saves them in Saves/Data.pt, Saves/DataTest.pt, and Saves/DataVal.pt.
if Config's "attemptLoad" is true it instead loads the datasets from the files and does not create them.
if Config's "attemptLoadData" is true it instead loads the datasets from the files and does not create them.
This is so that the validation and testing data does not get mixed up which would invalidate the validation data.
"""
# get the data and create a test set and train set
Expand All @@ -80,7 +80,7 @@ def checkAttempLoad(root_path=""):
if Config.unit_test_mode:
return train, test, val

if Config.parameters["attemptLoad"][0] and os.path.exists(os.path.join(root_path,"Saves","Data.pt")):
if Config.parameters["attemptLoadData"][0] and os.path.exists(os.path.join(root_path,"Saves","Data.pt")):
print("Found prior dataset to load")
try:
train = torch.load(os.path.join(root_path,"Saves","Data.pt"))
Expand All @@ -101,7 +101,7 @@ def checkAttempLoad(root_path=""):
torch.save(train,os.path.join(root_path,"Saves","Data.pt"))
torch.save(test,os.path.join(root_path,"Saves","DataTest.pt"))
torch.save(val,os.path.join(root_path,"Saves","DataVal.pt"))
if Config.parameters["attemptLoad"][0]:
if Config.parameters["attemptLoadData"][0]:
print("No model train and test checkpoint was found, saving datacheckpoints ...")
return train, test, val

Expand Down Expand Up @@ -302,7 +302,7 @@ def create_params_Fscore(path, score, threshold = None):

class Score_saver():

def __init__(self,path="Scoresall.csv"):
def __init__(self,path="Scoresall.csv",Record_Hyperparams=True):
"""
Score_saver() is a class to consolidate the saving of data to the csv files.
When a Score_saver() object is initialized then it creates a new row onto the file,
Expand All @@ -312,13 +312,16 @@ def __init__(self,path="Scoresall.csv"):
self.writer = None
self.path = path #unused at the moment
self.name_all = {path:0}
self.create_params_All()
if Record_Hyperparams:
self.create_params_All()
else:
pd.DataFrame().to_csv(self.path)
if Config.save_as_tensorboard:
self.tensorboard_start()



def __call__(self,name:str,val,path="",fileName=None):
def __call__(self,name:str,val,path="",fileName=None, recursiveList=False):
"""
Adds the measurement to the file. The __call__ version allows the Score_saver to be called like this:
scoresaver = Score_saver()
Expand All @@ -331,7 +334,7 @@ def __call__(self,name:str,val,path="",fileName=None):
"""
if fileName is None:
fileName = self.path
self.addMeasurement(name,val,path,fileName)
self.addMeasurement(name,val,path,fileName,recursiveList=recursiveList)

def create_params_All(self,name=None):
"""
Expand Down Expand Up @@ -389,7 +392,7 @@ def create_loop_history(self,name:str):
#hist = hist.transpose()
hist.to_csv(os.path.join("Saves",name))

def addMeasurement(self,name:str,val,path="",fileName=None,step=0):
def addMeasurement(self,name:str,val,path="",fileName=None,step=0, recursiveList=0):
"""
Adds a measurement to the LATEST line in the Scoresall.csv file. This may cause problems if you are running two versions at once.
we reccomend only running one version at once.
Expand All @@ -405,6 +408,10 @@ def addMeasurement(self,name:str,val,path="",fileName=None,step=0):
"""
if Config.unit_test_mode:
return
if recursiveList>0 and (hasattr(val, '__iter__')):
for num,v in enumerate(val):
self.addMeasurement(name+f"_{num}", v, path, fileName, step, recursiveList-1)
return
if fileName is None:
fileName = self.path
if self.writer is not None:
Expand Down Expand Up @@ -490,3 +497,33 @@ def start(self):
self.writer = None
self.tensorboard_start()
self.create_params_All()

class items_with_classes_record():
def __init__(self, labels:torch.Tensor):
self.labels = labels.unsqueeze(dim=-1)
self.items = None
self.predict = None

def __call__(self, items:torch.Tensor, file = "Saves/items.csv"):
self.storeItems(items)
self.useItems(file)

def storeItems(self, items:torch.Tensor):
self.items = items

def useItems(self, file = "Saves/items.csv"):
index_names = [f"Logit{x}" for x in range(len(self.items[0]))]
if self.predict is None:
items_with_labels = torch.concat([self.items,self.labels],dim=1)
else:
items_with_labels = torch.concat([self.items,self.predict,self.labels],dim=1)
index_names.append("Prediction")
index_names.append("Label")
df = pd.DataFrame(items_with_labels.T,index=index_names).T
df.to_csv(file,mode="a",header=(not os.path.exists(file)))
self.items = None
self.predict = None

def storePredictions(self, predictions:torch.Tensor):
assert predictions.dim() == 1
self.predict = predictions.unsqueeze(dim=-1)
Loading

0 comments on commit 8a0d76e

Please sign in to comment.