Add files via upload

Komal01 · Nov 23, 2018 · a555fc0 · a555fc0
1 parent 292cead
commit a555fc0
Show file tree

Hide file tree

Showing 4 changed files with 194 additions and 0 deletions.
diff --git a/feature_extractor (1).py b/feature_extractor (1).py
@@ -0,0 +1,90 @@
+import pandas as pd 
+import numpy as np
+
+import rf_model
+
+
+class feature_extractor:
+
+    def __init__(self,url:str):
+        self.input_url = url
+
+    def long_url(self,l):
+        """This function is defined in order to differntiate website based on the length of the URL"""
+        l= str(l)
+        if len(l) < 54:
+            return 0
+        elif len(l) >= 54 and len(l) <= 75:
+            return 2
+        return 1
+
+    def have_at_symbol(self,l):
+        """This function is used to check whether the URL contains @ symbol or not"""
+        if "@" in str(l):
+            return 1
+        return 0
+
+    def redirection(self,l):
+        """If the url has symbol(//) after protocol then such URL is to be classified as phishing """
+        if "//" in str(l):
+            return 1
+        return 0
+
+    def prefix_suffix_seperation(self,l):
+        """seprate prefix and suffix"""
+        if '-' in str(l):
+            return 1
+        return 0
+
+    def sub_domains(self,l):
+        """check the subdomains"""
+        l= str(l)
+        if l.count('.') < 3:
+            return 0
+        elif l.count('.') == 3:
+            return 2
+        return 1
+
+
+    def extract(self):
+        print("in script 2")
+        input_data = [{"URL":self.input_url}]
+        print('input taken')
+        temp_df = pd.DataFrame(input_data)
+        print("dataframe created")
+        #expand argument in the split method will give you a new column
+        seperation_of_protocol = temp_df['URL'].str.split("://",expand = True)
+        print("step 1 done")
+        #split(seperator,no of splits according to seperator(delimiter),expand)
+        seperation_domain_name = seperation_of_protocol[1].str.split("/",1,expand = True)
+        print("step 2 done")
+        #renaming columns of data frame
+        seperation_domain_name.columns=["domain_name","address"]
+        print("step 3 done")
+        #Concatenation of data frames
+        splitted_data = pd.concat([seperation_of_protocol[0],seperation_domain_name],axis=1)
+        print("step 4 done")
+
+        splitted_data.columns = ['protocol','domain_name','address']
+        print("step 5 done")
+
+        #splitted_data['is_phished'] = pd.Series(temp_df['Target'], index=splitted_data.index)
+        #print("step 6 done")
+
+        """feature extraction starts here"""
+        #Applying the above defined function in order to divide the websites into 3 categories
+        splitted_data['long_url'] = temp_df['URL'].apply(self.long_url)
+        print("feature extra 1")
+        splitted_data['having_@_symbol'] = temp_df['URL'].apply(self.have_at_symbol)
+        print("feature extra 2")
+        splitted_data['redirection_//_symbol'] = seperation_of_protocol[1].apply(self.redirection)
+        print("feature extra 3")
+        splitted_data['prefix_suffix_seperation'] = seperation_domain_name['domain_name'].apply(self.prefix_suffix_seperation)
+        print("feature extra 4")
+        splitted_data['sub_domains'] = splitted_data['domain_name'].apply(self.sub_domains)
+        print("feature extra 5")
+        #splitted_data.to_csv(r'dataset3.csv',header= True)
+
+
+
+        return rf_model.predictor(splitted_data)
diff --git a/finalized_model (1).sav b/finalized_model (1).sav
diff --git a/gui (1).py b/gui (1).py
@@ -0,0 +1,69 @@
+from PyQt5 import QtCore, QtGui, QtWidgets
+import feature_extractor
+
+class Ui_Spam_detector(object):
+    def setupUi(self, Spam_detector):
+        Spam_detector.setObjectName("Spam_detector")
+        Spam_detector.resize(521, 389)
+        self.centralwidget = QtWidgets.QWidget(Spam_detector)
+        self.centralwidget.setObjectName("centralwidget")
+
+        """check button code and its connectivity to button_click function"""
+        self.check_button = QtWidgets.QPushButton(self.centralwidget)
+        self.check_button.setGeometry(QtCore.QRect(210, 170, 93, 28))
+        self.check_button.setObjectName("check_button")
+        self.check_button.clicked.connect(self.button_click)
+
+        """url input section"""
+        self.url_input = QtWidgets.QLineEdit(self.centralwidget)
+        self.url_input.setGeometry(QtCore.QRect(70, 111, 431, 31))
+        self.url_input.setObjectName("url_input")
+
+        self.label = QtWidgets.QLabel(self.centralwidget)
+        self.label.setGeometry(QtCore.QRect(20, 110, 81, 31))
+        self.label.setObjectName("label")
+
+        """output message"""
+        self.output_text = QtWidgets.QTextEdit(self.centralwidget)
+        self.output_text.setGeometry(QtCore.QRect(30, 241, 461, 121))
+        self.output_text.setObjectName("output_text")
+
+        self.label_2 = QtWidgets.QLabel(self.centralwidget)
+        self.label_2.setGeometry(QtCore.QRect(110, 10, 311, 41))
+        self.label_2.setObjectName("label_2")
+
+        Spam_detector.setCentralWidget(self.centralwidget)
+        self.statusbar = QtWidgets.QStatusBar(Spam_detector)
+        self.statusbar.setObjectName("statusbar")
+        Spam_detector.setStatusBar(self.statusbar)
+
+        self.retranslateUi(Spam_detector)
+        QtCore.QMetaObject.connectSlotsByName(Spam_detector)
+
+    def retranslateUi(self, Spam_detector):
+        _translate = QtCore.QCoreApplication.translate
+        Spam_detector.setWindowTitle(_translate("Spam_detector", "MainWindow"))
+        self.check_button.setText(_translate("Spam_detector", "Check "))
+        self.label.setText(_translate("Spam_detector", "<html><head/><body><p><span style=\" font-size:10pt;\">URL :</span></p></body></html>"))
+        self.label_2.setText(_translate("Spam_detector", "<html><head/><body><p align=\"center\"><span style=\" font-size:16pt;\">Spam URL Detector</span></p></body></html>"))
+
+    def button_click(self):
+        text = self.url_input.text()
+        #print(text)
+        obj = feature_extractor.feature_extractor(text)
+        str1,str2 = obj.extract()
+
+        self.output_text.append("{} \n{}\n\n".format(str1,str2))
+
+
+    #def show_output():
+
+if __name__ == "__main__":
+    import sys
+    app = QtWidgets.QApplication(sys.argv)
+    Spam_detector = QtWidgets.QMainWindow()
+    ui = Ui_Spam_detector()
+    ui.setupUi(Spam_detector)
+    Spam_detector.show()
+    sys.exit(app.exec_())
+
diff --git a/rf_model (1).py b/rf_model (1).py
@@ -0,0 +1,35 @@
+import pickle
+import numpy,sklearn,pandas
+
+"""# save the model to disk
+filename = 'finalized_model.sav'
+pickle.dump(clf, open(filename, 'wb'))
+"""
+
+def predictor(splitted_data):
+    print("/n script rf_model")
+    # load the model from disk
+    filename = 'finalized_model.sav'
+    loaded_model = pickle.load(open(filename, 'rb'))
+    print("model loaded")
+    print(splitted_data.shape)
+    print(list(splitted_data))
+    x = splitted_data.columns[3:9]
+    preds = loaded_model.predict(splitted_data[x])
+    print("pridction complete")
+    print(preds)
+    if preds == 0:
+        str1 = "Spoofed webpage: Yes"
+    else: str1 = "Spoofed webpage: NO"
+
+    score = loaded_model.predict_proba(splitted_data[x])
+    str2 = "Confidence score: "+ str(score[0][1])
+
+    return str1,str2
+
+
+
+
+
+
+