Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Komal01 authored Nov 23, 2018
1 parent 292cead commit a555fc0
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 0 deletions.
90 changes: 90 additions & 0 deletions feature_extractor (1).py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import pandas as pd
import numpy as np

import rf_model


class feature_extractor:

def __init__(self,url:str):
self.input_url = url

def long_url(self,l):
"""This function is defined in order to differntiate website based on the length of the URL"""
l= str(l)
if len(l) < 54:
return 0
elif len(l) >= 54 and len(l) <= 75:
return 2
return 1

def have_at_symbol(self,l):
"""This function is used to check whether the URL contains @ symbol or not"""
if "@" in str(l):
return 1
return 0

def redirection(self,l):
"""If the url has symbol(//) after protocol then such URL is to be classified as phishing """
if "//" in str(l):
return 1
return 0

def prefix_suffix_seperation(self,l):
"""seprate prefix and suffix"""
if '-' in str(l):
return 1
return 0

def sub_domains(self,l):
"""check the subdomains"""
l= str(l)
if l.count('.') < 3:
return 0
elif l.count('.') == 3:
return 2
return 1


def extract(self):
print("in script 2")
input_data = [{"URL":self.input_url}]
print('input taken')
temp_df = pd.DataFrame(input_data)
print("dataframe created")
#expand argument in the split method will give you a new column
seperation_of_protocol = temp_df['URL'].str.split("://",expand = True)
print("step 1 done")
#split(seperator,no of splits according to seperator(delimiter),expand)
seperation_domain_name = seperation_of_protocol[1].str.split("/",1,expand = True)
print("step 2 done")
#renaming columns of data frame
seperation_domain_name.columns=["domain_name","address"]
print("step 3 done")
#Concatenation of data frames
splitted_data = pd.concat([seperation_of_protocol[0],seperation_domain_name],axis=1)
print("step 4 done")

splitted_data.columns = ['protocol','domain_name','address']
print("step 5 done")

#splitted_data['is_phished'] = pd.Series(temp_df['Target'], index=splitted_data.index)
#print("step 6 done")

"""feature extraction starts here"""
#Applying the above defined function in order to divide the websites into 3 categories
splitted_data['long_url'] = temp_df['URL'].apply(self.long_url)
print("feature extra 1")
splitted_data['having_@_symbol'] = temp_df['URL'].apply(self.have_at_symbol)
print("feature extra 2")
splitted_data['redirection_//_symbol'] = seperation_of_protocol[1].apply(self.redirection)
print("feature extra 3")
splitted_data['prefix_suffix_seperation'] = seperation_domain_name['domain_name'].apply(self.prefix_suffix_seperation)
print("feature extra 4")
splitted_data['sub_domains'] = splitted_data['domain_name'].apply(self.sub_domains)
print("feature extra 5")
#splitted_data.to_csv(r'dataset3.csv',header= True)



return rf_model.predictor(splitted_data)
Binary file added finalized_model (1).sav
Binary file not shown.
69 changes: 69 additions & 0 deletions gui (1).py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from PyQt5 import QtCore, QtGui, QtWidgets
import feature_extractor

class Ui_Spam_detector(object):
def setupUi(self, Spam_detector):
Spam_detector.setObjectName("Spam_detector")
Spam_detector.resize(521, 389)
self.centralwidget = QtWidgets.QWidget(Spam_detector)
self.centralwidget.setObjectName("centralwidget")

"""check button code and its connectivity to button_click function"""
self.check_button = QtWidgets.QPushButton(self.centralwidget)
self.check_button.setGeometry(QtCore.QRect(210, 170, 93, 28))
self.check_button.setObjectName("check_button")
self.check_button.clicked.connect(self.button_click)

"""url input section"""
self.url_input = QtWidgets.QLineEdit(self.centralwidget)
self.url_input.setGeometry(QtCore.QRect(70, 111, 431, 31))
self.url_input.setObjectName("url_input")

self.label = QtWidgets.QLabel(self.centralwidget)
self.label.setGeometry(QtCore.QRect(20, 110, 81, 31))
self.label.setObjectName("label")

"""output message"""
self.output_text = QtWidgets.QTextEdit(self.centralwidget)
self.output_text.setGeometry(QtCore.QRect(30, 241, 461, 121))
self.output_text.setObjectName("output_text")

self.label_2 = QtWidgets.QLabel(self.centralwidget)
self.label_2.setGeometry(QtCore.QRect(110, 10, 311, 41))
self.label_2.setObjectName("label_2")

Spam_detector.setCentralWidget(self.centralwidget)
self.statusbar = QtWidgets.QStatusBar(Spam_detector)
self.statusbar.setObjectName("statusbar")
Spam_detector.setStatusBar(self.statusbar)

self.retranslateUi(Spam_detector)
QtCore.QMetaObject.connectSlotsByName(Spam_detector)

def retranslateUi(self, Spam_detector):
_translate = QtCore.QCoreApplication.translate
Spam_detector.setWindowTitle(_translate("Spam_detector", "MainWindow"))
self.check_button.setText(_translate("Spam_detector", "Check "))
self.label.setText(_translate("Spam_detector", "<html><head/><body><p><span style=\" font-size:10pt;\">URL :</span></p></body></html>"))
self.label_2.setText(_translate("Spam_detector", "<html><head/><body><p align=\"center\"><span style=\" font-size:16pt;\">Spam URL Detector</span></p></body></html>"))

def button_click(self):
text = self.url_input.text()
#print(text)
obj = feature_extractor.feature_extractor(text)
str1,str2 = obj.extract()

self.output_text.append("{} \n{}\n\n".format(str1,str2))


#def show_output():

if __name__ == "__main__":
import sys
app = QtWidgets.QApplication(sys.argv)
Spam_detector = QtWidgets.QMainWindow()
ui = Ui_Spam_detector()
ui.setupUi(Spam_detector)
Spam_detector.show()
sys.exit(app.exec_())

35 changes: 35 additions & 0 deletions rf_model (1).py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pickle
import numpy,sklearn,pandas

"""# save the model to disk
filename = 'finalized_model.sav'
pickle.dump(clf, open(filename, 'wb'))
"""

def predictor(splitted_data):
print("/n script rf_model")
# load the model from disk
filename = 'finalized_model.sav'
loaded_model = pickle.load(open(filename, 'rb'))
print("model loaded")
print(splitted_data.shape)
print(list(splitted_data))
x = splitted_data.columns[3:9]
preds = loaded_model.predict(splitted_data[x])
print("pridction complete")
print(preds)
if preds == 0:
str1 = "Spoofed webpage: Yes"
else: str1 = "Spoofed webpage: NO"

score = loaded_model.predict_proba(splitted_data[x])
str2 = "Confidence score: "+ str(score[0][1])

return str1,str2







0 comments on commit a555fc0

Please sign in to comment.