Skip to content

Commit

Permalink
graph_classification via supervised learning
Browse files Browse the repository at this point in the history
  • Loading branch information
lizitong67 committed Oct 21, 2020
1 parent 5775237 commit c4086cf
Show file tree
Hide file tree
Showing 2 changed files with 359 additions and 0 deletions.
210 changes: 210 additions & 0 deletions graph_classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
#! /usr/bin/env python
"""
Graph Classification via GCN
Author: Alston
Date: 2020.9.14
"""

import os
import dgl
import dgl.function as fn
import numpy as np
import networkx as nx
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GraphConv
from dgl.data import DGLDataset
from torch.utils.data import DataLoader



class Classifier(nn.Module):
def __init__(self, in_dim, hidden_dim, n_classes):
super(Classifier, self).__init__()
self.conv1 = GraphConv(in_dim, hidden_dim)
self.conv2 = GraphConv(hidden_dim, hidden_dim)
self.linear1 = nn.Linear(hidden_dim, hidden_dim)
self.linear2 = nn.Linear(hidden_dim, 10)
self.classify = nn.Linear(10, n_classes)
self.sigmoid = nn.Sigmoid()

def forward(self, g, h):
# Apply graph convolution and activation.
h = F.relu(self.conv1(g, h))
h = F.relu(self.conv2(g, h))
with g.local_scope():
g.ndata['h'] = h
# Calculate graph representation by average readout.
hg = dgl.mean_nodes(g, 'h')
x = self.linear1(hg)
x = self.linear2(x)
output = self.sigmoid(self.classify(x))
return output

# Customized Dataset
class MyDataset(DGLDataset):
def __init__(self):
super(MyDataset, self).__init__(name="MyDataset")

def process(self):
self.graph_list = []
self.label_list = []

homograph = "dataset/homograph"
scenarios = os.listdir(homograph)
for scenario in scenarios:
filepath = "dataset/homograph/" + scenario
graphs = os.listdir(filepath)
for graph in graphs:
glist, label_dict = dgl.load_graphs(filepath + '/' + graph)
self.graph_list.append(glist[0])
for key, value in label_dict.items():
if key != 'Drive-by-download':
self.label_list.append(0)
else:
self.label_list.append(1)

def __getitem__(self, idx):
"""
Get graph and label by index
"""
return self.graph_list[idx], th.tensor(self.label_list[idx])

def __len__(self):
"""Number of graphs in the dataset"""
return len(self.graph_list)

def collate(batch):
graphs, labels = map(list, zip(*batch))
batched_graph = dgl.batch(graphs)
batched_labels = th.tensor(labels)
return batched_graph, batched_labels

if __name__ == "__main__":
dataset = MyDataset()
dataloader = DataLoader(
dataset,
batch_size=10,
collate_fn=collate,
drop_last=False,
shuffle=True)
print(dataloader)

model = Classifier(8, 20, 2)
opt = th.optim.Adam(model.parameters())
for epoch in range(100):
for batched_graph, labels in dataloader:
feats = batched_graph.ndata['feat'].float()
logits = model(batched_graph, feats)
loss = F.cross_entropy(logits, labels)
opt.zero_grad()
loss.backward()
opt.step()
print('Epoch %d | Loss: %.4f' % (epoch, loss.item()))



"""
Epoch 0 | Loss: 0.6993
Epoch 1 | Loss: 0.7039
Epoch 2 | Loss: 0.6674
Epoch 3 | Loss: 0.6835
Epoch 4 | Loss: 0.6827
Epoch 5 | Loss: 0.6789
Epoch 6 | Loss: 0.6753
Epoch 7 | Loss: 0.6539
Epoch 8 | Loss: 0.6573
Epoch 9 | Loss: 0.6500
Epoch 10 | Loss: 0.6246
Epoch 11 | Loss: 0.6567
Epoch 12 | Loss: 0.6270
Epoch 13 | Loss: 0.6040
Epoch 14 | Loss: 0.6375
Epoch 15 | Loss: 0.6208
Epoch 16 | Loss: 0.6930
Epoch 17 | Loss: 0.6281
Epoch 18 | Loss: 0.5654
Epoch 19 | Loss: 0.6036
Epoch 20 | Loss: 0.5478
Epoch 21 | Loss: 0.5834
Epoch 22 | Loss: 0.6159
Epoch 23 | Loss: 0.5316
Epoch 24 | Loss: 0.5525
Epoch 25 | Loss: 0.4411
Epoch 26 | Loss: 0.4759
Epoch 27 | Loss: 0.4574
Epoch 28 | Loss: 0.3825
Epoch 29 | Loss: 0.5281
Epoch 30 | Loss: 0.3677
Epoch 31 | Loss: 0.4427
Epoch 32 | Loss: 0.3487
Epoch 33 | Loss: 0.3621
Epoch 34 | Loss: 0.3184
Epoch 35 | Loss: 0.3133
Epoch 36 | Loss: 0.3803
Epoch 37 | Loss: 0.2646
Epoch 38 | Loss: 0.2648
Epoch 39 | Loss: 0.2382
Epoch 40 | Loss: 0.2440
Epoch 41 | Loss: 0.2007
Epoch 42 | Loss: 0.1962
Epoch 43 | Loss: 0.2106
Epoch 44 | Loss: 0.2070
Epoch 45 | Loss: 0.4496
Epoch 46 | Loss: 0.1555
Epoch 47 | Loss: 0.1578
Epoch 48 | Loss: 0.1500
Epoch 49 | Loss: 0.3055
Epoch 50 | Loss: 0.1369
Epoch 51 | Loss: 0.1380
Epoch 52 | Loss: 0.1008
Epoch 53 | Loss: 0.1185
Epoch 54 | Loss: 0.1280
Epoch 55 | Loss: 0.1091
Epoch 56 | Loss: 0.0954
Epoch 57 | Loss: 0.1003
Epoch 58 | Loss: 0.0959
Epoch 59 | Loss: 0.4562
Epoch 60 | Loss: 0.2788
Epoch 61 | Loss: 0.0891
Epoch 62 | Loss: 0.0763
Epoch 63 | Loss: 0.1028
Epoch 64 | Loss: 0.2882
Epoch 65 | Loss: 0.0797
Epoch 66 | Loss: 0.2800
Epoch 67 | Loss: 0.0789
Epoch 68 | Loss: 0.0787
Epoch 69 | Loss: 0.0543
Epoch 70 | Loss: 0.2681
Epoch 71 | Loss: 0.0973
Epoch 72 | Loss: 0.0719
Epoch 73 | Loss: 0.0666
Epoch 74 | Loss: 0.0648
Epoch 75 | Loss: 0.0619
Epoch 76 | Loss: 0.0707
Epoch 77 | Loss: 0.0367
Epoch 78 | Loss: 0.0562
Epoch 79 | Loss: 0.0614
Epoch 80 | Loss: 0.0453
Epoch 81 | Loss: 0.0586
Epoch 82 | Loss: 0.0455
Epoch 83 | Loss: 0.0476
Epoch 84 | Loss: 0.0528
Epoch 85 | Loss: 0.0419
Epoch 86 | Loss: 0.2351
Epoch 87 | Loss: 0.0435
Epoch 88 | Loss: 0.0597
Epoch 89 | Loss: 0.0410
Epoch 90 | Loss: 0.2581
Epoch 91 | Loss: 0.2576
Epoch 92 | Loss: 0.0585
Epoch 93 | Loss: 0.0549
Epoch 94 | Loss: 0.0569
Epoch 95 | Loss: 0.2630
Epoch 96 | Loss: 0.2523
Epoch 97 | Loss: 0.0300
Epoch 98 | Loss: 0.0297
Epoch 99 | Loss: 0.0286
Epoch 100 | Loss: 0.0342
"""
149 changes: 149 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@

import dgl
import torch as th
import torch.nn.functional as F

# graph_data = {('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
# ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])),
# ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))
# }
# g = dgl.heterograph(graph_data)
# feats = th.tensor([1,2])
# g.edges[('drug', 'interacts', 'drug')].data["feats"] = feats
# print(g.edges[('drug', 'interacts', 'drug')].data["feats"])
# print(g.etypes)

# node_type_list = ['process', 'thread', 'file', 'MAP_ANONYMOUS', 'NA', 'stdin', 'stdout', 'stderr',
# 'accept', 'access', 'bind', 'chmod', 'clone', 'close', 'connect', 'execve', 'fstat',
# 'ftruncate', 'listen', 'mmap2', 'open', 'read', 'recv', 'recvfrom', 'recvmsg', 'send',
# 'sendmsg', 'sendto', 'stat', 'truncate', 'unlink', 'waitpid', 'write', 'writev']


# import csv
#
# node_types = []
# edge_types = []
# data_path = 'dataset/output_ADM.csv'
# with open(data_path, 'r') as file:
# reader = csv.reader(file)
# for line in reader:
# # data_entry: source-id, source-type, destination-id, destination-type, edge-type, timestamp, graph-id
# src_type = line[1]
# des_type = line[3]
# edge_type = line[4]
# if src_type not in node_types:
# node_types.append(src_type)
# if des_type not in node_types:
# node_types.append(des_type)
# if edge_type not in edge_types:
# edge_types.append(edge_type)
#
# with open('node_type.txt', 'a+') as f:
# f.write(str(node_types))
# with open('edge_type.txt', 'a+') as f:
# f.write(str(edge_types))


# l = [[1,2,3], [1,2,2], [4,5,6]]
# ts = th.tensor(l)
# print(ts)

# u, v = th.tensor([0, 0, 0, 1]), th.tensor([1, 1, 3, 3])
# g = dgl.graph((u,v), idtype=th.int32)
# g = dgl.add_reverse_edges(g)
# print(bg.edges())
# bg.edata['p'] = th.FloatTensor([1, 1, 1, 1])

# glist, label_dict = dgl.load_graphs("0.bin")
# g = dgl.add_reverse_edges(glist[0])
# result = dgl.sampling.random_walk(g, g.nodes(), length=5, restart_prob=0)
# node_feats = result[0]+1
# node_feats = node_feats.type(th.FloatTensor)
# node_feats = F.normalize(node_feats, p=2, dim=1)
# g.ndata['feat'] = node_feats[:, 1:]

# th.set_printoptions(sci_mode=False)
# print(g.ndata['feat'][0:10])

edge_types = ['execve', 'access', 'mmap2', 'open', 'fstat', 'close', 'read', 'stat', 'write',
'unlink', 'clone', 'waitpid', 'bind', 'listen', 'chmod', 'connect', 'writev',
'recv', 'ftruncate', 'sendmsg', 'send', 'recvmsg', 'accept', 'sendto', 'recvfrom',
'truncate']

import dgl.function as fn
import torch.nn as nn


# u, v = th.tensor([0, 1, 2, 3]), th.tensor([1, 2, 3, 4])
# g = dgl.graph((u,v), idtype=th.int32)
# g.ndata['feat'] = th.ones(5,2)
# g.edata['feat'] = th.ones(4,3)
# print(g.ndata)
# print(g.edata)

# print(g.ndata['feat'])
# node_feat_dim = 2
# linear_src = nn.Parameter(th.FloatTensor(size=(1, node_feat_dim)))
# print(linear_src)
# out_src = g.ndata['feat'] * linear_src
# print(out_src)

# linear_src = nn.Parameter(th.FloatTensor(size=(1, node_feat_dim)))
# linear_dst = nn.Parameter(th.FloatTensor(size=(1, node_feat_dim)))
# out_src = g.ndata['feat'] * linear_src
# out_dst = g.ndata['feat'] * linear_dst
# g.srcdata.update({'out_src': out_src})
# g.dstdata.update({'out_dst': out_dst})
# g.apply_edges(fn.u_add_v('out_src', 'out_dst', 'out'))
# print(g.edata['out'])


# glist, label_dict = dgl.load_graphs("dataset/homograph/YouTube/0.bin")
# g = dgl.add_reverse_edges(glist[0])
# print(g.ndata)
# print(g.edate)


# g1 = dgl.graph(([0, 1], [1, 0]))
# g1.ndata['h'] = th.tensor([1., 2.])
# g2 = dgl.graph(([0, 1], [1, 2]))
# g2.ndata['h'] = th.tensor([1., 2., 3.])
#
#
# print (dgl.readout_nodes(g1, 'h'))
# # tensor([3.]) # 1 + 2
#
# bg = dgl.batch([g1, g2])
# print (dgl.readout_nodes(bg, 'h'))
# # tensor([3., 6.]) # [1 + 2, 1 + 2 + 3]

"""
import os
graph_list = []
label_list = []
homograph = "dataset/homograph"
scenarios = os.listdir(homograph)
for scenario in scenarios:
filepath = "dataset/homograph/"+scenario
graphs = os.listdir(filepath)
for graph in graphs:
glist, label_dict = dgl.load_graphs(filepath+'/'+graph)
graph_list.append(glist[0])
for key, value in label_dict.items():
if key != 'Attack':
label_list.append(0)
else:
label_list.append(1)
print(len(graph_list))
print(label_list)
"""

# graph_list, label_list = dgl.load_graphs("dataset/homograph/YouTube/0.bin")
u, v = th.tensor([0, 1, 2, 3]), th.tensor([1, 2, 3, 4])
g = dgl.graph((u,v), idtype=th.int32)
g.ndata['feat'] = th.ones(5,2)
g.edata['feat'] = th.ones(4,3)
bg = dgl.add_reverse_edges(g, copy_ndata=True, copy_edata=True)
print(bg)

0 comments on commit c4086cf

Please sign in to comment.