Skip to content

Commit 20bac2b

Browse files
committed
Initial commit
1 parent 47664e6 commit 20bac2b

18 files changed

+1435
-0
lines changed

core/EmbeddingDataSet.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
import os
2+
import pickle
3+
import numpy as np
4+
import scipy.sparse as sp
5+
import time
6+
from core.GraphDataBlock import GraphDataBlock
7+
from util.graph_utils import neighbor_sampling
8+
9+
10+
class EmbeddingDataSet():
11+
"""
12+
Attributes:
13+
name (str): name of dataset
14+
data_dir (str): path to dataset folder
15+
train_dir (str): path to training data file
16+
test_dir (str): path to test data file
17+
input_dim (int): number of data features per node
18+
is_labelled (Boolean): whether underlying class labels are present
19+
all_data (list[GraphDataBlock]): data inputs packaged into blocks
20+
all_indices (np.array): input sequence when packaging data into blocks
21+
22+
inputs (scipy csr matrix): data feature matrix of size n x f
23+
labels (np.array): data class label matrix of size n x 1
24+
adj_matrix (scipy csr matrix): adjacency matrix of size n x n
25+
"""
26+
27+
train_dir = {'cora': 'cora_full.pkl'}
28+
29+
test_dir = train_dir
30+
31+
def __init__(self, name, data_dir, train=True):
32+
self.name = name
33+
self.data_dir = data_dir
34+
self.train_dir = EmbeddingDataSet.train_dir[name]
35+
self.test_dir = EmbeddingDataSet.test_dir[name]
36+
self.is_labelled = False
37+
38+
self.all_data = []
39+
40+
# Extract data from file contents
41+
data_root = os.path.join(self.data_dir, self.name)
42+
if train:
43+
fname = os.path.join(data_root, self.train_dir)
44+
else:
45+
assert self.test_dir is not None
46+
fname = os.path.join(data_root, self.test_dir)
47+
with open(fname, 'rb') as f:
48+
file_contents = pickle.load(f)
49+
50+
self.inputs = file_contents[0]
51+
self.labels = file_contents[1]
52+
self.adj_matrix = file_contents[2]
53+
54+
self.is_labelled = len(self.labels) != 0
55+
self.input_dim = self.inputs.shape[1]
56+
57+
self.all_indices = np.arange(0, self.inputs.shape[0])
58+
59+
# Convert adj to csr matrix
60+
self.inputs = sp.csr_matrix(self.inputs)
61+
self.adj_matrix = sp.csr_matrix(self.adj_matrix)
62+
63+
def create_all_data(self, n_batches=1, shuffle=False, sampling=False, full_path_matrix=None):
64+
"""
65+
Initialises all_data as a list of GraphDataBlock
66+
Args:
67+
n_batches (int): number of blocks to return
68+
shuffle (Boolean): whether to shuffle input sequence
69+
sampling (Boolean): whether to expand data blocks with neighbor sampling
70+
"""
71+
i = 0
72+
labels_subset = []
73+
self.all_data = []
74+
75+
if shuffle:
76+
np.random.shuffle(self.all_indices)
77+
else:
78+
self.all_indices = np.arange(0, self.inputs.shape[0])
79+
80+
# Split equally
81+
# TODO: Another option to split randomly
82+
chunk_sizes = self.get_k_equal_chunks(self.inputs.shape[0], k=n_batches)
83+
84+
t_start = time.time()
85+
86+
for num_samples in chunk_sizes:
87+
mask = sorted(self.all_indices[i: i + num_samples])
88+
89+
# Perform sampling to obtain local neighborhood of mini-batch
90+
if sampling:
91+
D_layers = [9, 14] # max samples per layer
92+
mask = neighbor_sampling(self.adj_matrix, mask, D_layers)
93+
94+
inputs_subset = self.inputs[mask]
95+
adj_subset = self.adj_matrix[mask, :][:, mask]
96+
97+
if self.is_labelled:
98+
labels_subset = self.labels[mask]
99+
100+
# Package data into graph block
101+
G = GraphDataBlock(inputs_subset, labels=labels_subset, W=adj_subset)
102+
103+
# Add original indices from the complete dataset
104+
G.original_indices = mask
105+
106+
# Add shortest path matrix from precomputed data if needed
107+
if full_path_matrix is not None:
108+
G.precomputed_path_matrix = full_path_matrix[mask, :][:, mask]
109+
110+
self.all_data.append(G)
111+
i += num_samples
112+
113+
t_elapsed = time.time() - t_start
114+
print('Data blocks of length: ', [len(G.labels) for G in self.all_data])
115+
print("Time to create all data (s) = {:.4f}".format(t_elapsed))
116+
117+
def summarise(self):
118+
print("Name of dataset = {}".format(self.name))
119+
print("Input dimension = {}".format(self.input_dim))
120+
print("Number of training samples = {}".format(self.inputs.shape[0]))
121+
print("Training labels = {}".format(self.is_labelled))
122+
123+
def get_k_equal_chunks(self, n, k):
124+
# returns n % k sub-arrays of size n//k + 1 and the rest of size n//k
125+
p, r = divmod(n, k)
126+
return [p + 1 for _ in range(r)] + [p for _ in range(k - r)]
127+
128+
def get_current_inputs(self):
129+
inputs = self.inputs[self.all_indices]
130+
labels = self.labels[self.all_indices]
131+
adj = self.adj_matrix[self.all_indices, :][:, self.all_indices]
132+
return inputs, labels, adj
133+
134+
def get_sample_block(self, n_initial, sample_neighbors, verbose=0):
135+
"""
136+
Returns a subset of data as a GraphDataBlock
137+
Args:
138+
n_initial (int): number of samples at the start
139+
sample_neighbors (Boolean): whether to expand the sample block with neighbor sampling
140+
Returns:
141+
G (GraphDataBlock): data subset
142+
"""
143+
144+
mask = sorted(np.random.choice(self.all_indices, size=n_initial, replace=False))
145+
if sample_neighbors:
146+
mask = neighbor_sampling(self.adj_matrix, mask, D_layers=[9, 14])
147+
inputs = self.inputs[mask]
148+
labels = self.labels[mask]
149+
W = self.adj_matrix[mask, :][:, mask]
150+
G = GraphDataBlock(inputs, labels, W)
151+
G.original_indices = mask
152+
if verbose:
153+
print("Initial set of {} points was expanded to {} points".format(n_initial, len(mask)))
154+
return G

core/GraphConvNet.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import torch
2+
from torch.autograd import Variable
3+
import torch.nn as nn
4+
import numpy as np
5+
6+
from core.GraphConvNetCell import GraphConvNetCell
7+
from util.training_utils import get_torch_dtype
8+
9+
10+
dtypeFloat, dtypeLong = get_torch_dtype()
11+
12+
13+
class GraphConvNet(nn.Module):
14+
"""
15+
PyTorch implementation of Residual Gated Graph ConvNets
16+
Adapted from An Experimental Study of Neural Networks for Variable Graphs (ICLR'18)
17+
Xavier Bresson and Thomas Laurent
18+
See: https://github.com/xbresson/spatial_graph_convnets
19+
"""
20+
21+
def __init__(self, net_parameters):
22+
23+
super(GraphConvNet, self).__init__()
24+
25+
self.name = 'graph_net'
26+
27+
# parameters
28+
D = net_parameters['D']
29+
n_components = net_parameters['n_components']
30+
H = net_parameters['H']
31+
L = net_parameters['L']
32+
33+
# vector of hidden dimensions
34+
net_layers = []
35+
for layer in range(L):
36+
net_layers.append(H)
37+
38+
# CL cells
39+
# NOTE: Each graph convnet cell uses *TWO* convolutional operations
40+
net_layers_extended = [D] + net_layers # include embedding dim
41+
L = len(net_layers)
42+
list_of_gnn_cells = [] # list of NN cells
43+
for layer in range(L // 2):
44+
Hin, Hout = net_layers_extended[2 * layer], net_layers_extended[2 * layer + 2]
45+
list_of_gnn_cells.append(GraphConvNetCell(Hin, Hout))
46+
47+
# register the cells for pytorch
48+
self.gnn_cells = nn.ModuleList(list_of_gnn_cells)
49+
50+
# fc
51+
Hfinal = net_layers_extended[-1]
52+
self.fc = nn.Linear(Hfinal, n_components)
53+
54+
# init
55+
self.init_weights_Graph_OurConvNet(Hfinal, n_components, 1)
56+
57+
# print('\nnb of hidden layers=', L)
58+
# print('dim of layers (w/ embed dim)=', net_layers_extended)
59+
# print('\n')
60+
61+
# class variables
62+
self.L = L
63+
self.net_layers_extended = net_layers_extended
64+
65+
def init_weights_Graph_OurConvNet(self, Fin_fc, Fout_fc, gain):
66+
67+
scale = gain * np.sqrt(2.0 / Fin_fc)
68+
self.fc.weight.data.uniform_(-scale, scale)
69+
self.fc.bias.data.fill_(0)
70+
71+
def forward(self, G):
72+
# Data matrix
73+
x = G.inputs
74+
75+
# Unroll into single vector
76+
x = x.view(x.shape[0], -1)
77+
78+
# Pass raw data matrix X directly as input
79+
x = Variable(torch.FloatTensor(x).type(dtypeFloat), requires_grad=False)
80+
81+
# graph operators
82+
# Edge = start vertex to end vertex
83+
# E_start = E x V mapping matrix from edge index to corresponding start vertex
84+
# E_end = E x V mapping matrix from edge index to corresponding end vertex
85+
E_start = G.edge_to_starting_vertex
86+
E_end = G.edge_to_ending_vertex
87+
E_start = torch.from_numpy(E_start.toarray()).type(dtypeFloat)
88+
E_end = torch.from_numpy(E_end.toarray()).type(dtypeFloat)
89+
E_start = Variable(E_start, requires_grad=False)
90+
E_end = Variable(E_end, requires_grad=False)
91+
92+
for layer in range(self.L // 2):
93+
gnn_layer = self.gnn_cells[layer]
94+
x = gnn_layer(x, E_start, E_end) # V x Hfinal
95+
96+
# FC
97+
x = self.fc(x)
98+
99+
return x
100+
101+
def loss(self, y, y_target):
102+
loss = nn.MSELoss()(y, y_target) # L2 loss
103+
return loss
104+
105+
def pairwise_loss(self, y, y_target, W):
106+
distances_1 = y_target[W.row, :] - y_target[W.col, :]
107+
distances_2 = y[W.row, :] - y[W.col, :]
108+
loss = torch.mean(torch.pow(distances_1.norm(dim=1) - distances_2.norm(dim=1), 2))
109+
110+
return loss
111+
112+
def update(self, lr):
113+
update = torch.optim.Adam(self.parameters(), lr=lr)
114+
return update
115+
116+
def update_learning_rate(self, optimizer, lr):
117+
for param_group in optimizer.param_groups:
118+
param_group['lr'] = lr
119+
120+
return optimizer
121+
122+
def nb_param(self):
123+
return self.nb_param

core/GraphConvNetCell.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import torch
2+
import torch.nn.functional as F
3+
import torch.nn as nn
4+
import numpy as np
5+
6+
7+
class GraphConvNetCell(nn.Module):
8+
"""
9+
PyTorch implementation of Residual Gated Graph ConvNets
10+
Adapted from An Experimental Study of Neural Networks for Variable Graphs (ICLR'18)
11+
Xavier Bresson and Thomas Laurent
12+
See: https://github.com/xbresson/spatial_graph_convnets
13+
"""
14+
15+
def __init__(self, dim_in, dim_out):
16+
super(GraphConvNetCell, self).__init__()
17+
18+
# conv1
19+
self.Ui1 = nn.Linear(dim_in, dim_out, bias=False)
20+
self.Uj1 = nn.Linear(dim_in, dim_out, bias=False)
21+
self.Vi1 = nn.Linear(dim_in, dim_out, bias=False)
22+
self.Vj1 = nn.Linear(dim_in, dim_out, bias=False)
23+
self.bu1 = torch.nn.Parameter(torch.FloatTensor(dim_out), requires_grad=True)
24+
self.bv1 = torch.nn.Parameter(torch.FloatTensor(dim_out), requires_grad=True)
25+
26+
# conv2
27+
self.Ui2 = nn.Linear(dim_out, dim_out, bias=False)
28+
self.Uj2 = nn.Linear(dim_out, dim_out, bias=False)
29+
self.Vi2 = nn.Linear(dim_out, dim_out, bias=False)
30+
self.Vj2 = nn.Linear(dim_out, dim_out, bias=False)
31+
self.bu2 = torch.nn.Parameter(torch.FloatTensor(dim_out), requires_grad=True)
32+
self.bv2 = torch.nn.Parameter(torch.FloatTensor(dim_out), requires_grad=True)
33+
34+
# bn1, bn2
35+
self.bn1 = torch.nn.BatchNorm1d(dim_out)
36+
self.bn2 = torch.nn.BatchNorm1d(dim_out)
37+
38+
# resnet
39+
self.R = nn.Linear(dim_in, dim_out, bias=False)
40+
41+
# init
42+
self.init_weights_OurConvNetCell(dim_in, dim_out, 1)
43+
44+
def init_weights_OurConvNetCell(self, dim_in, dim_out, gain):
45+
# conv1
46+
scale = gain * np.sqrt(2.0 / dim_in)
47+
self.Ui1.weight.data.uniform_(-scale, scale)
48+
self.Uj1.weight.data.uniform_(-scale, scale)
49+
self.Vi1.weight.data.uniform_(-scale, scale)
50+
self.Vj1.weight.data.uniform_(-scale, scale)
51+
scale = gain * np.sqrt(2.0 / dim_out)
52+
self.bu1.data.fill_(0)
53+
self.bv1.data.fill_(0)
54+
55+
# conv2
56+
scale = gain * np.sqrt(2.0 / dim_out)
57+
self.Ui2.weight.data.uniform_(-scale, scale)
58+
self.Uj2.weight.data.uniform_(-scale, scale)
59+
self.Vi2.weight.data.uniform_(-scale, scale)
60+
self.Vj2.weight.data.uniform_(-scale, scale)
61+
scale = gain * np.sqrt(2.0 / dim_out)
62+
self.bu2.data.fill_(0)
63+
self.bv2.data.fill_(0)
64+
65+
# RN
66+
scale = gain * np.sqrt(2.0 / dim_in)
67+
self.R.weight.data.uniform_(-scale, scale)
68+
69+
def forward(self, x, E_start, E_end):
70+
# E_start, E_end : E x V
71+
72+
xin = x
73+
# conv1
74+
Vix = self.Vi1(x) # V x H_out
75+
Vjx = self.Vj1(x) # V x H_out
76+
x1 = torch.mm(E_end, Vix) + torch.mm(E_start, Vjx) + self.bv1 # E x H_out, edge gates
77+
x1 = torch.sigmoid(x1)
78+
Ujx = self.Uj1(x) # V x H_out
79+
x2 = torch.mm(E_start, Ujx) # V x H_out
80+
Uix = self.Ui1(x) # V x H_out
81+
# x = Uix + torch.mm(E_end.t(), x1 * x2) + self.bu1 # V x H_out
82+
indegree = torch.sum(E_end, dim=0) # V
83+
indegree[indegree==0] = 1
84+
sum_xj = torch.div(torch.mm(E_end.t(), x1 * x2).t(), indegree).t()
85+
x = Uix + sum_xj + self.bu1
86+
87+
# bn1
88+
x = self.bn1(x)
89+
# relu1
90+
x = F.relu(x)
91+
# conv2
92+
Vix = self.Vi2(x) # V x H_out
93+
Vjx = self.Vj2(x) # V x H_out
94+
x1 = torch.mm(E_end, Vix) + torch.mm(E_start, Vjx) + self.bv2 # E x H_out, edge gates
95+
x1 = torch.sigmoid(x1)
96+
Ujx = self.Uj2(x) # V x H_out
97+
x2 = torch.mm(E_start, Ujx) # V x H_out
98+
Uix = self.Ui2(x) # V x H_out
99+
sum_xj = torch.div(torch.mm(E_end.t(), x1 * x2).t(), indegree).t()
100+
x = Uix + sum_xj + self.bu2 # V x H_out
101+
# bn2
102+
x = self.bn2(x)
103+
# addition
104+
x = x + self.R(xin)
105+
# relu2
106+
x = F.relu(x)
107+
108+
return x

0 commit comments

Comments
 (0)