graphdeeplearning
diff --git a/‎core/EmbeddingDataSet.py‎
Lines changed: 154 additions & 0 deletions b/‎core/EmbeddingDataSet.py‎
Lines changed: 154 additions & 0 deletions
diff --git a/‎core/GraphConvNet.py‎
Lines changed: 123 additions & 0 deletions b/‎core/GraphConvNet.py‎
Lines changed: 123 additions & 0 deletions
diff --git a/‎core/GraphConvNetCell.py‎
Lines changed: 108 additions & 0 deletions b/‎core/GraphConvNetCell.py‎
Lines changed: 108 additions & 0 deletions
@@ -0,0 +1,154 @@
+import os
+import pickle
+import numpy as np
+import scipy.sparse as sp
+import time
+from core.GraphDataBlock import GraphDataBlock
+from util.graph_utils import neighbor_sampling
+
+
+class EmbeddingDataSet():
+    """
+    Attributes:
+        name (str): name of dataset
+        data_dir (str): path to dataset folder
+        train_dir (str): path to training data file
+        test_dir (str): path to test data file
+        input_dim (int): number of data features per node
+        is_labelled (Boolean): whether underlying class labels are present
+        all_data (list[GraphDataBlock]): data inputs packaged into blocks
+        all_indices (np.array): input sequence when packaging data into blocks
+
+        inputs (scipy csr matrix): data feature matrix of size n x f
+        labels (np.array): data class label matrix of size n x 1
+        adj_matrix (scipy csr matrix): adjacency matrix of size n x n
+    """
+
+    train_dir = {'cora': 'cora_full.pkl'}
+
+    test_dir = train_dir
+
+    def __init__(self, name, data_dir, train=True):
+        self.name = name
+        self.data_dir = data_dir
+        self.train_dir = EmbeddingDataSet.train_dir[name]
+        self.test_dir = EmbeddingDataSet.test_dir[name]
+        self.is_labelled = False
+
+        self.all_data = []
+
+        # Extract data from file contents
+        data_root = os.path.join(self.data_dir, self.name)
+        if train:
+            fname = os.path.join(data_root, self.train_dir)
+        else:
+            assert self.test_dir is not None
+            fname = os.path.join(data_root, self.test_dir)
+        with open(fname, 'rb') as f:
+            file_contents = pickle.load(f)
+
+        self.inputs = file_contents[0]
+        self.labels = file_contents[1]
+        self.adj_matrix = file_contents[2]
+
+        self.is_labelled = len(self.labels) != 0
+        self.input_dim = self.inputs.shape[1]
+
+        self.all_indices = np.arange(0, self.inputs.shape[0])
+
+        # Convert adj to csr matrix
+        self.inputs = sp.csr_matrix(self.inputs)
+        self.adj_matrix = sp.csr_matrix(self.adj_matrix)
+
+    def create_all_data(self, n_batches=1, shuffle=False, sampling=False, full_path_matrix=None):
+        """
+        Initialises all_data as a list of GraphDataBlock
+        Args:
+            n_batches (int): number of blocks to return
+            shuffle (Boolean): whether to shuffle input sequence
+            sampling (Boolean): whether to expand data blocks with neighbor sampling
+        """
+        i = 0
+        labels_subset = []
+        self.all_data = []
+
+        if shuffle:
+            np.random.shuffle(self.all_indices)
+        else:
+            self.all_indices = np.arange(0, self.inputs.shape[0])
+
+        # Split equally
+        # TODO: Another option to split randomly
+        chunk_sizes = self.get_k_equal_chunks(self.inputs.shape[0], k=n_batches)
+
+        t_start = time.time()
+
+        for num_samples in chunk_sizes:
+            mask = sorted(self.all_indices[i: i + num_samples])
+
+            # Perform sampling to obtain local neighborhood of mini-batch
+            if sampling:
+                D_layers = [9, 14]  # max samples per layer
+                mask = neighbor_sampling(self.adj_matrix, mask, D_layers)
+
+            inputs_subset = self.inputs[mask]
+            adj_subset = self.adj_matrix[mask, :][:, mask]
+
+            if self.is_labelled:
+                labels_subset = self.labels[mask]
+
+            # Package data into graph block
+            G = GraphDataBlock(inputs_subset, labels=labels_subset, W=adj_subset)
+
+            # Add original indices from the complete dataset
+            G.original_indices = mask
+
+            # Add shortest path matrix from precomputed data if needed
+            if full_path_matrix is not None:
+                G.precomputed_path_matrix = full_path_matrix[mask, :][:, mask]
+
+            self.all_data.append(G)
+            i += num_samples
+
+        t_elapsed = time.time() - t_start
+        print('Data blocks of length: ', [len(G.labels) for G in self.all_data])
+        print("Time to create all data (s) = {:.4f}".format(t_elapsed))
+
+    def summarise(self):
+        print("Name of dataset = {}".format(self.name))
+        print("Input dimension = {}".format(self.input_dim))
+        print("Number of training samples = {}".format(self.inputs.shape[0]))
+        print("Training labels = {}".format(self.is_labelled))
+
+    def get_k_equal_chunks(self, n, k):
+        # returns n % k sub-arrays of size n//k + 1 and the rest of size n//k
+        p, r = divmod(n, k)
+        return [p + 1 for _ in range(r)] + [p for _ in range(k - r)]
+
+    def get_current_inputs(self):
+        inputs = self.inputs[self.all_indices]
+        labels = self.labels[self.all_indices]
+        adj = self.adj_matrix[self.all_indices, :][:, self.all_indices]
+        return inputs, labels, adj
+
+    def get_sample_block(self, n_initial, sample_neighbors, verbose=0):
+        """
+        Returns a subset of data as a GraphDataBlock
+        Args:
+            n_initial (int): number of samples at the start
+            sample_neighbors (Boolean): whether to expand the sample block with neighbor sampling
+        Returns:
+            G (GraphDataBlock): data subset
+        """
+
+        mask = sorted(np.random.choice(self.all_indices, size=n_initial, replace=False))
+        if sample_neighbors:
+            mask = neighbor_sampling(self.adj_matrix, mask, D_layers=[9, 14])
+        inputs = self.inputs[mask]
+        labels = self.labels[mask]
+        W = self.adj_matrix[mask, :][:, mask]
+        G = GraphDataBlock(inputs, labels, W)
+        G.original_indices = mask
+        if verbose:
+            print("Initial set of {} points was expanded to {} points".format(n_initial, len(mask)))
+        return G
@@ -0,0 +1,123 @@
+import torch
+from torch.autograd import Variable
+import torch.nn as nn
+import numpy as np
+
+from core.GraphConvNetCell import GraphConvNetCell
+from util.training_utils import get_torch_dtype
+
+
+dtypeFloat, dtypeLong = get_torch_dtype()
+
+
+class GraphConvNet(nn.Module):
+    """
+    PyTorch implementation of Residual Gated Graph ConvNets
+    Adapted from An Experimental Study of Neural Networks for Variable Graphs (ICLR'18)
+    Xavier Bresson and Thomas Laurent
+    See: https://github.com/xbresson/spatial_graph_convnets
+    """
+
+    def __init__(self, net_parameters):
+
+        super(GraphConvNet, self).__init__()
+
+        self.name = 'graph_net'
+
+        # parameters
+        D = net_parameters['D']
+        n_components = net_parameters['n_components']
+        H = net_parameters['H']
+        L = net_parameters['L']
+
+        # vector of hidden dimensions
+        net_layers = []
+        for layer in range(L):
+            net_layers.append(H)
+
+        # CL cells
+        # NOTE: Each graph convnet cell uses *TWO* convolutional operations
+        net_layers_extended = [D] + net_layers  # include embedding dim
+        L = len(net_layers)
+        list_of_gnn_cells = []  # list of NN cells
+        for layer in range(L // 2):
+            Hin, Hout = net_layers_extended[2 * layer], net_layers_extended[2 * layer + 2]
+            list_of_gnn_cells.append(GraphConvNetCell(Hin, Hout))
+
+        # register the cells for pytorch
+        self.gnn_cells = nn.ModuleList(list_of_gnn_cells)
+
+        # fc
+        Hfinal = net_layers_extended[-1]
+        self.fc = nn.Linear(Hfinal, n_components)
+
+        # init
+        self.init_weights_Graph_OurConvNet(Hfinal, n_components, 1)
+
+        # print('\nnb of hidden layers=', L)
+        # print('dim of layers (w/ embed dim)=', net_layers_extended)
+        # print('\n')
+
+        # class variables
+        self.L = L
+        self.net_layers_extended = net_layers_extended
+
+    def init_weights_Graph_OurConvNet(self, Fin_fc, Fout_fc, gain):
+
+        scale = gain * np.sqrt(2.0 / Fin_fc)
+        self.fc.weight.data.uniform_(-scale, scale)
+        self.fc.bias.data.fill_(0)
+
+    def forward(self, G):
+        # Data matrix
+        x = G.inputs
+
+        # Unroll into single vector
+        x = x.view(x.shape[0], -1)
+
+        # Pass raw data matrix X directly as input
+        x = Variable(torch.FloatTensor(x).type(dtypeFloat), requires_grad=False)
+
+        # graph operators
+        # Edge = start vertex to end vertex
+        # E_start = E x V mapping matrix from edge index to corresponding start vertex
+        # E_end = E x V mapping matrix from edge index to corresponding end vertex
+        E_start = G.edge_to_starting_vertex
+        E_end = G.edge_to_ending_vertex
+        E_start = torch.from_numpy(E_start.toarray()).type(dtypeFloat)
+        E_end = torch.from_numpy(E_end.toarray()).type(dtypeFloat)
+        E_start = Variable(E_start, requires_grad=False)
+        E_end = Variable(E_end, requires_grad=False)
+
+        for layer in range(self.L // 2):
+            gnn_layer = self.gnn_cells[layer]
+            x = gnn_layer(x, E_start, E_end)  # V x Hfinal
+
+        # FC
+        x = self.fc(x)
+
+        return x
+
+    def loss(self, y, y_target):
+        loss = nn.MSELoss()(y, y_target) # L2 loss
+        return loss
+
+    def pairwise_loss(self, y, y_target, W):
+        distances_1 = y_target[W.row, :] - y_target[W.col, :]
+        distances_2 = y[W.row, :] - y[W.col, :]
+        loss = torch.mean(torch.pow(distances_1.norm(dim=1) - distances_2.norm(dim=1), 2))
+
+        return loss
+
+    def update(self, lr):
+        update = torch.optim.Adam(self.parameters(), lr=lr)
+        return update
+
+    def update_learning_rate(self, optimizer, lr):
+        for param_group in optimizer.param_groups:
+            param_group['lr'] = lr
+
+        return optimizer
+
+    def nb_param(self):
+        return self.nb_param
@@ -0,0 +1,108 @@
+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+import numpy as np
+
+
+class GraphConvNetCell(nn.Module):
+    """
+    PyTorch implementation of Residual Gated Graph ConvNets
+    Adapted from An Experimental Study of Neural Networks for Variable Graphs (ICLR'18)
+    Xavier Bresson and Thomas Laurent
+    See: https://github.com/xbresson/spatial_graph_convnets
+    """
+
+    def __init__(self, dim_in, dim_out):
+        super(GraphConvNetCell, self).__init__()
+
+        # conv1
+        self.Ui1 = nn.Linear(dim_in, dim_out, bias=False)
+        self.Uj1 = nn.Linear(dim_in, dim_out, bias=False)
+        self.Vi1 = nn.Linear(dim_in, dim_out, bias=False)
+        self.Vj1 = nn.Linear(dim_in, dim_out, bias=False)
+        self.bu1 = torch.nn.Parameter(torch.FloatTensor(dim_out), requires_grad=True)
+        self.bv1 = torch.nn.Parameter(torch.FloatTensor(dim_out), requires_grad=True)
+
+        # conv2
+        self.Ui2 = nn.Linear(dim_out, dim_out, bias=False)
+        self.Uj2 = nn.Linear(dim_out, dim_out, bias=False)
+        self.Vi2 = nn.Linear(dim_out, dim_out, bias=False)
+        self.Vj2 = nn.Linear(dim_out, dim_out, bias=False)
+        self.bu2 = torch.nn.Parameter(torch.FloatTensor(dim_out), requires_grad=True)
+        self.bv2 = torch.nn.Parameter(torch.FloatTensor(dim_out), requires_grad=True)
+
+        # bn1, bn2
+        self.bn1 = torch.nn.BatchNorm1d(dim_out)
+        self.bn2 = torch.nn.BatchNorm1d(dim_out)
+
+        # resnet
+        self.R = nn.Linear(dim_in, dim_out, bias=False)
+
+        # init
+        self.init_weights_OurConvNetCell(dim_in, dim_out, 1)
+
+    def init_weights_OurConvNetCell(self, dim_in, dim_out, gain):
+        # conv1
+        scale = gain * np.sqrt(2.0 / dim_in)
+        self.Ui1.weight.data.uniform_(-scale, scale)
+        self.Uj1.weight.data.uniform_(-scale, scale)
+        self.Vi1.weight.data.uniform_(-scale, scale)
+        self.Vj1.weight.data.uniform_(-scale, scale)
+        scale = gain * np.sqrt(2.0 / dim_out)
+        self.bu1.data.fill_(0)
+        self.bv1.data.fill_(0)
+
+        # conv2
+        scale = gain * np.sqrt(2.0 / dim_out)
+        self.Ui2.weight.data.uniform_(-scale, scale)
+        self.Uj2.weight.data.uniform_(-scale, scale)
+        self.Vi2.weight.data.uniform_(-scale, scale)
+        self.Vj2.weight.data.uniform_(-scale, scale)
+        scale = gain * np.sqrt(2.0 / dim_out)
+        self.bu2.data.fill_(0)
+        self.bv2.data.fill_(0)
+
+        # RN
+        scale = gain * np.sqrt(2.0 / dim_in)
+        self.R.weight.data.uniform_(-scale, scale)
+
+    def forward(self, x, E_start, E_end):
+        # E_start, E_end : E x V
+
+        xin = x
+        # conv1
+        Vix = self.Vi1(x)  # V x H_out
+        Vjx = self.Vj1(x)  # V x H_out
+        x1 = torch.mm(E_end, Vix) + torch.mm(E_start, Vjx) + self.bv1  # E x H_out, edge gates
+        x1 = torch.sigmoid(x1)
+        Ujx = self.Uj1(x)  # V x H_out
+        x2 = torch.mm(E_start, Ujx)  # V x H_out   
+        Uix = self.Ui1(x)  # V x H_out
+        # x = Uix + torch.mm(E_end.t(), x1 * x2) + self.bu1  # V x H_out
+        indegree = torch.sum(E_end, dim=0) # V
+        indegree[indegree==0] = 1
+        sum_xj = torch.div(torch.mm(E_end.t(), x1 * x2).t(), indegree).t()
+        x = Uix + sum_xj + self.bu1
+
+        # bn1
+        x = self.bn1(x)
+        # relu1
+        x = F.relu(x)
+        # conv2
+        Vix = self.Vi2(x)  # V x H_out
+        Vjx = self.Vj2(x)  # V x H_out
+        x1 = torch.mm(E_end, Vix) + torch.mm(E_start, Vjx) + self.bv2  # E x H_out, edge gates
+        x1 = torch.sigmoid(x1)
+        Ujx = self.Uj2(x)  # V x H_out
+        x2 = torch.mm(E_start, Ujx)  # V x H_out
+        Uix = self.Ui2(x)  # V x H_out
+        sum_xj = torch.div(torch.mm(E_end.t(), x1 * x2).t(), indegree).t()
+        x = Uix + sum_xj + self.bu2  # V x H_out
+        # bn2
+        x = self.bn2(x)
+        # addition
+        x = x + self.R(xin)
+        # relu2
+        x = F.relu(x)
+
+        return x