We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
class LINE(object): def __init__(self, graph_edge_num, graph_nodes_num, dimension): self.e = graph_edge_num self.n = graph_nodes_num self.steps_per_epoch = None self.epoch_train_size = None self.dimension = dimension def _generate_batch_train(self, adj_list, graph_nodes_num, graph_edge_num, batch_size, negativeRatio, negative_sampling): # 使用 negative sampling 优化 table_size = 1e8 power = 0.75 sampling_table = None data = np.ones((adj_list.shape[0]), dtype=np.int8) mat = csr_matrix((data, (adj_list[:, 0], adj_list[:, 1])), shape=(graph_nodes_num, graph_nodes_num), dtype=np.int8) batch_size_ones = np.ones((batch_size), dtype=np.int8) nb_train_sample = adj_list.shape[0] index_array = np.arange(nb_train_sample) nb_batch = int(np.ceil(nb_train_sample / float(batch_size))) batches = [(i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) for i in range(0, nb_batch)] if negative_sampling == "NON-UNIFORM": print("Pre-procesing for non-uniform negative sampling!") node_degree = np.zeros(graph_nodes_num) for i in range(graph_edge_num): node_degree[adj_list[i, 0]] += 1 node_degree[adj_list[i, 1]] += 1 norm = sum([math.pow(node_degree[i], power) for i in range(graph_nodes_num)]) sampling_table = np.zeros(int(table_size), dtype=np.uint32) p = 0 i = 0 for j in range(graph_nodes_num): p += float(math.pow(node_degree[j], power)) / norm while i < table_size and float(i) / table_size < p: sampling_table[i] = j i += 1 while 1: for batch_index, (batch_start, batch_end) in enumerate(batches): pos_edge_list = index_array[batch_start:batch_end] pos_left_nodes = adj_list[pos_edge_list, 0] pos_right_nodes = adj_list[pos_edge_list, 1] pos_relation_y = batch_size_ones[0:len(pos_edge_list)] neg_left_nodes = np.zeros(len(pos_edge_list) * negativeRatio, dtype=np.int32) neg_right_nodes = np.zeros(len(pos_edge_list) * negativeRatio, dtype=np.int32) neg_relation_y = np.zeros(len(pos_edge_list) * negativeRatio, dtype=np.int8) h = 0 for i in pos_left_nodes: for k in range(negativeRatio): rn = sampling_table[random.randint(0, table_size - 1)] if negative_sampling == "NON-UNIFORM" else random.randint( 0, graph_nodes_num - 1) while mat[i, rn] == 1 or i == rn: rn = sampling_table[random.randint(0, table_size - 1)] if negative_sampling == "NON-UNIFORM" else random.randint( 0, graph_nodes_num - 1) neg_left_nodes[h] = i neg_right_nodes[h] = rn h += 1 left_nodes = np.concatenate((pos_left_nodes, neg_left_nodes), axis=0) right_nodes = np.concatenate((pos_right_nodes, neg_right_nodes), axis=0) relation_y = np.concatenate((pos_relation_y, neg_relation_y), axis=0) yield ([left_nodes, right_nodes], [relation_y]) def _model(self, graph_nodes_num, dimension): left_input = Input(shape=(1,)) right_input = Input(shape=(1,)) left_model = Sequential() left_model.add(Embedding(input_dim=graph_nodes_num + 1, output_dim=dimension, input_length=1, mask_zero=False)) left_model.add(Reshape((dimension,))) right_model = Sequential() right_model.add(Embedding(input_dim=graph_nodes_num + 1, output_dim=dimension, input_length=1, mask_zero=False)) right_model.add(Reshape((dimension,))) left_embed = left_model(left_input) right_embed = left_model(right_input) left_right_dot = dot(inputs=[left_embed, right_embed], axes=1, name="left_right_dot") model = Model(inputs=[left_input, right_input], outputs=[left_right_dot]) embed_generator = Model(inputs=[left_input, right_input], outputs=[left_embed, right_embed]) return model, embed_generator def _line_loss(self, y_true, y_pred): coeff = y_true*2 - 1 return -K.mean(K.log(K.sigmoid(coeff*y_pred))) def fit(self, adj_list, batch_size, negative_ratio, negative_sampling, epoch_num): self.steps_per_epoch = int(self.e / batch_size) self.epoch_train_size = (1 + negative_ratio) * self.e # 产生训练样本 data = self._generate_batch_train(adj_list, self.n, self.e, batch_size, negative_ratio, negative_sampling) self.model, self.embed_generator = self._model(self.n, self.dimension) # model.summary() self.model.compile(optimizer='rmsprop', loss={'left_right_dot': self._line_loss}) self.model.fit_generator(data, steps_per_epoch=self.epoch_train_size / batch_size, epochs=epoch_num, verbose=1) def predict(self, data): # link prediction return self.embed_generator.predict_on_batch(data)
line = LINE(graph_edge_num, graph_nodes_num, dimension) line.fit(adj_list, batch_size, negative_ratio, negative_sampling, epoch_num)
The text was updated successfully, but these errors were encountered:
What are the pre-requisites for running this code?
Sorry, something went wrong.
No branches or pull requests
usage
The text was updated successfully, but these errors were encountered: