Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simple wrapper for this implemention for Keras 2.1.1 #3

Open
Kiris-tingna opened this issue Mar 24, 2018 · 1 comment
Open

simple wrapper for this implemention for Keras 2.1.1 #3

Kiris-tingna opened this issue Mar 24, 2018 · 1 comment

Comments

@Kiris-tingna
Copy link

class LINE(object):
    def __init__(self, graph_edge_num, graph_nodes_num,
                 dimension):
        self.e = graph_edge_num
        self.n = graph_nodes_num
        self.steps_per_epoch = None
        self.epoch_train_size = None
        self.dimension = dimension

    def _generate_batch_train(self, adj_list,
                         graph_nodes_num, graph_edge_num,
                         batch_size, negativeRatio,
                         negative_sampling):
        #  使用 negative sampling 优化
        table_size = 1e8
        power = 0.75
        sampling_table = None

        data = np.ones((adj_list.shape[0]), dtype=np.int8)
        mat = csr_matrix((data, (adj_list[:, 0], adj_list[:, 1])), shape=(graph_nodes_num, graph_nodes_num),
                         dtype=np.int8)
        batch_size_ones = np.ones((batch_size), dtype=np.int8)

        nb_train_sample = adj_list.shape[0]
        index_array = np.arange(nb_train_sample)

        nb_batch = int(np.ceil(nb_train_sample / float(batch_size)))
        batches = [(i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) for i in range(0, nb_batch)]

        if negative_sampling == "NON-UNIFORM":
            print("Pre-procesing for non-uniform negative sampling!")
            node_degree = np.zeros(graph_nodes_num)

            for i in range(graph_edge_num):
                node_degree[adj_list[i, 0]] += 1
                node_degree[adj_list[i, 1]] += 1

            norm = sum([math.pow(node_degree[i], power) for i in range(graph_nodes_num)])

            sampling_table = np.zeros(int(table_size), dtype=np.uint32)

            p = 0
            i = 0
            for j in range(graph_nodes_num):
                p += float(math.pow(node_degree[j], power)) / norm
                while i < table_size and float(i) / table_size < p:
                    sampling_table[i] = j
                    i += 1

        while 1:

            for batch_index, (batch_start, batch_end) in enumerate(batches):
                pos_edge_list = index_array[batch_start:batch_end]
                pos_left_nodes = adj_list[pos_edge_list, 0]
                pos_right_nodes = adj_list[pos_edge_list, 1]

                pos_relation_y = batch_size_ones[0:len(pos_edge_list)]

                neg_left_nodes = np.zeros(len(pos_edge_list) * negativeRatio, dtype=np.int32)
                neg_right_nodes = np.zeros(len(pos_edge_list) * negativeRatio, dtype=np.int32)

                neg_relation_y = np.zeros(len(pos_edge_list) * negativeRatio, dtype=np.int8)

                h = 0
                for i in pos_left_nodes:
                    for k in range(negativeRatio):
                        rn = sampling_table[random.randint(0,
                                                           table_size - 1)] if negative_sampling == "NON-UNIFORM" else random.randint(
                            0, graph_nodes_num - 1)
                        while mat[i, rn] == 1 or i == rn:
                            rn = sampling_table[random.randint(0,
                                                               table_size - 1)] if negative_sampling == "NON-UNIFORM" else random.randint(
                                0, graph_nodes_num - 1)
                        neg_left_nodes[h] = i
                        neg_right_nodes[h] = rn
                        h += 1

                left_nodes = np.concatenate((pos_left_nodes, neg_left_nodes), axis=0)
                right_nodes = np.concatenate((pos_right_nodes, neg_right_nodes), axis=0)
                relation_y = np.concatenate((pos_relation_y, neg_relation_y), axis=0)

                yield ([left_nodes, right_nodes], [relation_y])

    def _model(self, graph_nodes_num, dimension):
        left_input = Input(shape=(1,))
        right_input = Input(shape=(1,))

        left_model = Sequential()
        left_model.add(Embedding(input_dim=graph_nodes_num + 1, output_dim=dimension, input_length=1, mask_zero=False))
        left_model.add(Reshape((dimension,)))

        right_model = Sequential()
        right_model.add(Embedding(input_dim=graph_nodes_num + 1, output_dim=dimension, input_length=1, mask_zero=False))
        right_model.add(Reshape((dimension,)))

        left_embed = left_model(left_input)
        right_embed = left_model(right_input)

        left_right_dot = dot(inputs=[left_embed, right_embed], axes=1, name="left_right_dot")
        model = Model(inputs=[left_input, right_input], outputs=[left_right_dot])
        embed_generator = Model(inputs=[left_input, right_input], outputs=[left_embed, right_embed])

        return model, embed_generator

    def _line_loss(self, y_true, y_pred):
        coeff = y_true*2 - 1
        return -K.mean(K.log(K.sigmoid(coeff*y_pred)))

    def fit(self, adj_list, batch_size, negative_ratio, negative_sampling, epoch_num):
        self.steps_per_epoch = int(self.e / batch_size)
        self.epoch_train_size = (1 + negative_ratio) * self.e

        # 产生训练样本
        data = self._generate_batch_train(adj_list, self.n, self.e, batch_size, negative_ratio,
                                          negative_sampling)

        self.model, self.embed_generator = self._model(self.n, self.dimension)
        # model.summary()
        self.model.compile(optimizer='rmsprop', loss={'left_right_dot': self._line_loss})
        self.model.fit_generator(data, steps_per_epoch=self.epoch_train_size / batch_size, epochs=epoch_num, verbose=1)

    def predict(self, data):
        # link prediction
        return self.embed_generator.predict_on_batch(data)

usage

line = LINE(graph_edge_num, graph_nodes_num, dimension)
line.fit(adj_list, batch_size, negative_ratio, negative_sampling, epoch_num)
@Zafar-southeast
Copy link

What are the pre-requisites for running this code?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants