Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Benchmarks #22

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
17 changes: 9 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@ PYTHONPATH := `pwd`
#* Variables
IMAGE := dynnode2vec
VERSION := latest
SRC_FOLDERS := ./dynnode2vec ./tests ./benchmarks

#* Poetry
.PHONY: poetry-download
@@ -30,8 +31,8 @@ pre-commit-install:
#* Formatters
.PHONY: codestyle
codestyle:
poetry run isort --settings-path pyproject.toml ./dynnode2vec ./tests
poetry run black --config pyproject.toml ./dynnode2vec ./tests
poetry run isort --settings-path pyproject.toml $(SRC_FOLDERS)
poetry run black --config pyproject.toml $(SRC_FOLDERS)

#* Linting
.PHONY: test
@@ -41,18 +42,18 @@ test:

.PHONY: check-codestyle
check-codestyle:
poetry run isort --diff --check-only --settings-path pyproject.toml ./dynnode2vec ./tests
poetry run black --diff --check --config pyproject.toml ./dynnode2vec ./tests
poetry run darglint --verbosity 2 ./dynnode2vec ./tests
poetry run pylint ./dynnode2vec/ ./tests
poetry run isort --diff --check-only --settings-path pyproject.toml $(SRC_FOLDERS)
poetry run black --diff --check --config pyproject.toml $(SRC_FOLDERS)
poetry run darglint --verbosity 2 $(SRC_FOLDERS)
poetry run pylint $(SRC_FOLDERS)

.PHONY: pylint
pylint:
poetry run pylint ./dynnode2vec/ ./tests
poetry run pylint $(SRC_FOLDERS)

.PHONY: mypy
mypy:
poetry run mypy --config-file pyproject.toml ./dynnode2vec ./tests
poetry run mypy --config-file pyproject.toml $(SRC_FOLDERS)

.PHONY: lint
lint: check-codestyle mypy
30 changes: 30 additions & 0 deletions benchmarks/build_graphs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""
Build graphs from datasets.
"""
from __future__ import annotations

import gzip

import networkx as nx


def build_as_733_graphs() -> list[nx.Graph]:
"""
Build the Autonomous systems AS-733 graphs.
link: https://snap.stanford.edu/data/as-733.html
"""
graphs = []
graph = nx.Graph()
with gzip.open("benchmarks/data/as-733.tar.gz", "rt") as stream:
for line in stream:
if "Autonomous systems" in line:
if graph.nodes:
graphs.append(graph)
graph.clear()
continue
if line[0].isdigit():
edge = map(int, line.strip().split("\t"))
graph.add_edge(*edge)
graphs.append(graph)
graphs.reverse() # Input is in reverse chronological order
return graphs
22 changes: 22 additions & 0 deletions benchmarks/build_sets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
Build training and test sets from dynamic graphs.
"""
from __future__ import annotations

import networkx as nx

from dynnode2vec.dynnode2vec import DynNode2Vec, Embedding


def get_node2vec_embeddings(
graphs: list[nx.Graph], parameters: dict
) -> list[Embedding]:
"""
Build plain node2vec embeddings at each time step.
Copy link
Owner

@pedugnat pedugnat Aug 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To do so, you can also call the classic DynNode2Vec.compute_embeddings with plain_node2vec=True (https://github.com/pedugnat/dynnode2vec/blob/master/dynnode2vec/dynnode2vec.py#L42). It should yield equivalent embeddings

"""
dynnode2vec_obj = DynNode2Vec(**parameters)
embeddings = []
for graph in graphs:
_, embedding = dynnode2vec_obj.get_node2vec_embeddings(graph)
embeddings.extend(embedding)
return embeddings
Binary file added benchmarks/data/as-733.tar.gz
Binary file not shown.
23 changes: 13 additions & 10 deletions dynnode2vec/dynnode2vec.py
Original file line number Diff line number Diff line change
@@ -91,24 +91,22 @@ def __init__(
# see https://stackoverflow.com/questions/53417258/what-is-workers-parameter-in-word2vec-in-nlp # pylint: disable=line-too-long
self.gensim_workers = max(self.parallel_processes - 1, 12)

def _initialize_embeddings(
self, graphs: list[nx.Graph]
def get_node2vec_embeddings(
self, graph: nx.Graph
) -> tuple[Word2Vec, list[Embedding]]:
"""
Compute normal node2vec embedding at timestep 0.
Compute normal node2vec embedding.
"""
first_graph = graphs[0]

first_walks = BiasedRandomWalk(first_graph).run(
nodes=first_graph.nodes(),
walks = BiasedRandomWalk(graph).run(
nodes=graph.nodes,
walk_length=self.walk_length,
n_walks=self.n_walks_per_node,
p=self.p,
q=self.q,
)

model = Word2Vec(
sentences=first_walks,
sentences=walks,
vector_size=self.embedding_size,
window=self.window,
min_count=0,
@@ -159,7 +157,7 @@ def generate_updated_walks(
if self.plain_node2vec:
# if we stick to node2vec implementation, we sample walks
# for all nodes at each time step
delta_nodes = current_graph.nodes()
delta_nodes = current_graph.nodes
else:
# if we use dynnode2vec, we sample walks only for nodes
# that changed compared to the previous time step
@@ -233,8 +231,13 @@ def compute_embeddings(self, graphs: list[nx.Graph]) -> list[Embedding]:
Compute dynamic embeddings on a list of graphs.
"""
# TO DO : check graph weights valid
model, embeddings = self._initialize_embeddings(graphs)
# Compute normal node2vec embedding at timestep 0.
model, embeddings = self.get_node2vec_embeddings(graphs[0])

# Simulate walks for all time steps.
time_walks = self._simulate_walks(graphs)

# Compute embeddings for all time steps.
self._update_embeddings(embeddings, time_walks, model)

return embeddings
2 changes: 1 addition & 1 deletion dynnode2vec/utils.py
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@ def create_dynamic_graph(
graph = nx.fast_gnp_random_graph(n=n_base_nodes, p=base_density)

# add one to each node to avoid the perfect case where true_ids match int_ids
graph = nx.relabel_nodes(graph, mapping={n: str(n) for n in graph.nodes()})
graph = nx.relabel_nodes(graph, mapping={n: str(n) for n in graph.nodes})

# initialize graphs list with first graph
graphs = [graph.copy()]
8 changes: 4 additions & 4 deletions tests/test_biased_random_walk.py
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@ def test_init(graphs):
brw = dynnode2vec.biased_random_walk.BiasedRandomWalk(graphs[0])

# make sure nodes ids were converted to integers
assert list(brw.graph.nodes()) == list(range(brw.graph.number_of_nodes()))
assert list(brw.graph.nodes) == list(range(brw.graph.number_of_nodes()))


def test_weighted_choice(graphs):
@@ -66,7 +66,7 @@ def test_generate_walk(graphs, ip, iq, weighted):
)

assert isinstance(walk, list)
assert all(n in brw.graph.nodes() for n in walk)
assert all(n in brw.graph.nodes for n in walk)


@pytest.mark.parametrize("p", [0.5, 1.0])
@@ -84,7 +84,7 @@ def test_run(graphs, p, q, weighted, n_processes):
brw = dynnode2vec.biased_random_walk.BiasedRandomWalk(graph)

random_walks = brw.run(
graph.nodes(), p=p, q=q, weighted=weighted, n_processes=n_processes
graph.nodes, p=p, q=q, weighted=weighted, n_processes=n_processes
)
assert all(isinstance(walk, list) for walk in random_walks)
assert all(n in graph.nodes() for walk in random_walks for n in walk)
assert all(n in graph.nodes for walk in random_walks for n in walk)
9 changes: 4 additions & 5 deletions tests/test_dynnode2vec.py
Original file line number Diff line number Diff line change
@@ -38,9 +38,8 @@ def plain_node2vec_parallel_fixture():
)


def test_initialize_embeddings(graphs, dynnode2vec_object):
# pylint: disable=protected-access
init_model, init_embeddings = dynnode2vec_object._initialize_embeddings(graphs)
def test_get_node2vec_embeddings(graphs, dynnode2vec_object):
init_model, init_embeddings = dynnode2vec_object.get_node2vec_embeddings(graphs[0])

assert isinstance(init_model, gensim.models.Word2Vec)
assert isinstance(init_embeddings[0], dynnode2vec.Embedding)
@@ -74,7 +73,7 @@ def test_generate_updated_walks(graphs, dynnode2vec_object):
updated_walks = dynnode2vec_object.generate_updated_walks(current, previous)

assert isinstance(updated_walks, list)
assert all(node in current.nodes() for walk in updated_walks for node in walk)
assert all(node in current.nodes for walk in updated_walks for node in walk)


def test_node2vec_generate_updated_walks(graphs, node2vec_object):
@@ -83,7 +82,7 @@ def test_node2vec_generate_updated_walks(graphs, node2vec_object):
updated_walks = node2vec_object.generate_updated_walks(current, previous)

assert isinstance(updated_walks, list)
assert all(node in current.nodes() for walk in updated_walks for node in walk)
assert all(node in current.nodes for walk in updated_walks for node in walk)


def test_compute_embeddings(graphs, dynnode2vec_object):