Skip to content

Commit 4a0f103

Browse files
committed
Add solutions for exercise 11
1 parent ec987a5 commit 4a0f103

5 files changed

+414
-171
lines changed

11-metis-from-sparql.py

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import requests
5+
import itertools
6+
from collections import defaultdict
7+
from graphs import Graph, io
8+
9+
10+
# getting all the results will time out, so we do paging
11+
PAGE_SIZE = 10000
12+
SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
13+
USER_AGENT = "tud-kbs-example-subgraph-extractor/0.0.1 (https://github.com/knowsys/Course-Knowledge-Graphs/)"
14+
# first, in a subquery, get the v/w pairs in a stable order and
15+
# retrieve a page of those. then find the labels for these results.
16+
QUERY = """#TOOL:tud-kbs-example-subgraph-extractor
17+
SELECT DISTINCT ?v ?vLabel ?w ?wLabel WHERE {{
18+
{{ SELECT DISTINCT ?v ?w WHERE {{
19+
?v wdt:P1344 ?tournament .
20+
?tournament ^wdt:P2522 ?w .
21+
}} ORDER BY ASC(?v) ASC(?w) LIMIT {limit} OFFSET {offset} }}
22+
SERVICE wikibase:label {{
23+
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
24+
?v rdfs:label ?vLabel .
25+
?w rdfs:label ?wLabel .
26+
}}
27+
}}
28+
"""
29+
30+
31+
def counter():
32+
c = itertools.count(1)
33+
34+
def step():
35+
return next(c)
36+
37+
return step
38+
39+
40+
def paged(page):
41+
return QUERY.format(limit=PAGE_SIZE + 1, offset=(PAGE_SIZE + 1) * page)
42+
43+
44+
if __name__ == "__main__":
45+
parser = argparse.ArgumentParser(
46+
description=(
47+
"Extract tournament winner/" "participant subgraph from" "Wikidata"
48+
)
49+
)
50+
parser.add_argument("output", help="path to output graph")
51+
parser.add_argument("dict", help="path to output dictionary")
52+
args = parser.parse_args()
53+
54+
ids = defaultdict(counter())
55+
labels = {}
56+
edges = set([])
57+
done = False
58+
page = 0
59+
60+
while not done:
61+
print("-!- getting page {}".format(page))
62+
request = requests.get(
63+
SPARQL_ENDPOINT,
64+
params={"query": paged(page), "format": "json"},
65+
headers={"user-agent": USER_AGENT},
66+
)
67+
request.raise_for_status()
68+
results = request.json()["results"]["bindings"]
69+
done = len(results) <= PAGE_SIZE
70+
page += 1
71+
72+
for binding in results:
73+
v = ids[binding["v"]["value"]]
74+
labels[v] = binding["vLabel"]["value"]
75+
w = ids[binding["w"]["value"]]
76+
labels[w] = binding["wLabel"]["value"]
77+
78+
edges |= {(w, v)}
79+
80+
graph = Graph(vertices=ids.values())
81+
82+
for (w, v) in edges:
83+
graph.add_edge(w, v)
84+
85+
with open(args.output, "w") as metis:
86+
io.write_metis_graph(metis, graph)
87+
88+
with open(args.dict, "w") as dictionary:
89+
for vertex_id, label in labels.items():
90+
print('{},"{}"'.format(vertex_id, label), file=dictionary)

11-page-rank.py

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import networkx
5+
from operator import itemgetter
6+
from collections import defaultdict
7+
8+
9+
def ranks(ranking):
10+
idx = 0
11+
worst = float("inf")
12+
13+
for label, rank in sorted(ranking.items(), key=itemgetter(1), reverse=True):
14+
if rank < worst:
15+
idx += 1
16+
worst = rank
17+
18+
yield (idx, label)
19+
20+
21+
def read_nx_digraph_from_metis_file(graphfile, dictfile=None):
22+
vertices_edges = graphfile.readline().split(" ")
23+
num_vertices = int(vertices_edges[0])
24+
num_edges = int(vertices_edges[1])
25+
26+
labels = {}
27+
28+
if dictfile is not None:
29+
for line in dictfile:
30+
vertex, label = line.split(",", 1)
31+
labels[int(vertex)] = label.strip()[1:-1]
32+
else:
33+
labels = defaultdict(lambda label: label)
34+
35+
def edgelist():
36+
for line in graphfile:
37+
vertices = [labels[int(vertex)] for vertex in line.split(" ")]
38+
source = vertices[0]
39+
40+
for target in vertices[1:]:
41+
yield (source, target)
42+
43+
return networkx.from_edgelist(edgelist(), create_using=networkx.DiGraph)
44+
45+
46+
if __name__ == "__main__":
47+
parser = argparse.ArgumentParser(description=("Compute PageRank for a given graph"))
48+
parser.add_argument("input", help="path to input graph")
49+
parser.add_argument("dict", help="path to input dict")
50+
args = parser.parse_args()
51+
52+
graph = None
53+
54+
with open(args.input, "r") as infile:
55+
with open(args.dict, "r") as dictfile:
56+
graph = read_nx_digraph_from_metis_file(infile, dictfile)
57+
58+
ranking = ranks(networkx.pagerank(graph))
59+
60+
for rank, label in ranking:
61+
print("{:5d}. {}".format(rank, label))

flake.lock

+6-6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)