Skip to content

Commit ad47f9b

Browse files
committedJan 25, 2022
Add solutions for sheet 11
1 parent 61fa46e commit ad47f9b

File tree

2 files changed

+160
-0
lines changed

2 files changed

+160
-0
lines changed
 

‎solutions/11-metis-from-sparql.py

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import requests
5+
import itertools
6+
from collections import defaultdict
7+
from graphs import Graph, io
8+
9+
10+
# getting all the results will time out, so we do paging
11+
PAGE_SIZE = 10000
12+
SPARQL_ENDPOINT = 'https://query.wikidata.org/sparql'
13+
USER_AGENT = 'tud-kbs-example-subgraph-extractor/0.0.1 (https://github.com/knowsys/Course-Knowledge-Graphs/)'
14+
# first, in a subquery, get the v/w pairs in a stable order and
15+
# retrieve a page of those. then find the labels for these results.
16+
QUERY = """#TOOL:tud-kbs-example-subgraph-extractor
17+
SELECT DISTINCT ?v ?vLabel ?w ?wLabel WHERE {{
18+
{{ SELECT DISTINCT ?v ?w WHERE {{
19+
?v wdt:P1344/^wdt:P2522 ?w .
20+
# ?v wdt:P1344 ?event .
21+
# ?v wdt:P31/wdt:P279* wd:Q6979593 .
22+
# ?event wdt:P31?/wdt:P279* wd:Q170645 .
23+
# ?event ^wdt:P2522 ?w .
24+
}} ORDER BY ASC(?v) ASC(?w) LIMIT {limit} OFFSET {offset} }}
25+
SERVICE wikibase:label {{
26+
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
27+
?v rdfs:label ?vLabel .
28+
?w rdfs:label ?wLabel .
29+
}}
30+
}}
31+
"""
32+
33+
34+
def counter():
35+
c = itertools.count(1)
36+
37+
def step():
38+
return next(c)
39+
40+
return step
41+
42+
43+
def paged(page):
44+
return QUERY.format(limit=PAGE_SIZE + 1,
45+
offset=(PAGE_SIZE + 1) * page)
46+
47+
48+
if __name__ == '__main__':
49+
parser = argparse.ArgumentParser(description=("Extract tournament winner/"
50+
"participant subgraph from"
51+
"Wikidata"))
52+
parser.add_argument('output',
53+
help='path to output graph')
54+
parser.add_argument('dict',
55+
help='path to output dictionary')
56+
args = parser.parse_args()
57+
58+
ids = defaultdict(counter())
59+
labels = {}
60+
edges = set([])
61+
done = False
62+
page = 0
63+
64+
while not done:
65+
print('-!- getting page {}'.format(page))
66+
request = requests.get(SPARQL_ENDPOINT,
67+
params={'query': paged(page),
68+
'format': 'json'},
69+
headers={'user-agent': USER_AGENT})
70+
request.raise_for_status()
71+
results = request.json()['results']['bindings']
72+
done = len(results) <= PAGE_SIZE
73+
page += 1
74+
75+
for binding in results:
76+
v = ids[binding['v']['value']]
77+
labels[v] = binding['vLabel']['value']
78+
w = ids[binding['w']['value']]
79+
labels[w] = binding['wLabel']['value']
80+
81+
edges |= {(w, v)}
82+
83+
graph = Graph(vertices=ids.values())
84+
85+
for (w, v) in edges:
86+
graph.add_edge(w, v)
87+
88+
with open(args.output, 'w') as metis:
89+
io.write_metis_graph(metis, graph)
90+
91+
with open(args.dict, 'w') as dictionary:
92+
for vertex_id, label in labels.items():
93+
print('{},"{}"'.format(vertex_id, label),
94+
file=dictionary)

‎solutions/11-page-rank.py

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import networkx
5+
from operator import itemgetter
6+
from collections import defaultdict
7+
8+
9+
def ranks(ranking):
10+
idx = 0
11+
worst = float('inf')
12+
13+
for label, rank in sorted(ranking.items(),
14+
key=itemgetter(1),
15+
reverse=True):
16+
if rank < worst:
17+
idx += 1
18+
worst = rank
19+
20+
yield (idx, label)
21+
22+
23+
def read_nx_digraph_from_metis_file(graphfile, dictfile=None):
24+
vertices_edges = graphfile.readline().split(' ')
25+
num_vertices = int(vertices_edges[0])
26+
num_edges = int(vertices_edges[1])
27+
28+
labels = {}
29+
30+
if dictfile is not None:
31+
for line in dictfile:
32+
vertex, label = line.split(',', 1)
33+
labels[int(vertex)] = label.strip()[1:-1]
34+
else:
35+
labels = defaultdict(lambda label: label)
36+
37+
def edgelist():
38+
for line in graphfile:
39+
vertices = [labels[int(vertex)] for vertex in line.split(' ')]
40+
source = vertices[0]
41+
42+
for target in vertices[1:]:
43+
yield (source, target)
44+
45+
return networkx.from_edgelist(edgelist(),
46+
create_using=networkx.DiGraph)
47+
48+
49+
if __name__ == '__main__':
50+
parser = argparse.ArgumentParser(description=("Compute PageRank for a given graph"))
51+
parser.add_argument('input',
52+
help='path to input graph')
53+
parser.add_argument('dict',
54+
help='path to input dict')
55+
args = parser.parse_args()
56+
57+
graph = None
58+
59+
with open(args.input, 'r') as infile:
60+
with open(args.dict, 'r') as dictfile:
61+
graph = read_nx_digraph_from_metis_file(infile, dictfile)
62+
63+
ranking = ranks(networkx.pagerank_scipy(graph))
64+
65+
for rank, label in ranking:
66+
print('{:5d}. {}'.format(rank, label))

0 commit comments

Comments
 (0)
Please sign in to comment.