Skip to content

Commit e3c3b46

Browse files
committedJan 16, 2024
Add solutions for exercise 11
1 parent 4210079 commit e3c3b46

File tree

4 files changed

+193
-0
lines changed

4 files changed

+193
-0
lines changed
 

‎11-metis-from-sparql.py

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import requests
5+
import itertools
6+
from collections import defaultdict
7+
from graphs import Graph, io
8+
9+
10+
# getting all the results will time out, so we do paging
11+
PAGE_SIZE = 10000
12+
SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
13+
USER_AGENT = "tud-kbs-example-subgraph-extractor/0.0.1 (https://github.com/knowsys/Course-Knowledge-Graphs/)"
14+
# first, in a subquery, get the v/w pairs in a stable order and
15+
# retrieve a page of those. then find the labels for these results.
16+
QUERY = """#TOOL:tud-kbs-example-subgraph-extractor
17+
SELECT DISTINCT ?v ?vLabel ?w ?wLabel WHERE {{
18+
{{ SELECT DISTINCT ?v ?w WHERE {{
19+
?v wdt:P1344 ?tournament .
20+
?tournament ^wdt:P2522 ?w .
21+
?tournament wdt:P361* wd:Q170645 .
22+
}} ORDER BY ASC(?v) ASC(?w) LIMIT {limit} OFFSET {offset} }}
23+
SERVICE wikibase:label {{
24+
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
25+
?v rdfs:label ?vLabel .
26+
?w rdfs:label ?wLabel .
27+
}}
28+
}}
29+
"""
30+
31+
32+
def counter():
33+
c = itertools.count(1)
34+
35+
def step():
36+
return next(c)
37+
38+
return step
39+
40+
41+
def paged(page):
42+
return QUERY.format(limit=PAGE_SIZE + 1, offset=(PAGE_SIZE + 1) * page)
43+
44+
45+
if __name__ == "__main__":
46+
parser = argparse.ArgumentParser(
47+
description=(
48+
"Extract tournament winner/" "participant subgraph from" "Wikidata"
49+
)
50+
)
51+
parser.add_argument("output", help="path to output graph")
52+
parser.add_argument("dict", help="path to output dictionary")
53+
args = parser.parse_args()
54+
55+
ids = defaultdict(counter())
56+
labels = {}
57+
edges = set([])
58+
done = False
59+
page = 0
60+
61+
while not done:
62+
print("-!- getting page {}".format(page))
63+
request = requests.get(
64+
SPARQL_ENDPOINT,
65+
params={"query": paged(page), "format": "json"},
66+
headers={"user-agent": USER_AGENT},
67+
)
68+
request.raise_for_status()
69+
results = request.json()["results"]["bindings"]
70+
done = len(results) <= PAGE_SIZE
71+
page += 1
72+
73+
for binding in results:
74+
v = ids[binding["v"]["value"]]
75+
labels[v] = binding["vLabel"]["value"]
76+
w = ids[binding["w"]["value"]]
77+
labels[w] = binding["wLabel"]["value"]
78+
79+
edges |= {(w, v)}
80+
81+
graph = Graph(vertices=ids.values())
82+
83+
for w, v in edges:
84+
graph.add_edge(w, v)
85+
86+
with open(args.output, "w") as metis:
87+
io.write_metis_graph(metis, graph)
88+
89+
with open(args.dict, "w") as dictionary:
90+
for vertex_id, label in labels.items():
91+
print('{},"{}"'.format(vertex_id, label), file=dictionary)

‎11-page-rank.py

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import networkx
5+
from operator import itemgetter
6+
from collections import defaultdict
7+
8+
9+
def ranks(ranking):
10+
idx = 0
11+
worst = float("inf")
12+
13+
for label, rank in sorted(ranking.items(), key=itemgetter(1), reverse=True):
14+
if rank < worst:
15+
idx += 1
16+
worst = rank
17+
18+
yield (idx, label)
19+
20+
21+
def read_nx_digraph_from_metis_file(graphfile, dictfile=None):
22+
vertices_edges = graphfile.readline().split(" ")
23+
num_vertices = int(vertices_edges[0])
24+
num_edges = int(vertices_edges[1])
25+
26+
labels = {}
27+
28+
if dictfile is not None:
29+
for line in dictfile:
30+
vertex, label = line.split(",", 1)
31+
labels[int(vertex)] = label.strip()[1:-1]
32+
else:
33+
labels = defaultdict(lambda label: label)
34+
35+
def edgelist():
36+
for line in graphfile:
37+
vertices = [labels[int(vertex)] for vertex in line.split(" ")]
38+
source = vertices[0]
39+
40+
for target in vertices[1:]:
41+
yield (source, target)
42+
43+
return networkx.from_edgelist(edgelist(), create_using=networkx.DiGraph)
44+
45+
46+
if __name__ == "__main__":
47+
parser = argparse.ArgumentParser(description=("Compute PageRank for a given graph"))
48+
parser.add_argument("input", help="path to input graph")
49+
parser.add_argument("dict", help="path to input dict")
50+
args = parser.parse_args()
51+
52+
graph = None
53+
54+
with open(args.input, "r") as infile:
55+
with open(args.dict, "r") as dictfile:
56+
graph = read_nx_digraph_from_metis_file(infile, dictfile)
57+
58+
ranking = ranks(networkx.pagerank(graph))
59+
60+
for rank, label in ranking:
61+
print("{:5d}. {}".format(rank, label))

‎schemas/11.1.shacl

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
eg:CoordinateShape
2+
a sh:NodeShape ;
3+
sh:targetClass wikibase:GlobecoordinateValue ;
4+
sh:property [
5+
sh:path wikibase:geoLatitude ;
6+
sh:datatype xsd:double
7+
] ;
8+
sh:property [
9+
sh:path wikibase:geoLongitude ;
10+
sh:datatype xsd:double
11+
] ;
12+
sh:property [
13+
sh:path wikibase:geoPrecision ;
14+
sh:datatype xsd:double
15+
] ;
16+
sh:property [
17+
sh:path wikibase:geoGlobe ;
18+
sh:nodeKind sh:IRI
19+
] ;
20+
sh:closed true .

‎schemas/11.2.shex

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
PREFIX eg: <http://example.org/>
2+
3+
eg:Coloured @eg:Red OR @eg:Green OR @eg:Blue
4+
5+
eg:Red CLOSED {
6+
a [eg:Node] ;
7+
(eg:edge @eg:Green | eg:edge @eg:Blue |
8+
^eg:edge @eg:Green | ^eg:edge @eg:Blue) * ;
9+
} AND NOT @eg:Green AND NOT @eg:Blue
10+
11+
eg:Green CLOSED {
12+
a [eg:Node] ;
13+
(eg:edge @eg:Red | eg:edge @eg:Blue |
14+
^eg:edge @eg:Red | ^eg:edge @eg:Blue) * ;
15+
} AND NOT @eg:Red AND NOT @eg:Blue
16+
17+
eg:Blue CLOSED {
18+
a [eg:Node] ;
19+
(eg:edge @eg:Green | eg:edge @eg:Red |
20+
^eg:edge @eg:Green | ^eg:edge @eg:Red) * ;
21+
} AND NOT @eg:Red AND NOT @eg:Green

0 commit comments

Comments
 (0)
Please sign in to comment.