knowsys
diff --git a/‎11-metis-from-sparql.py
+90 b/‎11-metis-from-sparql.py
+90
diff --git a/‎11-page-rank.py
+61 b/‎11-page-rank.py
+61
diff --git a/‎flake.lock
+6-6 b/‎flake.lock
+6-6
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+import argparse
+import requests
+import itertools
+from collections import defaultdict
+from graphs import Graph, io
+
+
+# getting all the results will time out, so we do paging
+PAGE_SIZE = 10000
+SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
+USER_AGENT = "tud-kbs-example-subgraph-extractor/0.0.1 (https://github.com/knowsys/Course-Knowledge-Graphs/)"
+# first, in a subquery, get the v/w pairs in a stable order and
+# retrieve a page of those. then find the labels for these results.
+QUERY = """#TOOL:tud-kbs-example-subgraph-extractor
+SELECT DISTINCT ?v ?vLabel ?w ?wLabel WHERE {{
+  {{ SELECT DISTINCT ?v ?w WHERE {{
+    ?v wdt:P1344 ?tournament .
+    ?tournament ^wdt:P2522 ?w .
+  }} ORDER BY ASC(?v) ASC(?w) LIMIT {limit} OFFSET {offset} }}
+  SERVICE wikibase:label {{
+    bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
+    ?v rdfs:label ?vLabel .
+    ?w rdfs:label ?wLabel .
+  }}
+}}
+"""
+
+
+def counter():
+    c = itertools.count(1)
+
+    def step():
+        return next(c)
+
+    return step
+
+
+def paged(page):
+    return QUERY.format(limit=PAGE_SIZE + 1, offset=(PAGE_SIZE + 1) * page)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description=(
+            "Extract tournament winner/" "participant subgraph from" "Wikidata"
+        )
+    )
+    parser.add_argument("output", help="path to output graph")
+    parser.add_argument("dict", help="path to output dictionary")
+    args = parser.parse_args()
+
+    ids = defaultdict(counter())
+    labels = {}
+    edges = set([])
+    done = False
+    page = 0
+
+    while not done:
+        print("-!- getting page {}".format(page))
+        request = requests.get(
+            SPARQL_ENDPOINT,
+            params={"query": paged(page), "format": "json"},
+            headers={"user-agent": USER_AGENT},
+        )
+        request.raise_for_status()
+        results = request.json()["results"]["bindings"]
+        done = len(results) <= PAGE_SIZE
+        page += 1
+
+        for binding in results:
+            v = ids[binding["v"]["value"]]
+            labels[v] = binding["vLabel"]["value"]
+            w = ids[binding["w"]["value"]]
+            labels[w] = binding["wLabel"]["value"]
+
+            edges |= {(w, v)}
+
+    graph = Graph(vertices=ids.values())
+
+    for (w, v) in edges:
+        graph.add_edge(w, v)
+
+    with open(args.output, "w") as metis:
+        io.write_metis_graph(metis, graph)
+
+    with open(args.dict, "w") as dictionary:
+        for vertex_id, label in labels.items():
+            print('{},"{}"'.format(vertex_id, label), file=dictionary)
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+import argparse
+import networkx
+from operator import itemgetter
+from collections import defaultdict
+
+
+def ranks(ranking):
+    idx = 0
+    worst = float("inf")
+
+    for label, rank in sorted(ranking.items(), key=itemgetter(1), reverse=True):
+        if rank < worst:
+            idx += 1
+            worst = rank
+
+        yield (idx, label)
+
+
+def read_nx_digraph_from_metis_file(graphfile, dictfile=None):
+    vertices_edges = graphfile.readline().split(" ")
+    num_vertices = int(vertices_edges[0])
+    num_edges = int(vertices_edges[1])
+
+    labels = {}
+
+    if dictfile is not None:
+        for line in dictfile:
+            vertex, label = line.split(",", 1)
+            labels[int(vertex)] = label.strip()[1:-1]
+    else:
+        labels = defaultdict(lambda label: label)
+
+    def edgelist():
+        for line in graphfile:
+            vertices = [labels[int(vertex)] for vertex in line.split(" ")]
+            source = vertices[0]
+
+            for target in vertices[1:]:
+                yield (source, target)
+
+    return networkx.from_edgelist(edgelist(), create_using=networkx.DiGraph)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=("Compute PageRank for a given graph"))
+    parser.add_argument("input", help="path to input graph")
+    parser.add_argument("dict", help="path to input dict")
+    args = parser.parse_args()
+
+    graph = None
+
+    with open(args.input, "r") as infile:
+        with open(args.dict, "r") as dictfile:
+            graph = read_nx_digraph_from_metis_file(infile, dictfile)
+
+    ranking = ranks(networkx.pagerank(graph))
+
+    for rank, label in ranking:
+        print("{:5d}. {}".format(rank, label))