Skip to content

Commit b4f4135

Browse files
committed
Added a simple example class showing the generation of a NIF corpus.
1 parent 64e6e96 commit b4f4135

File tree

1 file changed

+77
-0
lines changed

1 file changed

+77
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package org.aksw.gerbil;
2+
3+
import java.util.ArrayList;
4+
import java.util.HashSet;
5+
import java.util.List;
6+
import java.util.Set;
7+
8+
import org.aksw.gerbil.io.nif.DocumentListParser;
9+
import org.aksw.gerbil.io.nif.DocumentListWriter;
10+
import org.aksw.gerbil.io.nif.NIFParser;
11+
import org.aksw.gerbil.io.nif.NIFWriter;
12+
import org.aksw.gerbil.io.nif.impl.TurtleNIFParser;
13+
import org.aksw.gerbil.io.nif.impl.TurtleNIFWriter;
14+
import org.aksw.gerbil.transfer.nif.Document;
15+
import org.aksw.gerbil.transfer.nif.NIFTransferPrefixMapping;
16+
import org.aksw.gerbil.transfer.nif.data.Annotation;
17+
import org.aksw.gerbil.transfer.nif.data.DocumentImpl;
18+
import org.aksw.gerbil.transfer.nif.data.TypedNamedEntity;
19+
import org.junit.Ignore;
20+
21+
import com.hp.hpl.jena.rdf.model.Model;
22+
import com.hp.hpl.jena.rdf.model.ModelFactory;
23+
24+
/**
25+
* This class contains a simple example, showing how a developer could create a
26+
* simple NIF corpus.
27+
*
28+
* @author Michael Röder ([email protected])
29+
*
30+
*/
31+
@Ignore
32+
class CorpusGenerationExample {
33+
34+
public static void main(String[] args) {
35+
36+
String text = "Japan (Japanese: 日本 Nippon or Nihon) is a stratovolcanic archipelago of 6,852 islands.";
37+
Document document = new DocumentImpl(text, "http://example.org/document0");
38+
39+
// Add the marking for "Japan"
40+
Set<String> uris = new HashSet<String>();
41+
uris.add("http://example.org/Japan");
42+
Set<String> types = new HashSet<String>();
43+
types.add("http://example.org/Country");
44+
types.add("http://example.org/StratovolcanicArchipelago");
45+
document.addMarking(new TypedNamedEntity(0, 5, uris, types));
46+
47+
// Add the marking for "stratovolcanic archipelago"
48+
uris = new HashSet<String>();
49+
uris.add("http://example.org/StratovolcanicArchipelago");
50+
types = new HashSet<String>();
51+
types.add("http://example.org/Archipelago");
52+
types.add("http://www.w3.org/2000/01/rdf-schema#Class");
53+
document.addMarking(new TypedNamedEntity(42, 26, uris, types));
54+
55+
// Add a marking showing that this document has geographical content
56+
uris = new HashSet<String>();
57+
uris.add("http://example.org/Geography");
58+
document.addMarking(new Annotation(uris));
59+
60+
List<Document> documents = new ArrayList<Document>();
61+
documents.add(document);
62+
63+
// Writing our new list of documents to a String
64+
NIFWriter writer = new TurtleNIFWriter();
65+
String nifString = writer.writeNIF(documents);
66+
System.out.println(nifString);
67+
68+
// After generating a NIF corpus, it can be helpful to parse the NIF using a `NIFParser` instance.
69+
NIFParser parser = new TurtleNIFParser();
70+
parser.parseNIF(nifString);
71+
72+
// Instead of text containing the NIF information, a jena RDF `Model` can be created.
73+
DocumentListWriter listWriter = new DocumentListWriter();
74+
Model nifModel = ModelFactory.createDefaultModel();
75+
listWriter.writeDocumentsToModel(nifModel, documents);
76+
}
77+
}

0 commit comments

Comments
 (0)