From c5d931a9e2ca0eced8bc4108108fe8a38cb6b697 Mon Sep 17 00:00:00 2001 From: Martin Wiesner Date: Mon, 28 Oct 2024 19:01:24 +0100 Subject: [PATCH] OPENNLP-1626 Update Website with new models (1.1) - updates models.ad to cover all new 18 languages - restructures models.ad for better separation and readability of long tables - reduces information duplication / overhead - adds better CSS styling for TOC formatting - adds TOC directives to several pages where helpful - fixes several occasions of incorrectly chosen section/subsection depths, in download.ad, maven-dependency.ad and gradle-dependency.ad - adds news entry for the models release 1.1 (2024-10-28) --- src/main/jbake/assets/css/custom-style.css | 7 + src/main/jbake/assets/css/scheme-dark.css | 8 + src/main/jbake/content/download.ad | 20 +- src/main/jbake/content/gradle-dependency.ad | 4 +- src/main/jbake/content/maven-dependency.ad | 5 +- src/main/jbake/content/models.ad | 665 ++++++++++++++---- .../jbake/content/news/news-2021-05-30.ad | 2 - .../jbake/content/news/news-2024-10-28.ad | 40 ++ src/main/jbake/jbake.properties | 1 + 9 files changed, 592 insertions(+), 160 deletions(-) create mode 100755 src/main/jbake/content/news/news-2024-10-28.ad diff --git a/src/main/jbake/assets/css/custom-style.css b/src/main/jbake/assets/css/custom-style.css index de817cce1..b661b1526 100644 --- a/src/main/jbake/assets/css/custom-style.css +++ b/src/main/jbake/assets/css/custom-style.css @@ -27,4 +27,11 @@ body { .jumbotron h1 { line-height: 1; font-weight: bold; +} + +#toc > ul { + padding-inline-start: 0; +} +#toc > ul > li > ul { + padding-inline-start: 1em; } \ No newline at end of file diff --git a/src/main/jbake/assets/css/scheme-dark.css b/src/main/jbake/assets/css/scheme-dark.css index c658b9c6c..d2febf68b 100644 --- a/src/main/jbake/assets/css/scheme-dark.css +++ b/src/main/jbake/assets/css/scheme-dark.css @@ -71,6 +71,14 @@ .tableblock > a { color: #f59523; } + #toctitle { + color: #eee; + background: #222; + } + #toc > ul > li > a, #toc > ul > li > ul > li > a { + color: #f59523; + background: #222; + } .content pre, .content pre > code, .com, .tag, .pln, .str, .pun, .kwd, diff --git a/src/main/jbake/content/download.ad b/src/main/jbake/content/download.ad index 6c4da0d70..0f479cb7b 100755 --- a/src/main/jbake/content/download.ad +++ b/src/main/jbake/content/download.ad @@ -21,8 +21,10 @@ :jbake-tags: download :jbake-status: published :idprefix: +:toc: +:toc-title: Content -## Last Official Release +== Latest Official Release Apache OpenNLP {opennlp_version} is now available for download. @@ -34,24 +36,24 @@ It must be untarred with a GNU compatible version of tar. |=== |.tar.gz | .zip -|https://www.apache.org/dyn/closer.cgi/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-bin.tar.gz[apache-opennlp-{opennlp_version}-bin.tar.gz] +| https://www.apache.org/dyn/closer.cgi/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-bin.tar.gz[apache-opennlp-{opennlp_version}-bin.tar.gz] https://www.apache.org/dist/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-bin.tar.gz.sha512[sha512] https://www.apache.org/dist/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-bin.tar.gz.asc[asc] -|https://www.apache.org/dyn/closer.cgi/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-bin.zip[apache-opennlp-{opennlp_version}-bin.zip] +| https://www.apache.org/dyn/closer.cgi/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-bin.zip[apache-opennlp-{opennlp_version}-bin.zip] https://www.apache.org/dist/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-bin.zip.sha512[sha512] https://www.apache.org/dist/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-bin.zip.asc[asc] -|https://www.apache.org/dyn/closer.cgi/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-src.tar.gz[apache-opennlp-{opennlp_version}-src.tar.gz] +| https://www.apache.org/dyn/closer.cgi/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-src.tar.gz[apache-opennlp-{opennlp_version}-src.tar.gz] https://www.apache.org/dist/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-src.tar.gz.sha512[sha512] https://www.apache.org/dist/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-src.tar.gz.asc[asc] -|https://www.apache.org/dyn/closer.cgi/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-src.zip[apache-opennlp-{opennlp_version}-src.zip] +| https://www.apache.org/dyn/closer.cgi/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-src.zip[apache-opennlp-{opennlp_version}-src.zip] https://www.apache.org/dist/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-src.zip.sha512[sha512] https://www.apache.org/dist/opennlp/opennlp-{opennlp_version}/apache-opennlp-{opennlp_version}-src.zip.asc[asc] |=== -### Verifying Signatures +== Verifying Signatures The sha512 and asc files are signature files and can be used to verify the integrity of the downloaded distribution package. @@ -70,13 +72,13 @@ That can easily be done with: More information about release signing and verifying signatures can be found https://www.apache.org/dev/release-signing.html[here]. -## Models +== Models -The models for Apache OpenNLP are found link:/models.html[here]. +All models are available via our link:/models.html[models page]. NOTE: The models can be used for testing or getting started, please train your own models for all other use cases. -## Archive +== Archive Historical releases can be downloaded from the archived https://sourceforge.net/projects/opennlp/[SourceForge project] or the https://archive.apache.org/dist/opennlp/[Apache archive]. diff --git a/src/main/jbake/content/gradle-dependency.ad b/src/main/jbake/content/gradle-dependency.ad index d3cc6273e..0261541b5 100755 --- a/src/main/jbake/content/gradle-dependency.ad +++ b/src/main/jbake/content/gradle-dependency.ad @@ -21,6 +21,8 @@ :jbake-tags: maven :jbake-status: published :idprefix: +:toc: preamble +:toc-title: Content Apache OpenNLP is also distributed via the Maven Central Repository. The maven artifacts are located https://repo1.maven.org/maven2/org/apache/opennlp/[here]. @@ -95,7 +97,7 @@ dependencies { } ---- -NOTE: The `SNAPSHOT` dependency requires the following Maven repository in your Gradle project: +NOTE: All `SNAPSHOT` dependencies require the following Maven repository: [source,indent=0,subs=attributes+] ---- diff --git a/src/main/jbake/content/maven-dependency.ad b/src/main/jbake/content/maven-dependency.ad index 56f837047..8b6874138 100755 --- a/src/main/jbake/content/maven-dependency.ad +++ b/src/main/jbake/content/maven-dependency.ad @@ -21,6 +21,8 @@ :jbake-tags: maven :jbake-status: published :idprefix: +:toc: preamble +:toc-title: Content Apache OpenNLP is also distributed via the Maven Central Repository. The maven artifacts are located https://repo1.maven.org/maven2/org/apache/opennlp/[here]. @@ -89,6 +91,7 @@ To use the OpenNLP UIMA Annotators define the following dependency: ---- +== SNAPSHOTS === OpenNLP Tools SNAPSHOT Dependency To use the current version of the _main_ branch define the following dependency: @@ -101,7 +104,7 @@ To use the current version of the _main_ branch define the following dependency: ---- -NOTE: The `SNAPSHOT` dependency requires the following Maven repository: +NOTE: All `SNAPSHOT` dependencies require the following Maven repository: [source,xml,indent=0,subs=attributes+] ---- diff --git a/src/main/jbake/content/models.ad b/src/main/jbake/content/models.ad index 9d2e0213b..2f29d78cf 100755 --- a/src/main/jbake/content/models.ad +++ b/src/main/jbake/content/models.ad @@ -21,187 +21,558 @@ :jbake-tags: models :jbake-status: published :idprefix: +:toc: preamble +:toc-title: Content -Use the links in the table below to download the pre-trained models for the Apache OpenNLP. +The Apache OpenNLP project provides several pre-trained model files: +* 1 generic model to conduct _language detection_ on a specified text input +* 23 language specific models to accomplish: _sentence detection_, _part of speech tagging_, and _tokenization_. + +== Models IMPORTANT: All models are zip compressed (like a jar file), they *must not* be uncompressed. +Use the URLs in the tables below to download the pre-trained models for use with the Apache OpenNLP toolkit. + +=== Language detection + [options="header"] |====== -| Component | Language | Compatibility | Description | README and Reports | File | Signatures - -| Language Detector +| Language | Description | Compatibility | README and Reports | File | Signatures | Detects 103 languages -| >= 1.8.3 | Detects 103 languages in ISO 693-3 standard. Works well with longer texts that have at least 2 sentences or more from the same language. +| >= 1.8.3 | https://www.apache.org/dist/opennlp/models/langdetect/1.8.3/README.txt[README] https://www.apache.org/dist/opennlp/models/langdetect/1.8.3/langdetect-183.bin.report.txt[Effectiveness] https://www.apache.org/dyn/closer.cgi/opennlp/models/langdetect/1.8.3/langdetect-183.bin.misclassified.txt[Misclassified] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/langdetect/1.8.3/langdetect-183.bin[langdetect-183.bin] -|https://www.apache.org/dist/opennlp/models/langdetect/1.8.3/langdetect-183.bin.md5[md5] +| https://www.apache.org/dyn/closer.cgi/opennlp/models/langdetect/1.8.3/langdetect-183.bin[langdetect-183.bin] +| https://www.apache.org/dist/opennlp/models/langdetect/1.8.3/langdetect-183.bin.md5[md5] https://www.apache.org/dist/opennlp/models/langdetect/1.8.3/langdetect-183.bin.sha1[sha1] https://www.apache.org/dist/opennlp/models/langdetect/1.8.3/langdetect-183.bin.asc[asc] +|====== + +=== Sentence detection + +NOTE: All models below are compatible with OpenNLP versions >= 1.0.0. The {opennlp_downloads}/README[README] and +{opennlp_downloads}/opennlp-training-eval-logs-1.1-2.4.0.zip[evaluation logs] +refer to every language listed below. + +[options="header"] +|====== +| ISO code | Language | Trained with OpenNLP | UD version | File | Signatures +| bg +| Bulgarian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-bg-ud-btb-sentence-1.1-2.4.0.bin[opennlp-bg-ud-btb-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-bg-ud-btb-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-bg-ud-btb-sentence-1.1-2.4.0.bin.asc[asc] + +| cs +| Czech +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-cs-ud-pdt-sentence-1.1-2.4.0.bin[opennlp-cs-ud-pdt-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-cs-ud-pdt-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-cs-ud-pdt-sentence-1.1-2.4.0.bin.asc[asc] + +| da +| Danish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-da-ud-ddt-sentence-1.1-2.4.0.bin[opennlp-da-ud-ddt-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-da-ud-ddt-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-da-ud-ddt-sentence-1.1-2.4.0.bin.asc[asc] -| Sentence -| fr -| >= 1.0.0 -| Sentence detection model for French -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-1.0-1.9.3fr-ud-ftb-sentence-1.0-1.9.3.bin[opennlp-1.0-1.9.3fr-ud-ftb-sentence-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-1.0-1.9.3fr-ud-ftb-sentence-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-1.0-1.9.3fr-ud-ftb-sentence-1.0-1.9.3.bin.asc[asc] - -| Sentence | de -| >= 1.0.0 -| Sentence detection model for German -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-de-ud-gsd-sentence-1.0-1.9.3.bin[opennlp-de-ud-gsd-sentence-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-de-ud-gsd-sentence-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-de-ud-gsd-sentence-1.0-1.9.3.bin.asc[asc] - -| Sentence +| German +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-de-ud-gsd-sentence-1.1-2.4.0.bin[opennlp-de-ud-gsd-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-de-ud-gsd-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-de-ud-gsd-sentence-1.1-2.4.0.bin.asc[asc] + | en -| >= 1.0.0 -| Sentence detection model for English -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin[opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin.asc[asc] - -| Sentence +| English +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-en-ud-ewt-sentence-1.1-2.4.0.bin[opennlp-en-ud-ewt-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-en-ud-ewt-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-en-ud-ewt-sentence-1.1-2.4.0.bin.asc[asc] + +| es +| Spanish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-es-ud-gsd-sentence-1.1-2.4.0.bin[opennlp-es-ud-gsd-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-es-ud-gsd-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-es-ud-gsd-sentence-1.1-2.4.0.bin.asc[asc] + +| et +| Estonian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-et-ud-edt-sentence-1.1-2.4.0.bin[opennlp-et-ud-edt-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-et-ud-edt-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-et-ud-edt-sentence-1.1-2.4.0.bin.asc[asc] + +| fi +| Finnish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-fi-ud-tdt-sentence-1.1-2.4.0.bin[opennlp-fi-ud-tdt-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-fi-ud-tdt-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-fi-ud-tdt-sentence-1.1-2.4.0.bin.asc[asc] + +| fr +| French +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-fr-ud-gsd-sentence-1.1-2.4.0.bin[opennlp-fr-ud-gsd-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-fr-ud-gsd-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-fr-ud-gsd-sentence-1.1-2.4.0.bin.asc[asc] + +| hr +| Croatian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-hr-ud-set-sentence-1.1-2.4.0.bin[opennlp-hr-ud-set-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-hr-ud-set-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-hr-ud-set-sentence-1.1-2.4.0.bin.asc[asc] + | it -| >= 1.0.0 -| Sentence detection model for Italian -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-it-ud-vit-sentence-1.0-1.9.3.bin[opennlp-it-ud-vit-sentence-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-it-ud-vit-sentence-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-it-ud-vit-sentence-1.0-1.9.3.bin.asc[asc] - -| Sentence +| Italian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-it-ud-vit-sentence-1.1-2.4.0.bin[opennlp-it-ud-vit-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-it-ud-vit-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-it-ud-vit-sentence-1.1-2.4.0.bin.asc[asc] + +| lv +| Latvian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-lv-ud-lvtb-sentence-1.1-2.4.0.bin[opennlp-lv-ud-lvtb-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-lv-ud-lvtb-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-lv-ud-lvtb-sentence-1.1-2.4.0.bin.asc[asc] + | nl -| >= 1.0.0 -| Sentence detection model for Dutch -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin[opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin.asc[asc] - -| Parts of Speech +| Dutch +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin[opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin.asc[asc] + +| no +| Norwegian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-no-ud-bokmaal-sentence-1.1-2.4.0.bin[opennlp-no-ud-bokmaal-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-no-ud-bokmaal-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-no-ud-bokmaal-sentence-1.1-2.4.0.bin.asc[asc] + +| pl +| Polish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-pl-ud-pdb-sentence-1.1-2.4.0.bin[opennlp-pl-ud-pdb-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-pl-ud-pdb-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-pl-ud-pdb-sentence-1.1-2.4.0.bin.asc[asc] + +| pt +| Portuguese +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-pt-ud-gsd-sentence-1.1-2.4.0.bin[opennlp-pt-ud-gsd-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-pt-ud-gsd-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-pt-ud-gsd-sentence-1.1-2.4.0.bin.asc[asc] + +| ro +| Romanian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-ro-ud-rrt-sentence-1.1-2.4.0.bin[opennlp-ro-ud-rrt-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-ro-ud-rrt-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-ro-ud-rrt-sentence-1.1-2.4.0.bin.asc[asc] + +| ru +| Russian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-ru-ud-gsd-sentence-1.1-2.4.0.bin[opennlp-ru-ud-gsd-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-ru-ud-gsd-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-ru-ud-gsd-sentence-1.1-2.4.0.bin.asc[asc] + +| sk +| Slovak +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sk-ud-snk-sentence-1.1-2.4.0.bin[opennlp-sk-ud-snk-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sk-ud-snk-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sk-ud-snk-sentence-1.1-2.4.0.bin.asc[asc] + +| sl +| Slovenian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sl-ud-ssj-sentence-1.1-2.4.0.bin[opennlp-sl-ud-ssj-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sl-ud-ssj-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sl-ud-ssj-sentence-1.1-2.4.0.bin.asc[asc] + +| sr +| Serbian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sr-ud-set-sentence-1.1-2.4.0.bin[opennlp-sr-ud-set-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sr-ud-set-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sr-ud-set-sentence-1.1-2.4.0.bin.asc[asc] + +| sv +| Swedish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sv-ud-talbanken-sentence-1.1-2.4.0.bin[opennlp-sv-ud-talbanken-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sv-ud-talbanken-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sv-ud-talbanken-sentence-1.1-2.4.0.bin.asc[asc] + +| uk +| Ukrainian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-uk-ud-iu-sentence-1.1-2.4.0.bin[opennlp-uk-ud-iu-sentence-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-uk-ud-iu-sentence-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-uk-ud-iu-sentence-1.1-2.4.0.bin.asc[asc] +|====== + +=== Part of Speech Tagging + +NOTE: All models below are compatible with OpenNLP versions >= 1.0.0. The {opennlp_downloads}/README[README] and +{opennlp_downloads}/opennlp-training-eval-logs-1.1-2.4.0.zip[evaluation logs] +refer to every language listed below. + +[options="header"] +|====== +| ISO code | Language | Trained with OpenNLP | UD version | File | Signatures +| bg +| Bulgarian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-bg-ud-btb-pos-1.1-2.4.0.bin[opennlp-bg-ud-btb-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-bg-ud-btb-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-bg-ud-btb-pos-1.1-2.4.0.bin.asc[asc] + +| cs +| Czech +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-cs-ud-pdt-pos-1.1-2.4.0.bin[opennlp-cs-ud-pdt-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-cs-ud-pdt-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-cs-ud-pdt-pos-1.1-2.4.0.bin.asc[asc] + +| da +| Danish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-da-ud-ddt-pos-1.1-2.4.0.bin[opennlp-da-ud-ddt-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-da-ud-ddt-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-da-ud-ddt-pos-1.1-2.4.0.bin.asc[asc] + | de -| >= 1.0.0 -| Parts of speech model for German -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-de-ud-gsd-pos-1.0-1.9.3.bin[opennlp-de-ud-gsd-pos-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-de-ud-gsd-pos-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-de-ud-gsd-pos-1.0-1.9.3.bin.asc[asc] - -| Parts of Speech +| German +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-de-ud-gsd-pos-1.1-2.4.0.bin[opennlp-de-ud-gsd-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-de-ud-gsd-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-de-ud-gsd-pos-1.1-2.4.0.bin.asc[asc] + | en -| >= 1.0.0 -| Parts of speech model for English -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-pos-1.0-1.9.3.bin[opennlp-en-ud-ewt-pos-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-pos-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-pos-1.0-1.9.3.bin.asc[asc] - -| Parts of Speech +| English +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-en-ud-ewt-pos-1.1-2.4.0.bin[opennlp-en-ud-ewt-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-en-ud-ewt-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-en-ud-ewt-pos-1.1-2.4.0.bin.asc[asc] + +| es +| Spanish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-es-ud-gsd-pos-1.1-2.4.0.bin[opennlp-es-ud-gsd-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-es-ud-gsd-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-es-ud-gsd-pos-1.1-2.4.0.bin.asc[asc] + +| et +| Estonian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-et-ud-edt-pos-1.1-2.4.0.bin[opennlp-et-ud-edt-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-et-ud-edt-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-et-ud-edt-pos-1.1-2.4.0.bin.asc[asc] + +| fi +| Finnish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-fi-ud-tdt-pos-1.1-2.4.0.bin[opennlp-fi-ud-tdt-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-fi-ud-tdt-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-fi-ud-tdt-pos-1.1-2.4.0.bin.asc[asc] + | fr -| >= 1.0.0 -| Parts of speech model for French -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-fr-ud-ftb-pos-1.0-1.9.3.bin[opennlp-fr-ud-ftb-pos-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-fr-ud-ftb-pos-1.0-1.9.3.bin.md5[md5] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-fr-ud-ftb-pos-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-fr-ud-ftb-pos-1.0-1.9.3.bin.asc[asc] - -| Parts of Speech +| French +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-fr-ud-gsd-pos-1.1-2.4.0.bin[opennlp-fr-ud-gsd-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-fr-ud-gsd-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-fr-ud-gsd-pos-1.1-2.4.0.bin.asc[asc] + +| hr +| Croatian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-hr-ud-set-pos-1.1-2.4.0.bin[opennlp-hr-ud-set-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-hr-ud-set-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-hr-ud-set-pos-1.1-2.4.0.bin.asc[asc] + | it -| >= 1.0.0 -| Parts of speech model for Italian -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-it-ud-vit-pos-1.0-1.9.3.bin[opennlp-it-ud-vit-pos-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-it-ud-vit-pos-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-it-ud-vit-pos-1.0-1.9.3.bin.asc[asc] - -| Parts of Speech +| Italian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-it-ud-vit-pos-1.1-2.4.0.bin[opennlp-it-ud-vit-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-it-ud-vit-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-it-ud-vit-pos-1.1-2.4.0.bin.asc[asc] + +| lv +| Latvian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-lv-ud-lvtb-pos-1.1-2.4.0.bin[opennlp-lv-ud-lvtb-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-lv-ud-lvtb-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-lv-ud-lvtb-pos-1.1-2.4.0.bin.asc[asc] + | nl -| >= 1.0.0 -| Parts of speech model for Dutch -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-nl-ud-alpino-pos-1.0-1.9.3.bin[opennlp-nl-ud-alpino-pos-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-nl-ud-alpino-pos-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-nl-ud-alpino-pos-1.0-1.9.3.bin.asc[asc] - -| Tokens +| Dutch +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-nl-ud-alpino-pos-1.1-2.4.0.bin[opennlp-nl-ud-alpino-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-nl-ud-alpino-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-nl-ud-alpino-pos-1.1-2.4.0.bin.asc[asc] + +| no +| Norwegian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-no-ud-bokmaal-pos-1.1-2.4.0.bin[opennlp-no-ud-bokmaal-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-no-ud-bokmaal-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-no-ud-bokmaal-pos-1.1-2.4.0.bin.asc[asc] + +| pl +| Polish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-pl-ud-pdb-pos-1.1-2.4.0.bin[opennlp-pl-ud-pdb-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-pl-ud-pdb-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-pl-ud-pdb-pos-1.1-2.4.0.bin.asc[asc] + +| pt +| Portuguese +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-pt-ud-gsd-pos-1.1-2.4.0.bin[opennlp-pt-ud-gsd-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-pt-ud-gsd-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-pt-ud-gsd-pos-1.1-2.4.0.bin.asc[asc] + +| ro +| Romanian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-ro-ud-rrt-pos-1.1-2.4.0.bin[opennlp-ro-ud-rrt-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-ro-ud-rrt-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-ro-ud-rrt-pos-1.1-2.4.0.bin.asc[asc] + +| ru +| Russian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-ru-ud-gsd-pos-1.1-2.4.0.bin[opennlp-ru-ud-gsd-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-ru-ud-gsd-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-ru-ud-gsd-pos-1.1-2.4.0.bin.asc[asc] + +| sk +| Slovak +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sk-ud-snk-pos-1.1-2.4.0.bin[opennlp-sk-ud-snk-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sk-ud-snk-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sk-ud-snk-pos-1.1-2.4.0.bin.asc[asc] + +| sl +| Slovenian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sl-ud-ssj-pos-1.1-2.4.0.bin[opennlp-sl-ud-ssj-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sl-ud-ssj-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sl-ud-ssj-pos-1.1-2.4.0.bin.asc[asc] + +| sr +| Serbian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sr-ud-set-pos-1.1-2.4.0.bin[opennlp-sr-ud-set-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sr-ud-set-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sr-ud-set-pos-1.1-2.4.0.bin.asc[asc] + +| sv +| Swedish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sv-ud-talbanken-pos-1.1-2.4.0.bin[opennlp-sv-ud-talbanken-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sv-ud-talbanken-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sv-ud-talbanken-pos-1.1-2.4.0.bin.asc[asc] + +| uk +| Ukrainian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-uk-ud-iu-pos-1.1-2.4.0.bin[opennlp-uk-ud-iu-pos-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-uk-ud-iu-pos-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-uk-ud-iu-pos-1.1-2.4.0.bin.asc[asc] +|====== + +=== Tokenization + +NOTE: All models below are compatible with OpenNLP versions >= 1.0.0. The {opennlp_downloads}/README[README] and +{opennlp_downloads}/opennlp-training-eval-logs-1.1-2.4.0.zip[evaluation logs] +refer to every language listed below. + +[options="header"] +|====== +| ISO code | Language | Trained with OpenNLP | UD version | File | Signatures +| bg +| Bulgarian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-bg-ud-btb-tokens-1.1-2.4.0.bin[opennlp-bg-ud-btb-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-bg-ud-btb-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-bg-ud-btb-tokens-1.1-2.4.0.bin.asc[asc] + +| cs +| Czech +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-cs-ud-pdt-tokens-1.1-2.4.0.bin[opennlp-cs-ud-pdt-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-cs-ud-pdt-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-cs-ud-pdt-tokens-1.1-2.4.0.bin.asc[asc] + +| da +| Danish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-da-ud-ddt-tokens-1.1-2.4.0.bin[opennlp-da-ud-ddt-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-da-ud-ddt-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-da-ud-ddt-tokens-1.1-2.4.0.bin.asc[asc] + | de -| >= 1.0.0 -| Tokenizer model for German -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-de-ud-gsd-tokens-1.0-1.9.3.bin[opennlp-de-ud-gsd-tokens-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-de-ud-gsd-tokens-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-de-ud-gsd-tokens-1.0-1.9.3.bin.asc[asc] - -| Tokens +| German +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-de-ud-gsd-tokens-1.1-2.4.0.bin[opennlp-de-ud-gsd-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-de-ud-gsd-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-de-ud-gsd-tokens-1.1-2.4.0.bin.asc[asc] + | en -| >= 1.0.0 -| Tokenizer model for English -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin[opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin.asc[asc] - -| Tokens +| English +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-en-ud-ewt-tokens-1.1-2.4.0.bin[opennlp-en-ud-ewt-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-en-ud-ewt-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-en-ud-ewt-tokens-1.1-2.4.0.bin.asc[asc] + +| es +| Spanish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-es-ud-gsd-tokens-1.1-2.4.0.bin[opennlp-es-ud-gsd-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-es-ud-gsd-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-es-ud-gsd-tokens-1.1-2.4.0.bin.asc[asc] + +| et +| Estonian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-et-ud-edt-tokens-1.1-2.4.0.bin[opennlp-et-ud-edt-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-et-ud-edt-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-et-ud-edt-tokens-1.1-2.4.0.bin.asc[asc] + +| fi +| Finnish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-fi-ud-tdt-tokens-1.1-2.4.0.bin[opennlp-fi-ud-tdt-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-fi-ud-tdt-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-fi-ud-tdt-tokens-1.1-2.4.0.bin.asc[asc] + | fr -| >= 1.0.0 -| Tokenizer model for French -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-fr-ud-ftb-tokens-1.0-1.9.3.bin[opennlp-fr-ud-ftb-tokens-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-fr-ud-ftb-tokens-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-fr-ud-ftb-tokens-1.0-1.9.3.bin.asc[asc] - -| Tokens +| French +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-fr-ud-gsd-tokens-1.1-2.4.0.bin[opennlp-fr-ud-gsd-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-fr-ud-gsd-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-fr-ud-gsd-tokens-1.1-2.4.0.bin.asc[asc] + +| hr +| Croatian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-hr-ud-set-tokens-1.1-2.4.0.bin[opennlp-hr-ud-set-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-hr-ud-set-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-hr-ud-set-tokens-1.1-2.4.0.bin.asc[asc] + | it -| >= 1.0.0 -| Tokenizer model for Italian -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-it-ud-vit-tokens-1.0-1.9.3.bin[opennlp-it-ud-vit-tokens-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-it-ud-vit-tokens-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-it-ud-vit-tokens-1.0-1.9.3.bin.asc[asc] - -| Tokens +| Italian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-it-ud-vit-tokens-1.1-2.4.0.bin[opennlp-it-ud-vit-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-it-ud-vit-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-it-ud-vit-tokens-1.1-2.4.0.bin.asc[asc] + +| lv +| Latvian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-lv-ud-lvtb-tokens-1.1-2.4.0.bin[opennlp-lv-ud-lvtb-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-lv-ud-lvtb-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-lv-ud-lvtb-tokens-1.1-2.4.0.bin.asc[asc] + | nl -| >= 1.0.0 -| Tokenizer model for Dutch -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/README[README] -https://dist.apache.org/repos/dist/release/opennlp/models/ud-models-1.0/opennlp-training-eval-logs-1.0-1.9.3.zip[Evaluation Logs] -| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-nl-ud-alpino-tokens-1.0-1.9.3.bin[opennlp-nl-ud-alpino-tokens-1.0-1.9.3.bin] -|https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-nl-ud-alpino-tokens-1.0-1.9.3.bin.sha512[sha512] -https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.0/opennlp-nl-ud-alpino-tokens-1.0-1.9.3.bin.asc[asc] +| Dutch +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-nl-ud-alpino-tokens-1.1-2.4.0.bin[opennlp-nl-ud-alpino-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-nl-ud-alpino-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-nl-ud-alpino-tokens-1.1-2.4.0.bin.asc[asc] + +| no +| Norwegian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-no-ud-bokmaal-tokens-1.1-2.4.0.bin[opennlp-no-ud-bokmaal-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-no-ud-bokmaal-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-no-ud-bokmaal-tokens-1.1-2.4.0.bin.asc[asc] + +| pl +| Polish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-pl-ud-pdb-tokens-1.1-2.4.0.bin[opennlp-pl-ud-pdb-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-pl-ud-pdb-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-pl-ud-pdb-tokens-1.1-2.4.0.bin.asc[asc] + +| pt +| Portuguese +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-pt-ud-gsd-tokens-1.1-2.4.0.bin[opennlp-pt-ud-gsd-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-pt-ud-gsd-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-pt-ud-gsd-tokens-1.1-2.4.0.bin.asc[asc] + +| ro +| Romanian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-ro-ud-rrt-tokens-1.1-2.4.0.bin[opennlp-ro-ud-rrt-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-ro-ud-rrt-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-ro-ud-rrt-tokens-1.1-2.4.0.bin.asc[asc] + +| ru +| Russian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-ru-ud-gsd-tokens-1.1-2.4.0.bin[opennlp-ru-ud-gsd-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-ru-ud-gsd-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-ru-ud-gsd-tokens-1.1-2.4.0.bin.asc[asc] + +| sk +| Slovak +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sk-ud-snk-tokens-1.1-2.4.0.bin[opennlp-sk-ud-snk-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sk-ud-snk-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sk-ud-snk-tokens-1.1-2.4.0.bin.asc[asc] + +| sl +| Slovenian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sl-ud-ssj-tokens-1.1-2.4.0.bin[opennlp-sl-ud-ssj-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sl-ud-ssj-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sl-ud-ssj-tokens-1.1-2.4.0.bin.asc[asc] + +| sr +| Serbian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sr-ud-set-tokens-1.1-2.4.0.bin[opennlp-sr-ud-set-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sr-ud-set-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sr-ud-set-tokens-1.1-2.4.0.bin.asc[asc] + +| sv +| Swedish +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-sv-ud-talbanken-tokens-1.1-2.4.0.bin[opennlp-sv-ud-talbanken-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-sv-ud-talbanken-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-sv-ud-talbanken-tokens-1.1-2.4.0.bin.asc[asc] + +| uk +| Ukrainian +| 2.4.0 +| 2.14 +| https://www.apache.org/dyn/closer.cgi/opennlp/models/ud-models-1.1/opennlp-uk-ud-iu-tokens-1.1-2.4.0.bin[opennlp-uk-ud-iu-tokens-1.1-2.4.0.bin] +| {opennlp_downloads}/opennlp-uk-ud-iu-tokens-1.1-2.4.0.bin.sha512[sha512] {opennlp_downloads}/opennlp-uk-ud-iu-tokens-1.1-2.4.0.bin.asc[asc] |====== -## Verifying Signatures -The md5, sha1, sha512, and asc files are signature files and can be used to verify the integrity of the +== Verifying Signatures +The sha512, sha1, md5, and asc files are signature files and can be used to verify the integrity of the downloaded distribution package. Use the following commands to verify the integrity: -* `gpg --print-md MD5 fileName.zip` +* `gpg --print-md SHA512 fileName.tar.gz` * `gpg --print-md SHA1 fileName.tar.gz` +* `gpg --print-md MD5 fileName.zip` * `gpg --verify fileName.tar.gz.asc` It might be necessary to import the https://www.apache.org/dist/opennlp/KEYS[KEYS file,window=_blank] to verify the integrity @@ -214,7 +585,7 @@ That can easily be done with: More information about release signing and verifying signatures can be found https://www.apache.org/dev/release-signing.html[here,window=_blank]. -## SourceForge Models +== SourceForge Resources The models on Sourceforge for 1.5.0 are found https://opennlp.sourceforge.net/models-1.5/[here]. and are fully compatible with Apache OpenNLP {opennlp_version}. diff --git a/src/main/jbake/content/news/news-2021-05-30.ad b/src/main/jbake/content/news/news-2021-05-30.ad index 95cd77a56..1584a399e 100755 --- a/src/main/jbake/content/news/news-2021-05-30.ad +++ b/src/main/jbake/content/news/news-2021-05-30.ad @@ -25,8 +25,6 @@ Apache OpenNLP :category: news :idprefix: -== OpenNLP Pre-trained Models Available - Pre-trained sentence, parts of speech, and token models are now available for English, French, Italian, German, and Dutch. These models were trained on Universal Dependencies and are intended to provide usable models under the Apache 2.0 license. See the models' README for more information on the models including how each was created and evaluated. diff --git a/src/main/jbake/content/news/news-2024-10-28.ad b/src/main/jbake/content/news/news-2024-10-28.ad new file mode 100755 index 000000000..aa3339fb6 --- /dev/null +++ b/src/main/jbake/content/news/news-2024-10-28.ad @@ -0,0 +1,40 @@ +//// + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +//// += New OpenNLP Pre-trained Models released +Apache OpenNLP +2024-10-28 +:jbake-type: post +:jbake-tags: news +:jbake-status: published +:category: news +:idprefix: + +New pre-trained sentence, parts of speech, and token models for 18 (Indo-European) languages are now available for: + +* Bulgarian, Czech, Croatian, Danish, Estonian, Finnish, Latvian, Norwegian, Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovenian, Spanish, Swedish, and Ukrainian. + +The existing sentence, parts of speech, and token models for these 5 languages: + +* Dutch, English, French, German, and Italian + +were re-trained. The French models are now based on a GSD treebank, as the previously used FTB treebank https://universaldependencies.org/fr/index.html[is not maintained] and has therefore been discontinued by the https://universaldependencies.org[Universal Dependencies] (UD) project. + +All models were trained with OpenNLP 2.4.0 based on the UD release https://hdl.handle.net/11234/1-5502[2.14] and are intended to provide usable models under the Apache 2.0 license. +These models are available as JAR artifacts via Maven Central, or directly as plain, binary files via our link:/models.html[models page]. +See the models' README for more information on the models including how each was created and evaluated. diff --git a/src/main/jbake/jbake.properties b/src/main/jbake/jbake.properties index bc30f7c94..5538ac38a 100755 --- a/src/main/jbake/jbake.properties +++ b/src/main/jbake/jbake.properties @@ -34,3 +34,4 @@ asciidoctor.attributes.export=true asciidoctor.attributes.export.prefix= opennlp.version=2.4.0 opennlp.next.version=2.4.1-SNAPSHOT +opennlp.downloads=https://downloads.apache.org/opennlp/models/ud-models-1.1 \ No newline at end of file