deepjavalibrary
diff --git a/‎.github/workflows/pr_notebook.yml
+3-1 b/‎.github/workflows/pr_notebook.yml
+3-1
diff --git a/‎chapter_attention-mechanisms/attention-cues.ipynb
+48-72 b/‎chapter_attention-mechanisms/attention-cues.ipynb
+48-72
diff --git a/‎chapter_attention-mechanisms/attention-scoring-functions.ipynb
+9-48 b/‎chapter_attention-mechanisms/attention-scoring-functions.ipynb
+9-48
diff --git a/‎chapter_attention-mechanisms/multihead-attention.ipynb
+7-35 b/‎chapter_attention-mechanisms/multihead-attention.ipynb
+7-35
diff --git a/‎chapter_attention-mechanisms/nadaraya-watson.ipynb
+9-46 b/‎chapter_attention-mechanisms/nadaraya-watson.ipynb
+9-46
@@ -55,10 +55,12 @@ jobs:
           ./gradlew installKernel
       - name: test Notebook
         run: |
+          export DATASET_LIMIT=512
+          export MAX_EPOCH=2
           bash test_notebook.sh $${{ matrix.group }}
       - name: generated Notebook in html
         uses: actions/upload-artifact@v1
         if: always()
         with:
           name: notebook
-          path: test_output/
+          path: test_output/
@@ -172,42 +172,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%mavenRepo snapshots https://oss.sonatype.org/content/repositories/snapshots/\n",
-    "\n",
-    "%maven ai.djl:api:0.10.0\n",
-    "%maven org.slf4j:slf4j-api:1.7.26\n",
-    "%maven org.slf4j:slf4j-simple:1.7.26\n",
-    "\n",
-    "%maven ai.djl.mxnet:mxnet-engine:0.10.0\n",
-    "%maven ai.djl.mxnet:mxnet-native-auto:1.7.0-backport"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "%load ../utils/djl-imports\n",
     "%load ../utils/plot-utils\n",
     "%load ../utils/Functions.java"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import ai.djl.ndarray.NDArray;\n",
-    "import ai.djl.ndarray.NDManager;\n",
-    "import ai.djl.ndarray.types.Shape;\n",
-    "import ai.djl.translate.TranslateException;\n",
-    "import tech.tablesaw.plotly.Plot;\n",
-    "import tech.tablesaw.plotly.components.*;\n",
-    "import tech.tablesaw.plotly.traces.HeatmapTrace;\n",
-    "import tech.tablesaw.plotly.traces.Trace;"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -241,48 +210,48 @@
     "            String[] titles,\n",
     "            int width,\n",
     "            int height) {\n",
-    "        int numRows = (int) matrices.getShape().get(0);\n",
-    "        int numCols = (int) matrices.getShape().get(1);\n",
+    "    int numRows = (int) matrices.getShape().get(0);\n",
+    "    int numCols = (int) matrices.getShape().get(1);\n",
     "\n",
-    "        Trace[] traces = new Trace[numRows * numCols];\n",
-    "        int count = 0;\n",
-    "        for (int i = 0; i < numRows; i++) {\n",
-    "            for (int j = 0; j < numCols; j++) {\n",
-    "                NDArray NDMatrix = matrices.get(i).get(j);\n",
-    "                double[][] matrix =\n",
-    "                        new double[(int) NDMatrix.getShape().get(0)]\n",
-    "                                [(int) NDMatrix.getShape().get(1)];\n",
-    "                Object[] x = new Object[matrix.length];\n",
-    "                Object[] y = new Object[matrix.length];\n",
-    "                for (int k = 0; k < NDMatrix.getShape().get(0); k++) {\n",
-    "                    matrix[k] = Functions.floatToDoubleArray(NDMatrix.get(k).toFloatArray());\n",
-    "                    x[k] = k;\n",
-    "                    y[k] = k;\n",
-    "                }\n",
-    "                HeatmapTrace.HeatmapBuilder builder = HeatmapTrace.builder(x, y, matrix);\n",
-    "                if (titles != null) {\n",
-    "                    builder = (HeatmapTrace.HeatmapBuilder) builder.name(titles[j]);\n",
-    "                }\n",
-    "                traces[count++] = builder.build();\n",
+    "    Trace[] traces = new Trace[numRows * numCols];\n",
+    "    int count = 0;\n",
+    "    for (int i = 0; i < numRows; i++) {\n",
+    "        for (int j = 0; j < numCols; j++) {\n",
+    "            NDArray NDMatrix = matrices.get(i).get(j);\n",
+    "            double[][] matrix =\n",
+    "                    new double[(int) NDMatrix.getShape().get(0)]\n",
+    "                            [(int) NDMatrix.getShape().get(1)];\n",
+    "            Object[] x = new Object[matrix.length];\n",
+    "            Object[] y = new Object[matrix.length];\n",
+    "            for (int k = 0; k < NDMatrix.getShape().get(0); k++) {\n",
+    "                matrix[k] = Functions.floatToDoubleArray(NDMatrix.get(k).toFloatArray());\n",
+    "                x[k] = k;\n",
+    "                y[k] = k;\n",
+    "            }\n",
+    "            HeatmapTrace.HeatmapBuilder builder = HeatmapTrace.builder(x, y, matrix);\n",
+    "            if (titles != null) {\n",
+    "                builder = (HeatmapTrace.HeatmapBuilder) builder.name(titles[j]);\n",
     "            }\n",
+    "            traces[count++] = builder.build();\n",
     "        }\n",
-    "        Grid grid =\n",
-    "                Grid.builder()\n",
-    "                        .columns(numCols)\n",
-    "                        .rows(numRows)\n",
-    "                        .pattern(Grid.Pattern.INDEPENDENT)\n",
-    "                        .build();\n",
-    "        Layout layout =\n",
-    "                Layout.builder()\n",
-    "                        .title(\"\")\n",
-    "                        .xAxis(Axis.builder().title(xLabel).build())\n",
-    "                        .yAxis(Axis.builder().title(yLabel).build())\n",
-    "                        .width(width)\n",
-    "                        .height(height)\n",
-    "                        .grid(grid)\n",
-    "                        .build();\n",
-    "        return new Figure(layout, traces);\n",
-    "    }"
+    "    }\n",
+    "    Grid grid =\n",
+    "            Grid.builder()\n",
+    "                    .columns(numCols)\n",
+    "                    .rows(numRows)\n",
+    "                    .pattern(Grid.Pattern.INDEPENDENT)\n",
+    "                    .build();\n",
+    "    Layout layout =\n",
+    "            Layout.builder()\n",
+    "                    .title(\"\")\n",
+    "                    .xAxis(Axis.builder().title(xLabel).build())\n",
+    "                    .yAxis(Axis.builder().title(yLabel).build())\n",
+    "                    .width(width)\n",
+    "                    .height(height)\n",
+    "                    .grid(grid)\n",
+    "                    .build();\n",
+    "    return new Figure(layout, traces);\n",
+    "}"
    ]
   },
   {
@@ -328,6 +297,13 @@
     "1. What can be the volitional cue when decoding a sequence token by token in machine translation? What are the nonvolitional cues and the sensory inputs?\n",
     "1. Randomly generate a $10 \\times 10$ matrix and use the softmax operation to ensure each row is a valid probability distribution. Visualize the output attention weights.\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -342,7 +318,7 @@
    "mimetype": "text/x-java-source",
    "name": "Java",
    "pygments_lexer": "java",
-   "version": "11.0.10+9"
+   "version": "14.0.2+12"
   }
  },
  "nbformat": 4,
 
@@ -80,58 +80,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%mavenRepo snapshots https://oss.sonatype.org/content/repositories/snapshots/\n",
-    "\n",
-    "%maven ai.djl:api:0.11.0\n",
-    "%maven org.slf4j:slf4j-api:1.7.26\n",
-    "%maven org.slf4j:slf4j-simple:1.7.26\n",
-    "\n",
-    "%maven ai.djl.mxnet:mxnet-engine:0.11.0\n",
-    "%maven ai.djl.mxnet:mxnet-native-auto:1.7.0-backport"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "%load ../utils/djl-imports\n",
     "%load ../utils/plot-utils\n",
     "%load ../utils/Functions.java\n",
     "%load ../utils/PlotUtils.java"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import ai.djl.Model;\n",
-    "import ai.djl.ndarray.*;\n",
-    "import ai.djl.ndarray.types.DataType;\n",
-    "import ai.djl.ndarray.types.Shape;\n",
-    "import ai.djl.nn.AbstractBlock;\n",
-    "import ai.djl.nn.Parameter;\n",
-    "import ai.djl.training.*;\n",
-    "import ai.djl.nn.core.Linear;\n",
-    "import ai.djl.nn.norm.Dropout;\n",
-    "import ai.djl.training.listener.TrainingListener;\n",
-    "import ai.djl.training.loss.Loss;\n",
-    "import ai.djl.training.optimizer.Optimizer;\n",
-    "import ai.djl.training.tracker.Tracker;\n",
-    "import ai.djl.training.ParameterStore;\n",
-    "import ai.djl.training.initializer.UniformInitializer;\n",
-    "import ai.djl.util.PairList;\n",
-    "import ai.djl.translate.TranslateException;\n",
-    "import tech.tablesaw.plotly.Plot;\n",
-    "import tech.tablesaw.plotly.components.*;\n",
-    "import tech.tablesaw.plotly.traces.ScatterTrace;\n",
-    "\n",
-    "import java.io.IOException;\n",
-    "import java.util.function.Function;"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -608,6 +562,13 @@
     "1. Using matrix multiplications only, can you design a new scoring function for queries and keys with different vector lengths?\n",
     "1. When queries and keys have the same vector length, is vector summation a better design than dot product for the scoring function? Why or why not?\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -622,7 +583,7 @@
    "mimetype": "text/x-java-source",
    "name": "Java",
    "pygments_lexer": "java",
-   "version": "11.0.10+9"
+   "version": "14.0.2+12"
   }
  },
  "nbformat": 4,
 
@@ -93,43 +93,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%mavenRepo snapshots https://oss.sonatype.org/content/repositories/snapshots/\n",
-    "\n",
-    "%maven ai.djl:api:0.11.0\n",
-    "%maven org.slf4j:slf4j-api:1.7.26\n",
-    "%maven org.slf4j:slf4j-simple:1.7.26\n",
-    "\n",
-    "%maven ai.djl.mxnet:mxnet-engine:0.11.0\n",
-    "%maven ai.djl.mxnet:mxnet-native-auto:1.7.0-backport"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "%load ../utils/djl-imports\n",
     "%load ../utils/plot-utils\n",
     "%load ../utils/Functions.java\n",
     "%load ../utils/PlotUtils.java\n",
-    "%load ../utils/AttentionUtils.java"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import ai.djl.ndarray.*;\n",
-    "import ai.djl.ndarray.types.DataType;\n",
-    "import ai.djl.ndarray.types.Shape;\n",
-    "import ai.djl.nn.AbstractBlock;\n",
-    "import ai.djl.nn.Parameter;\n",
-    "import ai.djl.nn.core.Linear;\n",
-    "import ai.djl.nn.norm.Dropout;\n",
-    "import ai.djl.training.ParameterStore;\n",
-    "import ai.djl.util.PairList;"
+    "\n",
+    "%load ../utils/attention/Chap10Utils.java\n",
+    "%load ../utils/attention/DotProductAttention.java\n",
+    "%load ../utils/attention/MultiHeadAttention.java\n",
+    "%load ../utils/attention/PositionalEncoding.java"
    ]
   },
   {
@@ -394,7 +366,7 @@
    "mimetype": "text/x-java-source",
    "name": "Java",
    "pygments_lexer": "java",
-   "version": "11.0.10+9"
+   "version": "14.0.2+12"
   }
  },
  "nbformat": 4,
 
@@ -32,57 +32,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%mavenRepo snapshots https://oss.sonatype.org/content/repositories/snapshots/\n",
-    "\n",
-    "%maven ai.djl:api:0.11.0\n",
-    "%maven org.slf4j:slf4j-api:1.7.26\n",
-    "%maven org.slf4j:slf4j-simple:1.7.26\n",
-    "\n",
-    "%maven ai.djl.mxnet:mxnet-engine:0.11.0\n",
-    "%maven ai.djl.mxnet:mxnet-native-auto:1.7.0-backport"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "%load ../utils/djl-imports\n",
     "%load ../utils/plot-utils\n",
     "%load ../utils/Functions.java\n",
     "%load ../utils/Animator.java\n",
     "%load ../utils/PlotUtils.java"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import ai.djl.Model;\n",
-    "import ai.djl.ndarray.*;\n",
-    "import ai.djl.ndarray.types.DataType;\n",
-    "import ai.djl.ndarray.types.Shape;\n",
-    "import ai.djl.nn.AbstractBlock;\n",
-    "import ai.djl.nn.Parameter;\n",
-    "import ai.djl.training.*;\n",
-    "import ai.djl.training.listener.TrainingListener;\n",
-    "import ai.djl.training.loss.Loss;\n",
-    "import ai.djl.training.optimizer.Optimizer;\n",
-    "import ai.djl.training.tracker.Tracker;\n",
-    "import ai.djl.training.ParameterStore;\n",
-    "import ai.djl.training.initializer.UniformInitializer;\n",
-    "import ai.djl.util.PairList;\n",
-    "import ai.djl.translate.TranslateException;\n",
-    "import tech.tablesaw.plotly.Plot;\n",
-    "import tech.tablesaw.plotly.components.*;\n",
-    "import tech.tablesaw.plotly.traces.ScatterTrace;\n",
-    "\n",
-    "import java.io.IOException;\n",
-    "import java.util.function.Function;"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -651,6 +607,13 @@
     "1. How can we add hyperparameters to nonparametric Nadaraya-Watson kernel regression to predict better?\n",
     "1. Design another parametric attention pooling for the kernel regression of this section. Train this new model and visualize its attention weights.\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -665,7 +628,7 @@
    "mimetype": "text/x-java-source",
    "name": "Java",
    "pygments_lexer": "java",
-   "version": "11.0.10+9"
+   "version": "14.0.2+12"
   }
  },
  "nbformat": 4,