diff --git a/notebooks/en/structured_generation_vision_languag_models.ipynb b/notebooks/en/structured_generation_vision_languag_models.ipynb index bd4a0589..3e3b331d 100644 --- a/notebooks/en/structured_generation_vision_languag_models.ipynb +++ b/notebooks/en/structured_generation_vision_languag_models.ipynb @@ -6,7 +6,8 @@ "source": [ "# Structured Generation from Documents Using Vision Language Models\n", "\n", - "We will be using the SmolVLM-500M-Instruct model from HuggingFaceTB to extract structured information from documents. We will do so using the HuggingFace Transformers library and the Outlines library, which facilitates structured generation based on limiting token sampling probabilities. We will also use the Gradio library to create a simple UI for uploading and extracting structured information from documents.\n", + "We will be using the SmolVLM-Instruct model from HuggingFaceTB to extract structured information from documents We will run the VLM using the HuggingFace Transformers library and the Outlines library, which facilitates structured generation based on limiting token sampling probabilities. \n", + "This approach is based on a [outlines tutorial](https://dottxt-ai.github.io/outlines/latest/cookbook/atomic_caption/) library.\n", "\n", "## Dependencies and imports\n", "\n", @@ -19,7 +20,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install accelerate outlines transformers torch flash-attn outlines datasets sentencepiece gradio" + "%pip install accelerate outlines transformers torch flash-attn outlines datasets sentencepiece" ] }, {