feat: Ollama integration via docker

Signed-off-by: John McBride <[email protected]>
jpmcb · Dec 13, 2023 · e143be6 · e143be6
1 parent b956ff8
commit e143be6
Show file tree

Hide file tree

Showing 11 changed files with 190 additions and 246 deletions.
diff --git a/LICENSE.llama.txt b/LICENSE.llama.txt
diff --git a/README.md b/README.md
@@ -1,20 +1,17 @@
 # 🦙 nvim-llama
 
-_[Llama 2](https://ai.meta.com/llama/) and [llama.cpp](https://github.com/ggerganov/llama.cpp/) interfaces for Neovim_
+_[Ollama](https://github.com/jmorganca/ollama) interfaces for Neovim_
 
 # Project status
 
 🏗️ 👷 Warning! Under active development!! 👷 🚧
 
-This plugin was created as a proof of concept plugin for running large language model technology within Neovim on consumer hardware.
-Using llama.cpp and a downloaded model showed that this goal is indeed possible.
+# Requirements
 
-But distributing, building, maintaining, and running a large C/C++ project (llama.cpp) is abit out of scope of what makes sense for a liteweight nvim plugin.
+Docker is required to use `nvim-llama`.
 
-Therefore, this project will be transitiion to use Ollama, a llama.cpp runner in docker.
-Ideally, this means that all you'll need to run nvim-llama is docker!
-
-You can expect these changes in Q1 2024.
+And that's it! All models and clients run from within Docker to provide chat interfaces and functionality.
+This is an agnostic approach that works for MacOS, Linux, and Windows.
 
 # Installation
 
@@ -55,44 +52,35 @@ local defaults = {
     -- See plugin debugging logs
     debug = false,
 
-    -- Build llama.cpp for GPU acceleration on Apple M chip devices.
-    -- If you are using an Apple M1/M2 laptop, it is highly recommended to
-    -- use this since, depending on the model, may drastically increase performance.
-    build_metal = false,
+    -- The model for ollama to use. This model will be automatically downloaded.
+    model = llama2,
 }
 ```
 
-# Models
-
-Llama.cpp supports an incredible number of models.
 
-To start using one, you'll need to download an appropriately sized model that
-is supported by llama.cpp.
+### Model library
 
-The 13B GGUF CodeLlama model is a really good place to start:
-https://huggingface.co/TheBloke/CodeLlama-13B-GGUF
+Ollama supports an incredible number of open-source models available on [ollama.ai/library](https://ollama.ai/library 'ollama model library')
 
-In order to use a model, it must be in the `llama.cpp/models/` directory which
-is expected to be found at `~/.local/share/llama.cpp/models`.
+Check out their docs to learn more: https://github.com/jmorganca/ollama
 
-The following script can be useful for downloading a model to that directory:
+---
 
-```sh
-LLAMA_CPP="~/.local/share/nvim/llama.cpp"
-MODEL="codellama-13b.Q4_K_M.gguf"
-
-pushd "${LLAMA_CPP}"
-    if [ ! -f models/${MODEL} ]; then
-        curl -L "https://huggingface.co/TheBloke/CodeLlama-13B-GGUF/resolve/main/${MODEL}" -o models/${MODEL}
-    fi
-popd
-```
+When setting the `model` setting, the specified model will be automatically downloaded:
 
-In the future, this project may provide the capability to download models automatically.
+| Model              | Parameters | Size  | Model setting |
+| ------------------ | ---------- | ----- | ------------------------------ |
+| Neural Chat        | 7B         | 4.1GB | `model = neural-chat`       |
+| Starling           | 7B         | 4.1GB | `model = starling-lm`       |
+| Mistral            | 7B         | 4.1GB | `model = mistral`           |
+| Llama 2            | 7B         | 3.8GB | `model = llama2`            |
+| Code Llama         | 7B         | 3.8GB | `model = codellama`         |
+| Llama 2 Uncensored | 7B         | 3.8GB | `model = llama2-uncensored` |
+| Llama 2 13B        | 13B        | 7.3GB | `model = llama2:13b`        |
+| Llama 2 70B        | 70B        | 39GB  | `model = llama2:70b`        |
+| Orca Mini          | 3B         | 1.9GB | `model = orca-mini`         |
+| Vicuna             | 7B         | 3.8GB | `model = vicuna`            |
 
-# License
+> Note: You should have at least 8 GB of RAM to run the 3B models, 16 GB to run the 7B models, and 32 GB to run the 13B models.
+70B parameter models require upwards of 64 GB of ram (if not more).
 
-This project is dual licensed under [MIT](./LICENSE.txt) (first party plugin code)
-and the [Llama 2 license](./LICENSE.llama.txt).
-By using this plugin, you agree to both terms and assert you have already have
-[your own non-transferable license for Llama 2 from Meta AI](https://ai.meta.com/resources/models-and-libraries/llama-downloads/).
diff --git a/get-model.sh b/get-model.sh
diff --git a/lua/nvim-llama/download-model.lua b/lua/nvim-llama/download-model.lua
diff --git a/lua/nvim-llama/info.lua b/lua/nvim-llama/info.lua
diff --git a/lua/nvim-llama/init.lua b/lua/nvim-llama/init.lua
@@ -1,44 +1,113 @@
 local window = require("nvim-llama.window")
 local settings = require("nvim-llama.settings")
-local llama_cpp = require('nvim-llama.install')
+local ollama = require('nvim-llama.ollama')
 
 local M = {}
 
 M.interactive_llama = function()
-    local buf, win = window.create_floating_window()
-    local binary = '~/.local/share/nvim/llama.cpp/main'
-    local llama_flags = '-t 10 -ngl 32 --color -c 4096 --temp 0.7 --repeat_penalty 1.1 -n -1 -i -ins'
-    local model_flag = '-m ~/.local/share/nvim/llama.cpp/models/codellama-13b.Q4_K_M.gguf'
-
-    -- Start terminal in the buffer with your binary
-    vim.api.nvim_buf_call(buf, function()
-        vim.cmd(string.format('term %s %s %s', binary, llama_flags, model_flag))
-    end)
+    local command = ollama.run(settings.current.model)
+
+    window.create_chat_window()
+    vim.fn.termopen(command)
+end
+
+local function trim(s)
+    return s:match('^%s*(.-)%s*$')
 end
 
 local function set_commands()
     vim.api.nvim_create_user_command("Llama", function()
         M.interactive_llama()
     end, {})
+end
 
-    vim.api.nvim_create_user_command("LlamaInstall", function()
-        llama_cpp.install()
-    end, {})
+local function is_docker_installed()
+    local handle = io.popen("docker --version 2>&1")
+    local result = handle:read("*a")
+    handle:close()
 
-    vim.api.nvim_create_user_command("LlamaRebuild", function()
-        llama_cpp.rebuild()
-    end, {})
+    return result:match("Docker version")
+end
 
-    vim.api.nvim_create_user_command("LlamaUpdate", function()
-        llama_cpp.update()
-    end, {})
+local function is_docker_running()
+    local handle = io.popen("docker info > /dev/null 2>&1; echo $?")
+    local result = handle:read("*a")
+    handle:close()
+
+    return result:match("0\n")
+end
+
+local function check_docker()
+    if not is_docker_installed() then
+        error("Docker is not installed. Docker is required for nvim-llama")
+        return false
+    end
+
+    if not is_docker_running() then
+        error("Docker is not running. ")
+        return false
+    end
+
+    return true
+end
+
+local function async(command, args, callback)
+    vim.loop.spawn(command, {args = args}, function(code)
+        if code == 0 then
+            callback(true)
+        else
+            callback(false)
+        end
+    end)
+end
+
+local function is_container_running()
+    local command = string.format("docker ps --filter 'name=^/nvim-llama$' --format '{{.Names}}'")
+    local handle = io.popen(command)
+    local result = trim(handle:read("*a"))
+    handle:close()
+
+    return result == "nvim-llama"
+end
+
+local function check_ollama_container()
+    local container_name = "nvim-llama"
+
+    local exists_command = string.format("docker ps -a --filter 'name=^/nvim-llama$' --format '{{.Names}}'")
+    local exists_handle = io.popen(exists_command)
+    local exists_result = trim(exists_handle:read("*a"))
+    exists_handle:close()
+
+    if exists_result == "nvim-llama" then
+        if not is_container_running() then
+            -- Remove the stopped container and re-run a new one
+            local handle = io.popen("docker rm " .. container_name)
+            handle:close()
+            ollama.start()
+        end
+    else
+        -- start a new container as non by name exists
+        ollama.start()
+    end
+
+    return true
 end
 
 function M.setup(config)
     if config then
         settings.set(config)
     end
 
+    local status, err = pcall(check_docker)
+    if not status then
+        print("Error checking docker status: " .. err)
+    end
+
+    status, err = pcall(check_ollama_container)
+    if not status then
+        print("Error checking docker status: " .. err)
+    end
+
     set_commands()
 end
 

diff --git a/lua/nvim-llama/install.lua b/lua/nvim-llama/install.lua