withcatai · ido-pluto · May 10, 2024 · Dec 14, 2023 · Dec 14, 2023 · May 9, 2024
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -23,6 +23,8 @@ jobs:
       contents: write
       issues: write
       pull-requests: write
+    environment:
+      name: npm
 
     steps:
       - uses: actions/checkout@v3
@@ -39,7 +41,7 @@ jobs:
           npm run generate-docs
 
       - name: Release
-        if: github.ref == 'refs/heads/main'
+        if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/beta'
         id: release-package
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

diff --git a/.gitignore b/.gitignore
@@ -112,7 +112,6 @@ web_modules/
 .yarn-integrity
 
 # dotenv environment variable files
-.env
 .env.development.local
 .env.test.local
 .env.production.local

diff --git a/README.md b/README.md
@@ -27,7 +27,7 @@ Make sure you have [Node.js](https://nodejs.org/en/) (**download current**) inst
 ```bash
 npm install -g catai
 
-catai install vicuna-7b-16k-q4_k_s
+catai install llama3-8b-openhermes-dpo-q3_k_s
 catai up
 ```
 
@@ -57,6 +57,7 @@ Commands:
   active                           Show active model
   remove|rm [options] [models...]  Remove a model
   uninstall                        Uninstall server and delete all models
+  node-llama-cpp|cpp [options]     Node llama.cpp CLI - recompile node-llama-cpp binaries
   help [command]                   display help for command
 ```
 
@@ -92,14 +93,6 @@ This package uses [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)
 - linux-ppc64le
 - win32-x64-msvc
 
-### Memory usage
-Runs on most modern computers. Unless your computer is very very old, it should work.
-
-According to [a llama.cpp discussion thread](https://github.com/ggerganov/llama.cpp/issues/13), here are the memory requirements:
-
-- 7B => ~4 GB
-- 13B => ~8 GB
-- 30B => ~16 GB
 
 ### Good to know
 - All download data will be downloaded at `~/catai` folder by default.
@@ -125,6 +118,38 @@ const data = await response.text();
 
 For more information, please read the [API guide](https://github.com/withcatai/catai/blob/main/docs/api.md)
 
+## Development API + Node-llama-cpp@beta integration
+
+You can use the model with [node-llama-cpp@beta](https://github.com/withcatai/node-llama-cpp/pull/105)
+
+CatAI enables you to easily manage the models and chat with them.
+
+```ts
+import {downloadModel, getModelPath} from 'catai';
+
+// download the model, skip if you already have the model
+await downloadModel(
+    "https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q2_K.gguf?download=true",
+    "llama3"
+);
+
+// get the model path with catai
+const modelPath = getModelPath("llama3");
+
+const llama = await getLlama();
+const model = await llama.loadModel({
+    modelPath
+});
+
+const context = await model.createContext();
+const session = new LlamaChatSession({
+    contextSequence: context.getSequence()
+});
+
+const a1 = await session.prompt("Hi there, how are you?");
+console.log("AI: " + a1);
+```
+
 ## Configuration
 
 You can edit the configuration via the web ui.

diff --git a/clients/catai/src/lib/Chat/Markdown.svelte b/clients/catai/src/lib/Chat/Markdown.svelte
@@ -68,10 +68,6 @@
       list-style: auto !important;
     }
 
-    li {
-      line-height: .5rem;
-    }
-
     .copy-clipboard {
       position: absolute;
       right: 0;

diff --git a/docs/api.md b/docs/api.md
@@ -11,7 +11,7 @@ Enable you to chat with the model locally on your computer.
 ```ts
 import {createChat} from 'catai';
 
-const chat = await createChat();
+const chat = await createChat(); // using the default model installed
 
 const response = await catai.prompt('Write me 100 words story', token => {
     progress.stdout.write(token);
@@ -20,6 +20,13 @@ const response = await catai.prompt('Write me 100 words story', token => {
 console.log(`Total text length: ${response.length}`);
 ```
 
+You can also specify the model you want to use:
+
+```ts
+import {createChat} from 'catai';
+const chat = await createChat({model: "llama3"});
+```
+
 If you want to install the model on the fly, please read the [install-api guide](./install-api.md)
 
 ## Remote API

diff --git a/docs/install-api.md b/docs/install-api.md
@@ -6,10 +6,9 @@ You can install models on the fly using the `FetchModels` class.
 import {FetchModels} from 'catai';
 
 const allModels = await FetchModels.fetchModels();
-const firstModel = Object.keys(allModels)[0];
 
 const installModel = new FetchModels({
-    download: firstModel,
+    download: "https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q2_K.gguf?download=true",
     latest: true,
     model: {
         settings: {
@@ -23,27 +22,27 @@ await installModel.startDownload();
 
 After the download is finished, this model will be the active model.
 
-## Configuration
+## Using with node-llama-cpp@beta
 
-You can change the active model by changing the `CatAIDB`
+You can download the model and use it directly with node-llama-cpp@beta
 
 ```ts
-import {CatAIDB} from 'catai';
+import {getLlama, LlamaChatSession} from "node-llama-cpp";
+import {getModelPath} from 'catai';
+const modelPath = getModelPath("llama3");
 
-CatAIDB.db.activeModel = Object.keys(CatAIDB.db.models)[0];
-
-await CatAIDB.saveDB();
-```
-
-You also can change the model settings by changing the `CatAIDB`
-
-```ts
-import {CatAIDB} from 'catai';
+const llama = await getLlama();
+const model = await llama.loadModel({
+    modelPath
+});
 
-const selectedModel = CatAIDB.db.models[CatAIDB.db.activeModel];
-selectedModel.settings.context = 4096;
+const context = await model.createContext();
+const session = new LlamaChatSession({
+    contextSequence: context.getSequence()
+});
 
-await CatAIDB.saveDB();
+const a1 = await session.prompt("Hi there, how are you?");
+console.log("AI: " + a1);
 ```
 
-For extra information about the configuration, please read the [configuration guide](./configuration.md)
+For more information on how to use the model, please refer to the [node-llama-cpp beta pull request](https://github.com/withcatai/node-llama-cpp/pull/105)
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
@@ -0,0 +1,41 @@
+# Troubleshooting
+
+Some common problems and solutions.
+
+
+## I can't connect to the server
+
+If the server is disconnected without any error, it's probably a problem with the llama.cpp binaries.
+
+The solution is to recompile the binaries:
+```bash
+catai cpp
+```
+
+## How to change the download location?
+
+You can configure the download location by changing the `CATAI_DIR` environment variable.
+
+More environment variables configuration can be found in the [configuration](https://withcatai.github.io/catai/interfaces/_internal_.Config.html#CATAI_DIR)
+
+## Cuda Support
+
+In case you have a GPU that supports CUDA, but the server doesn't recognize it, you can try to install the CUDA toolkit,
+and rebuild the binaries.
+
+Rebuild the binaries with CUDA support:
+
+```
+catai cpp --cuda
+```
+
+In case of an error, check the cuda
+troubleshooting [here](https://withcatai.github.io/node-llama-cpp/guide/CUDA#fix-the-failed-to-detect-a-default-cuda-architecture-build-error).
+
+## Unsupported processor / Exit without error
+
+In case you have an unsupported processor, you can try to rebuild the binaries.
+
+```
+catai cpp
+```