Update model paths to be more clear they should point to file

abetlen · abetlen · commit 196650ccb2a5 · 2023-04-09T22:45:55.000-04:00
diff --git a/README.md b/README.md
@@ -27,14 +27,14 @@ pip install llama-cpp-python
 
 ```python
 >>> from llama_cpp import Llama
->>> llm = Llama(model_path="models/7B/...")
+>>> llm = Llama(model_path="./models/7B/ggml-model.bin")
 >>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
 >>> print(output)
 {
   "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
   "object": "text_completion",
   "created": 1679561337,
-  "model": "models/7B/...",
+  "model": "./models/7B/ggml-model.bin",
   "choices": [
     {
       "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
@@ -60,7 +60,7 @@ To install the server package and get started:
 
 ```bash
 pip install llama-cpp-python[server]
-export MODEL=./models/7B
+export MODEL=./models/7B/ggml-model.bin
 python3 -m llama_cpp.server
 ```
 
diff --git a/docs/index.md b/docs/index.md
@@ -29,14 +29,14 @@ pip install llama-cpp-python
 
 ```python
 >>> from llama_cpp import Llama
->>> llm = Llama(model_path="models/7B/...")
+>>> llm = Llama(model_path="./models/7B/ggml-model.bin")
 >>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
 >>> print(output)
 {
   "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
   "object": "text_completion",
   "created": 1679561337,
-  "model": "models/7B/...",
+  "model": "./models/7B/ggml-model.bin",
   "choices": [
     {
       "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
@@ -62,7 +62,7 @@ To install the server package and get started:
 
 ```bash
 pip install llama-cpp-python[server]
-export MODEL=./models/7B
+export MODEL=./models/7B/ggml-model.bin
 python3 -m llama_cpp.server
 ```
 
diff --git a/examples/high_level_api/fastapi_server.py b/examples/high_level_api/fastapi_server.py
@@ -4,7 +4,7 @@
 
 ```bash
 pip install fastapi uvicorn sse-starlette
-export MODEL=../models/7B/...
+export MODEL=../models/7B/ggml-model.bin
 uvicorn fastapi_server_chat:app --reload
 ```
 
diff --git a/examples/high_level_api/high_level_api_embedding.py b/examples/high_level_api/high_level_api_embedding.py
@@ -3,7 +3,7 @@
 from llama_cpp import Llama
 
 parser = argparse.ArgumentParser()
-parser.add_argument("-m", "--model", type=str, default=".//models/...")
+parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-model.bin")
 args = parser.parse_args()
 
 llm = Llama(model_path=args.model, embedding=True)
diff --git a/examples/high_level_api/high_level_api_inference.py b/examples/high_level_api/high_level_api_inference.py
@@ -4,7 +4,7 @@
 from llama_cpp import Llama
 
 parser = argparse.ArgumentParser()
-parser.add_argument("-m", "--model", type=str, default="./models/...")
+parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
 args = parser.parse_args()
 
 llm = Llama(model_path=args.model)
diff --git a/examples/high_level_api/high_level_api_streaming.py b/examples/high_level_api/high_level_api_streaming.py
@@ -4,7 +4,7 @@
 from llama_cpp import Llama
 
 parser = argparse.ArgumentParser()
-parser.add_argument("-m", "--model", type=str, default="./models/...")
+parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
 args = parser.parse_args()
 
 llm = Llama(model_path=args.model)
diff --git a/examples/high_level_api/langchain_custom_llm.py b/examples/high_level_api/langchain_custom_llm.py
@@ -29,7 +29,7 @@ def _identifying_params(self) -> Mapping[str, Any]:
 
 
 parser = argparse.ArgumentParser()
-parser.add_argument("-m", "--model", type=str, default="./models/...")
+parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
 args = parser.parse_args()
 
 # Load the model
diff --git a/examples/low_level_api/low_level_api_llama_cpp.py b/examples/low_level_api/low_level_api_llama_cpp.py
@@ -9,7 +9,7 @@
 prompt = b"\n\n### Instruction:\nWhat is the capital of France?\n\n### Response:\n"
 
 lparams = llama_cpp.llama_context_default_params()
-ctx = llama_cpp.llama_init_from_file(b"models/ggml-alpaca-7b-q4.bin", lparams)
+ctx = llama_cpp.llama_init_from_file(b"../models/7B/ggml-model.bin", lparams)
 
 # determine the required inference memory per token:
 tmp = [0, 1, 2, 3]