Skip to content

Commit 196650c

Browse files
committed
Update model paths to be more clear they should point to file
1 parent a79d3eb commit 196650c

8 files changed

+12
-12
lines changed

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,14 @@ pip install llama-cpp-python
2727

2828
```python
2929
>>> from llama_cpp import Llama
30-
>>> llm = Llama(model_path="models/7B/...")
30+
>>> llm = Llama(model_path="./models/7B/ggml-model.bin")
3131
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
3232
>>> print(output)
3333
{
3434
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
3535
"object": "text_completion",
3636
"created": 1679561337,
37-
"model": "models/7B/...",
37+
"model": "./models/7B/ggml-model.bin",
3838
"choices": [
3939
{
4040
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
@@ -60,7 +60,7 @@ To install the server package and get started:
6060

6161
```bash
6262
pip install llama-cpp-python[server]
63-
export MODEL=./models/7B
63+
export MODEL=./models/7B/ggml-model.bin
6464
python3 -m llama_cpp.server
6565
```
6666

docs/index.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@ pip install llama-cpp-python
2929

3030
```python
3131
>>> from llama_cpp import Llama
32-
>>> llm = Llama(model_path="models/7B/...")
32+
>>> llm = Llama(model_path="./models/7B/ggml-model.bin")
3333
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
3434
>>> print(output)
3535
{
3636
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
3737
"object": "text_completion",
3838
"created": 1679561337,
39-
"model": "models/7B/...",
39+
"model": "./models/7B/ggml-model.bin",
4040
"choices": [
4141
{
4242
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
@@ -62,7 +62,7 @@ To install the server package and get started:
6262

6363
```bash
6464
pip install llama-cpp-python[server]
65-
export MODEL=./models/7B
65+
export MODEL=./models/7B/ggml-model.bin
6666
python3 -m llama_cpp.server
6767
```
6868

examples/high_level_api/fastapi_server.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
55
```bash
66
pip install fastapi uvicorn sse-starlette
7-
export MODEL=../models/7B/...
7+
export MODEL=../models/7B/ggml-model.bin
88
uvicorn fastapi_server_chat:app --reload
99
```
1010

examples/high_level_api/high_level_api_embedding.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from llama_cpp import Llama
44

55
parser = argparse.ArgumentParser()
6-
parser.add_argument("-m", "--model", type=str, default=".//models/...")
6+
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-model.bin")
77
args = parser.parse_args()
88

99
llm = Llama(model_path=args.model, embedding=True)

examples/high_level_api/high_level_api_inference.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from llama_cpp import Llama
55

66
parser = argparse.ArgumentParser()
7-
parser.add_argument("-m", "--model", type=str, default="./models/...")
7+
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
88
args = parser.parse_args()
99

1010
llm = Llama(model_path=args.model)

examples/high_level_api/high_level_api_streaming.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from llama_cpp import Llama
55

66
parser = argparse.ArgumentParser()
7-
parser.add_argument("-m", "--model", type=str, default="./models/...")
7+
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
88
args = parser.parse_args()
99

1010
llm = Llama(model_path=args.model)

examples/high_level_api/langchain_custom_llm.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def _identifying_params(self) -> Mapping[str, Any]:
2929

3030

3131
parser = argparse.ArgumentParser()
32-
parser.add_argument("-m", "--model", type=str, default="./models/...")
32+
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
3333
args = parser.parse_args()
3434

3535
# Load the model

examples/low_level_api/low_level_api_llama_cpp.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
prompt = b"\n\n### Instruction:\nWhat is the capital of France?\n\n### Response:\n"
1010

1111
lparams = llama_cpp.llama_context_default_params()
12-
ctx = llama_cpp.llama_init_from_file(b"models/ggml-alpaca-7b-q4.bin", lparams)
12+
ctx = llama_cpp.llama_init_from_file(b"../models/7B/ggml-model.bin", lparams)
1313

1414
# determine the required inference memory per token:
1515
tmp = [0, 1, 2, 3]

0 commit comments

Comments
 (0)