-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex_repo.py
39 lines (28 loc) · 1.33 KB
/
index_repo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from transformers import AutoTokenizer
from typing import List
from git import Repo # pip install gitpython
from src.create_index import create_index
from src.query_index import query_index
from src.utils import list_md_files, list_sh_files, get_relevant_files
git_url = "https://github.com/coltonstearns/dynamic-gaussian-marbles.git"
# extract the repo name from the git url
repo_name = git_url.split("/")[-1].replace(".git", "")
# clone this repo in data folder
repo_dir = os.path.join("data", repo_name)
if not os.path.exists(repo_dir):
Repo.clone_from(git_url, repo_dir)
include_md = True
include_sh = True
relevant_files = get_relevant_files(repo_dir, include_md=include_md, include_sh=include_sh)
index_dir = os.path.join("index", repo_name)
relevant_files = list(relevant_files)
index = create_index(input_files=relevant_files, save_dir=index_dir, model_provider='groq')
# query the index
query_text = "Give me steps to run the training pipeline. Where should I run the preprocessing script from (which subdir)?"
response = query_index(query_text, index=index)
print(response)