Skip to content

Commit

Permalink
Add Docker files and scripts for DockerAI integration
Browse files Browse the repository at this point in the history
  • Loading branch information
Davidnet committed Jan 10, 2024
0 parents commit 62eb665
Show file tree
Hide file tree
Showing 21 changed files with 5,235 additions and 0 deletions.
176 changes: 176 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml

# ruff
.ruff_cache/

# LSP config files
pyrightconfig.json

# End of https://www.toptal.com/developers/gitignore/api/python
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# DockerAI Gen Example

This is an example of how to use DockerAI for genAI

Author: David Cardozo
15 changes: 15 additions & 0 deletions docker-bot/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM python:3.11-bullseye

WORKDIR /usr/src/app

COPY docker_bot /usr/src/app/docker_bot
COPY pyproject.toml /usr/src/app/pyproject.toml
COPY README.md /usr/src/app/README.md

RUN pip install -e .

EXPOSE 8504

HEALTHCHECK CMD curl --fail http://localhost:8504/_stcore/health

ENTRYPOINT ["streamlit", "run", "yt_whisper/app.py", "--server.port=8504", "--server.address=0.0.0.0"]
Empty file added docker-bot/README.md
Empty file.
Empty file.
150 changes: 150 additions & 0 deletions docker-bot/docker_bot/bot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import os

import pinecone
import streamlit as st
from dotenv import load_dotenv
from openai import OpenAI
from streamlit.logger import get_logger

load_dotenv(".env")

load_dotenv(".env")

logger = get_logger(__name__)

client = OpenAI(api_key=os.getenv("OPENAI_TOKEN"))


@st.cache_resource
def load_pinecone(index_name="docker-genai"):
# initialize pinecone
pinecone.init(
api_key=os.getenv("PINECONE_TOKEN"),
environment=os.getenv("PINECONE_ENVIRONMENT"),
)
if index_name not in pinecone.list_indexes():
# we create a new index
pinecone.create_index(name=index_name, metric="cosine", dimension=1536)
index = pinecone.Index(index_name)
return index


def generate_response(input_text):
question_embedding = client.embeddings.create(
input=[input_text], model="text-embedding-ada-002"
)
num_embeddings = list(question_embedding.data[0].embedding)
# print(list(question_embedding.data[0].embedding))
res_contex = load_pinecone().query(
vector=num_embeddings,
top_k=5,
include_metadata=True,
)

matches = res_contex["matches"]

context = (
"The following are the top 5 videos transcription that match your query: \n"
)
references = ""
for match in matches:
context += "Title: " + match.metadata["title"] + "\n"
context += "Transcription: " + match.metadata["text"] + "\n"
references += "\n - " + match.metadata["video_url"] + "\n"

primer = """You are Q&A bot. A highly intelligent system that answers
user questions based on the information provided by videos transcriptions. You can use your inner knowledge,
but consider more with emphasis the information provided. Put emphasis on the transcriptions provided. If you see titles repeated, you can assume it is the same video.
Provide samples of the transcriptions that are important to your query.
"""

chat_completion = client.chat.completions.create(
messages=[
{"role": "system", "content": primer},
{
"role": "user",
"content": context,
},
{"role": "user", "content": input_text},
],
model="gpt-4-1106-preview",
)
response = chat_completion.choices[0].message.content

# Add video references
response += "\n" + references

return response


logger = get_logger(__name__)


# Streamlit UI
styl = """
<style>
/* not great support for :has yet (hello FireFox), but using it for now */
.element-container:has([aria-label="Select RAG mode"]) {{
position: fixed;
bottom: 33px;
background: white;
z-index: 101;
}}
.stChatFloatingInputContainer {{
bottom: 20px;
}}
/* Generate ticket text area */
textarea[aria-label="Description"] {{
height: 200px;
}}
</style>
"""
st.markdown(styl, unsafe_allow_html=True)


def chat_input():
user_input = st.chat_input("What you want to know about your videos?")

if user_input:
with st.chat_message("user"):
st.write(user_input)
with st.chat_message("assistant"):
st.caption("Dockerbot")
# result = output_function(
# {"question": user_input, "chat_history": []}, callbacks=[stream_handler]
# )["answer"]
result = generate_response(user_input)
# result = "I am a bot. I am still learning."
output = result
st.session_state["user_input"].append(user_input)
st.session_state["generated"].append(output)
st.rerun()


def display_chat():
# Session state
if "generated" not in st.session_state:
st.session_state["generated"] = []

if "user_input" not in st.session_state:
st.session_state["user_input"] = []

if st.session_state["generated"]:
size = len(st.session_state["generated"])
# Display only the last three exchanges
for i in range(max(size - 3, 0), size):
with st.chat_message("user"):
st.write(st.session_state["user_input"][i])

with st.chat_message("assistant"):
# st.caption(f"RAG: {st.session_state['rag_mode'][i]}")
st.caption("Dockerbot")
st.write(st.session_state["generated"][i])

with st.container():
st.write("&nbsp;")


display_chat()
chat_input()
Loading

0 comments on commit 62eb665

Please sign in to comment.