diff --git a/Makefile b/Makefile index 885b362..46e37de 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ test: .PHONY: bot bot: bot.py - poetry run ./bot.py + pipenv run ./bot.py bot-runner: pipenv run env RUN_AS_PROCESS=true python ./bot_runner.py diff --git a/Pipfile b/Pipfile index a9e3994..1ea8cdf 100644 --- a/Pipfile +++ b/Pipfile @@ -6,7 +6,7 @@ verify_ssl = true [packages] boto3 = "*" fastapi = "*" -pipecat-ai = {extras = ["silero,openai,daily"] } +pipecat-ai = {extras = ["silero,openai,daily,elevenlabs"], version = "==0.0.43"} python-dotenv = "*" python-multipart = "*" requests = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 1ba229c..b9be97d 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "5422a344b1ad343930d753f240eaca6b3b885ca4c3831936c5a67aee81866e8a" + "sha256": "f9508039e3d20014a6de50c41de0236281c776c1ee32ca96feffef57ae071026" }, "pipfile-spec": 6, "requires": { @@ -891,7 +891,7 @@ }, "pipecat-ai": { "extras": [ - "silero,openai,daily" + "silero,openai,daily,elevenlabs" ], "hashes": [ "sha256:35439a4f5bef7b31efa71aff52d4dc9fecf23241ef4f14d187773cc1346a4c75", @@ -1335,6 +1335,83 @@ "markers": "python_version >= '3.8'", "version": "==0.31.1" }, + "websockets": { + "hashes": [ + "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b", + "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6", + "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df", + "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b", + "sha256:1a9d160fd080c6285e202327aba140fc9a0d910b09e423afff4ae5cbbf1c7205", + "sha256:1bf386089178ea69d720f8db6199a0504a406209a0fc23e603b27b300fdd6892", + "sha256:1df2fbd2c8a98d38a66f5238484405b8d1d16f929bb7a33ed73e4801222a6f53", + "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2", + "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed", + "sha256:23509452b3bc38e3a057382c2e941d5ac2e01e251acce7adc74011d7d8de434c", + "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd", + "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b", + "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931", + "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30", + "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370", + "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be", + "sha256:2e5fc14ec6ea568200ea4ef46545073da81900a2b67b3e666f04adf53ad452ec", + "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf", + "sha256:3c6cc1360c10c17463aadd29dd3af332d4a1adaa8796f6b0e9f9df1fdb0bad62", + "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b", + "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402", + "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f", + "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123", + "sha256:46e71dbbd12850224243f5d2aeec90f0aaa0f2dde5aeeb8fc8df21e04d99eff9", + "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603", + "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45", + "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558", + "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4", + "sha256:5f6ffe2c6598f7f7207eef9a1228b6f5c818f9f4d53ee920aacd35cec8110438", + "sha256:604428d1b87edbf02b233e2c207d7d528460fa978f9e391bd8aaf9c8311de137", + "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480", + "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447", + "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8", + "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04", + "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c", + "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb", + "sha256:7fa3d25e81bfe6a89718e9791128398a50dec6d57faf23770787ff441d851967", + "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b", + "sha256:8572132c7be52632201a35f5e08348137f658e5ffd21f51f94572ca6c05ea81d", + "sha256:87b4aafed34653e465eb77b7c93ef058516cb5acf3eb21e42f33928616172def", + "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c", + "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92", + "sha256:9edf3fc590cc2ec20dc9d7a45108b5bbaf21c0d89f9fd3fd1685e223771dc0b2", + "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113", + "sha256:a02413bc474feda2849c59ed2dfb2cddb4cd3d2f03a2fedec51d6e959d9b608b", + "sha256:a1d9697f3337a89691e3bd8dc56dea45a6f6d975f92e7d5f773bc715c15dde28", + "sha256:a571f035a47212288e3b3519944f6bf4ac7bc7553243e41eac50dd48552b6df7", + "sha256:ab3d732ad50a4fbd04a4490ef08acd0517b6ae6b77eb967251f4c263011a990d", + "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f", + "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468", + "sha256:b2ee7288b85959797970114deae81ab41b731f19ebcd3bd499ae9ca0e3f1d2c8", + "sha256:b81f90dcc6c85a9b7f29873beb56c94c85d6f0dac2ea8b60d995bd18bf3e2aae", + "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611", + "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d", + "sha256:bbe6013f9f791944ed31ca08b077e26249309639313fff132bfbf3ba105673b9", + "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca", + "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f", + "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2", + "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077", + "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2", + "sha256:cbe83a6bbdf207ff0541de01e11904827540aa069293696dd528a6640bd6a5f6", + "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374", + "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc", + "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e", + "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53", + "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399", + "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547", + "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3", + "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870", + "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5", + "sha256:fc4e7fa5414512b481a2483775a8e8be7803a35b30ca805afa4998a84f9fd9e8", + "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7" + ], + "version": "==12.0" + }, "yarl": { "hashes": [ "sha256:047b258e00b99091b6f90355521f026238c63bd76dcf996d93527bb13320eefd", diff --git a/bot.py b/bot.py old mode 100644 new mode 100755 index 1f31a56..45c8943 --- a/bot.py +++ b/bot.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + import asyncio import datetime import os @@ -8,7 +10,8 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.frames.frames import EndFrame, TextFrame + +from pipecat.frames.frames import EndFrame, LLMFullResponseEndFrame, TextFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -21,7 +24,6 @@ DailyTranscriptionSettings, DailyTransport) from pipecat.vad.silero import SileroVADAnalyzer - from processors import BucketLogger, ConversationLogger, ConversationProcessor from prompts import get_llm_base_prompt from runner import configure @@ -148,7 +150,7 @@ async def on_participant_joined(transport, participant): logger.info(f"Participant {participant_name} joined. Total participants: {participant_count}") conversation_processor.add_user_mapping(participant["id"], participant_name) await task.queue_frames( - [TextFrame(f"Hallo {participant_name}! Ich bin {bot_name}. Willkommen in unserem Gespräch!")] + [TextFrame(f"Hallo {participant_name}! Ich bin {bot_name}."), LLMFullResponseEndFrame()] ) @transport.event_handler("on_participant_left") @@ -161,7 +163,8 @@ async def on_participant_left(transport, participant, reason): [ TextFrame( f"Auf Wiedersehen {participant_name}! Ich, {bot_name}, wünsche dir alles Gute und hoffe, wir sehen uns bald wieder." - ) + ), + LLMFullResponseEndFrame(), ] ) if participant_count == 0: diff --git a/participant.py b/participant.py index dab6706..234b9db 100644 --- a/participant.py +++ b/participant.py @@ -1,9 +1,3 @@ -# -# Copyright (c) 2024, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - import asyncio import os import random @@ -12,15 +6,16 @@ import aiohttp from dotenv import load_dotenv from loguru import logger -from pipecat.frames.frames import EndFrame, TextFrame + +from pipecat.frames.frames import LLMFullResponseEndFrame, TextFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask +from pipecat.processors.logger import FrameLogger from pipecat.services.elevenlabs import ElevenLabsTTSService from pipecat.transports.services.daily import (DailyParams, DailyTranscriptionSettings, DailyTransport) - from runner import configure load_dotenv(override=True) @@ -74,7 +69,10 @@ async def main(room_url): runner = PipelineRunner() - task = PipelineTask(Pipeline([tts, transport.output()])) + frame_logger_1 = FrameLogger("FL1", "green") + frame_logger_2 = FrameLogger("FL2", "red") + + task = PipelineTask(Pipeline([tts, frame_logger_1, transport.output(), frame_logger_2])) # Register an event handler so we can play the audio when the # participant joins. @@ -84,6 +82,8 @@ async def on_participant_joined(transport, participant): transport.capture_participant_transcription(participant["id"]) await asyncio.sleep(2) await task.queue_frames([TextFrame(f"Hallo, wie geht es {participant_name}?")]) + await task.queue_frame(LLMFullResponseEndFrame()) + # sleep for a bit to give the participant time to hear the audio # await task.queue_frames([TextFrame(f"Wer bist du?")]) # await task.queue_frames([EndFrame()]) @@ -92,6 +92,7 @@ async def on_participant_joined(transport, participant): async def on_participant_left(transport, participant, reason): participant_name = participant["info"]["userName"] or "" await task.queue_frames([TextFrame(f"Auf wiedersehen {participant_name}")]) + await task.queue_frame(LLMFullResponseEndFrame()) logger.info(f"Participant {participant_name} left") await runner.run(task) diff --git a/processors.py b/processors.py index 2f13eb7..db11b8f 100644 --- a/processors.py +++ b/processors.py @@ -5,6 +5,7 @@ import boto3 from botocore.exceptions import ClientError from loguru import logger + from pipecat.frames.frames import (Frame, InterimTranscriptionFrame, LLMFullResponseEndFrame, TranscriptionFrame, UserStartedSpeakingFrame, @@ -113,11 +114,13 @@ async def process_frame(self, frame: Frame, direction: FrameDirection): "timestamp": timestamp, } self._aggregation_detailed.append(entry) + logger.debug(f"Added conversation entry: {entry}") async def _push_aggregation(self): self._aggregation = self.format_aggregation() self._aggregation_detailed = [] await super()._push_aggregation() + logger.debug("Pushed conversation aggregation") def format_aggregation(self): """ diff --git a/talking_animation.py b/talking_animation.py index 178b63d..1fe635d 100644 --- a/talking_animation.py +++ b/talking_animation.py @@ -2,9 +2,9 @@ from loguru import logger from PIL import Image -from pipecat.frames.frames import (AudioRawFrame, EndFrame, Frame, - ImageRawFrame, LLMMessagesFrame, - SpriteFrame, TextFrame, TTSStoppedFrame) + +from pipecat.frames.frames import (Frame, OutputImageRawFrame, SpriteFrame, + TTSAudioRawFrame, TTSStoppedFrame) from pipecat.processors.frame_processor import FrameDirection, FrameProcessor sprites = [] @@ -24,7 +24,7 @@ full_path = os.path.join(sprite_dir, png_file) # logger.info(f"Loading sprite: {full_path}") with Image.open(full_path) as img: - sprites.append(ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format)) + sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format)) # Add reversed sprites to create a loop sprites.extend(sprites[::-1]) @@ -52,7 +52,7 @@ def quiet_frame(self): async def process_frame(self, frame: Frame, direction: FrameDirection): await super().process_frame(frame, direction) - if isinstance(frame, AudioRawFrame): + if isinstance(frame, TTSAudioRawFrame): if not self._is_talking: await self.push_frame(talking_frame) self._is_talking = True