bump pipecat

b4mad · Oct 11, 2024 · 6f4b374 · 6f4b374
1 parent 775bd15
commit 6f4b374
Show file tree

Hide file tree

Showing 7 changed files with 106 additions and 22 deletions.
diff --git a/Makefile b/Makefile
@@ -19,7 +19,7 @@ test:
 
 .PHONY: bot
 bot: bot.py
-	poetry run ./bot.py
+	pipenv run ./bot.py
 
 bot-runner:
 	pipenv run env RUN_AS_PROCESS=true  python ./bot_runner.py

diff --git a/Pipfile b/Pipfile
@@ -6,7 +6,7 @@ verify_ssl = true
 [packages]
 boto3 = "*"
 fastapi = "*"
-pipecat-ai = {extras = ["silero,openai,daily"] }
+pipecat-ai = {extras = ["silero,openai,daily,elevenlabs"], version = "==0.0.43"}
 python-dotenv = "*"
 python-multipart = "*"
 requests = "*"

diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/bot.py b/bot.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 import asyncio
 import datetime
 import os
@@ -8,7 +10,8 @@
 import aiohttp
 from dotenv import load_dotenv
 from loguru import logger
-from pipecat.frames.frames import EndFrame, TextFrame
+
+from pipecat.frames.frames import EndFrame, LLMFullResponseEndFrame, TextFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -21,7 +24,6 @@
                                                DailyTranscriptionSettings,
                                                DailyTransport)
 from pipecat.vad.silero import SileroVADAnalyzer
-
 from processors import BucketLogger, ConversationLogger, ConversationProcessor
 from prompts import get_llm_base_prompt
 from runner import configure
@@ -148,7 +150,7 @@ async def on_participant_joined(transport, participant):
             logger.info(f"Participant {participant_name} joined. Total participants: {participant_count}")
             conversation_processor.add_user_mapping(participant["id"], participant_name)
             await task.queue_frames(
-                [TextFrame(f"Hallo {participant_name}! Ich bin {bot_name}. Willkommen in unserem Gespräch!")]
+                [TextFrame(f"Hallo {participant_name}! Ich bin {bot_name}."), LLMFullResponseEndFrame()]
             )
 
         @transport.event_handler("on_participant_left")
@@ -161,7 +163,8 @@ async def on_participant_left(transport, participant, reason):
                 [
                     TextFrame(
                         f"Auf Wiedersehen {participant_name}! Ich, {bot_name}, wünsche dir alles Gute und hoffe, wir sehen uns bald wieder."
-                    )
+                    ),
+                    LLMFullResponseEndFrame(),
                 ]
             )
             if participant_count == 0:

diff --git a/participant.py b/participant.py
@@ -1,9 +1,3 @@
-#
-# Copyright (c) 2024, Daily
-#
-# SPDX-License-Identifier: BSD 2-Clause License
-#
-
 import asyncio
 import os
 import random
@@ -12,15 +6,16 @@
 import aiohttp
 from dotenv import load_dotenv
 from loguru import logger
-from pipecat.frames.frames import EndFrame, TextFrame
+
+from pipecat.frames.frames import LLMFullResponseEndFrame, TextFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.logger import FrameLogger
 from pipecat.services.elevenlabs import ElevenLabsTTSService
 from pipecat.transports.services.daily import (DailyParams,
                                                DailyTranscriptionSettings,
                                                DailyTransport)
-
 from runner import configure
 
 load_dotenv(override=True)
@@ -74,7 +69,10 @@ async def main(room_url):
 
         runner = PipelineRunner()
 
-        task = PipelineTask(Pipeline([tts, transport.output()]))
+        frame_logger_1 = FrameLogger("FL1", "green")
+        frame_logger_2 = FrameLogger("FL2", "red")
+
+        task = PipelineTask(Pipeline([tts, frame_logger_1, transport.output(), frame_logger_2]))
 
         # Register an event handler so we can play the audio when the
         # participant joins.
@@ -84,6 +82,8 @@ async def on_participant_joined(transport, participant):
             transport.capture_participant_transcription(participant["id"])
             await asyncio.sleep(2)
             await task.queue_frames([TextFrame(f"Hallo, wie geht es {participant_name}?")])
+            await task.queue_frame(LLMFullResponseEndFrame())
+
             # sleep for a bit to give the participant time to hear the audio
             # await task.queue_frames([TextFrame(f"Wer bist du?")])
             # await task.queue_frames([EndFrame()])
@@ -92,6 +92,7 @@ async def on_participant_joined(transport, participant):
         async def on_participant_left(transport, participant, reason):
             participant_name = participant["info"]["userName"] or ""
             await task.queue_frames([TextFrame(f"Auf wiedersehen {participant_name}")])
+            await task.queue_frame(LLMFullResponseEndFrame())
             logger.info(f"Participant {participant_name} left")
 
         await runner.run(task)

diff --git a/processors.py b/processors.py
@@ -5,6 +5,7 @@
 import boto3
 from botocore.exceptions import ClientError
 from loguru import logger
+
 from pipecat.frames.frames import (Frame, InterimTranscriptionFrame,
                                    LLMFullResponseEndFrame, TranscriptionFrame,
                                    UserStartedSpeakingFrame,
@@ -113,11 +114,13 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):
                     "timestamp": timestamp,
                 }
                 self._aggregation_detailed.append(entry)
+                logger.debug(f"Added conversation entry: {entry}")
 
     async def _push_aggregation(self):
         self._aggregation = self.format_aggregation()
         self._aggregation_detailed = []
         await super()._push_aggregation()
+        logger.debug("Pushed conversation aggregation")
 
     def format_aggregation(self):
         """

diff --git a/talking_animation.py b/talking_animation.py
@@ -2,9 +2,9 @@
 
 from loguru import logger
 from PIL import Image
-from pipecat.frames.frames import (AudioRawFrame, EndFrame, Frame,
-                                   ImageRawFrame, LLMMessagesFrame,
-                                   SpriteFrame, TextFrame, TTSStoppedFrame)
+
+from pipecat.frames.frames import (Frame, OutputImageRawFrame, SpriteFrame,
+                                   TTSAudioRawFrame, TTSStoppedFrame)
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 
 sprites = []
@@ -24,7 +24,7 @@
     full_path = os.path.join(sprite_dir, png_file)
     # logger.info(f"Loading sprite: {full_path}")
     with Image.open(full_path) as img:
-        sprites.append(ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
+        sprites.append(OutputImageRawFrame(image=img.tobytes(), size=img.size, format=img.format))
 
 # Add reversed sprites to create a loop
 sprites.extend(sprites[::-1])
@@ -52,7 +52,7 @@ def quiet_frame(self):
     async def process_frame(self, frame: Frame, direction: FrameDirection):
         await super().process_frame(frame, direction)
 
-        if isinstance(frame, AudioRawFrame):
+        if isinstance(frame, TTSAudioRawFrame):
             if not self._is_talking:
                 await self.push_frame(talking_frame)
                 self._is_talking = True