diff --git a/meminto/chunking.py b/meminto/chunking.py index 704c448..1249d4c 100644 --- a/meminto/chunking.py +++ b/meminto/chunking.py @@ -38,7 +38,7 @@ def _number_of_tokens_per_chunk( number_of_chunks = token_count_transcript // token_count_per_chunk + 1 number_of_tokens_per_chunk = token_count_transcript // number_of_chunks + 1 - print(f"Spliting transcript in chunks:") + print("Spliting transcript in chunks:") print(f"LLM max. token count: {max_tokens}") print(f"Token count of system prompt: {token_count_system_prompt}") print(f"Token count reserved for response: {token_count_reserved_for_response}") diff --git a/meminto/diarizer.py b/meminto/diarizer.py index 5bf7840..877d3ee 100644 --- a/meminto/diarizer.py +++ b/meminto/diarizer.py @@ -1,6 +1,6 @@ from pathlib import Path -from pyannote.audio import Pipeline # type: ignore -from pyannote.core import Annotation # type: ignore +from pyannote.audio import Pipeline +from pyannote.core import Annotation from meminto.decorators import log_time diff --git a/meminto/helpers.py b/meminto/helpers.py index e337936..7987074 100644 --- a/meminto/helpers.py +++ b/meminto/helpers.py @@ -32,7 +32,7 @@ def parse_input_file_path(input_file: str) -> Path: if not file_path.is_file(): raise Exception(f"Input file path '{file_path}' does not reference a file.") - if not file_path.suffix in ALLOWED_INPUT_FILE_TYPE: + if file_path.suffix not in ALLOWED_INPUT_FILE_TYPE: raise Exception( f"Invalid input file type. Only one of the following file type are allowed: {', '.join(str(file_type) for file_type in ALLOWED_INPUT_FILE_TYPE)}" ) diff --git a/meminto/llm/llm.py b/meminto/llm/llm.py index 9aa67a0..65efb80 100644 --- a/meminto/llm/llm.py +++ b/meminto/llm/llm.py @@ -1,4 +1,4 @@ -import requests # type: ignore +import requests class LLM: diff --git a/meminto/llm/tokenizers.py b/meminto/llm/tokenizers.py index 3d19f9b..29f4783 100644 --- a/meminto/llm/tokenizers.py +++ b/meminto/llm/tokenizers.py @@ -1,6 +1,6 @@ import tiktoken from transformers import AutoTokenizer, OpenAIGPTTokenizer -from huggingface_hub import login +from huggingface_hub import login class Tokenizer: @@ -13,7 +13,7 @@ def _select_tokenizer(self): login(token=self.hugging_face_acces_token) try: tokenizer = AutoTokenizer.from_pretrained(self.model) - except: + except(Exception): if self.model in tiktoken.model.MODEL_TO_ENCODING.keys(): tokenizer = OpenAIGPTTokenizer.from_pretrained("openai-gpt") else: diff --git a/meminto/meeting_minutes_generator.py b/meminto/meeting_minutes_generator.py index f02584f..14096fa 100644 --- a/meminto/meeting_minutes_generator.py +++ b/meminto/meeting_minutes_generator.py @@ -1,5 +1,3 @@ -import os -from typing import Tuple from meminto.decorators import log_time from meminto.helpers import Language from meminto.llm.llm import LLM @@ -16,7 +14,7 @@ ) from meminto.llm.tokenizers import Tokenizer from meminto.transcriber import TranscriptSection -from huggingface_hub import login + class MeetingMinutesGenerator: diff --git a/poetry.lock b/poetry.lock index 0c63af4..3b50e3c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2626,6 +2626,32 @@ files = [ {file = "ruamel.yaml.clib-0.2.8.tar.gz", hash = "sha256:beb2e0404003de9a4cab9753a8805a8fe9320ee6673136ed7f04255fe60bb512"}, ] +[[package]] +name = "ruff" +version = "0.4.2" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.4.2-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:8d14dc8953f8af7e003a485ef560bbefa5f8cc1ad994eebb5b12136049bbccc5"}, + {file = "ruff-0.4.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:24016ed18db3dc9786af103ff49c03bdf408ea253f3cb9e3638f39ac9cf2d483"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e2e06459042ac841ed510196c350ba35a9b24a643e23db60d79b2db92af0c2b"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3afabaf7ba8e9c485a14ad8f4122feff6b2b93cc53cd4dad2fd24ae35112d5c5"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:799eb468ea6bc54b95527143a4ceaf970d5aa3613050c6cff54c85fda3fde480"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:ec4ba9436a51527fb6931a8839af4c36a5481f8c19e8f5e42c2f7ad3a49f5069"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6a2243f8f434e487c2a010c7252150b1fdf019035130f41b77626f5655c9ca22"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8772130a063f3eebdf7095da00c0b9898bd1774c43b336272c3e98667d4fb8fa"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ab165ef5d72392b4ebb85a8b0fbd321f69832a632e07a74794c0e598e7a8376"}, + {file = "ruff-0.4.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:1f32cadf44c2020e75e0c56c3408ed1d32c024766bd41aedef92aa3ca28eef68"}, + {file = "ruff-0.4.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:22e306bf15e09af45ca812bc42fa59b628646fa7c26072555f278994890bc7ac"}, + {file = "ruff-0.4.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:82986bb77ad83a1719c90b9528a9dd663c9206f7c0ab69282af8223566a0c34e"}, + {file = "ruff-0.4.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:652e4ba553e421a6dc2a6d4868bc3b3881311702633eb3672f9f244ded8908cd"}, + {file = "ruff-0.4.2-py3-none-win32.whl", hash = "sha256:7891ee376770ac094da3ad40c116258a381b86c7352552788377c6eb16d784fe"}, + {file = "ruff-0.4.2-py3-none-win_amd64.whl", hash = "sha256:5ec481661fb2fd88a5d6cf1f83403d388ec90f9daaa36e40e2c003de66751798"}, + {file = "ruff-0.4.2-py3-none-win_arm64.whl", hash = "sha256:cbd1e87c71bca14792948c4ccb51ee61c3296e164019d2d484f3eaa2d360dfaf"}, + {file = "ruff-0.4.2.tar.gz", hash = "sha256:33bcc160aee2520664bc0859cfeaebc84bb7323becff3f303b8f1f2d81cb4edc"}, +] + [[package]] name = "safetensors" version = "0.4.3" @@ -3656,6 +3682,20 @@ rich = ">=10.11.0" shellingham = ">=1.3.0" typing-extensions = ">=3.7.4.3" +[[package]] +name = "types-requests" +version = "2.31.0.20240406" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-requests-2.31.0.20240406.tar.gz", hash = "sha256:4428df33c5503945c74b3f42e82b181e86ec7b724620419a2966e2de604ce1a1"}, + {file = "types_requests-2.31.0.20240406-py3-none-any.whl", hash = "sha256:6216cdac377c6b9a040ac1c0404f7284bd13199c0e1bb235f4324627e8898cf5"}, +] + +[package.dependencies] +urllib3 = ">=2" + [[package]] name = "typing-extensions" version = "4.11.0" @@ -3801,4 +3841,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "867c5ec0bf7330002d9f89102bae6dae73205d920109dac3f011af1548b4bf92" +content-hash = "511bac126b4852c673d9d913018375d475c30f7e393706924c25bda460913db5" diff --git a/pyproject.toml b/pyproject.toml index 239f582..75a6113 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,19 @@ transformers = "^4.39.3" pyannote-pipeline = "^3.0.1" speechbrain = "0.5.15" python-dotenv = "^1.0.1" + +[tool.poetry.group.dev.dependencies] +ruff = "^0.4.2" mypy = "^1.9.0" +types-requests = "^2.31.0.20240406" +[[tool.mypy.overrides]] +module = [ + "transformers", + "huggingface_hub", + "torchaudio", + "pyannote.*", +] +ignore_missing_imports = true