From 4a288949fc6ddddaee1ef39e48e8b8c32ac4c21a Mon Sep 17 00:00:00 2001 From: keetrap Date: Wed, 25 Sep 2024 15:05:57 +0530 Subject: [PATCH 1/2] Added TokenCountEstimatorMetric --- .../concrete/TokenCountEstimatorMetric.py | 28 ++++++++++++++++ .../TokenCountEstimatorMetric_unit_test.py | 32 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 pkgs/swarmauri/swarmauri/metrics/concrete/TokenCountEstimatorMetric.py create mode 100644 pkgs/swarmauri/tests/unit/metrics/TokenCountEstimatorMetric_unit_test.py diff --git a/pkgs/swarmauri/swarmauri/metrics/concrete/TokenCountEstimatorMetric.py b/pkgs/swarmauri/swarmauri/metrics/concrete/TokenCountEstimatorMetric.py new file mode 100644 index 000000000..742956313 --- /dev/null +++ b/pkgs/swarmauri/swarmauri/metrics/concrete/TokenCountEstimatorMetric.py @@ -0,0 +1,28 @@ +from typing import Any, Literal +import tiktoken +from swarmauri.metrics.base.MetricBase import MetricBase +from swarmauri.metrics.base.MetricCalculateMixin import MetricCalculateMixin + +class TokenCountEstimatorMetric(MetricBase, MetricCalculateMixin): + """ + A metric class to estimate the number of tokens in a given text. + """ + unit: str = "tokens" + type: Literal['TokenCountEstimatorMetric'] = 'TokenCountEstimatorMetric' + + def calculate(self, text: str,encoding='cl100k_base') -> int: + """ + Calculate the number of tokens in the given text. + Args: + text (str): The input text to calculate token count for. + Returns: + int: The number of tokens in the text, or None if an error occurs. + """ + try: + encoding = tiktoken.get_encoding(encoding) + except ValueError as e: + print(f"Error: {e}") + return None + + tokens = encoding.encode(text) + return len(tokens) diff --git a/pkgs/swarmauri/tests/unit/metrics/TokenCountEstimatorMetric_unit_test.py b/pkgs/swarmauri/tests/unit/metrics/TokenCountEstimatorMetric_unit_test.py new file mode 100644 index 000000000..8cfb2b219 --- /dev/null +++ b/pkgs/swarmauri/tests/unit/metrics/TokenCountEstimatorMetric_unit_test.py @@ -0,0 +1,32 @@ +import pytest +from swarmauri.metrics.concrete.TokenCountEstimatorMetric import TokenCountEstimatorMetric as Metric + +@pytest.mark.unit +def test_ubc_resource(): + def test(): + assert Metric().resource == 'Metric' + test() + +@pytest.mark.unit +def test_ubc_type(): + metric = Metric() + assert metric.type == 'TokenCountEstimatorMetric' + +@pytest.mark.unit +def test_serialization(): + metric = Metric() + assert metric.id == Metric.model_validate_json(metric.model_dump_json()).id + + +@pytest.mark.unit +def test_metric_value(): + def test(): + assert Metric().calculate("Lorem ipsum odor amet, consectetuer adipiscing elit.") == 11 + test() + + +@pytest.mark.unit +def test_metric_unit(): + def test(): + assert Metric().unit == "tokens" + test() \ No newline at end of file From b12da1b3cdb5e4c7d70d5a9a6a9084da95a5057a Mon Sep 17 00:00:00 2001 From: keetrap Date: Wed, 25 Sep 2024 16:49:42 +0530 Subject: [PATCH 2/2] Moved TokenCountEstimatorMetric to community --- pkgs/community/setup.py | 3 ++- .../metrics/concrete/TokenCountEstimatorMetric.py | 0 .../tests/unit/metrics/TokenCountEstimatorMetric_unit_test.py | 0 3 files changed, 2 insertions(+), 1 deletion(-) rename pkgs/{swarmauri/swarmauri => community/swarmauri_community}/metrics/concrete/TokenCountEstimatorMetric.py (100%) rename pkgs/{swarmauri => community}/tests/unit/metrics/TokenCountEstimatorMetric_unit_test.py (100%) diff --git a/pkgs/community/setup.py b/pkgs/community/setup.py index 364debd1b..1193db205 100644 --- a/pkgs/community/setup.py +++ b/pkgs/community/setup.py @@ -62,7 +62,8 @@ "pacmap", "tf-keras", "pinecone", - "neo4j" + "neo4j", + "tiktoken" ] }, classifiers=[ diff --git a/pkgs/swarmauri/swarmauri/metrics/concrete/TokenCountEstimatorMetric.py b/pkgs/community/swarmauri_community/metrics/concrete/TokenCountEstimatorMetric.py similarity index 100% rename from pkgs/swarmauri/swarmauri/metrics/concrete/TokenCountEstimatorMetric.py rename to pkgs/community/swarmauri_community/metrics/concrete/TokenCountEstimatorMetric.py diff --git a/pkgs/swarmauri/tests/unit/metrics/TokenCountEstimatorMetric_unit_test.py b/pkgs/community/tests/unit/metrics/TokenCountEstimatorMetric_unit_test.py similarity index 100% rename from pkgs/swarmauri/tests/unit/metrics/TokenCountEstimatorMetric_unit_test.py rename to pkgs/community/tests/unit/metrics/TokenCountEstimatorMetric_unit_test.py