swarmauri · cobycloud · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024
diff --git a/pkgs/community/swarmauri_community/metrics/__init__.py b/pkgs/community/swarmauri_community/metrics/__init__.py
diff --git a/pkgs/community/swarmauri_community/metrics/base/__init__.py b/pkgs/community/swarmauri_community/metrics/base/__init__.py
diff --git a/...munity/metrics/MutualInformationMetric.py → ...trics/concrete/MutualInformationMetric.py b/...munity/metrics/MutualInformationMetric.py → ...trics/concrete/MutualInformationMetric.py
diff --git a/pkgs/community/swarmauri_community/metrics/concrete/__init__.py b/pkgs/community/swarmauri_community/metrics/concrete/__init__.py
diff --git a/pkgs/community/swarmauri_community/parsers/__init__.py b/pkgs/community/swarmauri_community/parsers/__init__.py
diff --git a/pkgs/community/swarmauri_community/parsers/base/__init__.py b/pkgs/community/swarmauri_community/parsers/base/__init__.py
diff --git a/...rmauri_community/parsers/FitzPdfParser.py → ...mmunity/parsers/concrete/FitzPdfParser.py b/...rmauri_community/parsers/FitzPdfParser.py → ...mmunity/parsers/concrete/FitzPdfParser.py
diff --git a/pkgs/community/swarmauri_community/parsers/concrete/__init__.py b/pkgs/community/swarmauri_community/parsers/concrete/__init__.py
diff --git a/...mmunity/tests/llms/ShuttleAIModel_test.py → ...ty/tests/unit/llms/ShuttleAIModel_test.py b/...mmunity/tests/llms/ShuttleAIModel_test.py → ...ty/tests/unit/llms/ShuttleAIModel_test.py
diff --git a/pkgs/community/tests/unit/metrics/MutualInformationMetric_test.py b/pkgs/community/tests/unit/metrics/MutualInformationMetric_test.py
@@ -1,31 +1,31 @@
 import pytest
-from swarmauri_community.metrics.MutualInformationMetric import (
+from swarmauri_community.metrics.concrete.MutualInformationMetric import (
     MutualInformationMetric as Metric,
 )
 
 
 @pytest.mark.unit
 def test_ubc_resource():
-    assert Metric(unit="points", value=10).resource == "Metric"
+    assert Metric(value=10).resource == "Metric"
 
 
 @pytest.mark.unit
 def test_ubc_type():
-    metric = Metric(unit="points", value=10)
+    metric = Metric(value=10)
     assert metric.type == "MutualInformationMetric"
 
 
 @pytest.mark.unit
 def test_serialization():
-    metric = Metric(unit="points", value=10)
+    metric = Metric(value=10)
     assert metric.id == Metric.model_validate_json(metric.model_dump_json()).id
 
 
 @pytest.mark.unit
 def test_metric_value():
-    assert Metric(unit="points", value=10)() == 10
+    assert Metric(value=10)() == 10
 
 
 @pytest.mark.unit
 def test_metric_unit():
-    assert Metric(unit="points", value=10).unit == "points"
+    assert Metric(value=10).unit == "percentage"
diff --git a/pkgs/community/tests/unit/parsers/FitzPdfParser_test.py b/pkgs/community/tests/unit/parsers/FitzPdfParser_test.py
@@ -1,5 +1,8 @@
+import os
+from unittest import mock
+
 import pytest
-from swarmauri_community.parsers.FitzPdfParser import PDFtoTextParser as Parser
+from swarmauri_community.parsers.concrete.FitzPdfParser import PDFtoTextParser as Parser
 
 
 @pytest.mark.unit
@@ -21,8 +24,45 @@ def test_serialization():
 
 
 @pytest.mark.unit
-def test_parse():
-    documents = Parser().parse(r"resources/demo.pdf")
-    assert documents[0].resource == "Document"
-    assert documents[0].content == "This is a demo pdf \n"
-    assert documents[0].metadata["source"] == r"resources/demo.pdf"
+def test_parser():
+    parser = Parser()
+
+    file_path = "resources/demo.pdf"
+
+    # Mock the pymupdf open method and the returned document
+    with mock.patch("pymupdf.open") as mock_open:
+        # Create a mock document with multiple pages
+        mock_pdf_document = mock.Mock()
+
+        # Mock pages in the document
+        mock_pdf_document.__len__ = mock.Mock(return_value=2)
+
+        # Mocking the first page's get_text method
+        mock_page_1 = mock.Mock()
+        mock_page_1.get_text.return_value = "This is the text from page 1.\n"
+
+        # Mocking the second page's get_text method
+        mock_page_2 = mock.Mock()
+        mock_page_2.get_text.return_value = "This is the text from page 2.\n"
+
+        # Set load_page to return the mocked pages
+        mock_pdf_document.load_page.side_effect = [mock_page_1, mock_page_2]
+
+        # Set the return value of pymupdf.open to our mock PDF document
+        mock_open.return_value = mock_pdf_document
+
+        # Call the parser's parse method
+        documents = parser.parse(file_path)
+
+        # Check that pymupdf.open was called with the correct file path
+        mock_open.assert_called_once_with(file_path)
+
+        # Assertions
+        assert len(documents) == 1, "The parser should return a list with one document."
+        assert (
+            documents[0].content
+            == "This is the text from page 1.\nThis is the text from page 2.\n"
+        ), "The extracted text content is incorrect."
+        assert (
+            documents[0].metadata["source"] == file_path
+        ), "The metadata 'source' should match the file path."
diff --git a/pkgs/swarmauri/swarmauri/metrics/concrete/PatternMatchingMetric.py b/pkgs/swarmauri/swarmauri/metrics/concrete/PatternMatchingMetric.py
@@ -1,14 +1,31 @@
-from typing import Any
+from typing import Any, Literal
 import pandas as pd
 
 from swarmauri.metrics.base.MetricBase import MetricBase
 from swarmauri.metrics.base.MetricCalculateMixin import MetricCalculateMixin
 
 
 class PatternMatchingMetric(MetricBase, MetricCalculateMixin):
+    """
+    A metric class to calculate the percentage of data points that match a given pattern in a column.
+    """
+    unit: str = "percentage"
+    type: Literal["PatternMatchingMetric"] = "PatternMatchingMetric"
+
     def calculate(self, data: pd.DataFrame, column: str, pattern: str) -> float:
+        """
+        Calculate the percentage of data points that match a given pattern in a column.
+
+        Parameters:
+            data (pd.DataFrame): The input DataFrame.
+            column (str): The name of the column to match against.
+            pattern (str): The pattern to match.
+
+        Returns:
+            float: The percentage of data points that match the pattern.
+        """
         # Perform pattern matching
         matches = data[column].str.contains(pattern, regex=True)
-        
+
         # Calculate the proportion of True values (i.e., the percentage of matches)
-        return matches.mean() * 100  # Returning percentage as a float 
+        return matches.mean() * 100  # Returning percentage as a float
diff --git a/pkgs/swarmauri/swarmauri/metrics/concrete/RatioOfSumsMetric.py b/pkgs/swarmauri/swarmauri/metrics/concrete/RatioOfSumsMetric.py
@@ -1,15 +1,35 @@
-from typing import Any
+from typing import Any, Literal
 import pandas as pd
 
 from swarmauri.metrics.base.MetricBase import MetricBase
 from swarmauri.metrics.base.MetricCalculateMixin import MetricCalculateMixin
 
+
 class RatioOfSumsMetric(MetricBase, MetricCalculateMixin):
+    """
+    A metric class to calculate the ratio of the sum of two columns in a DataFrame.
+    """
+    unit: str = "percentage"
+    type: Literal["RatioOfSumsMetric"] = "RatioOfSumsMetric"
+
     def calculate(self, data: pd.DataFrame, column_a: str, column_b: str) -> float:
+        """
+        Calculate the ratio of the sum of two columns in a DataFrame.
+
+        Parameters:
+            data (pd.DataFrame): The input DataFrame.
+            column_a (str): The name of the first column.
+            column_b (str): The name of the second column.
+
+        Returns:
+            float: The ratio of the sum of the two columns.
+        """
         sum_a = data[column_a].sum()
         sum_b = data[column_b].sum()
-        
+
         if sum_b == 0:
-            raise ValueError(f"The sum of column '{column_b}' is zero, cannot divide by zero.")
-
-        return sum_a / sum_b 
+            raise ValueError(
+                f"The sum of column '{column_b}' is zero, cannot divide by zero."
+            )
+
+        return sum_a / sum_b
diff --git a/pkgs/swarmauri/tests/unit/metrics/PatternMatchingMetric_unit_test.py b/pkgs/swarmauri/tests/unit/metrics/PatternMatchingMetric_unit_test.py
@@ -1,25 +1,31 @@
 import pytest
-from swarmauri.metrics.concrete.PatternMatchingMetric import PatternMatchingMetric as Metric
+from swarmauri.metrics.concrete.PatternMatchingMetric import (
+    PatternMatchingMetric as Metric,
+)
+
 
 @pytest.mark.unit
 def test_ubc_resource():
-	assert Metric(unit='points', value=10).resource == 'Metric'
+    assert Metric(value=10).resource == "Metric"
 
 
 @pytest.mark.unit
 def test_ubc_type():
-    metric = Metric(unit='points', value=10)
-    assert metric.type == 'PatternMatchingMetric'
+    metric = Metric(value=10)
+    assert metric.type == "PatternMatchingMetric"
+
 
 @pytest.mark.unit
 def test_serialization():
-    metric = Metric(unit='points', value=10)
+    metric = Metric(value=10)
     assert metric.id == Metric.model_validate_json(metric.model_dump_json()).id
 
+
 @pytest.mark.unit
 def test_metric_value():
-	assert Metric(unit='points', value=10)() == 10
+    assert Metric(value=10)() == 10
+
 
 @pytest.mark.unit
 def test_metric_unit():
-	assert Metric(unit='points', value=10).unit == 'bad assertion value'
+    assert Metric(value=10).unit == "percentage"
diff --git a/pkgs/swarmauri/tests/unit/metrics/RatioOfSumsMetric_unit_test.py b/pkgs/swarmauri/tests/unit/metrics/RatioOfSumsMetric_unit_test.py
@@ -1,25 +1,28 @@
 import pytest
 from swarmauri.metrics.concrete.RatioOfSumsMetric import RatioOfSumsMetric as Metric
 
+
 @pytest.mark.unit
 def test_ubc_resource():
-	assert Metric(unit='points', value=10).resource == 'Metric'
+    assert Metric(value=10).resource == "Metric"
 
 
 @pytest.mark.unit
 def test_ubc_type():
-    metric = Metric(unit='points', value=10)
-    assert metric.type == 'RatioOfSumsMetric'
+    metric = Metric(value=10)
+    assert metric.type == "RatioOfSumsMetric"
 
 @pytest.mark.unit
 def test_serialization():
-    metric = Metric(unit='points', value=10)
+    metric = Metric(value=10)
     assert metric.id == Metric.model_validate_json(metric.model_dump_json()).id
 
+
 @pytest.mark.unit
 def test_metric_value():
-	assert Metric(unit='points', value=10)() == 10
+    assert Metric(value=10)() == 10
+
 
 @pytest.mark.unit
 def test_metric_unit():
-    assert Metric(unit='points', value=10).unit == 'bad assertion value'
+    assert Metric(value=10).unit == "percentage"