swarmauri · cobycloud · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024
diff --git a/pkgs/community/setup.py b/pkgs/community/setup.py
@@ -18,6 +18,7 @@
         "numpy",  # Common dependencies for all distributions
         "requests",
         "pydantic",
+        "pymupdf",
         "swarmauri-core==0.5.0.dev7",
         "swarmauri==0.5.0.dev7"
     ],

diff --git a/pkgs/community/swarmauri_community/metrics/MutualInformationMetric.py b/pkgs/community/swarmauri_community/metrics/MutualInformationMetric.py
@@ -1,11 +1,32 @@
-from typing import Any
+from typing import Any, Literal
 import pandas as pd
 from sklearn.feature_selection import mutual_info_classif
 from swarmauri.metrics.base.MetricBase import MetricBase
 from swarmauri.metrics.base.MetricCalculateMixin import MetricCalculateMixin
 
+
 class MutualInformationMetric(MetricBase, MetricCalculateMixin):
-    def calculate(self, data: pd.DataFrame, target_column: str) -> float:  # Now returns a float
+    """
+    A metric class to calculate mutual information between features and a target column in a given dataset.
+
+    This class computes the mutual information between each feature in a DataFrame (excluding the target column)
+    and the target column itself, and returns the average mutual information score.
+    """
+
+    type: Literal["MutualInformationMetric"] = "MutualInformationMetric"
+
+    def calculate(self, data: pd.DataFrame, target_column: str) -> float:
+        """
+        Calculate the average mutual information between the features and the target column.
+
+        Parameters:
+        - data (pd.DataFrame): A DataFrame containing both the features and the target column.
+
+        - target_column (str) The name of the target column in the DataFrame.
+
+        Returns:
+        - float: The average mutual information across all feature columns.
+        """
         # Separate features from the target column
         features_data = data.drop(columns=[target_column])
         target_data = data[target_column]
@@ -14,4 +35,4 @@ def calculate(self, data: pd.DataFrame, target_column: str) -> float:  # Now ret
         mi = mutual_info_classif(features_data, target_data)
 
         # Return the average mutual information across all features
-        return float(mi.mean())  # Output as a float
+        return float(mi.mean())  # Output as a float
diff --git a/pkgs/community/swarmauri_community/parsers/FitzPdfParser.py b/pkgs/community/swarmauri_community/parsers/FitzPdfParser.py
@@ -1,15 +1,17 @@
-import fitz  # PyMuPDF
+import pymupdf  # PyMuPDF
 from typing import List, Union, Any, Literal
-from swarmauri.standard.parsers.base.ParserBase import ParserBase
-from swarmauri.core.documents.IDocument import IDocument
-from swarmauri.standard.documents.concrete.Document import Document
+from swarmauri.parsers.base.ParserBase import ParserBase
+from swarmauri_core.documents.IDocument import IDocument
+from swarmauri.documents.concrete.Document import Document
+
 
 class PDFtoTextParser(ParserBase):
     """
     A parser to extract text from PDF files.
     """
-    type: Literal['FitzPdfParser'] = 'FitzPdfParser'
-
+
+    type: Literal["FitzPdfParser"] = "FitzPdfParser"
+
     def parse(self, data: Union[str, Any]) -> List[IDocument]:
         """
         Parses a PDF file and extracts its text content as Document instances.
@@ -26,7 +28,7 @@ def parse(self, data: Union[str, Any]) -> List[IDocument]:
 
         try:
             # Open the PDF file
-            doc = fitz.open(data)
+            doc = pymupdf.open(data)
             text = ""
 
             # Extract text from each page
@@ -37,7 +39,7 @@ def parse(self, data: Union[str, Any]) -> List[IDocument]:
             # Create a document with the extracted text
             document = Document(content=text, metadata={"source": data})
             return [document]
-        
+
         except Exception as e:
             print(f"An error occurred while parsing the PDF: {e}")
             return []
diff --git a/pkgs/community/tests/unit/metrics/MutualInformationMetric_test.py b/pkgs/community/tests/unit/metrics/MutualInformationMetric_test.py
@@ -1,24 +1,31 @@
 import pytest
-from swarmauri.metrics.concrete.MutualInformationMetric import MutualInformationMetric as Metric
+from swarmauri_community.metrics.MutualInformationMetric import (
+    MutualInformationMetric as Metric,
+)
+
 
 @pytest.mark.unit
 def test_ubc_resource():
-	assert Metric(unit='points', value=10).resource == 'Metric'
+    assert Metric(unit="points", value=10).resource == "Metric"
+
 
 @pytest.mark.unit
 def test_ubc_type():
-    metric = Metric(unit='points', value=10)
-    assert metric.type == 'MutualInformationMetric'
+    metric = Metric(unit="points", value=10)
+    assert metric.type == "MutualInformationMetric"
+
 
 @pytest.mark.unit
 def test_serialization():
-    metric = Metric(unit='points', value=10)
+    metric = Metric(unit="points", value=10)
     assert metric.id == Metric.model_validate_json(metric.model_dump_json()).id
 
+
 @pytest.mark.unit
 def test_metric_value():
-	assert Metric(unit='points', value=10)() == 10
+    assert Metric(unit="points", value=10)() == 10
+
 
 @pytest.mark.unit
 def test_metric_unit():
-    assert Metric(unit='points', value=10).unit == 'bad assertion value'
+    assert Metric(unit="points", value=10).unit == "points"
diff --git a/pkgs/community/tests/unit/parsers/FitzPdfParser_test.py b/pkgs/community/tests/unit/parsers/FitzPdfParser_test.py
@@ -1,25 +1,28 @@
-
 import pytest
-from swarmauri.community.parsers.FitzPdfParser import PDFtoTextParser as Parser
+from swarmauri_community.parsers.FitzPdfParser import PDFtoTextParser as Parser
+
 
 @pytest.mark.unit
 def test_ubc_resource():
     parser = Parser()
-    assert parser.resource == 'Parser'
+    assert parser.resource == "Parser"
+
 
 @pytest.mark.unit
 def test_ubc_type():
     parser = Parser()
-    assert parser.type == 'FitzPdfParser'
+    assert parser.type == "FitzPdfParser"
+
 
 @pytest.mark.unit
 def test_serialization():
     parser = Parser()
     assert parser.id == Parser.model_validate_json(parser.model_dump_json()).id
 
+
 @pytest.mark.unit
 def test_parse():
-    documents = Parser().parse(r'resources/demo.pdf')
-    assert documents[0].resource == 'Document'
-    assert documents[0].content == 'This is a demo pdf \n'
-    assert documents[0].metadata['source'] == r'resources/demo.pdf'
+    documents = Parser().parse(r"resources/demo.pdf")
+    assert documents[0].resource == "Document"
+    assert documents[0].content == "This is a demo pdf \n"
+    assert documents[0].metadata["source"] == r"resources/demo.pdf"
diff --git a/pkgs/community/tests/unit/tools/PsutilTool_test.py b/pkgs/community/tests/unit/tools/PsutilTool_test.py
@@ -1,4 +1,3 @@
-from unittest.mock import patch, MagicMock
 import pytest
 import psutil
 from swarmauri_community.tools.concrete.PsutilTool import PsutilTool as Tool

diff --git a/pkgs/community/tests/unit/tools/QrCodeGeneratorTool_test.py b/pkgs/community/tests/unit/tools/QrCodeGeneratorTool_test.py
@@ -1,5 +1,4 @@
 import base64
-from PIL import Image
 import pytest
 from swarmauri_community.tools.concrete.QrCodeGeneratorTool import (
     QrCodeGeneratorTool as Tool,

diff --git a/pkgs/community/tests/unit/vector_stores/PineconeVectorStore_test.py b/pkgs/community/tests/unit/vector_stores/PineconeVectorStore_test.py
@@ -5,6 +5,11 @@
 
 API_KEY = os.getenv("PINECONE_API_KEY")
 
+
+@pytest.mark.skipif(
+    not os.getenv("PINECONE_API_KEY"),
+    reason="Skipping due to environment variable not set",
+)
 @pytest.mark.unit
 def test_ubc_resource():
     vs = PineconeVectorStore(
@@ -16,6 +21,10 @@ def test_ubc_resource():
     assert vs.embedder.resource == "Embedding"
 
 
+@pytest.mark.skipif(
+    not os.getenv("PINECONE_API_KEY"),
+    reason="Skipping due to environment variable not set",
+)
 @pytest.mark.unit
 def test_ubc_type():
     vs = PineconeVectorStore(
@@ -25,6 +34,11 @@ def test_ubc_type():
     )
     assert vs.type == "PineconeVectorStore"
 
+
+@pytest.mark.skipif(
+    not os.getenv("PINECONE_API_KEY"),
+    reason="Skipping due to environment variable not set",
+)
 @pytest.mark.unit
 def test_serialization():
     vs = PineconeVectorStore(
@@ -35,6 +49,10 @@ def test_serialization():
     assert vs.id == PineconeVectorStore.model_validate_json(vs.model_dump_json()).id
 
 
+@pytest.mark.skipif(
+    not os.getenv("PINECONE_API_KEY"),
+    reason="Skipping due to environment variable not set",
+)
 @pytest.mark.unit
 def test_top_k():
     vs = PineconeVectorStore(
@@ -52,6 +70,3 @@ def test_top_k():
 
     vs.add_documents(documents)
     assert len(vs.retrieve(query="test", top_k=2)) == 2
-
-
-
diff --git a/...vector_stores/SpatialDocEmbedding_test.py → ...or_stores/SpatialDocEmbedding_i9n_test.py b/...vector_stores/SpatialDocEmbedding_test.py → ...or_stores/SpatialDocEmbedding_i9n_test.py
@@ -4,11 +4,7 @@
 )
 
 
-@pytest.mark.unit
-def test_ubc_resource():
-    assert SpatialDocEmbedding().resource == "Embedding"
-
-@pytest.mark.xfail('Expected to fail until we fix the bug.')
+@pytest.mark.xfail(reason="Expected to fail until we fix the bug.")
 def test_fit_transform():
     embedder = SpatialDocEmbedding()
     embedder.fit_transform(["test", "test1", "test2"])

diff --git a/...vector_stores/SpatialDocEmbedding_test.py → ...r_stores/SpatialDocEmbedding_unit_test.py b/...vector_stores/SpatialDocEmbedding_test.py → ...r_stores/SpatialDocEmbedding_unit_test.py
@@ -4,6 +4,11 @@
 )
 
 
+@pytest.mark.unit
+def test_ubc_resource():
+    assert SpatialDocEmbedding().resource == "Embedding"
+
+
 @pytest.mark.unit
 def test_ubc_type():
     assert SpatialDocEmbedding().type == "SpatialDocEmbedding"