Skip to content

Commit

Permalink
Rename evaluate file to evaluations to not conflict with huggingface …
Browse files Browse the repository at this point in the history
…library
  • Loading branch information
rogeriochaves committed Apr 22, 2024
1 parent 533c4fc commit 7ed6e81
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 25 deletions.
2 changes: 1 addition & 1 deletion langevals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from langevals.evaluate import evaluate
from langevals.evaluation import evaluate

__all__ = ["evaluate"]
File renamed without changes.
53 changes: 29 additions & 24 deletions tests/test_evaluate.py → tests/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
ExampleWordCountEvaluator,
ExampleWordCountResult,
)
from langevals_langevals.blocklist import (
BlocklistEvaluator,
BlocklistResult,
BlocklistSettings,
from langevals_langevals.competitor_blocklist import (
CompetitorBlocklistEvaluator,
CompetitorBlocklistResult,
CompetitorBlocklistSettings,
)
import langevals
import pandas as pd

from langevals.evaluate import EvaluationResultSet, _pandas_to_generic_entries
from langevals.evaluation import EvaluationResultSet, _pandas_to_generic_entries


def test_run_simple_evaluation():
Expand All @@ -26,7 +26,9 @@ def test_run_simple_evaluation():
entries,
[
ExampleWordCountEvaluator(),
BlocklistEvaluator(settings=BlocklistSettings(competitors=["Bob"])),
CompetitorBlocklistEvaluator(
settings=CompetitorBlocklistSettings(competitors=["Bob"])
),
],
)

Expand All @@ -39,9 +41,9 @@ def test_run_simple_evaluation():
ExampleWordCountResult(score=4, details="Words found: My, name, is, Bob"),
],
[
BlocklistResult(score=0, passed=True),
BlocklistResult(score=0, passed=True),
BlocklistResult(
CompetitorBlocklistResult(score=0, passed=True),
CompetitorBlocklistResult(score=0, passed=True),
CompetitorBlocklistResult(
score=1, passed=False, details="Competitors mentioned: Bob"
),
],
Expand Down Expand Up @@ -71,7 +73,7 @@ def test_run_simple_evaluation():
"cost": None,
},
],
"blocklist": [
"competitor_blocklist": [
{
"status": "processed",
"score": 0.0,
Expand All @@ -96,20 +98,23 @@ def test_run_simple_evaluation():
],
}

assert results.to_pandas().to_dict() == pd.DataFrame(
{
"input": ["hello", "how are you?", "what is your name?"],
"output": ["hi", "I am a chatbot, no feelings", "My name is Bob"],
"word_count": [1.0, 6.0, 4.0],
"word_count_details": [
"Words found: hi",
"Words found: I, am, a, chatbot,, no, feelings",
"Words found: My, name, is, Bob",
],
"blocklist": [True, True, False],
"blocklist_details": [None, None, "Competitors mentioned: Bob"],
}
).to_dict()
assert (
results.to_pandas().to_dict()
== pd.DataFrame(
{
"input": ["hello", "how are you?", "what is your name?"],
"output": ["hi", "I am a chatbot, no feelings", "My name is Bob"],
"word_count": [1.0, 6.0, 4.0],
"word_count_details": [
"Words found: hi",
"Words found: I, am, a, chatbot,, no, feelings",
"Words found: My, name, is, Bob",
],
"competitor_blocklist": [True, True, False],
"competitor_blocklist_details": [None, None, "Competitors mentioned: Bob"],
}
).to_dict()
)


# TODO: accept huggingface datasets as input as well (maybe find an example for the readme? load_dataset("explodinggradients/amnesty_qa", "english_v2"))

0 comments on commit 7ed6e81

Please sign in to comment.