123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 |
- import pytest
- from embedchain.config.evaluation.base import ContextRelevanceConfig
- from embedchain.evaluation.metrics import ContextRelevance
- from embedchain.utils.evaluation import EvalData, EvalMetric
- @pytest.fixture
- def mock_data():
- return [
- EvalData(
- contexts=[
- "This is a test context 1.",
- ],
- question="This is a test question 1.",
- answer="This is a test answer 1.",
- ),
- EvalData(
- contexts=[
- "This is a test context 2-1.",
- "This is a test context 2-2.",
- ],
- question="This is a test question 2.",
- answer="This is a test answer 2.",
- ),
- ]
- @pytest.fixture
- def mock_context_relevance_metric(monkeypatch):
- monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
- metric = ContextRelevance()
- return metric
- def test_context_relevance_init(monkeypatch):
- monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
- metric = ContextRelevance()
- assert metric.name == EvalMetric.CONTEXT_RELEVANCY.value
- assert metric.config.model == "gpt-4"
- assert metric.config.api_key is None
- assert metric.config.language == "en"
- monkeypatch.delenv("OPENAI_API_KEY")
- def test_context_relevance_init_with_config():
- metric = ContextRelevance(config=ContextRelevanceConfig(api_key="test_api_key"))
- assert metric.name == EvalMetric.CONTEXT_RELEVANCY.value
- assert metric.config.model == "gpt-4"
- assert metric.config.api_key == "test_api_key"
- assert metric.config.language == "en"
- def test_context_relevance_init_without_api_key(monkeypatch):
- monkeypatch.delenv("OPENAI_API_KEY", raising=False)
- with pytest.raises(ValueError):
- ContextRelevance()
- def test_sentence_segmenter(mock_context_relevance_metric):
- text = "This is a test sentence. This is another sentence."
- assert mock_context_relevance_metric._sentence_segmenter(text) == [
- "This is a test sentence. ",
- "This is another sentence.",
- ]
- def test_compute_score(mock_context_relevance_metric, mock_data, monkeypatch):
- monkeypatch.setattr(
- mock_context_relevance_metric.client.chat.completions,
- "create",
- lambda model, messages: type(
- "obj",
- (object,),
- {
- "choices": [
- type("obj", (object,), {"message": type("obj", (object,), {"content": "This is a test reponse."})})
- ]
- },
- )(),
- )
- assert mock_context_relevance_metric._compute_score(mock_data[0]) == 1.0
- assert mock_context_relevance_metric._compute_score(mock_data[1]) == 0.5
- def test_evaluate(mock_context_relevance_metric, mock_data, monkeypatch):
- monkeypatch.setattr(
- mock_context_relevance_metric.client.chat.completions,
- "create",
- lambda model, messages: type(
- "obj",
- (object,),
- {
- "choices": [
- type("obj", (object,), {"message": type("obj", (object,), {"content": "This is a test reponse."})})
- ]
- },
- )(),
- )
- assert mock_context_relevance_metric.evaluate(mock_data) == 0.75
|