from langchain.evaluation import load_evaluator
evaluator = load_evaluator("criteria", criteria="conciseness")
# This is equivalent to loading using the enum
from langchain.evaluation import EvaluatorType
evaluator = load_evaluator(EvaluatorType.CRITERIA, criteria="conciseness")
eval_result = evaluator.evaluate_strings(
prediction="What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four.",
input="What's 2+2?",
)
print(eval_result)
The output is like this below
{'reasoning': 'The criterion is conciseness, which means the submission should be brief and to the point. \n\nLooking at the submission, the answer to the question "What\'s 2+2?" is indeed "four". However, the respondent has added extra information, stating "That\'s an elementary question" before providing the answer. This additional statement does not contribute to answering the question and therefore makes the response less concise.\n\nSo, based on the criterion of conciseness, the submission does not meet the criterion.\n\nN', 'value': 'N', 'score': 0}
Multiple criteria evaluator is looking like below
def multiple_custom_criteria():
query = "Tell me a joke"
prediction = "I ate some square pie but I don't know the square of pi."
# If you wanted to specify multiple criteria. Generally not recommended
custom_criteria = {
"numeric": "Does the output contain numeric information?",
"mathematical": "Does the output contain mathematical information?",
"grammatical": "Is the output grammatically correct?",
"logical": "Is the output logical?",
}
eval_chain = load_evaluator(
EvaluatorType.CRITERIA,
criteria=custom_criteria,
)
eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)
print("Multi-criteria evaluation")
print(eval_result)
References:
https://python.langchain.com/v0.1/docs/guides/productionization/evaluation/string/criteria_eval_chain/
No comments:
Post a Comment