gitextract_fpungzut/

├── .github/
│   └── workflows/
│       ├── mypy_linter.yml
│       ├── pyflakes_and_flake8_and_compileall_linter.py.yml
│       └── python-publish.yml
├── .gitignore
├── .pre-commit-config.yaml
├── README.md
├── athina/
│   ├── __init__.py
│   ├── cli/
│   │   ├── __init__.py
│   │   └── cli.py
│   ├── constants/
│   │   ├── __init__.py
│   │   └── messages.py
│   ├── datasets/
│   │   ├── __init__.py
│   │   ├── conversations.json
│   │   ├── dataset.py
│   │   ├── summarization_sample.py
│   │   └── yc_query_mini.py
│   ├── errors/
│   │   ├── __init__.py
│   │   └── exceptions.py
│   ├── evals/
│   │   ├── __init__.py
│   │   ├── base_evaluator.py
│   │   ├── conversation/
│   │   │   ├── conversation_coherence/
│   │   │   │   ├── evaluator.py
│   │   │   │   └── prompt.py
│   │   │   └── conversation_resolution/
│   │   │       ├── evaluator.py
│   │   │       └── prompt.py
│   │   ├── eval_type.py
│   │   ├── function/
│   │   │   ├── __init__.py
│   │   │   ├── function_evaluator.py
│   │   │   ├── functions.py
│   │   │   └── wrapper.py
│   │   ├── grounded/
│   │   │   ├── __init__.py
│   │   │   ├── grounded_evaluator.py
│   │   │   ├── similarity.py
│   │   │   └── wrapper.py
│   │   ├── guardrails/
│   │   │   ├── correct_language/
│   │   │   │   └── evaluator.py
│   │   │   ├── detect_pii/
│   │   │   │   └── evaluator.py
│   │   │   ├── gibberish_text/
│   │   │   │   └── evaluator.py
│   │   │   ├── no_secrets_present/
│   │   │   │   └── evaluator.py
│   │   │   ├── politeness_check/
│   │   │   │   └── evaluator.py
│   │   │   ├── profanity_free/
│   │   │   │   └── evaluator.py
│   │   │   ├── reading_time/
│   │   │   │   └── evaluator.py
│   │   │   ├── restrict_to_topic/
│   │   │   │   └── evaluator.py
│   │   │   ├── sensitive_topics/
│   │   │   │   └── evaluator.py
│   │   │   ├── sfw/
│   │   │   │   └── evaluator.py
│   │   │   ├── toxic_language/
│   │   │   │   └── evaluator.py
│   │   │   └── unusual_prompt/
│   │   │       └── evaluator.py
│   │   ├── llm/
│   │   │   ├── __init__.py
│   │   │   ├── context_contains_enough_information/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── evaluator.py
│   │   │   │   └── examples.py
│   │   │   ├── custom_prompt/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   ├── does_response_answer_query/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── evaluator.py
│   │   │   │   └── examples.py
│   │   │   ├── example.py
│   │   │   ├── faithfulness/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── evaluator.py
│   │   │   │   └── examples.py
│   │   │   ├── grading_criteria/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   ├── groundedness/
│   │   │   │   ├── evaluator.py
│   │   │   │   └── prompt.py
│   │   │   ├── llm_evaluator.py
│   │   │   └── summary_accuracy/
│   │   │       └── evaluator.py
│   │   ├── ragas/
│   │   │   ├── __init__.py
│   │   │   ├── answer_correctness/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   ├── answer_relevancy/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   ├── answer_semantic_similarity/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   ├── coherence/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   ├── conciseness/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   ├── context_precision/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   ├── context_recall/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   ├── faithfulness/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   ├── harmfulness/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   ├── maliciousness/
│   │   │   │   ├── __init__.py
│   │   │   │   └── evaluator.py
│   │   │   └── ragas_evaluator.py
│   │   └── safety/
│   │       ├── content_moderation/
│   │       │   └── evaluator.py
│   │       ├── pii_detection/
│   │       │   └── evaluator.py
│   │       └── prompt_injection/
│   │           └── evaluator.py
│   ├── guard/
│   │   ├── exception.py
│   │   └── guard.py
│   ├── helpers/
│   │   ├── __init__.py
│   │   ├── athina_logging_helper.py
│   │   ├── config.py
│   │   ├── constants.py
│   │   ├── dataset_helper.py
│   │   ├── eval_helper.py
│   │   ├── function_eval_util.py
│   │   ├── get_evaluator.py
│   │   ├── jinja_helper.py
│   │   ├── json.py
│   │   ├── kwparser.py
│   │   ├── loader_helper.py
│   │   ├── logger.py
│   │   ├── package_helper.py
│   │   ├── run_helper.py
│   │   └── step_helper.py
│   ├── interfaces/
│   │   ├── __init__.py
│   │   ├── athina.py
│   │   ├── custom_model_config.py
│   │   ├── data.py
│   │   ├── model.py
│   │   ├── openai.py
│   │   └── result.py
│   ├── keys/
│   │   ├── __init__.py
│   │   ├── athina_api_key.py
│   │   └── openai_api_key.py
│   ├── llms/
│   │   ├── __init__.py
│   │   ├── abstract_llm_service.py
│   │   ├── litellm_service.py
│   │   ├── openai_service.py
│   │   ├── question_answerer.py
│   │   ├── question_answerer_bulk.py
│   │   ├── question_answerer_cot.py
│   │   ├── question_answerer_with_retrieval.py
│   │   └── question_generator.py
│   ├── loaders/
│   │   ├── __init__.py
│   │   ├── base_loader.py
│   │   ├── conversation_loader.py
│   │   ├── json_loader.py
│   │   ├── loader.py
│   │   ├── response_loader.py
│   │   ├── summary_loader.py
│   │   └── text_loader.py
│   ├── metrics/
│   │   ├── agreement_score.py
│   │   ├── contradiction_score.py
│   │   ├── groundedness.py
│   │   ├── hallucination_score.py
│   │   ├── metric.py
│   │   ├── metric_type.py
│   │   ├── passed.py
│   │   ├── ragas_metric.py
│   │   └── similarity_score.py
│   ├── runner/
│   │   ├── __init__.py
│   │   ├── run.py
│   │   └── run_wrapper.py
│   ├── scripts/
│   │   └── guardrails.py
│   ├── services/
│   │   └── athina_api_service.py
│   └── steps/
│       ├── __init__.py
│       ├── api.py
│       ├── base.py
│       ├── browser_use_step.py
│       ├── chain.py
│       ├── chroma_retrieval.py
│       ├── classify_text.py
│       ├── code_execution.py
│       ├── code_execution_v2.py
│       ├── conditional.py
│       ├── debug.py
│       ├── extract_entities.py
│       ├── extract_json_path.py
│       ├── iterator.py
│       ├── llm.py
│       ├── loop.py
│       ├── open_ai_assistant.py
│       ├── parse_document.py
│       ├── pinecone_retrieval.py
│       ├── qdrant_retrieval.py
│       ├── research_agent_step.py
│       ├── search.py
│       ├── spider_crawl.py
│       ├── tool_call_agent.py
│       ├── transcribe_speech_to_text.py
│       ├── transform.py
│       ├── utils/
│       │   └── metadata.py
│       └── weaviate_retrieval.py
├── examples/
│   ├── chain.ipynb
│   ├── conditional_flow.ipynb
│   ├── conversation_coherence.ipynb
│   ├── conversation_eval.ipynb
│   ├── conversation_resolution.ipynb
│   ├── custom_grading_criteria.ipynb
│   ├── dataset_creation.ipynb
│   ├── execute_node.ipynb
│   ├── groundedness.ipynb
│   ├── guard.ipynb
│   ├── guardrails.ipynb
│   ├── load_athina_data.ipynb
│   ├── question_answerer.ipynb
│   ├── ragas.ipynb
│   ├── run_custom_eval.ipynb
│   ├── run_eval.ipynb
│   ├── run_eval_llama_index.ipynb
│   ├── run_eval_suite.ipynb
│   ├── run_experiment.ipynb
│   ├── run_function_eval.ipynb
│   ├── run_single_datapoint.ipynb
│   └── text_summarization.ipynb
└── pyproject.toml