Showing preview only (4,315K chars total). Download the full file or copy to clipboard to get everything.
Repository: 567-labs/instructor
Branch: main
Commit: 41f050c7c1fa
Files: 706
Total size: 4.0 MB
Directory structure:
gitextract_z1bftxv1/
├── .coveragerc
├── .cursor/
│ └── rules/
│ ├── documentation-sync.mdc
│ ├── followups.mdc
│ ├── new-features-planning.mdc
│ ├── readme.md
│ └── simple-language.mdc
├── .cursorignore
├── .github/
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ └── feature_request.md
│ ├── PULL_REQUEST_TEMPLATE/
│ │ └── pull_request_template.md
│ ├── dependabot.yml
│ └── workflows/
│ ├── ai-label.yml
│ ├── evals.yml
│ ├── python-publish.yml
│ ├── ruff.yml
│ ├── scheduled-release.yml
│ ├── test.yml
│ ├── test_docs.yml
│ └── ty.yml
├── .gitignore
├── .grit/
│ ├── .gitignore
│ └── grit.yaml
├── .pre-commit-config.yaml
├── .ruff.toml
├── AGENT.md
├── CHANGELOG.md
├── CLAUDE.md
├── CONTRIBUTING.md
├── LICENSE
├── NEW_PROVIDER_AGENT_INSTRUCTIONS.md
├── README.md
├── build_mkdocs.sh
├── cross_link_mapping.yaml
├── docs/
│ ├── AGENT.md
│ ├── api-docstring-assessment.md
│ ├── api.md
│ ├── architecture.md
│ ├── blog/
│ │ ├── .authors.yml
│ │ ├── index.md
│ │ └── posts/
│ │ ├── aisummit-2023.md
│ │ ├── announcing-gemini-tool-calling-support.md
│ │ ├── announcing-instructor-responses-support.md
│ │ ├── announcing-unified-provider-interface.md
│ │ ├── anthropic-prompt-caching.md
│ │ ├── anthropic-web-search-structured.md
│ │ ├── anthropic.md
│ │ ├── bad-schemas-could-break-llms.md
│ │ ├── best_framework.md
│ │ ├── caching.md
│ │ ├── chain-of-density.md
│ │ ├── chat-with-your-pdf-with-gemini.md
│ │ ├── citations.md
│ │ ├── consistent-stories.md
│ │ ├── course.md
│ │ ├── cursor-rules.md
│ │ ├── distilation-part1.md
│ │ ├── extract-model-looks.md
│ │ ├── extracting-model-metadata.md
│ │ ├── fake-data.md
│ │ ├── full-fastapi-visibility.md
│ │ ├── generating-pdf-citations.md
│ │ ├── generator.md
│ │ ├── google-openai-client.md
│ │ ├── introducing-structured-outputs-with-cerebras-inference.md
│ │ ├── introducing-structured-outputs.md
│ │ ├── introduction.md
│ │ ├── jinja-proposal.md
│ │ ├── langsmith.md
│ │ ├── learn-async.md
│ │ ├── llm-as-reranker.md
│ │ ├── llms-txt-adoption.md
│ │ ├── llms-txt-support.md
│ │ ├── logfire.md
│ │ ├── lseg-market-surveillance.md
│ │ ├── matching-language.md
│ │ ├── migrating-to-uv.md
│ │ ├── mkdocs-llmstxt-plugin-integration.md
│ │ ├── multimodal-gemini.md
│ │ ├── native_caching.md
│ │ ├── open_source.md
│ │ ├── openai-distilation-store.md
│ │ ├── openai-multimodal.md
│ │ ├── pairwise-llm-judge.md
│ │ ├── parea.md
│ │ ├── pydantic-is-still-all-you-need.md
│ │ ├── rag-and-beyond.md
│ │ ├── rag-timelines.md
│ │ ├── semantic-validation-structured-outputs.md
│ │ ├── situate-context.md
│ │ ├── string-based-init.md
│ │ ├── structured-output-anthropic.md
│ │ ├── tidy-data-from-messy-tables.md
│ │ ├── timestamp.md
│ │ ├── using_json.md
│ │ ├── validation-part1.md
│ │ ├── version-1.md
│ │ ├── why-care-about-mcps.md
│ │ ├── writer-support.md
│ │ ├── youtube-flashcards.md
│ │ └── youtube-transcripts.md
│ ├── cli/
│ │ ├── batch.md
│ │ ├── finetune.md
│ │ ├── index.md
│ │ └── usage.md
│ ├── concepts/
│ │ ├── alias.md
│ │ ├── batch.md
│ │ ├── caching.md
│ │ ├── citation.md
│ │ ├── dictionary_operations.md
│ │ ├── distillation.md
│ │ ├── enums.md
│ │ ├── error_handling.md
│ │ ├── fastapi.md
│ │ ├── fields.md
│ │ ├── from_provider.md
│ │ ├── hooks.md
│ │ ├── index.md
│ │ ├── iterable.md
│ │ ├── lists.md
│ │ ├── logging.md
│ │ ├── maybe.md
│ │ ├── migration.md
│ │ ├── mode-migration.md
│ │ ├── models.md
│ │ ├── multimodal.md
│ │ ├── parallel.md
│ │ ├── partial.md
│ │ ├── patching.md
│ │ ├── philosophy.md
│ │ ├── prompt_caching.md
│ │ ├── prompting.md
│ │ ├── raw_response.md
│ │ ├── reask_validation.md
│ │ ├── retrying.md
│ │ ├── semantic_validation.md
│ │ ├── templating.md
│ │ ├── typeadapter.md
│ │ ├── typeddicts.md
│ │ ├── types.md
│ │ ├── union.md
│ │ ├── unions.md
│ │ ├── usage.md
│ │ └── validation.md
│ ├── contributing.md
│ ├── debugging.md
│ ├── examples/
│ │ ├── action_items.md
│ │ ├── audio_extraction.md
│ │ ├── batch_classification_langsmith.md
│ │ ├── batch_in_memory.md
│ │ ├── batch_job_oai.md
│ │ ├── building_knowledge_graphs.md
│ │ ├── bulk_classification.md
│ │ ├── classification.md
│ │ ├── document_segmentation.md
│ │ ├── entity_resolution.md
│ │ ├── exact_citations.md
│ │ ├── examples.md
│ │ ├── extract_contact_info.md
│ │ ├── extract_slides.md
│ │ ├── extracting_receipts.md
│ │ ├── extracting_tables.md
│ │ ├── groq.md
│ │ ├── image_to_ad_copy.md
│ │ ├── index.md
│ │ ├── knowledge_graph.md
│ │ ├── local_classification.md
│ │ ├── mistral.md
│ │ ├── moderation.md
│ │ ├── multi_modal_gemini.md
│ │ ├── multiple_classification.md
│ │ ├── ollama.md
│ │ ├── open_source.md
│ │ ├── pandas_df.md
│ │ ├── partial_streaming.md
│ │ ├── pii.md
│ │ ├── planning-tasks.md
│ │ ├── recursive.md
│ │ ├── search.md
│ │ ├── self_critique.md
│ │ ├── single_classification.md
│ │ ├── sqlmodel.md
│ │ ├── tables_from_vision.md
│ │ ├── tracing_with_langfuse.md
│ │ ├── using_decimals.md
│ │ ├── watsonx.md
│ │ └── youtube_clips.md
│ ├── faq.md
│ ├── getting-started.md
│ ├── help.md
│ ├── hooks/
│ │ └── hide_lines.py
│ ├── index.md
│ ├── installation.md
│ ├── integrations/
│ │ ├── anthropic.md
│ │ ├── anyscale.md
│ │ ├── azure.md
│ │ ├── bedrock.md
│ │ ├── cerebras.md
│ │ ├── cohere.md
│ │ ├── cortex.md
│ │ ├── databricks.md
│ │ ├── deepseek.md
│ │ ├── fireworks.md
│ │ ├── genai.md
│ │ ├── google.md
│ │ ├── groq.md
│ │ ├── index.md
│ │ ├── litellm.md
│ │ ├── llama-cpp-python.md
│ │ ├── mistral.md
│ │ ├── ollama.md
│ │ ├── openai-responses.md
│ │ ├── openai.md
│ │ ├── openrouter.md
│ │ ├── perplexity.md
│ │ ├── sambanova.md
│ │ ├── together.md
│ │ ├── truefoundry.md
│ │ ├── vertex.md
│ │ ├── writer.md
│ │ └── xai.md
│ ├── javascripts/
│ │ └── katex.js
│ ├── jobs.md
│ ├── learning/
│ │ ├── getting_started/
│ │ │ ├── first_extraction.md
│ │ │ ├── installation.md
│ │ │ ├── response_models.md
│ │ │ └── structured_outputs.md
│ │ ├── index.md
│ │ ├── patterns/
│ │ │ ├── field_validation.md
│ │ │ ├── list_extraction.md
│ │ │ ├── nested_structure.md
│ │ │ ├── optional_fields.md
│ │ │ ├── prompt_templates.md
│ │ │ └── simple_object.md
│ │ ├── streaming/
│ │ │ ├── basics.md
│ │ │ └── lists.md
│ │ └── validation/
│ │ ├── basics.md
│ │ ├── custom_validators.md
│ │ ├── field_level_validation.md
│ │ └── retry_mechanisms.md
│ ├── llms.txt
│ ├── modes-comparison.md
│ ├── newsletter.md
│ ├── overrides/
│ │ └── main.html
│ ├── prompting/
│ │ ├── decomposition/
│ │ │ ├── decomp.md
│ │ │ ├── faithful_cot.md
│ │ │ ├── least_to_most.md
│ │ │ ├── plan_and_solve.md
│ │ │ ├── program_of_thought.md
│ │ │ ├── recurs_of_thought.md
│ │ │ ├── skeleton_of_thought.md
│ │ │ └── tree-of-thought.md
│ │ ├── ensembling/
│ │ │ ├── cosp.md
│ │ │ ├── dense.md
│ │ │ ├── diverse.md
│ │ │ ├── max_mutual_information.md
│ │ │ ├── meta_cot.md
│ │ │ ├── more.md
│ │ │ ├── prompt_paraphrasing.md
│ │ │ ├── self_consistency.md
│ │ │ ├── universal_self_consistency.md
│ │ │ └── usp.md
│ │ ├── few_shot/
│ │ │ ├── cosp.md
│ │ │ ├── example_generation/
│ │ │ │ └── sg_icl.md
│ │ │ ├── example_ordering.md
│ │ │ └── exemplar_selection/
│ │ │ ├── knn.md
│ │ │ └── vote_k.md
│ │ ├── index.md
│ │ ├── self_criticism/
│ │ │ ├── chain_of_verification.md
│ │ │ ├── cumulative_reason.md
│ │ │ ├── reversecot.md
│ │ │ ├── self_calibration.md
│ │ │ ├── self_refine.md
│ │ │ └── self_verification.md
│ │ ├── thought_generation/
│ │ │ ├── chain_of_thought_few_shot/
│ │ │ │ ├── active_prompt.md
│ │ │ │ ├── auto_cot.md
│ │ │ │ ├── complexity_based.md
│ │ │ │ ├── contrastive.md
│ │ │ │ ├── memory_of_thought.md
│ │ │ │ ├── prompt_mining.md
│ │ │ │ └── uncertainty_routed_cot.md
│ │ │ └── chain_of_thought_zero_shot/
│ │ │ ├── analogical_prompting.md
│ │ │ ├── step_back_prompting.md
│ │ │ ├── tab_cot.md
│ │ │ └── thread_of_thought.md
│ │ └── zero_shot/
│ │ ├── emotion_prompting.md
│ │ ├── rar.md
│ │ ├── re2.md
│ │ ├── role_prompting.md
│ │ ├── s2a.md
│ │ ├── self_ask.md
│ │ ├── simtom.md
│ │ └── style_prompting.md
│ ├── repository-overview.md
│ ├── start-here.md
│ ├── templates/
│ │ └── provider_template.md
│ ├── tutorials/
│ │ ├── 1-introduction.ipynb
│ │ ├── 2-tips.ipynb
│ │ ├── 3-0-applications-rag.ipynb
│ │ ├── 3-1-validation-rag.ipynb
│ │ ├── 4-validation.ipynb
│ │ ├── 5-knowledge-graphs.ipynb
│ │ ├── 6-chain-of-density.ipynb
│ │ ├── 7-synthetic-data-generation.ipynb
│ │ └── index.md
│ └── why.md
├── ellipsis.yaml
├── examples/
│ ├── __init__.py
│ ├── anthropic/
│ │ └── run.py
│ ├── anthropic-web-tool/
│ │ └── run.py
│ ├── asyncio-benchmarks/
│ │ └── run.py
│ ├── auto-ticketer/
│ │ └── run.py
│ ├── automodel/
│ │ └── run.py
│ ├── avail/
│ │ ├── run.py
│ │ └── run_mixtral.py
│ ├── batch-classification/
│ │ ├── run-cache.py
│ │ ├── run.py
│ │ └── run_langsmith.py
│ ├── batch_api/
│ │ ├── README.md
│ │ ├── in_memory_batch_example.py
│ │ └── run_batch_test.py
│ ├── caching/
│ │ ├── example_diskcache.py
│ │ ├── example_redis.py
│ │ ├── lru.py
│ │ └── run.py
│ ├── caching_prototype/
│ │ ├── README.md
│ │ └── run_real.py
│ ├── chain-of-density/
│ │ ├── Readme.md
│ │ ├── chain_of_density.py
│ │ ├── finetune.py
│ │ └── requirements.txt
│ ├── citation_with_extraction/
│ │ ├── Dockerfile
│ │ ├── README.md
│ │ ├── citation_fuzzy_match.py
│ │ ├── diagram.py
│ │ ├── main.py
│ │ ├── modal_main.py
│ │ └── requirements.txt
│ ├── citations/
│ │ └── run.py
│ ├── classification/
│ │ ├── classifiy_with_validation.py
│ │ ├── multi_prediction.py
│ │ └── simple_prediction.py
│ ├── codegen-from-schema/
│ │ ├── create_fastapi_app.py
│ │ ├── input.json
│ │ ├── models.py
│ │ ├── readme.md
│ │ └── run.py
│ ├── cohere/
│ │ └── cohere.py
│ ├── crm/
│ │ └── run.py
│ ├── decimals/
│ │ └── run.py
│ ├── distilations/
│ │ ├── math_finetunes_val.jsonl
│ │ ├── readme.md
│ │ ├── three_digit_mul.py
│ │ └── three_digit_mul_dispatch.py
│ ├── evals/
│ │ ├── eval.py
│ │ ├── models.py
│ │ ├── stats_dict.py
│ │ ├── streamlit.py
│ │ └── test.jsonl
│ ├── extract-table/
│ │ ├── run_vision.py
│ │ ├── run_vision_langsmith.py
│ │ ├── run_vision_org.py
│ │ ├── run_vision_org_table.py
│ │ ├── run_vision_receipt.py
│ │ └── test.py
│ ├── extracting-pii/
│ │ └── run.py
│ ├── fastapi_app/
│ │ ├── __init__.py
│ │ ├── main.py
│ │ └── script.py
│ ├── fizzbuzz/
│ │ └── run.py
│ ├── gpt-engineer/
│ │ ├── changes.diff
│ │ ├── generate.py
│ │ ├── program.json
│ │ └── refactor.py
│ ├── groq/
│ │ ├── groq_example.py
│ │ └── groq_example2.py
│ ├── hooks/
│ │ ├── README.md
│ │ └── run.py
│ ├── iterables/
│ │ └── run.py
│ ├── knowledge-graph/
│ │ ├── run.py
│ │ └── run_stream.py
│ ├── learn-async/
│ │ └── run.py
│ ├── llm-judge-relevance/
│ │ └── run.py
│ ├── logfire/
│ │ ├── classify.py
│ │ ├── image.py
│ │ ├── requirements.txt
│ │ └── validate.py
│ ├── logfire-fastapi/
│ │ ├── Readme.md
│ │ ├── requirements.txt
│ │ ├── server.py
│ │ └── test.py
│ ├── logging/
│ │ └── run.py
│ ├── match_language/
│ │ ├── run_v1.py
│ │ └── run_v2.py
│ ├── mistral/
│ │ └── mistral.py
│ ├── multi-actions/
│ │ └── run.py
│ ├── multiple_search_queries/
│ │ ├── diagram.py
│ │ └── segment_search_queries.py
│ ├── open_source_examples/
│ │ ├── README.md
│ │ ├── openrouter.py
│ │ ├── perplexity.py
│ │ └── runpod.py
│ ├── openai/
│ │ ├── __init__.py
│ │ └── run.py
│ ├── openai-audio/
│ │ └── run.py
│ ├── parallel/
│ │ └── run.py
│ ├── partial_streaming/
│ │ ├── benchmark.py
│ │ └── run.py
│ ├── patching/
│ │ ├── anyscale.py
│ │ ├── oai.py
│ │ ├── pcalls.py
│ │ └── together.py
│ ├── proscons/
│ │ └── run.py
│ ├── query_planner_execution/
│ │ ├── diagram.py
│ │ └── query_planner_execution.py
│ ├── recursive_filepaths/
│ │ ├── diagram.py
│ │ └── parse_recursive_paths.py
│ ├── reranker/
│ │ └── run.py
│ ├── resolving-complex-entities/
│ │ └── run.py
│ ├── retry/
│ │ └── run.py
│ ├── safer_sql_example/
│ │ ├── diagram.py
│ │ └── safe_sql.py
│ ├── simple-extraction/
│ │ ├── maybe_user.py
│ │ └── user.py
│ ├── situate_context/
│ │ └── run.py
│ ├── sqlmodel/
│ │ ├── run.py
│ │ └── test_basic.py
│ ├── stream_action_items/
│ │ └── run.py
│ ├── synethic-data/
│ │ └── run.py
│ ├── task_planner/
│ │ ├── diagram.py
│ │ └── task_planner_topological_sort.py
│ ├── tenacity-benchmarks/
│ │ └── run.py
│ ├── timestamps/
│ │ └── run.py
│ ├── union/
│ │ └── run.py
│ ├── validated-multiclass/
│ │ ├── output.json
│ │ └── run.py
│ ├── validators/
│ │ ├── allm_validator.py
│ │ ├── annotator.py
│ │ ├── chain_of_thought_validator.py
│ │ ├── citations.py
│ │ ├── competitors.py
│ │ ├── field_validator.py
│ │ ├── just_a_guy.py
│ │ ├── llm_validator.py
│ │ ├── moderation.py
│ │ └── readme.md
│ ├── vision/
│ │ ├── image_to_ad_copy.py
│ │ ├── run.py
│ │ ├── run_raw.py
│ │ ├── run_table.py
│ │ └── slides.py
│ ├── watsonx/
│ │ └── watsonx.py
│ ├── youtube/
│ │ └── run.py
│ ├── youtube-clips/
│ │ └── run.py
│ └── youtube-flashcards/
│ └── run.py
├── github_issue.md
├── instructor/
│ ├── __init__.py
│ ├── _types/
│ │ ├── __init__.py
│ │ └── _alias.py
│ ├── auto_client.py
│ ├── batch/
│ │ ├── __init__.py
│ │ ├── models.py
│ │ ├── processor.py
│ │ ├── providers/
│ │ │ ├── __init__.py
│ │ │ ├── anthropic.py
│ │ │ ├── base.py
│ │ │ └── openai.py
│ │ ├── request.py
│ │ └── utils.py
│ ├── cache/
│ │ └── __init__.py
│ ├── cli/
│ │ ├── __init__.py
│ │ ├── batch.py
│ │ ├── cli.py
│ │ ├── deprecated_hub.py
│ │ ├── files.py
│ │ ├── jobs.py
│ │ └── usage.py
│ ├── client.py
│ ├── core/
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── exceptions.py
│ │ ├── hooks.py
│ │ ├── patch.py
│ │ └── retry.py
│ ├── distil.py
│ ├── dsl/
│ │ ├── __init__.py
│ │ ├── citation.py
│ │ ├── iterable.py
│ │ ├── json_tracker.py
│ │ ├── maybe.py
│ │ ├── parallel.py
│ │ ├── partial.py
│ │ ├── response_list.py
│ │ ├── simple_type.py
│ │ └── validators.py
│ ├── exceptions.py
│ ├── function_calls.py
│ ├── hooks.py
│ ├── mode.py
│ ├── models.py
│ ├── multimodal.py
│ ├── patch.py
│ ├── process_response.py
│ ├── processing/
│ │ ├── __init__.py
│ │ ├── function_calls.py
│ │ ├── multimodal.py
│ │ ├── response.py
│ │ ├── schema.py
│ │ └── validators.py
│ ├── providers/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── anthropic/
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ └── utils.py
│ │ ├── bedrock/
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ └── utils.py
│ │ ├── cerebras/
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ └── utils.py
│ │ ├── cohere/
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ └── utils.py
│ │ ├── fireworks/
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ └── utils.py
│ │ ├── gemini/
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ └── utils.py
│ │ ├── genai/
│ │ │ ├── __init__.py
│ │ │ └── client.py
│ │ ├── groq/
│ │ │ ├── __init__.py
│ │ │ └── client.py
│ │ ├── mistral/
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ └── utils.py
│ │ ├── openai/
│ │ │ ├── __init__.py
│ │ │ └── utils.py
│ │ ├── perplexity/
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ └── utils.py
│ │ ├── vertexai/
│ │ │ ├── __init__.py
│ │ │ └── client.py
│ │ ├── writer/
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ └── utils.py
│ │ └── xai/
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── utils.py
│ ├── py.typed
│ ├── templating.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── core.py
│ │ └── providers.py
│ ├── validation/
│ │ ├── __init__.py
│ │ ├── async_validators.py
│ │ └── llm_validators.py
│ └── validators.py
├── mkdocs.yml
├── pyproject.toml
├── requirements-doc.txt
├── requirements-examples.txt
├── requirements.txt
├── scripts/
│ ├── README.md
│ ├── audit_patterns.py
│ ├── check_blog_excerpts.py
│ ├── check_links.py
│ ├── fix_api_calls.py
│ ├── fix_doc_tests.py
│ ├── fix_old_patterns.py
│ ├── make_clean.py
│ ├── make_desc.py
│ ├── make_sitemap.py
│ ├── validate_headings.py
│ └── validate_meta_tags.py
├── sitemap.yaml
├── tests/
│ ├── __init__.py
│ ├── conftest.py
│ ├── docs/
│ │ ├── _concept_groups.py
│ │ ├── _example_groups.py
│ │ ├── conftest.py
│ │ ├── test_concepts.py
│ │ ├── test_concepts_advanced.py
│ │ ├── test_concepts_operations.py
│ │ ├── test_concepts_providers.py
│ │ ├── test_docs.py
│ │ ├── test_examples.py
│ │ ├── test_examples_batch.py
│ │ ├── test_examples_integrations.py
│ │ ├── test_examples_multimodal.py
│ │ ├── test_examples_providers.py
│ │ ├── test_hub.py
│ │ ├── test_mkdocs.py
│ │ ├── test_posts.py
│ │ └── test_prompt_tips.py
│ ├── dsl/
│ │ ├── test_gemini_tools_async_streaming.py
│ │ ├── test_partial.py
│ │ ├── test_simple_type.py
│ │ └── test_simple_type_fix.py
│ ├── genai/
│ │ └── test_safety_settings.py
│ ├── llm/
│ │ ├── __init__.py
│ │ ├── shared_config.py
│ │ ├── test_anthropic/
│ │ │ ├── __init__.py
│ │ │ ├── conftest.py
│ │ │ ├── test_multimodal.py
│ │ │ ├── test_reasoning.py
│ │ │ ├── test_system.py
│ │ │ └── util.py
│ │ ├── test_bedrock/
│ │ │ ├── conftest.py
│ │ │ ├── test_bedrock_native_passthrough.py
│ │ │ ├── test_normalize.py
│ │ │ ├── test_openai_image_conversion.py
│ │ │ └── test_prepare_kwargs.py
│ │ ├── test_core_providers/
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── capabilities.py
│ │ │ ├── conftest.py
│ │ │ ├── test_basic_extraction.py
│ │ │ ├── test_response_modes.py
│ │ │ ├── test_retries.py
│ │ │ ├── test_simple_types.py
│ │ │ ├── test_streaming.py
│ │ │ └── test_validation.py
│ │ ├── test_gemini/
│ │ │ ├── __init__.py
│ │ │ ├── conftest.py
│ │ │ ├── evals/
│ │ │ │ ├── __init__.py
│ │ │ │ └── test_extract_users.py
│ │ │ ├── test_list_content.py
│ │ │ ├── test_multimodal_content.py
│ │ │ └── util.py
│ │ ├── test_genai/
│ │ │ ├── __init__.py
│ │ │ ├── conftest.py
│ │ │ ├── test_decimal.py
│ │ │ ├── test_format.py
│ │ │ ├── test_invalid_schema.py
│ │ │ ├── test_reask.py
│ │ │ ├── test_schema_conversion.py
│ │ │ ├── test_utils.py
│ │ │ └── util.py
│ │ ├── test_litellm.py
│ │ ├── test_new_client.py
│ │ ├── test_openai/
│ │ │ ├── __init__.py
│ │ │ ├── conftest.py
│ │ │ ├── slow/
│ │ │ │ └── test_response.py
│ │ │ ├── test_attr.py
│ │ │ ├── test_hooks.py
│ │ │ ├── test_multimodal.py
│ │ │ ├── test_multitask.py
│ │ │ ├── test_patch.py
│ │ │ ├── test_validation_context.py
│ │ │ ├── test_validators.py
│ │ │ └── util.py
│ │ ├── test_vertexai/
│ │ │ ├── __init__.py
│ │ │ ├── conftest.py
│ │ │ ├── test_deprecated_async.py
│ │ │ ├── test_format.py
│ │ │ ├── test_message_parser.py
│ │ │ ├── test_modes.py
│ │ │ └── util.py
│ │ └── test_writer/
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── evals/
│ │ │ ├── __init__.py
│ │ │ ├── test_classification_enums.py
│ │ │ ├── test_classification_literals.py
│ │ │ ├── test_entities.py
│ │ │ ├── test_extract_users.py
│ │ │ └── test_sentiment_analysis.py
│ │ ├── test_format_common_models.py
│ │ ├── test_format_difficult_models.py
│ │ └── util.py
│ ├── processing/
│ │ └── test_anthropic_json.py
│ ├── test_auto_client.py
│ ├── test_batch_in_memory.py
│ ├── test_cache_integration.py
│ ├── test_cache_key.py
│ ├── test_dict_operations.py
│ ├── test_dict_operations_validation.py
│ ├── test_dynamic_model_creation.py
│ ├── test_exception_backwards_compat.py
│ ├── test_exceptions.py
│ ├── test_fizzbuzz_fix.py
│ ├── test_formatting.py
│ ├── test_function_calls.py
│ ├── test_genai_config_merging.py
│ ├── test_genai_reask.py
│ ├── test_json_extraction.py
│ ├── test_json_extraction_edge_cases.py
│ ├── test_list_response.py
│ ├── test_list_response_wrapper.py
│ ├── test_logging.py
│ ├── test_message_processing.py
│ ├── test_multimodal.py
│ ├── test_multitask.py
│ ├── test_patch.py
│ ├── test_process_response.py
│ ├── test_response_model_conversion.py
│ ├── test_retry_json_mode.py
│ ├── test_schema.py
│ ├── test_schema_utils.py
│ ├── test_simple_types.py
│ ├── test_streaming_reask_bug.py
│ ├── test_utils.py
│ ├── test_xai_optional_dependency.py
│ └── v2/
│ └── test_provider_modes.py
├── ty-tests.toml
└── ty.toml
================================================
FILE CONTENTS
================================================
================================================
FILE: .coveragerc
================================================
[run]
source =
instructor/
omit =
instructor/cli/*
================================================
FILE: .cursor/rules/documentation-sync.mdc
================================================
---
description: when making code changes or adding documentation
globs: ["*.py", "*.md"]
alwaysApply: true
---
- When making code changes:
- Update related documentation files to reflect the changes
- Check docstrings and type hints are up to date
- Update any example code in markdown files
- Review README.md if the changes affect installation or usage
- When creating new markdown files:
- Add the file to mkdocs.yml under the appropriate section
- Follow the existing hierarchy and indentation
- Use descriptive nav titles
- Example:
```yaml
nav:
- Home: index.md
- Guides:
- Getting Started: guides/getting-started.md
- Your New File: guides/your-new-file.md
```
- For API documentation:
- Ensure new functions/classes are documented
- Include type hints and docstrings
- Add usage examples
- Update API reference docs if auto-generated
- Documentation Quality:
- Write at grade 10 reading level (see simple-language.mdc)
- Include working code examples
- Add links to related documentation
- Use consistent formatting and style
================================================
FILE: .cursor/rules/followups.mdc
================================================
---
description: when AI agents are collaborating on code
globs: "*"
alwaysApply: true
---
Make sure to come up with follow-up hot keys. They should be thoughtful and actionable and result in small additional code changes based on the context that you have available.
using [J], [K], [L]
================================================
FILE: .cursor/rules/new-features-planning.mdc
================================================
---
description: when asked to implement new features or clients
globs: *.py
alwaysApply: true
---
- When being asked to make new features, make sure that you check out from main a new branch and make incremental commits
- Use conventional commit format: `<type>(<scope>): <description>`
- Types: feat, fix, docs, style, refactor, perf, test, chore
- Example: `feat(validation): add email validation function`
- Keep commits focused on a single change
- Write descriptive commit messages in imperative mood
- Use `git commit -m "type(scope): subject" -m "body" -m "footer"` for multiline commits
- If the feature is very large, create a temporary `todo.md`
- And start a pull request using `gh`
- Create PRs with multiline bodies using:
```bash
gh pr create --title "feat(component): add new feature" --body "$(cat <<EOF
## Description
Detailed explanation of the changes
## Changes
- List important changes
- Another change
## Testing
How this was tested
This PR was written by [Cursor](cursor.com)
EOF
)" -r jxnl,ivanleomk
```
- Or use the `-F` flag with a file: `gh pr create -F pr_body.md`
- Make sure to include `This PR was written by [Cursor](mdc:cursor.com)`
- Add default reviewers:
- Use `gh pr edit <id> --add-reviewer jxnl,ivanleomk`
- Or include `-r jxnl,ivanleomk` when creating the PR
- use `gh pr view <id> --comments | cat` to view all the comments
- For PR updates:
- Do not directly commit to an existing PR branch
- Instead, create a new PR that builds on top of the original PR's branch
- This creates a "stacked PR" pattern where:
1. The original PR (base) contains the initial changes
2. The new PR (stack) contains only the review-related updates
3. Once the base PR is merged, the stack can be rebased onto main
================================================
FILE: .cursor/rules/readme.md
================================================
# Cursor Rules
Cursor rules are configuration files that help guide AI-assisted development in the Cursor IDE. They provide structured instructions for how the AI should behave in specific contexts or when working with certain types of files.
## What is Cursor?
[Cursor](https://cursor.sh) is an AI-powered IDE that helps developers write, understand, and maintain code more efficiently. It integrates AI capabilities directly into the development workflow, providing features like:
- AI-assisted code completion
- Natural language code generation
- Intelligent code explanations
- Automated refactoring suggestions
## Understanding Cursor Rules
Cursor rules are defined in `.mdc` files within the `.cursor/rules` directory. Each rule file follows a specific naming convention: lowercase names with the `.mdc` extension (e.g., `simple-language.mdc`).
Each rule file contains:
1. **Metadata Header**: YAML frontmatter that defines:
```yaml
---
description: when to apply this rule
globs: file patterns to match (e.g., "*.py", "*.md", or "*" for all files)
alwaysApply: true/false # whether to apply automatically
---
```
2. **Rule Content**: Markdown-formatted instructions that guide the AI's behavior
## Available Rules
Currently, the following rules are defined:
### `simple-language.mdc`
- **Purpose**: Ensures documentation is written at a grade 10 reading level
- **Applies to**: Markdown files (*.md)
- **Auto Apply**: No
- **Key Requirements**:
- Write at grade 10 reading level
- Ensure code blocks are self-contained with complete imports
### `new-features-planning.mdc`
- **Purpose**: Guides feature implementation workflow
- **Applies to**: Python files (*.py)
- **Auto Apply**: Yes
- **Key Requirements**:
- Create new branch from main
- Make incremental commits
- Create todo.md for large features
- Start pull requests using GitHub CLI (`gh`)
- Include "This PR was written by [Cursor](https://cursor.sh)" in PRs
### `followups.mdc`
- **Purpose**: Ensures thoughtful follow-up suggestions
- **Applies to**: All files
- **Auto Apply**: Yes
- **Key Requirements**:
- Generate actionable hotkey suggestions using:
- [J]: First follow-up action
- [K]: Second follow-up action
- [L]: Third follow-up action
- Focus on small, contextual code changes
- Suggestions should be thoughtful and actionable
### `documentation-sync.mdc`
- **Purpose**: Maintains documentation consistency with code changes
- **Applies to**: Python and Markdown files (*.py, *.md)
- **Auto Apply**: Yes
- **Key Requirements**:
- Update docs when code changes
- Add new markdown files to mkdocs.yml
- Keep API documentation current
- Maintain documentation quality standards
## Creating New Rules
To create a new rule:
1. Create a `.mdc` file in `.cursor/rules/` using lowercase naming
2. Add YAML frontmatter with required metadata:
```yaml
---
description: when to apply this rule
globs: file patterns to match
alwaysApply: true/false
---
```
3. Write clear, specific instructions in Markdown
4. Test the rule with relevant file types
## Best Practices
- Keep rules focused and specific
- Use clear, actionable language
- Test rules thoroughly before committing
- Document any special requirements or dependencies
- Update rules as project needs evolve
- Use consistent file naming (lowercase with .mdc extension)
- Ensure globs patterns are explicit and documented
================================================
FILE: .cursor/rules/simple-language.mdc
================================================
---
description: when writing documentation
globs: *.md
alwaysApply: false
---
- When writing documents and concepts make sure that you write at a grade 10 reading level
- make sure every code block has complete imports and makes no references to previous code blocks, each one needs to be self contained
================================================
FILE: .cursorignore
================================================
# Add directories or file patterns to ignore during indexing (e.g. foo/ or *.csv)
================================================
FILE: .github/FUNDING.yml
================================================
github: jxnl
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
---
- [ ] This is actually a bug report.
- [ ] I am not getting good LLM Results
- [ ] I have tried asking for help in the community on discord or discussions and have not received a response.
- [ ] I have tried searching the documentation and have not found an answer.
**What Model are you using?**
- [ ] gpt-3.5-turbo
- [ ] gpt-4-turbo
- [ ] gpt-4
- [ ] Other (please specify)
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior, including code snippets of the model and the input data and openai response.
**Expected behavior**
A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.
================================================
FILE: .github/PULL_REQUEST_TEMPLATE/pull_request_template.md
================================================
> Please use conventional commits to describe your changes. For example, `feat: add new feature` or `fix: fix a bug`. If you are unsure, leave the title as `...` and AI will handle it.
## Describe your changes
...
## Issue ticket number and link
## Checklist before requesting a review
- [ ] I have performed a self-review of my code
- [ ] If it is a core feature, I have added thorough tests.
- [ ] If it is a core feature, I have added documentation.
================================================
FILE: .github/dependabot.yml
================================================
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "pip" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "daily"
groups:
poetry:
patterns: ["*"]
================================================
FILE: .github/workflows/ai-label.yml
================================================
name: AI Labeler
on:
issues:
types: [opened, reopened]
pull_request:
types: [opened, reopened]
jobs:
ai-labeler:
runs-on: ubuntu-latest
permissions:
contents: read
issues: write
pull-requests: write
steps:
- uses: actions/checkout@v4
- uses: jlowin/ai-labeler@v0.4.0
with:
include-repo-labels: true
openai-api-key: ${{ secrets.OPENAI_API_KEY }}
================================================
FILE: .github/workflows/evals.yml
================================================
name: Weekly Tests
on:
workflow_dispatch:
schedule:
- cron: "0 0 * * 0" # Runs at 00:00 UTC every Sunday
push:
branches: [main]
paths-ignore:
- "**" # Ignore all paths to ensure it only triggers on schedule
jobs:
weekly-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.11
- name: Install dependencies
run: uv sync --all-extras --dev
- name: Run all tests
run: uv run pytest tests/ --asyncio-mode=auto
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
================================================
FILE: .github/workflows/python-publish.yml
================================================
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.
name: Upload Python Package
on:
release:
types: [published]
permissions:
contents: read
jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.10
- name: Install the project
run: uv sync --all-extras
- name: Build the project
run: uv build
- name: Build and publish Python package
run: uv publish
env:
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
================================================
FILE: .github/workflows/ruff.yml
================================================
name: Ruff
on:
push:
pull_request:
branches: [main]
env:
WORKING_DIRECTORY: "."
CUSTOM_PACKAGES: "instructor examples tests"
jobs:
Ruff:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.9
- name: Install the project
run: uv sync --all-extras
- name: Ruff lint
run: uv run ruff check ${{ env.CUSTOM_PACKAGES }}
- name: Ruff format
run: uv run ruff format --check ${{ env.CUSTOM_PACKAGES }}
================================================
FILE: .github/workflows/scheduled-release.yml
================================================
name: Scheduled Release
on:
schedule:
# Every 2 weeks on Monday at 9 AM UTC
- cron: '0 9 * * 1/2'
workflow_dispatch: # Allow manual trigger
inputs:
skip_tests:
description: 'Skip LLM tests (use for testing workflow)'
required: false
default: false
type: boolean
dry_run:
description: 'Dry run - dont push changes or create release'
required: false
default: false
type: boolean
jobs:
test-and-release:
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup UV
uses: astral-sh/setup-uv@v3
- name: Install dependencies
run: |
uv sync --all-extras --dev
- name: Run linting
run: |
uv run ruff check instructor examples tests
- name: Run type checking
run: |
uv run pyright
- name: Run core tests (no LLM)
run: |
uv run pytest tests/ -k "not openai and not llm and not anthropic and not gemini and not cohere and not mistral and not groq and not vertexai and not xai and not cerebras and not fireworks and not writer and not bedrock and not perplexity and not genai" --tb=short -v --maxfail=10
# Optional: Run LLM tests if you have API keys in secrets
- name: Run LLM tests
if: github.event.inputs.skip_tests != 'true'
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
run: |
echo "Running basic LLM tests if API keys are available..."
# Run a subset of LLM tests to verify basic functionality
if [ ! -z "$OPENAI_API_KEY" ]; then
echo "Testing OpenAI integration..."
uv run pytest tests/llm/test_openai/test_basics.py --tb=short -v --maxfail=1 || echo "OpenAI tests failed"
fi
if [ ! -z "$ANTHROPIC_API_KEY" ]; then
echo "Testing Anthropic integration..."
uv run pytest tests/llm/test_anthropic/test_basics.py --tb=short -v --maxfail=1 || echo "Anthropic tests failed"
fi
echo "LLM tests completed (non-blocking)"
- name: Check for changes since last release
id: changes
run: |
LAST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
if [ -z "$LAST_TAG" ]; then
echo "has_changes=true" >> $GITHUB_OUTPUT
echo "last_tag=none" >> $GITHUB_OUTPUT
echo "change_count=initial" >> $GITHUB_OUTPUT
else
CHANGES=$(git rev-list $LAST_TAG..HEAD --count)
echo "has_changes=$([[ $CHANGES -gt 0 ]] && echo true || echo false)" >> $GITHUB_OUTPUT
echo "change_count=$CHANGES" >> $GITHUB_OUTPUT
echo "last_tag=$LAST_TAG" >> $GITHUB_OUTPUT
fi
echo "Last tag: $LAST_TAG"
echo "Changes since last tag: $(git rev-list $LAST_TAG..HEAD --count 2>/dev/null || echo 'N/A')"
# Only proceed with release if tests passed AND there are changes
- name: Get current version
if: steps.changes.outputs.has_changes == 'true'
id: current_version
run: |
VERSION=$(uv run python -c "import tomllib; print(tomllib.load(open('pyproject.toml', 'rb'))['project']['version'])")
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Current version: $VERSION"
- name: Determine version bump type
if: steps.changes.outputs.has_changes == 'true'
id: version_type
run: |
# Check commit messages since last tag to determine bump type
LAST_TAG="${{ steps.changes.outputs.last_tag }}"
if [ "$LAST_TAG" = "none" ]; then
COMMITS=$(git log --oneline HEAD~20..HEAD)
else
COMMITS=$(git log --oneline $LAST_TAG..HEAD)
fi
echo "Recent commits:"
echo "$COMMITS"
# Look for breaking changes or major features
if echo "$COMMITS" | grep -qE "(BREAKING|feat!|fix!)"; then
echo "bump_type=minor" >> $GITHUB_OUTPUT
echo "Detected breaking changes - using minor bump"
elif echo "$COMMITS" | grep -qE "feat:"; then
echo "bump_type=minor" >> $GITHUB_OUTPUT
echo "Detected new features - using minor bump"
else
echo "bump_type=patch" >> $GITHUB_OUTPUT
echo "Using patch bump for bug fixes and chores"
fi
- name: Bump version
if: steps.changes.outputs.has_changes == 'true'
id: bump_version
run: |
CURRENT="${{ steps.current_version.outputs.version }}"
BUMP_TYPE="${{ steps.version_type.outputs.bump_type }}"
IFS='.' read -r major minor patch <<< "$CURRENT"
case $BUMP_TYPE in
major)
major=$((major + 1))
minor=0
patch=0
;;
minor)
minor=$((minor + 1))
patch=0
;;
patch)
patch=$((patch + 1))
;;
esac
NEW_VERSION="$major.$minor.$patch"
echo "new_version=$NEW_VERSION" >> $GITHUB_OUTPUT
echo "Bumping from $CURRENT to $NEW_VERSION ($BUMP_TYPE)"
# Update pyproject.toml
sed -i "s/version = \"$CURRENT\"/version = \"$NEW_VERSION\"/" pyproject.toml
- name: Update lockfile
if: steps.changes.outputs.has_changes == 'true'
run: |
uv lock
# Run tests again after version bump to make sure nothing broke
- name: Final test run
if: steps.changes.outputs.has_changes == 'true'
run: |
uv sync
uv run pytest tests/ -k "not openai and not llm and not anthropic and not gemini and not cohere and not mistral and not groq and not vertexai and not xai and not cerebras and not fireworks and not writer and not bedrock and not perplexity and not genai" --tb=short --maxfail=5
- name: Generate changelog
if: steps.changes.outputs.has_changes == 'true'
id: changelog
run: |
LAST_TAG="${{ steps.changes.outputs.last_tag }}"
NEW_VERSION="${{ steps.bump_version.outputs.new_version }}"
if [ "$LAST_TAG" = "none" ]; then
CHANGELOG=$(git log --oneline HEAD~30..HEAD --pretty=format:"- %s" | head -20)
else
CHANGELOG=$(git log --oneline $LAST_TAG..HEAD --pretty=format:"- %s")
fi
# Save changelog to file for GitHub release
cat > CHANGELOG.md << EOF
## 🚀 What's Changed
$CHANGELOG
## 🔗 Links
**Full Changelog**: https://github.com/${{ github.repository }}/compare/$LAST_TAG...v$NEW_VERSION
---
🤖 *This release was automatically generated every 2 weeks*
EOF
echo "changelog_file=CHANGELOG.md" >> $GITHUB_OUTPUT
- name: Create release commit
if: steps.changes.outputs.has_changes == 'true'
run: |
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
git add pyproject.toml uv.lock
git commit -m "chore: automated release v${{ steps.bump_version.outputs.new_version }}
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: GitHub Action <action@github.com>"
git tag "v${{ steps.bump_version.outputs.new_version }}"
- name: Push changes
if: steps.changes.outputs.has_changes == 'true' && github.event.inputs.dry_run != 'true'
run: |
git push origin main
git push origin "v${{ steps.bump_version.outputs.new_version }}"
- name: Create GitHub Release
if: steps.changes.outputs.has_changes == 'true' && github.event.inputs.dry_run != 'true'
uses: ncipollo/release-action@v1
with:
tag: "v${{ steps.bump_version.outputs.new_version }}"
name: "🚀 Release v${{ steps.bump_version.outputs.new_version }}"
bodyFile: "CHANGELOG.md"
draft: false
prerelease: false
- name: Dry run summary
if: steps.changes.outputs.has_changes == 'true' && github.event.inputs.dry_run == 'true'
run: |
echo "🧪 DRY RUN MODE - No changes pushed"
echo "Would have released: v${{ steps.bump_version.outputs.new_version }}"
cat CHANGELOG.md
# Optional: Publish to PyPI (uncomment if you want automatic PyPI releases)
# - name: Build and publish to PyPI
# if: steps.changes.outputs.has_changes == 'true' && secrets.PYPI_TOKEN != ''
# env:
# PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
# run: |
# uv build
# uv publish --token $PYPI_TOKEN
# Summary outputs
- name: Summary
if: always()
run: |
echo "## 📊 Scheduled Release Summary" >> $GITHUB_STEP_SUMMARY
echo "- **Branch**: ${{ github.ref }}" >> $GITHUB_STEP_SUMMARY
echo "- **Has Changes**: ${{ steps.changes.outputs.has_changes }}" >> $GITHUB_STEP_SUMMARY
echo "- **Change Count**: ${{ steps.changes.outputs.change_count }}" >> $GITHUB_STEP_SUMMARY
if [ "${{ steps.changes.outputs.has_changes }}" = "true" ]; then
echo "- **Version**: ${{ steps.current_version.outputs.version }} → ${{ steps.bump_version.outputs.new_version }}" >> $GITHUB_STEP_SUMMARY
echo "- **Bump Type**: ${{ steps.version_type.outputs.bump_type }}" >> $GITHUB_STEP_SUMMARY
echo "- **Status**: ✅ Released" >> $GITHUB_STEP_SUMMARY
else
echo "- **Status**: ⏭️ Skipped (no changes)" >> $GITHUB_STEP_SUMMARY
fi
- name: Notify on failure
if: failure()
run: |
echo "❌ Scheduled release failed - check the logs above"
echo "Common issues:"
echo "- Tests failed"
echo "- Linting issues"
echo "- Type checking errors"
echo "- Git push permissions"
================================================
FILE: .github/workflows/test.yml
================================================
name: Test
on:
pull_request:
push:
branches:
- main
jobs:
# Core tests without LLM providers
core-tests:
name: Core Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.11
- name: Install the project
run: uv sync --all-extras
- name: Run core tests
run: >-
uv run pytest tests/ --asyncio-mode=auto -n auto
-k 'not test_core_providers and not test_openai and not test_anthropic
and not test_gemini and not test_genai and not test_writer and not
test_vertexai and not docs'
env:
INSTRUCTOR_ENV: CI
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
# Core provider tests for OpenAI
core-openai:
name: Core Provider Tests (OpenAI)
runs-on: ubuntu-latest
needs: core-tests
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
steps:
- uses: actions/checkout@v2
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.11
- name: Install the project
run: uv sync --all-extras
- name: Skip core provider tests (OpenAI)
if: ${{ env.OPENAI_API_KEY == '' }}
run: echo "Skipping OpenAI core provider tests (missing OPENAI_API_KEY)."
- name: Run core provider tests (OpenAI)
if: ${{ env.OPENAI_API_KEY != '' }}
run: |
set +e
uv run pytest tests/llm/test_core_providers -v --asyncio-mode=auto -n auto -k "openai"
status=$?
set -e
if [ $status -eq 5 ]; then
echo "No tests collected; treating as success."
exit 0
fi
exit $status
env:
INSTRUCTOR_ENV: CI
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
# Core provider tests for Anthropic
core-anthropic:
name: Core Provider Tests (Anthropic)
runs-on: ubuntu-latest
needs: core-tests
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
steps:
- uses: actions/checkout@v2
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.11
- name: Install the project
run: uv sync --all-extras
- name: Skip core provider tests (Anthropic)
if: ${{ env.ANTHROPIC_API_KEY == '' }}
run: echo "Skipping Anthropic core provider tests (missing ANTHROPIC_API_KEY)."
- name: Run core provider tests (Anthropic)
if: ${{ env.ANTHROPIC_API_KEY != '' }}
run: |
set +e
uv run pytest tests/llm/test_core_providers -v --asyncio-mode=auto -n auto -k "anthropic"
status=$?
set -e
if [ $status -eq 5 ]; then
echo "No tests collected; treating as success."
exit 0
fi
exit $status
env:
INSTRUCTOR_ENV: CI
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
# Core provider tests for Google
core-google:
name: Core Provider Tests (Google)
runs-on: ubuntu-latest
needs: core-tests
env:
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GOOGLE_GENAI_MODEL: ${{ secrets.GOOGLE_GENAI_MODEL }}
steps:
- uses: actions/checkout@v2
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.11
- name: Install the project
run: uv sync --all-extras
- name: Skip core provider tests (Google)
if: ${{ env.GOOGLE_API_KEY == '' || env.GOOGLE_GENAI_MODEL == '' }}
run: echo "Skipping Google core provider tests (missing GOOGLE_API_KEY or GOOGLE_GENAI_MODEL)."
- name: Run core provider tests (Google)
if: ${{ env.GOOGLE_API_KEY != '' && env.GOOGLE_GENAI_MODEL != '' }}
run: |
set +e
uv run pytest tests/llm/test_core_providers -v --asyncio-mode=auto -n auto -k "google"
status=$?
set -e
if [ $status -eq 5 ]; then
echo "No tests collected; treating as success."
exit 0
fi
exit $status
env:
INSTRUCTOR_ENV: CI
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
# Core provider tests for other providers
core-other:
name: Core Provider Tests (Other)
runs-on: ubuntu-latest
needs: core-tests
env:
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
WRITER_API_KEY: ${{ secrets.WRITER_API_KEY }}
PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
steps:
- uses: actions/checkout@v2
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.11
- name: Install the project
run: uv sync --all-extras
- name: Skip core provider tests (Other)
if: >-
${{ env.COHERE_API_KEY == '' && env.XAI_API_KEY == ''
&& env.MISTRAL_API_KEY == '' && env.CEREBRAS_API_KEY == ''
&& env.FIREWORKS_API_KEY == '' && env.WRITER_API_KEY == ''
&& env.PERPLEXITY_API_KEY == '' }}
run: echo "Skipping core provider tests (Other) (missing provider secrets)."
- name: Run core provider tests (Cohere, xAI, Mistral, etc)
if: >-
${{ env.COHERE_API_KEY != '' || env.XAI_API_KEY != ''
|| env.MISTRAL_API_KEY != '' || env.CEREBRAS_API_KEY != ''
|| env.FIREWORKS_API_KEY != '' || env.WRITER_API_KEY != ''
|| env.PERPLEXITY_API_KEY != '' }}
run: |
set +e
uv run pytest tests/llm/test_core_providers -v --asyncio-mode=auto -n auto -k "cohere or xai or mistral or cerebras or fireworks or writer or perplexity"
status=$?
set -e
if [ $status -eq 5 ]; then
echo "No tests collected; treating as success."
exit 0
fi
exit $status
env:
INSTRUCTOR_ENV: CI
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
WRITER_API_KEY: ${{ secrets.WRITER_API_KEY }}
PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
# Provider tests run in parallel
provider-tests:
name: ${{ matrix.provider.name }} Tests
runs-on: ubuntu-latest
needs: [core-openai, core-anthropic, core-google, core-other]
env:
PROVIDER_API_KEY: ${{ secrets[matrix.provider.env_key] }}
GOOGLE_GENAI_MODEL: ${{ secrets.GOOGLE_GENAI_MODEL }}
strategy:
fail-fast: false
matrix:
provider:
- name: OpenAI
env_key: OPENAI_API_KEY
test_path: tests/llm/test_openai
- name: Anthropic
env_key: ANTHROPIC_API_KEY
test_path: tests/llm/test_anthropic
- name: Gemini
env_key: GOOGLE_API_KEY
test_path: tests/llm/test_gemini
- name: Google GenAI
env_key: GOOGLE_API_KEY
test_path: tests/llm/test_genai
- name: Vertex AI
env_key: GOOGLE_API_KEY
test_path: tests/llm/test_vertexai
- name: Writer
env_key: WRITER_API_KEY
test_path: tests/llm/test_writer
steps:
- uses: actions/checkout@v2
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.11
- name: Install the project
run: uv sync --all-extras
- name: Skip ${{ matrix.provider.name }} tests
if: >-
${{ env.PROVIDER_API_KEY == '' ||
((matrix.provider.name == 'Gemini' || matrix.provider.name == 'Google GenAI'
|| matrix.provider.name == 'Vertex AI') && env.GOOGLE_GENAI_MODEL == '') }}
run: >-
echo "Skipping ${{ matrix.provider.name }} tests
(missing ${{ matrix.provider.env_key }} or GOOGLE_GENAI_MODEL)."
- name: Run ${{ matrix.provider.name }} tests
if: >-
${{ env.PROVIDER_API_KEY != '' &&
((matrix.provider.name != 'Gemini' && matrix.provider.name != 'Google GenAI'
&& matrix.provider.name != 'Vertex AI') || env.GOOGLE_GENAI_MODEL != '') }}
run: |
set +e
uv run pytest ${{ matrix.provider.test_path }} --asyncio-mode=auto -n auto
status=$?
set -e
if [ $status -eq 5 ]; then
echo "No tests collected; treating as success."
exit 0
fi
exit $status
env:
INSTRUCTOR_ENV: CI
${{ matrix.provider.env_key }}: ${{ secrets[matrix.provider.env_key] }}
# Auto client needs multiple providers
auto-client-test:
name: Auto Client Tests
runs-on: ubuntu-latest
needs: [core-openai, core-anthropic, core-google, core-other]
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
steps:
- uses: actions/checkout@v2
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.11
- name: Install the project
run: uv sync --all-extras
- name: Skip Auto Client tests
if: >-
${{ env.OPENAI_API_KEY == '' || env.GOOGLE_API_KEY == ''
|| env.COHERE_API_KEY == '' || env.ANTHROPIC_API_KEY == ''
|| env.XAI_API_KEY == '' }}
run: echo "Skipping Auto Client tests (missing one or more provider secrets)."
- name: Run Auto Client tests
if: >-
${{ env.OPENAI_API_KEY != '' && env.GOOGLE_API_KEY != ''
&& env.COHERE_API_KEY != '' && env.ANTHROPIC_API_KEY != ''
&& env.XAI_API_KEY != '' }}
run: |
set +e
uv run pytest tests/test_auto_client.py --asyncio-mode=auto -n auto
status=$?
set -e
if [ $status -eq 5 ]; then
echo "No tests collected; treating as success."
exit 0
fi
exit $status
env:
INSTRUCTOR_ENV: CI
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
================================================
FILE: .github/workflows/test_docs.yml
================================================
name: Test Docs
on:
schedule:
- cron: '0 0 1 * *' # Runs at 00:00 on the 1st of every month
jobs:
release:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11"]
steps:
- uses: actions/checkout@v2
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y graphviz libcairo2-dev xdg-utils
- name: Install Poetry
uses: snok/install-poetry@v1.3.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "poetry"
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Install the project
run: uv sync --all-extras
- name: Run tests
run: uv run pytest tests/docs --asyncio-mode=auto
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
================================================
FILE: .github/workflows/ty.yml
================================================
name: ty
on:
pull_request:
branches: [main]
push:
branches: [main]
env:
WORKING_DIRECTORY: "."
jobs:
type-check:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
enable-cache: true
- name: Set up Python
run: uv python install 3.11
- name: Install the project
run: uv sync --all-extras
- name: Run type check with ty
run: uv run ty check instructor/
- name: Run type check with ty (tests)
run: uv run ty check --config-file ty-tests.toml tests
================================================
FILE: .gitignore
================================================
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
.envrc
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
.vscode/
examples/citation_with_extraction/fly.toml
my_cache_directory/
tutorials/wandb/*
tutorials/results.csv
tutorials/results.jsonl
tutorials/results.jsonlines
tutorials/schema.json
wandb/settings
math_finetunes.jsonl
pr_body.md
check_zero_width_chars.py
# Suggestion files from architectural analysis
*_SUGGESTIONS.md
ORGANIZED_SUGGESTIONS.md
================================================
FILE: .grit/.gitignore
================================================
.gritmodules
*.log
================================================
FILE: .grit/grit.yaml
================================================
version: 0.0.1
patterns:
- name: github.com/getgrit/python#openai
level: info
================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.9 # Ruff version
hooks:
- id: ruff # Run the linter.
name: Run Linter Check (Ruff)
args: [ --fix, --unsafe-fixes ]
files: ^(instructor|tests|examples)/
- id: ruff-format # Run the formatter.
name: Run Formatter (Ruff)
- repo: local
hooks:
- id: uv-lock-check
name: Check uv.lock is up-to-date
entry: uv
args: [lock, --check]
language: system
files: ^(pyproject\.toml|uv\.lock)$
pass_filenames: false
- id: uv-sync-check
name: Verify dependencies can be installed
entry: uv
args: [sync, --check]
language: system
files: ^(pyproject\.toml|uv\.lock)$
pass_filenames: false
- id: uv-export-requirements
name: Export requirements.txt from pyproject.toml
entry: bash -c 'uv pip compile pyproject.toml -o requirements.txt && git add requirements.txt'
language: system
files: ^pyproject\.toml$
pass_filenames: false
- id: ty-check
name: Run Type Check (ty)
entry: uv
args: [run, ty, check, --ignore, unresolved-import]
language: system
files: ^instructor/
pass_filenames: false
================================================
FILE: .ruff.toml
================================================
# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".mypy_cache",
".nox",
".pants.d",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"venv",
]
# Same as Black.
line-length = 88
output-format = "grouped"
target-version = "py39"
[lint]
select = [
# bugbear rules
"B",
# remove unused imports
"F401",
# bare except statements
"E722",
# unused arguments
"ARG",
# pyupgrade
"UP",
]
ignore = [
# mutable defaults
"B006",
"B018",
]
unfixable = [
# disable auto fix for print statements
"T201",
"T203",
]
[lint.extend-per-file-ignores]
"instructor/distil.py" = ["ARG002"]
"tests/test_distil.py" = ["ARG001"]
"tests/test_patch.py" = ["ARG001"]
"examples/task_planner/task_planner_topological_sort.py" = ["ARG002"]
"examples/citation_with_extraction/main.py" = ["ARG001"]
================================================
FILE: AGENT.md
================================================
# AGENT.md
## Commands
- Install: `uv pip install -e ".[dev]"` or `poetry install --with dev`
- Run tests: `uv run pytest tests/`
- Run single test: `uv run pytest tests/path_to_test.py::test_name`
- Skip LLM tests: `uv run pytest tests/ -k 'not llm and not openai'`
- Temp deps for a run: `uv run --with <pkg>[==version] <command>` (example: `uv run --with pytest-asyncio --with anthropic pytest tests/...`)
- Type check: `uv run ty check`
- Lint: `uv run ruff check instructor examples tests`
- Format: `uv run ruff format instructor examples tests`
- Build docs: `uv run mkdocs serve` (local) or `./build_mkdocs.sh` (production)
- Waiting: use `sleep <seconds>` for explicit pauses (e.g., CI waits) or to let external processes finish
## Architecture
- **Core**: `instructor/` - Pydantic-based structured outputs for LLMs
- **Base classes**: `Instructor` and `AsyncInstructor` in `client.py`
- **Providers**: Client files (`client_*.py`) for OpenAI, Anthropic, Gemini, Cohere, etc.
- **Factory pattern**: `from_provider()` for automatic provider detection
- **DSL**: `dsl/` directory with Partial, Iterable, Maybe, Citation extensions
- **Key modules**: `patch.py` (patching), `process_response.py` (parsing), `function_calls.py` (schemas)
## Code Style
- **Typing**: Strict type annotations, use `BaseModel` for structured outputs
- **Imports**: Standard lib → third-party → local
- **Formatting**: Ruff with Black conventions
- **Error handling**: Custom exceptions from `exceptions.py`, Pydantic validation
- **Naming**: `snake_case` functions/variables, `PascalCase` classes
- **No mocking**: Tests use real API calls
- **Client creation**: Always use `instructor.from_provider("provider_name/model_name")` instead of provider-specific methods like `from_openai()`, `from_anthropic()`, etc.
## Pull Request (PR) Formatting
Use **Conventional Commits** formatting for PR titles. Treat the PR title as the message we would use for a squash merge commit.
### PR Title Format
Use:
`<type>(<scope>): <short summary>`
Rules:
- Keep it under ~70 characters when you can.
- Use the imperative mood (for example, “add”, “fix”, “update”).
- Do not end with a period.
- If it includes a breaking change, add `!` after the type or scope (for example, `feat(api)!:`).
Good examples:
- `fix(openai): handle empty tool_calls in streaming`
- `feat(retry): add backoff for JSON parse failures`
- `docs(agents): add conventional commit PR title guidelines`
- `test(schema): cover nested union edge cases`
- `ci(ruff): enforce formatting in pre-commit`
Common types:
- `feat`: new feature
- `fix`: bug fix
- `docs`: documentation-only changes
- `refactor`: code change that is not a fix or feature
- `perf`: performance improvement
- `test`: add or update tests
- `build`: build system or dependency changes
- `ci`: CI pipeline changes
- `chore`: maintenance work
Suggested scopes (pick the closest match):
- Providers: `openai`, `anthropic`, `gemini`, `vertexai`, `bedrock`, `mistral`, `groq`, `writer`
- Core: `core`, `patch`, `process_response`, `function_calls`, `retry`, `dsl`
- Repo: `docs`, `examples`, `tests`, `ci`, `build`
### PR Description Guidelines
Keep PR descriptions short and easy to review:
- **What**: What changed, in 1–3 sentences.
- **Why**: Why this change is needed (link issues when possible).
- **Changes**: 3–7 bullet points with the main edits.
- **Testing**: What you ran (or why you did not run anything).
If the PR was authored by Cursor, include:
- `This PR was written by [Cursor](https://cursor.com)`
================================================
FILE: CHANGELOG.md
================================================
# Changelog
All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
<!-- Add upcoming changes here -->
## [1.14.4] - 2026-01-16
### Changed
- Simplified `JsonCompleteness` by using `jiter` parsing and a sibling-based completeness heuristic (#2000)
### Fixed
- Fixed Google GenAI `safety_settings` causing `400 INVALID_ARGUMENT` when requests include image content by using image-specific harm categories when needed (#1773)
- Fixed `create_with_completion()` crashing for `list[T]` response models (where `T` is a Pydantic model) by preserving `_raw_response` on list outputs (#1303)
- Fixed Responses API retries crashing on reasoning items by skipping non-tool-call items in `reask_responses_tools` (#2002)
- Fixed Google GenAI dict-style `config` handling to preserve `labels` and other settings like `cached_content` and `thinking_config` (#2005)
## [1.14.3] - 2026-01-13
### Added
- Completeness-based validation for Partial streaming - only validates JSON structures that are structurally complete (#1999)
- New `JsonCompleteness` class in `instructor/dsl/json_tracker.py` for tracking JSON completeness during streaming (#1999)
### Fixed
- Fixed Stream objects crashing reask handlers when using streaming with `max_retries > 1` (#1992)
- Field constraints (`min_length`, `max_length`, `ge`, `le`, etc.) now work correctly during streaming (#1999)
### Deprecated
- `PartialLiteralMixin` is now deprecated - completeness-based validation handles Literal/Enum types automatically (#1999)
## [1.14.2] - 2026-01-13
### Fixed
- Fixed model validators crashing during partial streaming by skipping them until streaming completes (#1994)
- Fixed infinite recursion with self-referential models in Partial (e.g., TreeNode with children: List["TreeNode"]) (#1997)
### Added
- Added `PartialLiteralMixin` documentation for handling Literal/Enum types during streaming (#1994)
- Added final validation against original model after streaming completes to enforce required fields (#1994)
- Added tests for recursive Partial models (#1997)
## [1.14.1] - 2026-01-08
### Fixed
- Added support for cached_content in Google Gemini context caching (#1987)
## [1.14.0] - 2026-01-08
### Added
- Pre-commit hook to auto-export requirements.txt for build consistency
### Changed
- Standardized provider factory methods across codebase for improved consistency
- Standardized provider imports throughout documentation
- Audited and standardized exception handling throughout the instructor library
### Fixed
- Fixed build issues with requirements.txt regeneration from pyproject.toml
- Fixed provider functionality issue (#1914)
### Documentation
- Comprehensive documentation audit and SEO optimization improvements (#1944)
- Updated documentation for responses API mode (#1946)
- Enhanced README with PydanticAI promotion and clear feature distinctions
- Removed incorrect model reference in client.create extraction example (#1951)
- Fixed image base URLs in Jupyter notebook tutorials (#1922)
## [1.13.0] - Previous Release
For changes in earlier versions, see the [git history](https://github.com/instructor-ai/instructor/releases).
================================================
FILE: CLAUDE.md
================================================
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
# Instructor Development Guide
## Commands
- Install deps: `uv pip install -e ".[dev,anthropic]"` or `poetry install --with dev,anthropic`
- Run tests: `uv run pytest tests/ -n auto`
- Run specific test: `uv run pytest tests/path_to_test.py::test_name`
- Skip LLM tests: `uv run pytest tests/ -k 'not llm and not openai'`
- Type check: `uv run ty check`
- Lint: `uv run ruff check instructor examples tests`
- Format: `uv run ruff format instructor examples tests`
- Generate coverage: `uv run coverage run -m pytest tests/ -k "not docs"` then `uv run coverage report`
- Build documentation: `uv run mkdocs serve` (for local preview) or `./build_mkdocs.sh` (for production)
- Waiting: use `sleep <seconds>` for explicit pauses (e.g., CI waits) or to let external processes finish
## Installation & Setup
- Fork the repository and clone your fork
- Install UV: `pip install uv`
- Create virtual environment: `uv venv`
- Install dependencies: `uv pip install -e ".[dev]"`
- Install pre-commit: `uv run pre-commit install`
- Run tests to verify: `uv run pytest tests/ -k "not openai"`
## Code Style Guidelines
- **Typing**: Use strict typing with annotations for all functions and variables
- **Imports**: Standard lib → third-party → local imports
- **Formatting**: Follow Black's formatting conventions (enforced by Ruff)
- **Models**: Define structured outputs as Pydantic BaseModel subclasses
- **Naming**: snake_case for functions/variables, PascalCase for classes
- **Error Handling**: Use custom exceptions from exceptions.py, validate with Pydantic
- **Comments**: Docstrings for public functions, inline comments for complex logic
## Conventional Commits
- **Format**: `type(scope): description`
- **Types**: feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert
- **Examples**:
- `feat(anthropic): add support for Claude 3.5`
- `fix(openai): correct response parsing for streaming`
- `docs(README): update installation instructions`
- `test(gemini): add validation tests for JSON mode`
## Core Architecture
- **Base Classes**: `Instructor` and `AsyncInstructor` in client.py are the foundation
- **Factory Pattern**: Provider-specific factory functions (`from_openai`, `from_anthropic`, etc.)
- **Unified Access**: `from_provider()` function in auto_client.py for automatic provider detection
- **Mode System**: `Mode` enum categorizes different provider capabilities (tools vs JSON output)
- **Patching Mechanism**: Uses Python's dynamic nature to patch provider clients for structured outputs
- **Response Processing**: Transforms raw API responses into validated Pydantic models
- **DSL Components**: Special types like Partial, Iterable, Maybe extend the core functionality
## Provider Architecture
- **Supported Providers**: OpenAI, Anthropic, Gemini, Cohere, Mistral, Groq, VertexAI, Fireworks, Cerebras, Writer, Databricks, Anyscale, Together, LiteLLM, Bedrock, Perplexity
- **Provider Implementation**: Each provider has a dedicated client file (e.g., `client_anthropic.py`) with factory functions
- **Modes**: Different providers support specific modes (`Mode` enum): `ANTHROPIC_TOOLS`, `GEMINI_JSON`, etc.
- **Common Pattern**: Factory functions (e.g., `from_anthropic`) take a native client and return patched `Instructor` instances
- **Provider Testing**: Tests in `tests/llm/` directory, define Pydantic models, make API calls, verify structured outputs
- **Provider Detection**: `get_provider` function analyzes base URL to detect which provider is being used
## Key Components
- **process_response.py**: Handles parsing and converting LLM outputs to Pydantic models
- **patch.py**: Contains the core patching logic for modifying provider clients
- **function_calls.py**: Handles generating function/tool schemas from Pydantic models
- **hooks.py**: Provides event hooks for intercepting various stages of the LLM request/response cycle
- **dsl/**: Domain-specific language extensions for specialized model types
- **retry.py**: Implements retry logic for handling validation failures
- **validators.py**: Custom validation mechanisms for structured outputs
## Testing Guidelines
- Tests are organized by provider under `tests/llm/`
- Each provider has its own conftest.py with fixtures
- Standard tests cover: basic extraction, streaming, validation, retries
- Evaluation tests in `tests/llm/test_provider/evals/` assess model capabilities
- Use parametrized tests when testing similar functionality across variants
- **IMPORTANT**: No mocking in tests - tests make real API calls
## Documentation Guidelines
- Every provider needs documentation in `docs/integrations/` following standard format
- Provider docs should include: installation, basic example, modes supported, special features
- When adding a new provider, update `mkdocs.yml` navigation and redirects
- Example code should include complete imports and environment setup
- Tutorials should progress from simple to complex concepts
- New features should include conceptual explanation in `docs/concepts/`
- **Writing Style**: Grade 10 reading level, all examples must be working code
## Branch and Development Workflow
1. Fork and clone the repository
2. Create feature branch: `git checkout -b feat/your-feature`
3. Make changes and add tests
4. Run tests and linting
5. Commit with conventional commit message
6. Push to your fork and create PR
7. Use stacked PRs for complex features
## Adding New Providers
### Step-by-Step Guide
1. **Update Provider Enum** in `instructor/utils.py`:
```python
class Provider(Enum):
YOUR_PROVIDER = "your_provider"
```
2. **Add Provider Modes** in `instructor/mode.py`:
```python
class Mode(enum.Enum):
YOUR_PROVIDER_TOOLS = "your_provider_tools"
YOUR_PROVIDER_JSON = "your_provider_json"
```
3. **Create Client Implementation** `instructor/client_your_provider.py`:
- Use overloads for sync/async variants
- Validate mode compatibility
- Return appropriate Instructor/AsyncInstructor instance
- Handle provider-specific edge cases
4. **Add Conditional Import** in `instructor/__init__.py`:
```python
if importlib.util.find_spec("your_provider_sdk") is not None:
from .client_your_provider import from_your_provider
__all__ += ["from_your_provider"]
```
5. **Update Auto Client** in `instructor/auto_client.py`:
- Add to `supported_providers` list
- Implement provider handling in `from_provider()`
- Update `get_provider()` function if URL-detectable
6. **Create Tests** in `tests/llm/test_your_provider/`:
- `conftest.py` with client fixtures
- Basic extraction tests
- Streaming tests
- Validation/retry tests
- No mocking - use real API calls
7. **Add Documentation** in `docs/integrations/your_provider.md`:
- Installation instructions
- Basic usage examples
- Supported modes
- Provider-specific features
8. **Update Navigation** in `mkdocs.yml`:
- Add to integrations section
- Include redirects if needed
## Contributing to Evals
- Standard evals for each provider test model capabilities
- Create new evals following existing patterns
- Run evals as part of integration test suite
- Performance tracking and comparison
## Pull Request Guidelines
- Keep PRs small and focused
- Include tests for all changes
- Update documentation as needed
- Follow PR template
- Link to relevant issues
## Type System and Best Practices
### Type Checking with ty
- **Type Checker**: Using `ty` for fast, incremental type checking
- **Python Version**: 3.9+ for compatibility
- **Configuration**: Uses `pyproject.toml` settings for type checking
- Run `uv run ty check` before committing - aim for zero errors
### Code Quality Checks Before Committing
Always run these checks before committing code:
1. **Ruff linting**: `uv run ruff check .` - Fix all errors
2. **Ruff formatting**: `uv run ruff format .` - Apply consistent formatting
3. **Type checking**: `uv run ty check` - Aim for zero type errors
4. **Tests**: Run relevant tests to ensure changes don't break functionality
### Type Patterns
- **Bounded TypeVars**: Use `T = TypeVar("T", bound=Union[BaseModel, ...])` for constraints
- **Version Compatibility**: Handle Python 3.9 vs 3.10+ typing differences explicitly
- **Union Type Syntax**: Use `from __future__ import annotations` to enable Python 3.10+ union syntax (`|`) in Python 3.9
- **Simple Type Detection**: Special handling for `list[Union[int, str]]` patterns
- **Runtime Type Handling**: Graceful fallbacks for compatibility
### Pydantic Integration
- Heavy use of `BaseModel` for structured outputs
- `TypeAdapter` used internally for JSON schema generation
- Field validators and custom types
- Models serve dual purpose: validation and documentation
## Building Documentation
### Setup
```bash
# Install documentation dependencies
pip install -r requirements-doc.txt
```
### Local Development
```bash
# Serve documentation locally with hot reload
uv run mkdocs serve
# Build documentation for production
./build_mkdocs.sh
```
### Documentation Features
- **Material Theme**: Modern UI with extensive customization
- **Plugins**:
- `mkdocstrings` - API documentation from docstrings
- `mkdocs-jupyter` - Notebook integration
- `mkdocs-redirects` - URL management
- Custom hooks for code processing
- **Custom Processing**: `hide_lines.py` removes code marked with `# <%hide%>`
- **Redirect Management**: Comprehensive redirect maps for moved content
### Writing Documentation
- Follow templates in `docs/templates/` for consistency
- Grade 10 reading level for accessibility
- All code examples must be runnable
- Include complete imports and environment setup
- Progressive complexity: simple → advanced
## Project Structure
- `instructor/` - Core library code
- Base classes (`client.py`): `Instructor` and `AsyncInstructor`
- Provider clients (`client_*.py`): Factory functions for each provider
- DSL components (`dsl/`): Partial, Iterable, Maybe, Citation extensions
- Core logic: `patch.py`, `process_response.py`, `function_calls.py`
- CLI tools (`cli/`): Batch processing, file management, usage tracking
- `tests/` - Test suite organized by provider
- Provider-specific tests in `tests/llm/test_<provider>/`
- Evaluation tests for model capabilities
- No mocking - all tests use real API calls
- `docs/` - MkDocs documentation
- `concepts/` - Core concepts and features
- `integrations/` - Provider-specific guides
- `examples/` - Practical examples and cookbooks
- `learning/` - Progressive tutorial path
- `blog/posts/` - Technical articles and announcements
- `templates/` - Templates for new docs (provider, concept, cookbook)
- `examples/` - Runnable code examples
- Feature demos: caching, streaming, validation, parallel processing
- Use cases: classification, extraction, knowledge graphs
- Provider examples: anthropic, openai, groq, mistral
- Each example has `run.py` as the main entry point
- `typings/` - Type stubs for untyped dependencies
## Documentation Structure
- **Getting Started Path**: Installation → First Extraction → Response Models → Structured Outputs
- **Learning Patterns**: Simple Objects → Lists → Nested Structures → Validation → Streaming
- **Example Organization**: Self-contained directories with runnable code demonstrating specific features
- **Blog Posts**: Technical deep-dives with code examples in `docs/blog/posts/`
## Example Patterns
When creating examples:
- Use `run.py` as the main file name
- Include clear imports: stdlib → third-party → instructor
- Define Pydantic models with descriptive fields
- Show expected output in comments
- Handle errors appropriately
- Make examples self-contained and runnable
## Dependency Management
### Core Dependencies
- **Minimal core**: `openai`, `pydantic`, `docstring-parser`, `typer`, `rich`
- **Python requirement**: `<4.0,>=3.9`
- **Pydantic version**: `<3.0.0,>=2.8.0` (constrained for stability)
### Optional Dependencies
Provider-specific packages as extras:
```bash
# Install with specific provider
pip install "instructor[anthropic]"
pip install "instructor[google-generativeai]"
pip install "instructor[groq]"
```
### Development Dependencies
```bash
# Install all development dependencies
uv pip install -e ".[dev]"
```
Includes:
- ty
- `pytest` and `pytest-asyncio` - Testing
- `ruff` - Linting and formatting
- `coverage` - Test coverage
- `mkdocs` and plugins - Documentation
### Version Constraints
- **Upper bounds on all dependencies** for stability
- **Provider SDK versions** pinned to tested versions
- **Test dependencies** include evaluation frameworks
### Managing Dependencies
- Update `pyproject.toml` for new dependencies
- Test with multiple Python versions (3.9-3.12)
- Run full test suite after dependency updates
- Document any provider-specific version requirements
The library enables structured LLM outputs using Pydantic models across multiple providers with type safety.
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Instructor
Thank you for considering contributing to Instructor! This document provides guidelines and instructions to help you contribute effectively.
## Table of Contents
- [Contributing to Instructor](#contributing-to-instructor)
- [Table of Contents](#table-of-contents)
- [Code of Conduct](#code-of-conduct)
- [Getting Started](#getting-started)
- [Environment Setup](#environment-setup)
- [Development Workflow](#development-workflow)
- [Dependency Management](#dependency-management)
- [Using UV](#using-uv)
- [Using Poetry](#using-poetry)
- [Working with Optional Dependencies](#working-with-optional-dependencies)
- [How to Contribute](#how-to-contribute)
- [Reporting Bugs](#reporting-bugs)
- [Feature Requests](#feature-requests)
- [Pull Requests](#pull-requests)
- [Writing Documentation](#writing-documentation)
- [Contributing to Evals](#contributing-to-evals)
- [Code Style Guidelines](#code-style-guidelines)
- [Conventional Comments](#conventional-comments)
- [Conventional Commits](#conventional-commits)
- [Types](#types)
- [Examples](#examples)
- [Testing](#testing)
- [Branch and Release Process](#branch-and-release-process)
- [Using Cursor for PR Creation](#using-cursor-for-pr-creation)
- [License](#license)
## Code of Conduct
By participating in this project, you agree to abide by our code of conduct: treat everyone with respect, be constructive in your communication, and focus on the technical aspects of the contributions.
## Getting Started
### Environment Setup
1. **Fork the Repository**: Click the "Fork" button at the top right of the [repository page](https://github.com/instructor-ai/instructor).
2. **Clone Your Fork**:
```bash
git clone https://github.com/YOUR-USERNAME/instructor.git
cd instructor
```
3. **Set up Remote**:
```bash
git remote add upstream https://github.com/instructor-ai/instructor.git
```
4. **Install UV** (recommended):
```bash
# macOS/Linux
curl -LsSf https://astral.sh/uv/install.sh | sh
# Windows PowerShell
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
```
5. **Install Dependencies**:
```bash
# Using uv (recommended)
uv pip install -e ".[dev,docs,test-docs]"
# Using poetry
poetry install --with dev,docs,test-docs
# For specific providers, add the provider name as an extra
# Example: uv pip install -e ".[dev,docs,test-docs,anthropic]"
```
6. **Set up Pre-commit**:
```bash
pip install pre-commit
pre-commit install
```
### Development Workflow
1. **Create a Branch**:
```bash
git checkout -b feature/your-feature-name
```
2. **Make Your Changes and Commit**:
```bash
git add .
git commit -m "Your descriptive commit message"
```
3. **Keep Your Branch Updated**:
```bash
git fetch upstream
git rebase upstream/main
```
4. **Push Changes**:
```bash
git push origin feature/your-feature-name
```
### Dependency Management
We support both UV and Poetry for dependency management. Choose the tool that works best for you:
#### Using UV
UV is a fast Python package installer and resolver. It's recommended for day-to-day development in Instructor.
```bash
# Install uv
curl -LsSf https://astral.sh/uv/install.sh | sh
# Install project and development dependencies
uv pip install -e ".[dev,docs]"
# Adding a new dependency (example)
uv pip install new-package
```
Key UV commands:
- `uv pip install -e .` - Install the project in editable mode
- `uv pip install -e ".[dev]"` - Install with development extras
- `uv pip freeze > requirements.txt` - Generate requirements file
- `uv self update` - Update UV to the latest version
#### Using Poetry
Poetry provides more comprehensive dependency management and packaging.
```bash
# Install Poetry
curl -sSL https://install.python-poetry.org | python3 -
# Install dependencies including development deps
poetry install --with dev,docs
# Add a new dependency
poetry add package-name
# Add a new development dependency
poetry add --group dev package-name
```
Key Poetry commands:
- `poetry shell` - Activate the virtual environment
- `poetry run python -m pytest` - Run commands within the virtual environment
- `poetry update` - Update dependencies to their latest versions
### Working with Optional Dependencies
Instructor uses optional dependencies to support different LLM providers. Provider-specific utilities live under `instructor/utils`. When adding integration for a new provider:
1. **Update pyproject.toml**: Add your provider's dependencies to both `[project.optional-dependencies]` and `[dependency-groups]`:
```toml
[project.optional-dependencies]
# Add your provider here
my-provider = ["my-provider-sdk>=1.0.0,<2.0.0"]
[dependency-groups]
# Also add to dependency groups
my-provider = ["my-provider-sdk>=1.0.0,<2.0.0"]
```
2. **Create Provider Client**: Implement your provider client in `instructor/clients/client_myprovider.py`
3. **Add Tests**: Create tests in `tests/llm/test_myprovider/`
4. **Document Installation**: Update the documentation to include installation instructions:
```
# Install with your provider support
uv pip install "instructor[my-provider]"
# or
poetry install --with my-provider
```
5. **Create Provider Utilities and Handlers**:
- Add a new module at `instructor/utils/myprovider.py`
- Implement `reask` functions for validation errors and `handle_*` functions
for formatting requests
- Define `MYPROVIDER_HANDLERS` mapping `Mode` values to these functions
6. **Register the Provider**:
- Add a value in `instructor/utils/providers.py` to the `Provider` enum
- Extend `get_provider` with detection logic for your base URL
7. **Update `process_response.py`**:
- Import your handler functions and include them in the `mode_handlers`
dictionary so the library can route requests to your provider
- `process_response.py` relies on these handlers to format arguments and
parse results for each `Mode`
## How to Contribute
### Reporting Bugs
If you find a bug, please create an issue on [our issue tracker](https://github.com/instructor-ai/instructor/issues) with:
1. A clear, descriptive title
2. A detailed description including:
- The `response_model` you are using
- The `messages` you are using
- The `model` you are using
- Steps to reproduce the bug
- The expected behavior and what went wrong
- Your environment (Python version, OS, package versions)
### Feature Requests
For feature requests, please create an issue describing:
1. The problem your feature would solve
2. How your solution would work
3. Alternatives you've considered
4. Examples of how the feature would be used
### Pull Requests
1. **Create a Pull Request** from your fork to the main repository.
2. **Fill out the PR template** with details about your changes.
3. **Address review feedback** and make requested changes.
4. **Wait for CI checks** to pass.
5. Once approved, a maintainer will merge your PR.
### Writing Documentation
Documentation improvements are always welcome! Follow these guidelines:
1. Documentation is written in Markdown format in the `docs/` directory
2. When creating new markdown files, add them to `mkdocs.yml` under the appropriate section
3. Follow the existing hierarchy and structure
4. Use a grade 10 reading level (simple, clear language)
5. Include working code examples
6. Add links to related documentation
### Contributing to Evals
We encourage contributions to our evaluation tests:
1. Explore existing evals in the [evals directory](https://github.com/instructor-ai/instructor/tree/main/tests/llm)
2. Contribute new evals as pytest tests
3. Evals should test specific capabilities or edge cases of the library or models
4. Follow the existing patterns for structuring eval tests
## Code Style Guidelines
We use automated tools to maintain consistent code style:
- **Ruff**: For linting and formatting
- **ty**: For type checking
- **Black**: For code formatting (enforced by Ruff)
General guidelines:
- **Typing**: Use strict typing with annotations for all functions and variables
- **Imports**: Standard lib → third-party → local imports
- **Models**: Define structured outputs as Pydantic BaseModel subclasses
- **Naming**: snake_case for functions/variables, PascalCase for classes
- **Error Handling**: Use custom exceptions from exceptions.py, validate with Pydantic
- **Comments**: Docstrings for public functions, inline comments for complex logic
### Conventional Comments
We use conventional comments in code reviews and commit messages. This helps make feedback clearer and more actionable:
```
<label>: <subject>
<description>
```
Labels include:
- **praise:** highlights something positive
- **suggestion:** proposes a change or improvement
- **question:** asks for clarification
- **nitpick:** minor, trivial feedback that can be ignored
- **issue:** points out a specific problem that needs to be fixed
- **todo:** notes something to be addressed later
- **fix:** resolves an issue
- **refactor:** suggests reorganizing code without changing behavior
- **test:** suggests adding or improving tests
Examples:
```
suggestion: consider using Pydantic's validator for this check
This would ensure validation happens automatically when the model is created.
question: why is this approach used instead of async processing?
I'm wondering if there would be performance benefits.
fix: correct the type hint for the client parameter
The client should accept OpenAI instances, not strings.
```
For more details, see the [Conventional Comments specification](https://conventionalcomments.org/).
### Conventional Commits
We follow the [Conventional Commits](https://www.conventionalcommits.org/) specification for commit messages. This helps us generate changelogs and understand the changes at a glance.
The commit message should be structured as follows:
```
<type>[optional scope]: <description>
[optional body]
[optional footer(s)]
```
#### Types
- **feat**: A new feature
- **fix**: A bug fix
- **docs**: Documentation only changes
- **style**: Changes that do not affect the meaning of the code (white-space, formatting, etc)
- **refactor**: A code change that neither fixes a bug nor adds a feature
- **perf**: A code change that improves performance
- **test**: Adding missing tests or correcting existing tests
- **build**: Changes that affect the build system or external dependencies
- **ci**: Changes to our CI configuration files and scripts
#### Examples
```
feat(openai): add support for response_format parameter
fix(anthropic): correct tool calling format in Claude client
docs: improve installation instructions for various providers
test(evals): add evaluation for recursive schema handling
```
Breaking changes should be indicated by adding `!` after the type/scope:
```
feat(api)!: change parameter order in from_openai factory function
```
Including a scope is recommended when changes affect a specific part of the codebase (e.g., a specific provider, feature, or component).
## Testing
Run tests using pytest:
```bash
# Run all tests
pytest tests/
# Run specific test
pytest tests/path_to_test.py::test_name
# Skip LLM tests (faster for local development)
pytest tests/ -k 'not llm and not openai'
# Generate coverage report
coverage run -m pytest tests/ -k "not docs"
coverage report
```
## Branch and Release Process
- `main` branch is the development branch
- Releases are tagged with version numbers
- We follow [Semantic Versioning](https://semver.org/)
## Using Cursor for PR Creation
Cursor (https://cursor.sh) is a code editor powered by AI that can help you create PRs efficiently. We encourage using Cursor for Instructor development:
1. **Install Cursor**: Download from [cursor.sh](https://cursor.sh/)
2. **Create a Branch**: Start a new branch for your feature using Cursor's Git integration
3. **Use Cursor Rules**: We have Cursor rules that help with standards:
- `new-features-planning`: Use when implementing new features
- `simple-language`: Follow when writing documentation
- `documentation-sync`: Reference when making code changes to keep docs in sync
4. **Generate Code with AI**: Use Cursor's AI assistance to generate code that follows our style
5. **Auto-Create PRs**: Use Cursor's PR creation feature with our template:
```
# Create PR using gh CLI
gh pr create -t "Your PR Title" -b "Description of changes" -r jxnl,ivanleomk
```
6. **Include Attribution**: Add `This PR was written by [Cursor](https://cursor.sh)` to your PR description
For more details, see our Cursor rules in `.cursor/rules/`.
## License
By contributing to Instructor, you agree that your contributions will be licensed under the project's MIT License.
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2023 Jason Liu
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: NEW_PROVIDER_AGENT_INSTRUCTIONS.md
================================================
# AI Agent Instructions: Creating a New Instructor Provider
**Instructions for AI coding agents to create a new provider for the instructor library.**
Copy these instructions to your AI coding agent when you want to add a new LLM provider to instructor. The agent will have everything needed to implement a complete, working provider.
**For human contributors:** See the quick reference template in [`instructor/providers/README.md`](instructor/providers/README.md#adding-a-new-provider)
---
## Mission
Create a complete, production-ready provider package for the instructor library that:
- Follows the BaseProvider protocol exactly
- Includes comprehensive tests using transcript fixtures
- Has proper error handling and validation
- Provides excellent documentation
- Integrates seamlessly with the instructor plugin system
## Prerequisites
Before starting, ensure you have:
- Provider name (e.g., "groq", "perplexity", "fireworks")
- Provider's Python SDK package name and version
- API documentation URL
- Sample API key format (for documentation)
- Knowledge of provider's chat completion API structure
## Step-by-Step Implementation
### Step 1: Project Structure Setup
**Note: This creates a new provider integration that follows instructor's existing patterns, not a separate package.**
Create the following structure in the instructor repository:
```
instructor/providers/{provider}/
├── __init__.py # Empty or basic exports
├── client.py # from_{provider} function implementation
└── utils.py # Provider-specific utilities
tests/llm/test_{provider}/
├── __init__.py # Empty
├── conftest.py # Test configuration & API key handling
├── util.py # Models and modes configuration
├── test_simple.py # Basic functionality tests
├── test_stream.py # Streaming tests (if supported)
├── test_format.py # Format/structure tests
└── test_retries.py # Error handling tests
docs/integrations/
└── {provider}.md # Provider documentation following existing pattern
```
**Important: You're adding to the existing instructor codebase, not creating a separate package.**
### Step 2: Provider Client Implementation
#### File: `instructor/providers/{provider}/client.py`
Follow the exact pattern used by other providers in instructor. This creates a `from_{provider}` function:
```python
from __future__ import annotations
from typing import Any, overload
import instructor
from ...core.client import AsyncInstructor, Instructor
# Import the provider's SDK
from {provider_sdk} import {SyncClient}, {AsyncClient} # Replace with actual imports
@overload
def from_{provider}(
client: {SyncClient},
mode: instructor.Mode = instructor.Mode.{PROVIDER}_TOOLS, # Default mode
**kwargs: Any,
) -> Instructor: ...
@overload
def from_{provider}(
client: {AsyncClient},
mode: instructor.Mode = instructor.Mode.{PROVIDER}_TOOLS, # Default mode
**kwargs: Any,
) -> AsyncInstructor: ...
def from_{provider}(
client: {SyncClient} | {AsyncClient},
mode: instructor.Mode = instructor.Mode.{PROVIDER}_TOOLS, # Default mode
**kwargs: Any,
) -> Instructor | AsyncInstructor:
"""
Create an instructor client from a {Provider} client
Args:
client: {Provider} sync or async client instance
mode: Mode to use for structured outputs
**kwargs: Additional arguments passed to instructor client
Returns:
Instructor or AsyncInstructor instance
"""
# Define valid modes for this provider
valid_modes = {
instructor.Mode.{PROVIDER}_TOOLS,
instructor.Mode.{PROVIDER}_JSON,
# Add other modes your provider supports
}
# Validate mode
if mode not in valid_modes:
from ...core.exceptions import ModeError
raise ModeError(
mode=str(mode),
provider="{Provider}",
valid_modes=[str(m) for m in valid_modes],
)
# Validate client type
if not isinstance(client, ({AsyncClient}, {SyncClient})):
from ...core.exceptions import ClientError
raise ClientError(
f"Client must be an instance of {SyncClient} or {AsyncClient}. "
f"Got: {type(client).__name__}"
)
# Handle async client
if isinstance(client, {AsyncClient}):
async def async_wrapper(*args: Any, **kwargs: Any):
"""Wrapper for async client calls"""
if "stream" in kwargs and kwargs["stream"] is True:
# Handle streaming if supported
return client.chat.completions.acreate(*args, **kwargs)
return await client.chat.completions.acreate(*args, **kwargs)
return AsyncInstructor(
client=client,
create=instructor.patch(create=async_wrapper, mode=mode),
provider=instructor.Provider.{PROVIDER}, # Must be defined in Provider enum
mode=mode,
**kwargs,
)
# Handle sync client
if isinstance(client, {SyncClient}):
return Instructor(
client=client,
create=instructor.patch(create=client.chat.completions.create, mode=mode),
provider=instructor.Provider.{PROVIDER}, # Must be defined in Provider enum
mode=mode,
**kwargs,
)
```
### Step 3: Mode Handlers Implementation
#### File: `instructor_{provider}/handlers.py`
```python
"""
Mode handlers for {Provider} provider
Each handler knows how to:
1. Format requests for the specific mode (TOOLS, JSON, etc.)
2. Parse responses back into Pydantic models
3. Handle provider-specific response formats
"""
from typing import Dict, Any, Type, Union
from pydantic import BaseModel
from instructor.mode import Mode
from instructor.function_calls import openai_schema
import json
class BaseModeHandler:
"""Base class for mode handlers"""
def __init__(self, provider):
self.provider = provider
def prepare_request(
self,
response_model: Type[BaseModel],
messages: list,
model: str,
**kwargs
) -> Dict[str, Any]:
"""Prepare request for this mode"""
raise NotImplementedError
def parse_response(self, response: Any, response_model: Type[BaseModel]) -> BaseModel:
"""Parse provider response into Pydantic model"""
raise NotImplementedError
class ToolsHandler(BaseModeHandler):
"""Handler for function/tool calling mode"""
def prepare_request(self, response_model, messages, model, **kwargs):
# Convert Pydantic model to function schema
schema = openai_schema(response_model)
return {
"model": model,
"messages": messages,
"tools": [{
"type": "function",
"function": schema
}],
"tool_choice": "auto", # or provider-specific equivalent
**kwargs
}
def parse_response(self, response, response_model):
# Extract function call from response
# This is provider-specific - adapt to your provider's response format
if hasattr(response, 'choices') and response.choices:
choice = response.choices[0]
if hasattr(choice.message, 'tool_calls') and choice.message.tool_calls:
tool_call = choice.message.tool_calls[0]
function_args = json.loads(tool_call.function.arguments)
return response_model(**function_args)
raise ValueError("No valid tool call found in response")
class JSONHandler(BaseModeHandler):
"""Handler for JSON mode responses"""
def prepare_request(self, response_model, messages, model, **kwargs):
# Add JSON schema to system message
schema_prompt = f"""
You must respond with valid JSON that matches this schema:
{response_model.model_json_schema()}
Respond with only the JSON, no additional text.
"""
# Add schema to messages
enhanced_messages = [
{"role": "system", "content": schema_prompt}
] + messages
return {
"model": model,
"messages": enhanced_messages,
"response_format": {"type": "json_object"}, # if provider supports
**kwargs
}
def parse_response(self, response, response_model):
# Extract JSON from response content
if hasattr(response, 'choices') and response.choices:
content = response.choices[0].message.content
try:
data = json.loads(content)
return response_model(**data)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in response: {e}")
raise ValueError("No valid response content found")
# Handler registry
_HANDLERS = {
Mode.TOOLS: ToolsHandler,
Mode.JSON: JSONHandler,
# Add other modes as supported by provider
}
def get_handler(mode: Mode, provider) -> BaseModeHandler:
"""Get handler instance for the specified mode"""
if mode not in _HANDLERS:
supported = ", ".join(h.name for h in _HANDLERS.keys())
raise ValueError(f"Mode {mode} not supported. Supported modes: {supported}")
handler_class = _HANDLERS[mode]
return handler_class(provider)
```
### Step 4: Package Configuration
#### File: `pyproject.toml`
```toml
[project]
name = "instructor-{provider}"
version = "0.1.0"
description = "Instructor provider for {Provider Name}"
authors = [
{name = "Your Name", email = "your.email@example.com"}
]
license = {text = "MIT"}
requires-python = ">=3.9"
dependencies = [
"instructor-core>=2.0.0,<3.0.0",
"{provider_sdk}>=X.X.X,<Y.0.0", # Replace with actual version constraints
"pydantic>=2.8.0,<3.0.0",
]
readme = "README.md"
keywords = ["instructor", "llm", "structured-output", "{provider}"]
[project.urls]
Homepage = "https://github.com/instructor-ai/instructor"
Documentation = "https://python.useinstructor.com"
Repository = "https://github.com/instructor-ai/instructor"
[project.optional-dependencies]
dev = [
"pytest>=8.3.3,<9.0.0",
"pytest-asyncio>=0.24.0,<1.0.0",
"pytest-mock>=3.12.0",
"responses>=0.24.0", # For HTTP mocking
"python-dotenv>=1.0.1",
]
# Register the provider with instructor's plugin system
[project.entry-points."instructor.providers"]
{provider} = "instructor_{provider}:{Provider}Provider"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.pytest.ini_options]
testpaths = ["tests"]
markers = [
"unit: Unit tests (fast, no external dependencies)",
"integration: Integration tests (may require API keys)",
"live: Live API tests (requires valid API key)"
]
[tool.ruff]
target-version = "py39"
line-length = 88
[tool.ruff.lint]
select = ["E", "F", "W", "I", "N", "B", "A", "C4", "T20"]
ignore = ["E501"] # Line too long (handled by formatter)
```
### Step 3: Testing Implementation
#### File: `tests/llm/test_{provider}/conftest.py`
Follow the exact pattern used by all other providers:
```python
import os
import pytest
# Skip entire test suite if API key is missing
if not os.getenv("{PROVIDER}_API_KEY"):
pytest.skip(
"{PROVIDER}_API_KEY environment variable not set",
allow_module_level=True,
)
# Skip if provider package is not installed
try:
from {provider_sdk} import {SyncClient}, {AsyncClient} # Replace with actual imports
except ImportError:
pytest.skip("{provider_sdk} package is not installed", allow_module_level=True)
@pytest.fixture(scope="function")
def client():
"""Sync client fixture"""
yield {SyncClient}()
@pytest.fixture(scope="function")
def aclient():
"""Async client fixture"""
yield {AsyncClient}()
```
#### File: `tests/llm/test_{provider}/util.py`
Define supported models and modes:
```python
import instructor
# Replace with actual model names your provider supports
models = ["provider-model-name-1", "provider-model-name-2"]
# Replace with actual modes your provider supports
modes = [
instructor.Mode.{PROVIDER}_TOOLS,
instructor.Mode.{PROVIDER}_JSON,
]
```
#### File: `tests/llm/test_{provider}/test_simple.py`
Follow the standard pattern for basic functionality tests:
```python
import instructor
from {provider_sdk} import {SyncClient}, {AsyncClient} # Replace with actual imports
from pydantic import BaseModel, field_validator
import pytest
from itertools import product
from .util import models, modes
class User(BaseModel):
"""Standard test model"""
name: str
age: int
@pytest.mark.parametrize("model, mode", product(models, modes))
def test_{provider}_sync(model: str, mode: instructor.Mode, client):
"""Test basic sync functionality"""
client = instructor.from_{provider}(client, mode=mode)
resp = client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": "Extract a user from this sentence: Ivan is 27 and lives in Singapore",
},
],
response_model=User,
)
assert resp.name.lower() == "ivan"
assert resp.age == 27
@pytest.mark.parametrize("model, mode", product(models, modes))
def test_{provider}_sync_validated(model: str, mode: instructor.Mode, client):
"""Test sync with validation retries"""
class ValidatedUser(BaseModel):
name: str
age: int
@field_validator("name")
def name_validator(cls, v: str) -> str:
if not v.isupper():
raise ValueError(
f"All letters in the name must be uppercase (Eg. JOHN, SMITH) - {v} is not a valid example."
)
return v
client = instructor.from_{provider}(client, mode=mode)
resp = client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": "Extract a user from this sentence: Ivan is 27 and lives in Singapore",
},
],
max_retries=5,
response_model=ValidatedUser,
)
assert resp.name == "IVAN"
assert resp.age == 27
@pytest.mark.parametrize("model, mode", product(models, modes))
@pytest.mark.asyncio(scope="session")
async def test_{provider}_async(model: str, mode: instructor.Mode, aclient):
"""Test async functionality"""
client = instructor.from_{provider}(aclient, mode=mode)
resp = await client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": "Extract a user from this sentence: Ivan is 27 and lives in Singapore",
},
],
response_model=User,
)
assert resp.name.lower() == "ivan"
assert resp.age == 27
@pytest.mark.parametrize("model, mode", product(models, modes))
@pytest.mark.asyncio(scope="session")
async def test_{provider}_async_validated(model: str, mode: instructor.Mode, aclient):
"""Test async with validation retries"""
class ValidatedUser(BaseModel):
name: str
age: int
@field_validator("name")
def name_validator(cls, v: str) -> str:
if not v.isupper():
raise ValueError(
f"Make sure to uppercase all letters in the name field. Examples include: JOHN, SMITH, etc. {v} is not a valid example."
)
return v
client = instructor.from_{provider}(aclient, mode=mode)
resp = await client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": "Extract a user from this sentence: Ivan is 27 and lives in Singapore",
},
],
response_model=ValidatedUser,
max_retries=5,
)
assert resp.name == "IVAN"
assert resp.age == 27
```
### Step 4: Required Infrastructure Updates
#### A. Add Mode Constants
Add your provider's modes to `instructor/mode.py`:
```python
# Add to the Mode enum class
{PROVIDER}_TOOLS = "{provider}_tools"
{PROVIDER}_JSON = "{provider}_json"
# Add other modes as needed
```
#### B. Add Provider to Enum
Add your provider to `instructor/utils/providers.py`:
```python
# Add to the Provider enum
{PROVIDER} = "{provider}"
```
#### C. Update Main __init__.py
Add conditional import to `instructor/__init__.py`:
```python
# Add this block with the other provider imports
if importlib.util.find_spec("{provider_sdk}") is not None:
from .providers.{provider}.client import from_{provider}
__all__ += ["from_{provider}"]
```
#### D. Add to pyproject.toml
Add your provider to the optional dependencies:
```toml
# In [project.optional-dependencies]
{provider} = ["{provider_sdk}>=X.X.X,<Y.0.0"] # Replace with actual version
# In [dependency-groups]
{provider} = ["{provider_sdk}>=X.X.X,<Y.0.0"]
```
### Step 5: Documentation
#### File: `docs/integrations/{provider}.md`
Follow the exact pattern of existing provider docs:
```markdown
---
title: "Structured outputs with {Provider}, a complete guide w/ instructor"
description: "Complete guide to using Instructor with {Provider} models. Learn how to generate structured, type-safe outputs with {provider description}."
---
# Structured outputs with {Provider}, a complete guide w/ instructor
{Provider description and benefits}. This guide shows you how to use Instructor with {Provider}'s models for type-safe, validated responses.
## Quick Start
Install Instructor with {Provider} support:
```bash
pip install "instructor[{provider}]"
```
## Simple User Example (Sync)
```python
from {provider_sdk} import {SyncClient}
import instructor
from pydantic import BaseModel
# Initialize the client
client = {SyncClient}()
# Enable instructor patches
client = instructor.from_{provider}(client)
class User(BaseModel):
name: str
age: int
# Extract structured data
user = client.chat.completions.create(
model="your-model-name",
messages=[{"role": "user", "content": "Extract: Jason is 25 years old"}],
response_model=User
)
print(user.name) # Jason
print(user.age) # 25
```
## Simple User Example (Async)
```python
from {provider_sdk} import {AsyncClient}
import instructor
from pydantic import BaseModel
import asyncio
# Initialize async client
client = {AsyncClient}()
# Enable instructor patches
client = instructor.from_{provider}(client)
class User(BaseModel):
name: str
age: int
async def extract_user():
user = await client.chat.completions.create(
model="your-model-name",
messages=[{"role": "user", "content": "Extract: Jason is 25 years old"}],
response_model=User
)
return user
# Run async function
user = asyncio.run(extract_user())
print(user.name) # Jason
print(user.age) # 25
```
## Supported Models
- `model-1` - Description and capabilities
- `model-2` - Description and capabilities
Check [{Provider} documentation](provider-docs-url) for the complete list of available models.
## Modes
The {Provider} provider supports these modes:
- `instructor.Mode.{PROVIDER}_TOOLS` - Uses {provider} function calling (recommended)
- `instructor.Mode.{PROVIDER}_JSON` - Uses JSON mode responses
```python
client = instructor.from_{provider}(client, mode=instructor.Mode.{PROVIDER}_TOOLS)
```
## Advanced Usage
### Validation and Retries
```python
from pydantic import BaseModel, field_validator
class User(BaseModel):
name: str
age: int
@field_validator('age')
def validate_age(cls, v):
if v < 0:
raise ValueError('Age must be positive')
return v
# Automatic retries on validation errors
user = client.chat.completions.create(
model="your-model-name",
messages=[{"role": "user", "content": "Extract: Jason is -5 years old"}],
response_model=User,
max_retries=3
)
```
### Complex Nested Models
```python
from typing import List
class Address(BaseModel):
street: str
city: str
country: str
class User(BaseModel):
name: str
age: int
addresses: List[Address]
users = client.chat.completions.create(
model="your-model-name",
messages=[{"role": "user", "content": "Extract user info with multiple addresses..."}],
response_model=User
)
```
## Migration from Other Providers
If you're migrating from another provider:
```python
# Old way (other provider)
# client = instructor.from_openai(openai_client)
# New way ({Provider})
client = instructor.from_{provider}({provider_sdk}.{SyncClient}())
```
## API Reference
For detailed API documentation, see the [Instructor API reference](../api/index.md).
```
## Example Provider: Groq
Here's a concrete example implementing a Groq provider:
#### File: `instructor/providers/groq/client.py`
```python
from __future__ import annotations
from typing import Any, overload
import instructor
from ...core.client import AsyncInstructor, Instructor
from groq import Groq, AsyncGroq
@overload
def from_groq(
client: Groq,
mode: instructor.Mode = instructor.Mode.GROQ_TOOLS,
**kwargs: Any,
) -> Instructor: ...
@overload
def from_groq(
client: AsyncGroq,
mode: instructor.Mode = instructor.Mode.GROQ_TOOLS,
**kwargs: Any,
) -> AsyncInstructor: ...
def from_groq(
client: Groq | AsyncGroq,
mode: instructor.Mode = instructor.Mode.GROQ_TOOLS,
**kwargs: Any,
) -> Instructor | AsyncInstructor:
valid_modes = {
instructor.Mode.GROQ_TOOLS,
instructor.Mode.GROQ_JSON,
}
if mode not in valid_modes:
from ...core.exceptions import ModeError
raise ModeError(
mode=str(mode),
provider="Groq",
valid_modes=[str(m) for m in valid_modes],
)
if not isinstance(client, (AsyncGroq, Groq)):
from ...core.exceptions import ClientError
raise ClientError(
f"Client must be an instance of Groq or AsyncGroq. "
f"Got: {type(client).__name__}"
)
if isinstance(client, AsyncGroq):
async def async_wrapper(*args: Any, **kwargs: Any):
return await client.chat.completions.acreate(*args, **kwargs)
return AsyncInstructor(
client=client,
create=instructor.patch(create=async_wrapper, mode=mode),
provider=instructor.Provider.GROQ,
mode=mode,
**kwargs,
)
return Instructor(
client=client,
create=instructor.patch(create=client.chat.completions.create, mode=mode),
provider=instructor.Provider.GROQ,
mode=mode,
**kwargs,
)
```
## Quality Checklist
Before submitting your provider implementation, verify:
### Core Implementation
- [ ] `from_{provider}` function implemented following the exact pattern
- [ ] Both sync and async clients supported with proper overloads
- [ ] Valid modes defined and enforced with proper error messages
- [ ] Client type validation with helpful error messages
- [ ] Proper use of `instructor.patch()` for both sync and async
### Testing
- [ ] `conftest.py` skips tests if API key missing or package not installed
- [ ] `util.py` defines supported models and modes
- [ ] `test_simple.py` covers basic sync/async functionality with validation
- [ ] Tests use parametrized approach with `product(models, modes)`
- [ ] All tests pass with real API key: `pytest tests/llm/test_{provider}/`
### Infrastructure Updates
- [ ] Modes added to `instructor/mode.py`
- [ ] Provider added to `instructor/utils/providers.py` Provider enum
- [ ] Conditional import added to `instructor/__init__.py`
- [ ] Dependencies added to `pyproject.toml` optional-dependencies
- [ ] Dependencies added to `pyproject.toml` dependency-groups
### Documentation
- [ ] Provider documentation created in `docs/integrations/{provider}.md`
- [ ] Follows exact pattern with frontmatter, examples, and sections
- [ ] All code examples are tested and work
- [ ] Covers sync/async usage, validation, nested models
- [ ] Links to provider documentation and API reference
### Integration
- [ ] Works with existing instructor patterns and conventions
- [ ] Error messages are helpful and actionable
- [ ] Follows the same API as other providers
- [ ] No performance regressions
## Submission Process
1. **Test Locally**: Ensure all tests pass and examples work
2. **Create PR**: Submit to instructor repository
3. **Package Registry**: Publish to PyPI as `instructor-{provider}`
4. **Documentation**: Add to instructor docs site
5. **Announcement**: Share with community
## Common Issues & Solutions
### "Provider not found" error
- Check entry point configuration in pyproject.toml
- Verify provider name matches exactly
- Ensure package is installed in same environment
### Validation errors not retrying
- Verify error handling in chat() method catches ValidationError
- Check that validation messages are added to conversation
- Ensure max_retries parameter is respected
### Mode not supported
- Implement handler in handlers.py for the mode
- Add to _HANDLERS registry
- Test with provider's actual API capabilities
### Streaming issues
- Check if provider supports streaming at all
- Implement incremental parsing for partial responses
- Handle stream interruption and reconnection
### Type checking failures
- Ensure all method signatures match BaseProvider protocol exactly
- Add proper type hints for all parameters and returns
- Use Union/Optional types where appropriate
---
**This completes the full provider implementation guide. Follow these instructions systematically and you'll have a production-ready instructor provider that integrates seamlessly with the existing ecosystem.**
================================================
FILE: README.md
================================================
# Instructor: Structured Outputs for LLMs
Get reliable JSON from any LLM. Built on Pydantic for validation, type safety, and IDE support.
```python
import instructor
from pydantic import BaseModel
# Define what you want
class User(BaseModel):
name: str
age: int
# Extract it from natural language
client = instructor.from_provider("openai/gpt-4o-mini")
user = client.chat.completions.create(
response_model=User,
messages=[{"role": "user", "content": "John is 25 years old"}],
)
print(user) # User(name='John', age=25)
```
**That's it.** No JSON parsing, no error handling, no retries. Just define a model and get structured data.
[](https://pypi.org/project/instructor/)
[](https://pypi.org/project/instructor/)
[](https://github.com/instructor-ai/instructor)
[](https://discord.gg/bD9YE9JArw)
[](https://twitter.com/jxnlco)
> **Use Instructor for fast extraction, reach for PydanticAI when you need agents.** Instructor keeps schema-first flows simple and cheap. If your app needs richer agent runs, built-in observability, or shareable traces, try [PydanticAI](https://ai.pydantic.dev/). PydanticAI is the official agent runtime from the Pydantic team, adding typed tools, replayable datasets, evals, and production dashboards while using the same Pydantic models. Dive into the [PydanticAI docs](https://ai.pydantic.dev/) to see how it extends Instructor-style workflows.
## Why Instructor?
Getting structured data from LLMs is hard. You need to:
1. Write complex JSON schemas
2. Handle validation errors
3. Retry failed extractions
4. Parse unstructured responses
5. Deal with different provider APIs
**Instructor handles all of this with one simple interface:**
<table>
<tr>
<td><b>Without Instructor</b></td>
<td><b>With Instructor</b></td>
</tr>
<tr>
<td>
```python
response = openai.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": "..."}],
tools=[
{
"type": "function",
"function": {
"name": "extract_user",
"parameters": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
},
},
}
],
)
# Parse response
tool_call = response.choices[0].message.tool_calls[0]
user_data = json.loads(tool_call.function.arguments)
# Validate manually
if "name" not in user_data:
# Handle error...
pass
```
</td>
<td>
```python
client = instructor.from_provider("openai/gpt-4")
user = client.chat.completions.create(
response_model=User,
messages=[{"role": "user", "content": "..."}],
)
# That's it! user is validated and typed
```
</td>
</tr>
</table>
## Install in seconds
```bash
pip install instructor
```
Or with your package manager:
```bash
uv add instructor
poetry add instructor
```
## Works with every major provider
Use the same code with any LLM provider:
```python
# OpenAI
client = instructor.from_provider("openai/gpt-4o")
# Anthropic
client = instructor.from_provider("anthropic/claude-3-5-sonnet")
# Google
client = instructor.from_provider("google/gemini-pro")
# Ollama (local)
client = instructor.from_provider("ollama/llama3.2")
# With API keys directly (no environment variables needed)
client = instructor.from_provider("openai/gpt-4o", api_key="sk-...")
client = instructor.from_provider("anthropic/claude-3-5-sonnet", api_key="sk-ant-...")
client = instructor.from_provider("groq/llama-3.1-8b-instant", api_key="gsk_...")
# All use the same API!
user = client.chat.completions.create(
response_model=User,
messages=[{"role": "user", "content": "..."}],
)
```
## Production-ready features
### Automatic retries
Failed validations are automatically retried with the error message:
```python
from pydantic import BaseModel, field_validator
class User(BaseModel):
name: str
age: int
@field_validator('age')
def validate_age(cls, v):
if v < 0:
raise ValueError('Age must be positive')
return v
# Instructor automatically retries when validation fails
user = client.chat.completions.create(
response_model=User,
messages=[{"role": "user", "content": "..."}],
max_retries=3,
)
```
### Streaming support
Stream partial objects as they're generated:
```python
from instructor import Partial
for partial_user in client.chat.completions.create(
response_model=Partial[User],
messages=[{"role": "user", "content": "..."}],
stream=True,
):
print(partial_user)
# User(name=None, age=None)
# User(name="John", age=None)
# User(name="John", age=25)
```
### Nested objects
Extract complex, nested data structures:
```python
from typing import List
class Address(BaseModel):
street: str
city: str
country: str
class User(BaseModel):
name: str
age: int
addresses: List[Address]
# Instructor handles nested objects automatically
user = client.chat.completions.create(
response_model=User,
messages=[{"role": "user", "content": "..."}],
)
```
## Used in production by
Trusted by over 100,000 developers and companies building AI applications:
- **3M+ monthly downloads**
- **10K+ GitHub stars**
- **1000+ community contributors**
Companies using Instructor include teams at OpenAI, Google, Microsoft, AWS, and many YC startups.
## Get started
### Basic extraction
Extract structured data from any text:
```python
from pydantic import BaseModel
import instructor
client = instructor.from_provider("openai/gpt-4o-mini")
class Product(BaseModel):
name: str
price: float
in_stock: bool
product = client.chat.completions.create(
response_model=Product,
messages=[{"role": "user", "content": "iPhone 15 Pro, $999, available now"}],
)
print(product)
# Product(name='iPhone 15 Pro', price=999.0, in_stock=True)
```
### Multiple languages
Instructor's simple API is available in many languages:
- [Python](https://python.useinstructor.com) - The original
- [TypeScript](https://js.useinstructor.com) - Full TypeScript support
- [Ruby](https://ruby.useinstructor.com) - Ruby implementation
- [Go](https://go.useinstructor.com) - Go implementation
- [Elixir](https://hex.pm/packages/instructor) - Elixir implementation
- [Rust](https://rust.useinstructor.com) - Rust implementation
### Learn more
- [Documentation](https://python.useinstructor.com) - Comprehensive guides
- [Examples](https://python.useinstructor.com/examples/) - Copy-paste recipes
- [Blog](https://python.useinstructor.com/blog/) - Tutorials and best practices
- [Discord](https://discord.gg/bD9YE9JArw) - Get help from the community
## Why use Instructor over alternatives?
**vs Raw JSON mode**: Instructor provides automatic validation, retries, streaming, and nested object support. No manual schema writing.
**vs LangChain/LlamaIndex**: Instructor is focused on one thing - structured extraction. It's lighter, faster, and easier to debug.
**vs Custom solutions**: Battle-tested by thousands of developers. Handles edge cases you haven't thought of yet.
## Contributing
We welcome contributions! Check out our [good first issues](https://github.com/instructor-ai/instructor/labels/good%20first%20issue) to get started.
## License
MIT License - see [LICENSE](https://github.com/instructor-ai/instructor/blob/main/LICENSE) for details.
---
<p align="center">
Built by the Instructor community. Special thanks to <a href="https://twitter.com/jxnlco">Jason Liu</a> and all <a href="https://github.com/instructor-ai/instructor/graphs/contributors">contributors</a>.
</p>
================================================
FILE: build_mkdocs.sh
================================================
pip install -r requirements.txt
pip install -r requirements-doc.txt
mkdocs build
================================================
FILE: cross_link_mapping.yaml
================================================
# Cross-Link Mapping for Instructor Documentation
# This file maps blog posts and documentation pages to their related content
# Format:
# source_file:
# related_concepts: [list of concept docs to link]
# related_blog_posts: [list of related blog posts]
# related_examples: [list of example files]
# related_integrations: [list of integration docs]
# see_also_text: "Custom text for See Also section"
# VALIDATION CLUSTER
blog/posts/validation-part1.md:
related_concepts:
- concepts/validation.md
- concepts/reask_validation.md
related_blog_posts:
- blog/posts/semantic-validation-structured-outputs.md
- blog/posts/bad-schemas-could-break-llms.md
- blog/posts/pydantic-is-still-all-you-need.md
related_examples:
- examples/validators.md
see_also_text: |
## Related Documentation
- [Core Validation Concepts](/concepts/validation) - Learn about validation fundamentals
- [Reask Validation](/concepts/reask_validation) - Handle validation failures gracefully
## See Also
- [Semantic Validation with Structured Outputs](semantic-validation-structured-outputs) - Next evolution in validation
- [Why Bad Schemas Break LLMs](bad-schemas-could-break-llms) - Schema design best practices
- [Pydantic Is Still All You Need](pydantic-is-still-all-you-need) - Why Pydantic validation matters
blog/posts/semantic-validation-structured-outputs.md:
related_concepts:
- concepts/validation.md
- concepts/llm_validation.md
related_blog_posts:
- blog/posts/validation-part1.md
- blog/posts/anthropic-prompt-caching.md
- blog/posts/logfire.md
related_examples:
- examples/moderation.md
see_also_text: |
## Related Documentation
- [Validation Fundamentals](/concepts/validation) - Core validation concepts
- [LLM Validation](/concepts/llm_validation) - Using LLMs for validation
## See Also
- [Validation Deep Dive](validation-part1) - Foundation validation concepts
- [Anthropic Prompt Caching](anthropic-prompt-caching) - Optimize validation costs
- [Monitoring with Logfire](logfire) - Track validation performance
blog/posts/pydantic-is-still-all-you-need.md:
related_concepts:
- concepts/philosophy.md
- concepts/validation.md
related_blog_posts:
- blog/posts/validation-part1.md
- blog/posts/best_framework.md
- blog/posts/introduction.md
related_integrations:
- integrations/index.md
see_also_text: |
## Related Documentation
- [Instructor Philosophy](/concepts/philosophy) - Why we chose Pydantic
- [Validation Guide](/concepts/validation) - Practical validation techniques
## See Also
- [Validation Deep Dive](validation-part1) - Advanced validation patterns
- [Best Framework Comparison](best_framework) - Why Instructor stands out
- [Introduction to Instructor](introduction) - Getting started guide
# MULTIMODAL CLUSTER
blog/posts/multimodal-gemini.md:
related_concepts:
- concepts/multimodal.md
- concepts/images.md
related_blog_posts:
- blog/posts/openai-multimodal.md
- blog/posts/structured-output-anthropic.md
- blog/posts/chat-with-your-pdf-with-gemini.md
related_integrations:
- integrations/google.md
- integrations/vertex.md
related_examples:
- examples/image_to_ad_copy.md
see_also_text: |
## Related Documentation
- [Multimodal Concepts](/concepts/multimodal) - Working with images, video, and audio
- [Image Processing](/concepts/images) - Image-specific techniques
- [Google Integration](/integrations/google) - Complete Gemini setup guide
## See Also
- [OpenAI Multimodal](openai-multimodal) - Compare multimodal approaches
- [Anthropic Structured Output](structured-output-anthropic) - Alternative provider
- [Chat with PDFs using Gemini](chat-with-your-pdf-with-gemini) - Practical PDF processing
blog/posts/openai-multimodal.md:
related_concepts:
- concepts/multimodal.md
- concepts/images.md
related_blog_posts:
- blog/posts/multimodal-gemini.md
- blog/posts/anthropic-prompt-caching.md
- blog/posts/logfire.md
related_integrations:
- integrations/openai.md
related_examples:
- examples/audio.md
see_also_text: |
## Related Documentation
- [Multimodal Guide](/concepts/multimodal) - Comprehensive multimodal reference
- [OpenAI Integration](/integrations/openai) - Full OpenAI setup
## See Also
- [Gemini Multimodal](multimodal-gemini) - Alternative multimodal approach
- [Prompt Caching](anthropic-prompt-caching) - Cache large audio files
- [Monitoring with Logfire](logfire) - Track multimodal processing
blog/posts/chat-with-your-pdf-with-gemini.md:
related_concepts:
- concepts/multimodal.md
related_blog_posts:
- blog/posts/multimodal-gemini.md
- blog/posts/generating-pdf-citations.md
- blog/posts/rag-and-beyond.md
related_examples:
- examples/pdf_to_markdown.md
see_also_text: |
## Related Documentation
- [Multimodal Processing](/concepts/multimodal) - Core multimodal concepts
## See Also
- [Gemini Multimodal Features](multimodal-gemini) - Full Gemini capabilities
- [PDF Citation Generation](generating-pdf-citations) - Extract citations from PDFs
- [RAG and Beyond](rag-and-beyond) - Advanced document processing
# PROVIDER INTEGRATION CLUSTER
blog/posts/structured-output-anthropic.md:
related_concepts:
- concepts/patching.md
related_blog_posts:
- blog/posts/anthropic-prompt-caching.md
- blog/posts/announcing-unified-provider-interface.md
- blog/posts/best_framework.md
related_integrations:
- integrations/anthropic.md
related_examples:
- examples/classification.md
see_also_text: |
## Related Documentation
- [How Patching Works](/concepts/patching) - Understand provider integration
- [Anthropic Integration](/integrations/anthropic) - Complete setup guide
## See Also
- [Anthropic Prompt Caching](anthropic-prompt-caching) - Optimize Anthropic costs
- [Unified Provider Interface](announcing-unified-provider-interface) - Switch providers easily
- [Framework Comparison](best_framework) - Why Instructor excels
blog/posts/anthropic-prompt-caching.md:
related_concepts:
- concepts/caching.md
related_blog_posts:
- blog/posts/structured-output-anthropic.md
- blog/posts/caching.md
- blog/posts/logfire.md
related_integrations:
- integrations/anthropic.md
see_also_text: |
## Related Documentation
- [Caching Strategies](/concepts/caching) - General caching concepts
- [Anthropic Integration](/integrations/anthropic) - Full Anthropic guide
## See Also
- [Anthropic Structured Outputs](structured-output-anthropic) - Use with caching
- [Response Caching](caching) - General caching strategies
- [Performance Monitoring](logfire) - Track cache performance
blog/posts/announcing-unified-provider-interface.md:
related_concepts:
- concepts/patching.md
- concepts/philosophy.md
related_blog_posts:
- blog/posts/string-based-init.md
- blog/posts/best_framework.md
- blog/posts/introduction.md
related_integrations:
- integrations/index.md
related_examples:
- examples/groq.md
- examples/mistral.md
see_also_text: |
## Related Documentation
- [Provider Patching](/concepts/patching) - How provider integration works
- [All Integrations](/integrations/) - Supported provider list
## See Also
- [String-Based Initialization](string-based-init) - Alternative init method
- [Framework Comparison](best_framework) - Multi-provider advantages
- [Getting Started](introduction) - Quick start guide
# RAG AND SEARCH CLUSTER
blog/posts/rag-and-beyond.md:
related_concepts:
- concepts/validation.md
related_blog_posts:
- blog/posts/llm-as-reranker.md
- blog/posts/citations.md
- blog/posts/chat-with-your-pdf-with-gemini.md
related_examples:
- examples/search.md
see_also_text: |
## Related Documentation
- [Validation Concepts](/concepts/validation) - Validate RAG outputs
## See Also
- [LLM as Reranker](llm-as-reranker) - Improve search relevance
- [Citation Extraction](citations) - Verify sources
- [PDF Processing](chat-with-your-pdf-with-gemini) - Document handling
blog/posts/llm-as-reranker.md:
related_blog_posts:
- blog/posts/rag-and-beyond.md
- blog/posts/validation-part1.md
- blog/posts/logfire.md
related_examples:
- examples/reranking.md
see_also_text: |
## See Also
- [RAG and Beyond](rag-and-beyond) - Comprehensive RAG guide
- [Validation Fundamentals](validation-part1) - Validate ranking scores
- [Performance Monitoring](logfire) - Track reranking performance
blog/posts/citations.md:
related_concepts:
- concepts/validation.md
related_blog_posts:
- blog/posts/rag-and-beyond.md
- blog/posts/generating-pdf-citations.md
- blog/posts/validation-part1.md
see_also_text: |
## Related Documentation
- [Validation Guide](/concepts/validation) - Validate citations
## See Also
- [RAG Techniques](rag-and-beyond) - Use citations in RAG
- [PDF Citations](generating-pdf-citations) - Extract from PDFs
- [Validation Basics](validation-part1) - Ensure citation quality
# PERFORMANCE AND MONITORING
blog/posts/logfire.md:
related_concepts:
- concepts/retrying.md
related_blog_posts:
- blog/posts/full-fastapi-visibility.md
- blog/posts/anthropic-prompt-caching.md
- blog/posts/validation-part1.md
related_integrations:
- integrations/pydantic_logfire.md
see_also_text: |
## Related Documentation
- [Retry Mechanisms](/concepts/retrying) - Handle failures gracefully
- [Logfire Integration](/integrations/pydantic_logfire) - Setup guide
## See Also
- [FastAPI Visibility](full-fastapi-visibility) - Web app monitoring
- [Prompt Caching](anthropic-prompt-caching) - Monitor cache hits
- [Validation Monitoring](validation-part1) - Track validation metrics
blog/posts/caching.md:
related_concepts:
- concepts/caching.md
related_blog_posts:
- blog/posts/anthropic-prompt-caching.md
- blog/posts/logfire.md
see_also_text: |
## Related Documentation
- [Caching Concepts](/concepts/caching) - Core caching strategies
## See Also
- [Anthropic Prompt Caching](anthropic-prompt-caching) - Provider-specific caching
- [Performance Monitoring](logfire) - Track cache effectiveness
# GETTING STARTED AND PHILOSOPHY
blog/posts/introduction.md:
related_concepts:
- concepts/philosophy.md
- concepts/quickstart.md
related_blog_posts:
- blog/posts/best_framework.md
- blog/posts/pydantic-is-still-all-you-need.md
- blog/posts/announcing-unified-provider-interface.md
see_also_text: |
## Related Documentation
- [Quick Start Guide](/concepts/quickstart) - Get running in minutes
- [Philosophy](/concepts/philosophy) - Why we built Instructor
## See Also
- [Framework Comparison](best_framework) - See how we compare
- [Why Pydantic](pydantic-is-still-all-you-need) - Our foundation
- [Easy Provider Setup](announcing-unified-provider-interface) - Start with any LLM
blog/posts/best_framework.md:
related_concepts:
- concepts/philosophy.md
related_blog_posts:
- blog/posts/introduction.md
- blog/posts/pydantic-is-still-all-you-need.md
- blog/posts/announcing-unified-provider-interface.md
see_also_text: |
## Related Documentation
- [Our Philosophy](/concepts/philosophy) - Design principles
## See Also
- [Getting Started](introduction) - Quick introduction
- [Pydantic Foundation](pydantic-is-still-all-you-need) - Why Pydantic
- [Multi-Provider Support](announcing-unified-provider-interface) - Key differentiator
================================================
FILE: docs/AGENT.md
================================================
---
title: Documentation Agent Guide
description: Internal guide for maintaining and improving Instructor documentation
---
# AGENT.md - Documentation
## Commands
- Serve docs locally: `uv run mkdocs serve`
- Build docs: `./build_mkdocs.sh` or `uv run mkdocs build`
- Install doc deps: `uv pip install -e ".[docs]"`
- Test examples: `uv run pytest docs/ --examples`
## Structure
- **Core docs**: `concepts/`, `integrations/`, `examples/`
- **Learning path**: `getting-started.md` → `learning/` → `tutorials/`
- **API reference**: Auto-generated from docstrings via `mkdocstrings`
- **Blog**: `blog/posts/` for announcements and deep-dives
- **Templates**: `templates/` for new docs (provider, concept, cookbook)
## Writing Guidelines
- **Reading level**: Grade 10 (from .cursor/rules)
- **Code examples**: Must be runnable with complete imports
- **Progressive complexity**: Simple → advanced concepts
- **Provider docs**: Follow `templates/` patterns
- **Navigation**: Update `mkdocs.yml` for new pages
## Pull Request (PR) Formatting
Use **Conventional Commits** formatting for PR titles so they are consistent and easy to scan. Treat the PR title as the message we would use for a squash merge commit.
### PR Title Format
Use:
`<type>(<scope>): <short summary>`
Rules:
- Keep it under ~70 characters when you can.
- Use the imperative mood (for example, “add”, “fix”, “update”).
- Do not end with a period.
- If it includes a breaking change, add `!` after the type or scope (for example, `feat(docs)!:`).
Good examples:
- `docs(agents): add conventional commit PR title guidelines`
- `docs(mkdocs): fix broken link in validation tutorial`
- `docs(examples): update youtube clips snippet`
- `chore(docs): refresh docs build commands`
Common types:
- `docs`: documentation-only changes
- `fix`: bug fix
- `feat`: new feature
- `test`: add or update tests
- `chore`: maintenance work (build scripts, tooling, repo hygiene)
- `ci`: CI pipeline changes
Suggested docs scopes:
- `docs`, `mkdocs`, `blog`, `examples`, `integrations`, `tutorials`, `agents`
### PR Description Guidelines
Keep PR descriptions short and actionable:
- **What**: What changed, in 1–3 sentences.
- **Why**: Why this change is needed (link issues when possible).
- **Changes**: 3–7 bullet points with the main edits.
- **Testing**: What you ran (or why you did not run anything).
- **Docs impact**: Call out page moves, redirects, or nav updates.
If the PR was authored by Cursor, include:
- `This PR was written by [Cursor](https://cursor.com)`
## Key Files
- `mkdocs.yml` - Site configuration and navigation
- `hooks/` - Custom processing (hide_lines.py removes `# <%hide%>` markers)
- `overrides/` - Custom theme elements
- `javascripts/` - Client-side enhancements
================================================
FILE: docs/api-docstring-assessment.md
================================================
# API Docstring Quality Assessment
This document assesses the quality and completeness of docstrings for all API items referenced in the expanded API documentation.
## Summary
Overall, the docstring quality is **good to excellent** for most items. Many classes and functions have comprehensive docstrings with usage examples, while some core classes could benefit from class-level docstrings.
## Excellent Docstrings (Comprehensive with Examples)
These have detailed docstrings with usage examples and clear descriptions:
### Client Creation
- **`from_provider`** - Comprehensive docstring with Args, Returns, Raises, and Examples sections. Includes multiple usage examples showing basic usage, caching, and async clients.
### Validation
- **`llm_validator`** - Good docstring with usage examples, parameter descriptions, and error message examples showing how validation errors are formatted.
### DSL Components
- **`CitationMixin`** - Excellent docstring with complete usage examples showing how to use it with context, and result examples showing the output structure.
- **`IterableModel`** - Good docstring with usage examples showing before/after transformation, Parameters section, and Returns description.
- **`Maybe`** - Good docstring with usage examples and result structure showing the generated model fields.
### Batch Processing
- **`BatchProcessor`** - Good class-level docstring explaining the unified interface. Methods like `create_batch_from_messages` and `submit_batch` have clear Args and Returns sections.
### Distillation
- **`Instructions`** - Good docstring with parameter descriptions. The `distil` method has usage examples showing decorator usage patterns.
### Hooks
- **`Hooks`** - Excellent class-level docstring explaining the purpose. Methods like `on()`, `get_hook_name()`, `emit()`, etc. have comprehensive docstrings with Args, Returns, Raises, and Examples sections.
### Schema Generation
- **`generate_openai_schema`** - Good docstring with Args, Returns, and Notes sections explaining how docstrings are used.
- **`generate_anthropic_schema`** - Has docstring explaining the conversion process.
### Multimodal
- **`Audio`** - Good class-level docstring. Methods like `autodetect()` and `autodetect_safely()` have clear docstrings with Args and Returns.
### Exceptions
- **`InstructorError`** - Excellent docstring with Attributes section, Examples showing error handling, and See Also references.
- **`IncompleteOutputException`** - Good docstring with Attributes, Common Solutions, and Examples.
- **`InstructorRetryException`** - Comprehensive docstring with Attributes, Common Causes, Examples, and See Also.
- **`ValidationError`** - Good docstring with Examples and See Also.
- **`ProviderError`** - Good docstring with Attributes, Common Causes, and Examples.
- **`ConfigurationError`** - Good docstring with Common Scenarios and Examples.
- **`ModeError`** - Good docstring with Attributes, Examples, and See Also.
- **`ClientError`** - Good docstring with Common Scenarios and Examples.
- **`AsyncValidationError`** - Good docstring with Attributes and Examples.
- **`ResponseParsingError`** - Good docstring with Attributes, Examples, and backwards compatibility notes.
- **`MultimodalError`** - Good docstring with Attributes, Examples, and backwards compatibility notes.
## Good Docstrings (Clear but Could Be Enhanced)
These have adequate docstrings but could benefit from more examples or additional detail:
### Core Clients
- **`Instructor`** - No class-level docstring. Methods have type hints but lack comprehensive docstrings. The class is well-documented through usage in examples, but a class-level docstring would help.
- **`AsyncInstructor`** - Similar to `Instructor`, no class-level docstring.
- **`Response`** - No class-level docstring. Methods like `create()` and `create_with_completion()` lack docstrings.
### Client Creation
- **`from_openai`** - No docstring. Only has type overloads. The implementation exists but lacks documentation explaining usage, parameters, and return values.
### Function Calls & Schema
- **`OpenAISchema`** - Good method docstrings for `openai_schema`, `anthropic_schema`, `gemini_schema`, and `from_response()`. The class itself could use a class-level docstring explaining its purpose and usage.
- **`openai_schema`** - Decorator function, but the docstring is on the class method, not the decorator itself.
### DSL Components
- **`Partial`** - Minimal docstring. Has Notes and Example sections but could benefit from more comprehensive usage examples showing streaming scenarios.
### Multimodal
- **`Image`** - No class-level docstring. Methods have good docstrings (`autodetect()`, `autodetect_safely()`, `from_gs_url()`, etc.), but the class itself lacks documentation.
### Mode & Provider
- **`Mode`** - Good class-level docstring explaining what modes are and how they work. Individual mode values lack docstrings but the enum docstring is comprehensive.
- **`Provider`** - No class-level docstring. Just enum values without explanation.
### Patch Functions
- **`patch`** - Good docstring explaining what features it enables (response_model, max_retries, validation_context, strict, hooks). Could benefit from usage examples.
- **`apatch`** - Need to check if it has similar docstring quality.
## Areas Needing Improvement
### Missing Class-Level Docstrings
1. **`Instructor`** - Should have a class-level docstring explaining:
- What the class does
- How to use it
- Key features (modes, hooks, retries)
- Basic usage example
2. **`AsyncInstructor`** - Should have a class-level docstring explaining:
- Async usage patterns
- How it differs from `Instructor`
- Async examples
3. **`Response`** - Should have a class-level docstring explaining:
- What the Response helper does
- When to use it vs direct client methods
- Usage examples
4. **`Image`** - Should have a class-level docstring explaining:
- What Image represents
- Supported formats
- Common usage patterns
5. **`Provider`** - Should have a class-level docstring explaining:
- What providers are supported
- How to use Provider enum
- Provider detection
### Missing Function Docstrings
1. **`from_openai`** - Needs comprehensive docstring with:
- Purpose and usage
- Parameters explanation
- Return value description
- Examples
2. **`from_litellm`** - No docstring. Only has type overloads. Similar to `from_openai`, needs comprehensive docstring.
### Could Be Enhanced
1. **`Partial`** - Could add more streaming examples
2. **`patch`** - Could add usage examples showing before/after
3. **`apatch`** - Has docstring but marked as deprecated ("No longer necessary, use `patch` instead"). Docstring is adequate but the deprecation should be more prominent.
4. **`openai_schema`** - Has minimal docstring. Could expand with usage examples showing how to use the decorator.
## Recommendations
### High Priority
1. Add class-level docstrings to `Instructor` and `AsyncInstructor` - These are the core classes users interact with
2. Add docstring to `from_openai` - Important client creation function
3. Add class-level docstring to `Response` - Helper class that needs explanation
### Medium Priority
1. Add class-level docstring to `Image` - Commonly used multimodal class
2. Add class-level docstring to `Provider` - Enum that could use explanation
3. Enhance `Partial` docstring with more streaming examples
### Low Priority
1. Add more examples to `patch` docstring
2. Expand `openai_schema` docstring with examples
3. Consider updating `apatch` deprecation message to be more prominent
## Overall Assessment
**Grade: B+**
The documentation is generally good with many excellent examples, but the core classes (`Instructor`, `AsyncInstructor`, `Response`) would benefit significantly from class-level docstrings. The DSL components and utility functions are well-documented, and the exception classes have comprehensive docstrings.
The mkdocs autodoc plugin will generate API documentation from these docstrings, so improving them will directly improve the generated API reference pages.
================================================
FILE: docs/api.md
================================================
---
title: API Reference Guide
description: Explore the comprehensive API reference with details on instructors, validation, iteration, and function calls.
---
# API Reference
Core modes are the recommended default. Legacy provider-specific modes still
work but are deprecated and will show warnings. See the
[Mode Migration Guide](concepts/mode-migration.md) for details.
## Core Clients
The main client classes for interacting with LLM providers.
::: instructor.Instructor
::: instructor.AsyncInstructor
::: instructor.core.client.Response
## Client Creation
Functions to create Instructor clients from various providers.
::: instructor.from_provider
::: instructor.from_openai
::: instructor.from_litellm
## DSL Components
Domain-specific language components for advanced patterns and data handling.
::: instructor.dsl.validators
::: instructor.dsl.iterable
::: instructor.dsl.partial
::: instructor.dsl.parallel
::: instructor.dsl.maybe
::: instructor.dsl.citation
## Function Calls & Schema
Classes and functions for defining and working with function call schemas.
::: instructor.function_calls
::: instructor.OpenAISchema
::: instructor.openai_schema
::: instructor.generate_openai_schema
::: instructor.generate_anthropic_schema
::: instructor.generate_gemini_schema
## Validation
Validation utilities for LLM outputs and async validation support.
::: instructor.validation
::: instructor.llm_validator
::: instructor.openai_moderation
## Batch Processing
Batch processing utilities for handling multiple requests efficiently.
::: instructor.batch
::: instructor.batch.BatchProcessor
::: instructor.batch.BatchRequest
::: instructor.batch.BatchJob
## Distillation
Tools for distillation and fine-tuning workflows.
::: instructor.distil
::: instructor.FinetuneFormat
::: instructor.Instructions
## Multimodal
Support for image and audio content in LLM requests.
::: instructor.processing.multimodal
::: instructor.Image
::: instructor.Audio
## Mode & Provider
Enumerations for modes and providers.
::: instructor.Mode
::: instructor.Provider
## Exceptions
Exception classes for error handling.
::: instructor.core.exceptions
## Hooks
Event hooks system for monitoring and intercepting LLM interactions.
::: instructor.core.hooks
::: instructor.core.hooks.Hooks
::: instructor.core.hooks.HookName
## Patch Functions
Decorators for patching LLM client methods.
::: instructor.core.patch
::: instructor.patch
::: instructor.apatch
================================================
FILE: docs/architecture.md
================================================
---
title: Instructor Architecture Overview
description: Learn about the internal architecture and design decisions of the Instructor library
---
# Architecture Overview
This page explains the core execution flow and where to plug in or debug. It highlights the minimal sync/async code paths and how streaming, partial, and parallel modes integrate.
## High-Level Flow
```mermaid
sequenceDiagram
autonumber
participant U as User Code
participant I as Instructor (patched)
participant R as Retry Layer (tenacity)
participant C as Provider Client
participant D as Dispatcher (process_response)
participant H as Provider Handler (response/reask)
participant M as Pydantic Model
U->>I: chat.completions.create(response_model=..., **kwargs)
Note right of I: patch() wraps create() with cache/templating and retry
I->>R: retry_sync/async(func=create, max_retries, strict, mode, hooks)
loop attempts
R->>C: create(**prepared_kwargs)
C-->>R: raw response (provider-specific)
R->>D: process_response(_async)(response, response_model, mode, stream)
alt Streaming/Partial
D->>M: Iterable/Partial.from_streaming_response(_async)
D-->>R: Iterable/Partial model (or list of items)
else Standard
D->>H: provider mode handler (format/parse selection)
H-->>D: adjusted response_model/new_kwargs if needed
D->>M: response_model.from_response(...)
M-->>D: parsed model (with _raw_response attached)
D-->>R: model (or adapted simple type)
end
R-->>I: parsed model
end
I-->>U: final model (plus _raw_response on instance)
rect rgb(255,240,240)
Note over R,H: On validation/JSON errors → reask path
R->>H: handle_reask_kwargs(..., exception, failed_attempts)
H-->>R: new kwargs/messages for next attempt
end
```
Key responsibilities:
- patch(): wraps the provider `create` with cache lookup/save, templating, strict mode, hooks, and retry.
- Retry: executes provider call, emits hooks, updates usage, handles validation/JSON errors with reask, and re-attempts.
- Dispatcher: selects the correct parsing path by `Mode`, handles multimodal message conversion, and attaches `_raw_response` to the returned model.
- Provider Handlers: provider/mode-specific request shaping and reask preparation.
## Minimal Code Paths
### Synchronous
```python
import openai
import instructor
from pydantic import BaseModel
class User(BaseModel):
name: str
age: int
client = instructor.from_provider("openai/gpt-5-nano")
model = client.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "{'name': 'Ada', 'age': 37}"}],
response_model=User, # triggers schema/tool wiring + parsing
max_retries=3, # tenacity-backed validation retries
strict=True, # strict JSON parsing if supported
)
# Access raw provider response if needed
raw = model._raw_response
```
### Asynchronous
```python
import asyncio
import openai
import instructor
from pydantic import BaseModel
class User(BaseModel):
name: str
age: int
async def main():
aclient = instructor.from_provider("openai/gpt-5-nano", async_client=True)
model = await aclient.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "{\"name\": \"Ada\", \"age\": 37}"}],
response_model=User,
max_retries=3,
strict=True,
)
print(model)
asyncio.run(main())
```
## Streaming, Partial, Parallel
### Streaming Iterable
- Use `create_iterable(response_model=Model, stream=True implicitly)` via `Instructor.create_iterable`.
- Returns a generator (sync) or async generator (async) of parsed items.
- Internally sets `stream=True`, and `IterableBase.from_streaming_response(_async)` assembles items.
```python
for item in client.create_iterable(messages=..., response_model=MyModel):
print(item)
```
### Partial Objects
- Use `create_partial(response_model=Model)` to receive progressively filled partial models while streaming.
- Internally wraps the model as `Partial[Model]` and sets `stream=True`.
```python
for partial in client.create_partial(messages=..., response_model=MyModel):
# partial contains fields as they arrive
pass
```
### Parallel Tools
- Use `Mode.PARALLEL_TOOLS` and a parallel type hint (e.g., list of models) when you need multiple tool calls in one request.
- Streaming is not supported in parallel tools mode.
```python
from instructor.mode import Mode
result = client.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Extract person and event info."}],
response_model=[PersonInfo, EventInfo],
mode=Mode.PARALLEL_TOOLS,
)
```
## Hooks and Retry
You can observe and instrument the flow with hooks. Typical events:
- `completion:kwargs`: just before provider call
- `completion:response`: after provider call
- `parse:error`: on validation/JSON errors
- `completion:last_attempt`: when a retry sequence is about to stop
- `completion:error`: non-validation completion errors
```python
from instructor.core.hooks import HookName
client.on(HookName.COMPLETION_KWARGS, lambda **kw: print("KWARGS", kw))
client.on(HookName.PARSE_ERROR, lambda e: print("PARSE", e))
```
## Where Multimodal Conversion Happens
- For modes that require it, messages are converted via `processing.multimodal.convert_messages`.
- Image/Audio/PDF autodetection can be enabled (by specific handlers/modes) and will convert strings/paths/URLs or data URIs into provider-ready payloads.
## Error Handling at a Glance
- Validation or JSON decode errors trigger the reask path.
- Reask handlers (`handle_reask_kwargs`) append/adjust messages with error feedback so the next attempt can correct itself.
- If all retries fail, `InstructorRetryException` is raised containing `failed_attempts`, the last completion, usage totals, and the create kwargs for reproduction.
## Extensibility Notes
- New providers add utils for response and reask handling and register modes used by the dispatcher.
- Most JSON/tool patterns are shared; prefer reusing existing handlers where possible.
- Keep provider-specific logic in provider utils; avoid expanding central dispatcher beyond routing and orchestration.
================================================
FILE: docs/blog/.authors.yml
================================================
authors:
jxnl:
name: Jason Liu
description: Creator
avatar: https://avatars.githubusercontent.com/u/4852235?v=4
url: https://twitter.com/intent/follow?screen_name=jxnlco
ivanleomk:
name: Ivan Leo
description: Contributor
avatar: https://pbs.twimg.com/profile_images/1838778744468836353/utYfioiO_400x400.jpg
url: https://twitter.com/intent/follow?screen_name=ivanleomk
anmol:
name: Anmol Jawandha
description: Contributor
avatar: https://pbs.twimg.com/profile_images/1248544843556466693/PgxUIeBs_400x400.jpg
joschkabraun:
name: Joschka Braun
description: Contributor
avatar: https://pbs.twimg.com/profile_images/1601251353531224065/PYpqKsjL_400x400.jpg
url: https://joschkabraun.com
sarahchieng:
name: Sarah Chieng
description: Contributor
avatar: https://pbs.twimg.com/profile_images/1755455116595834880/Hxh5ceRZ_400x400.jpg
url: https://twitter.com/sarahchieng
zilto:
name: Thierry Jean
description: Contributor
avatar: https://avatars.githubusercontent.com/u/68975210?v=4
url: https://www.linkedin.com/in/thierry-jean/
yanomaly:
name: Yan
description: Contributor
avatar: https://avatars.githubusercontent.com/u/87994542?v=4
================================================
FILE: docs/blog/index.md
================================================
# Subscribe to our Newsletter for Updates and Tips
If you want to get updates on new features and tips on how to use Instructor, you can subscribe to our newsletter below to get notified when we publish new content.
<iframe src="https://embeds.beehiiv.com/2faf420d-8480-4b6e-8d6f-9c5a105f917a?slim=true" data-test-id="beehiiv-embed" height="52" frameborder="0" scrolling="no" style="margin: 0; border-radius: 0px !important; background-color: transparent;"></iframe>
## Advanced Topics
1. [Unified Provider Interface in Instructor](posts/announcing-unified-provider-interface.md)
2. [Instructor Implements llms.txt](posts/llms-txt-adoption.md)
3. [Query Understanding: Beyond Embeddings](posts/rag-and-beyond.md)
4. [Achieving GPT-4 Level Summaries with GPT-3.5-turbo](posts/chain-of-density.md)
5. [Basics of Guardrails and Validation in AI Models](posts/validation-part1.md)
6. [Validating Citations in AI-Generated Content](posts/citations.md)
7. [Fine-tuning and Distillation in AI Models](posts/distilation-part1.md)
8. [Enhancing OpenAI Client Observability with LangSmith](posts/langsmith.md)
9. [Logfire Integration with Pydantic](posts/logfire.md)
## AI Development and Optimization
- [Effective Function Caching in Python](posts/caching.md)
- [Fundamentals of Batch Processing with Async in Python](posts/learn-async.md)
- [Streaming Models to Improve Latency](posts/generator.md)
- [Using OpenAI's Batch API for Large-Scale Synthetic Data Generation](../examples/batch_job_oai.md)
- [Implementing Bulk Classification with User-Provided Tags](../examples/bulk_classification.md)
- [Utilizing GPT-4 Vision API for Ad Copy from Product Images](../examples/image_to_ad_copy.md)
## Language Models and Prompting Techniques
- [Least-to-Most Prompting Technique for LLMs](../prompting/decomposition/least_to_most.md)
- [Chain of Verification (CoVe) Method for Improving LLM Accuracy](../prompting/self_criticism/chain_of_verification.md)
- [Cumulative Reasoning to Enhance Model Performance](../prompting/self_criticism/cumulative_reason.md)
- [Reverse Chain of Thought (RCoT) Method for Logical Consistency](../prompting/self_criticism/reversecot.md)
## Integrations and Tools
- [Ollama Integration](../integrations/ollama.md)
- [llama-cpp-python Integration](../integrations/llama-cpp-python.md)
- [Together Compute Integration](../integrations/together.md)
- [Pandas DataFrame Examples](./posts/tidy-data-from-messy-tables.md#defining-a-custom-type)
- [Streaming Response Examples](../concepts/partial.md)
## Media and Resources
- [Course: Structured Outputs with Instructor](https://www.wandb.courses/courses/steering-language-models?x=1)
- [Keynote: Pydantic is All You Need](posts/aisummit-2023.md)
================================================
FILE: docs/blog/posts/aisummit-2023.md
================================================
---
authors:
- jxnl
categories:
- Pydantic
comments: true
date: 2023-11-02
description: Explore insights on utilizing Pydantic for effective prompt engineering
in this AI Engineer Summit keynote.
draft: false
tags:
- Pydantic
- Prompt Engineering
- AI Summit
- Machine Learning
- Data Validation
---
# AI Engineer Keynote: Pydantic is all you need
[](https://www.youtube.com/watch?v=yj-wSRJwrrc)
[Click here to watch the full talk](https://www.youtube.com/watch?v=yj-wSRJwrrc)
<!-- more -->
Last month, I ventured back onto the speaking circuit at the inaugural [AI Engineer Summit](https://www.ai.engineer/summit), sharing insights on leveraging [Pydantic](https://docs.pydantic.dev/latest/) for effective prompt engineering. I dove deep into what is covered in our documentation and standard blog posts,
I'd genuinely appreciate any feedback on the talk - every bit helps in refining the art. So, take a moment to check out the [full talk here](https://youtu.be/yj-wSRJwrrc?si=vGMIqtTapbIN8SLz), and let's continue pushing the boundaries of what's possible.
================================================
FILE: docs/blog/posts/announcing-gemini-tool-calling-support.md
================================================
---
authors:
- ivanleomk
categories:
- LLM Techniques
comments: true
date: 2024-09-03
description: Introducing structured outputs for Gemini tool calling support in the
instructor library, enhancing interactions with Gemini and VertexAI SDKs.
draft: false
tags:
- Gemini
- VertexAI
- Tool Calling
- Instructor Library
- AI SDKs
---
# Structured Outputs for Gemini now supported
We're excited to announce that `instructor` now supports structured outputs using tool calling for both the Gemini SDK and the VertexAI SDK.
A special shoutout to [Sonal](https://x.com/sonalsaldanha) for his contributions to the Gemini Tool Calling support.
Let's walk through a simple example of how to use these new features
## Installation
To get started, install the latest version of `instructor`. Depending on whether you're using Gemini or VertexAI, you should install the following:
=== "Gemini"
```bash
pip install "instructor[google-generativeai]"
```
=== "VertexAI"
```bash
pip install "instructor[vertexai]"
```
This ensures that you have the necessary dependencies to use the Gemini or VertexAI SDKs with instructor.
We recommend using the Gemini SDK over the VertexAI SDK for two main reasons.
1. Compared to the VertexAI SDK, the Gemini SDK comes with a free daily quota of 1.5 billion tokens to use for developers.
2. The Gemini SDK is significantly easier to setup, all you need is a `GOOGLE_API_KEY` that you can generate in your GCP console. THe VertexAI SDK on the other hand requires a credentials.json file or an OAuth integration to use.
## Getting Started
With our provider agnostic API, you can use the same interface to interact with both SDKs, the only thing that changes here is how we initialise the client itself.
Before running the following code, you'll need to make sure that you have your Gemini API Key set in your shell under the alias `GOOGLE_API_KEY`.
```python
import instructor
import google.generativeai as genai
from pydantic import BaseModel
class User(BaseModel):
name: str
age: int
client = instructor.from_provider("google/gemini-2.5-flash")
)
)
resp = client.create(
messages=[
{
"role": "user",
"content": "Extract Jason is 25 years old.",
}
],
response_model=User,
)
print(resp)
#> name='Jason' age=25
```
1. Current Gemini models that support tool calling are `gemini-3-flash` and `gemini-1.5-pro-latest`.
We can achieve a similar thing with the VertexAI SDK. For this to work, you'll need to authenticate to VertexAI.
There are some instructions [here](https://cloud.google.com/vertex-ai/docs/authentication) but the easiest way I found was to simply download the GCloud cli and run `gcloud auth application-default login`.
```python
import instructor
import vertexai # type: ignore
from vertexai.generative_models import GenerativeModel # type: ignore
from pydantic import BaseModel
vertexai.init()
class User(BaseModel):
name: str
age: int
client = instructor.from_provider("google/gemini-2.5-flash", vertexai=True), # (1)!
)
resp = client.create(
messages=[
{
"role": "user",
"content": "Extract Jason is 25 years old.",
}
],
response_model=User,
)
print(resp)
#> name='Jason' age=25
```
1. Current Gemini models that support tool calling are `gemini-3-flash` and `gemini-1.5-pro-latest`.
================================================
FILE: docs/blog/posts/announcing-instructor-responses-support.md
================================================
---
authors:
- ivanleomk
categories:
- instructor
comments: true
date: 2025-05-11
description: Take advantage of OpenAI's latest offerings with the new responses API
draft: false
tags:
- LLMs
- OpenAI
- Instructor
---
# Announcing Responses API support
We're excited to announce Instructor's integration with OpenAI's new Responses API. This integration brings a more streamlined approach to working with structured outputs from OpenAI models. Let's see what makes this integration special and how it can improve your LLM applications.
<!-- more -->
## What's New?
The Responses API represents a significant shift in how we interact with OpenAI models. With Instructor's integration, you can leverage this new API with our familiar, type-safe interface.
For our full documentation of the features we support, check out our full [OpenAI integration guide](../../integrations/openai.md).
Getting started is now easier than ever. With our unified provider interface, you can initialize your client with a single line of code. This means less time dealing with configuration and more time building features that matter.
```python
import instructor
# Initialize the client with Responses mode
client = instructor.from_provider(
"openai/gpt-4.1-mini", mode=instructor.Mode.RESPONSES_TOOLS
)
```
The Responses API brings several improvements to structured data handling. You get access to built-in tools like web search and file search directly through the API. There's more efficient validation of structured outputs and improved error messages with better recovery mechanisms.
Here's a quick example showing how it works:
```python
class User(BaseModel):
name: str
age: int
# Create structured output
profile = client.responses.create(
input="Extract out Ivan is 28 years old",
response_model=User,
)
print(profile)
#> name='Ivan' age=28
```
## Key Benefits
The integration maintains Instructor's core strength of type safety while adding the power of the Responses API. You get full Pydantic model validation, automatic type checking, and clear error messages when validation fails. This gives you confidence that your outputs meet the constraints you've defined.
One of the most exciting features is the built-in tools support. You can now easily perform web searches with automatic citations, search through your knowledge base, and get real-time information with proper attribution. This significantly expands what you can build without having to integrate multiple APIs.
Here's an example using web search:
```python
class Citation(BaseModel):
id: int
url: str
class Summary(BaseModel):
citations: list[Citation]
summary: str
response = client.responses.create(
input="What are some of the best places to visit in New York for Latin American food?",
tools=[{"type": "web_search_preview"}],
response_model=Summary,
)
```
The integration supports multiple ways to get structured outputs. You can use basic creation for simple, straightforward structured outputs. If you need real-time updates, partial creation lets you stream them as they come in. For handling multiple instances of the same object, iterable creation works great. And when you need both structured output and raw completion, completion with raw response gives you exactly that.
For production applications, we've maintained full async support. This lets you build responsive applications that can handle multiple requests efficiently:
```python
async def get_user_profile():
async_client = instructor.from_provider(
"openai/gpt-4.1-mini", mode=instructor.Mode.RESPONSES_TOOLS, async_client=True
)
profile = await async_client.responses.create(
input="Extract: Maria lives in Spain.", response_model=UserProfile
)
```
## Why This Matters
The integration of Instructor with OpenAI's Responses API brings two major benefits that will transform how you work with LLMs.
First, it makes working with inline citations significantly easier. When your LLM needs to reference external information, you get structured citation data that's ready to integrate into downstream applications. No more parsing messy text or manually extracting references - they come as properly typed objects that you can immediately use in your code.
Second, it works seamlessly with your existing chat completions code. You can add powerful capabilities like file search and web search without modifying your codebase. Just add the tool definition, and you're ready to go. Here's how simple it is:
```python
from pydantic import BaseModel
import instructor
class Citation(BaseModel):
id: int
url: str
class Summary(BaseModel):
citations: list[Citation]
summary: str
client = instructor.from_provider(
"openai/gpt-4.1-mini",
mode=instructor.Mode.RESPONSES_TOOLS_WITH_INBUILT_TOOLS,
)
response = client.create(
messages=[
{
"role": "user",
"content": "What are some of the best places to visit in New York for Latin American food?",
}
],
tools=[{"type": "web_search_preview"}],
response_model=Summary,
)
print(response)
"""
citations=[Citation(id=1, url='https://www.nycgo.com/restaurants/best-latin-american-restaurants-in-nyc/'), Citation(id=2, url='https://www.timeout.com/newyork/restaurants/best-latin-american-restaurants-in-nyc'), Citation(id=3, url='https://www.thrillist.com/eat/nation/best-latin-american-restaurants-nyc')] summary="Some of the best places to visit in New York for Latin American food include neighborhoods and restaurants known for authentic and diverse offerings. In Manhattan, areas like the East Village and Lower East Side have excellent Latin American restaurants. Popular spots include Casa Enrique, known for Mexican cuisine; Tia Pol, offering Spanish and Latin flavors; and La Contenta, serving dishes from various Latin American countries. Brooklyn's Williamsburg and Bushwick have emerged as vibrant spots for Latin American eats, with restaurants such as La Esquina and Fonda not to miss. These places are celebrated for delicious food, lively atmospheres, and cultural authenticity, making them top choices for anyone looking to enjoy Latin American cuisine in New York City."
"""
```
This makes the path forward clear - you can enhance your existing applications with the latest OpenAI features while maintaining the type safety and validation Instructor is known for. No need to learn a new API or refactor your code. It just works.
## Getting Started
To start using the new Responses API integration, update to the latest version of Instructor, set up your OpenAI API key, initialize your client with the Responses mode, and start creating structured outputs.
This integration represents a significant step forward in making LLM development more accessible and powerful. We're excited to see what you'll build with these new capabilities.
For more detailed information about using the Responses API with Instructor, check out our [OpenAI integration guide](../../integrations/openai.md).
Happy coding!
================================================
FILE: docs/blog/posts/announcing-unified-provider-interface.md
================================================
---
authors:
- jxnl
- ivanleomk
categories:
- instructor
comments: true
date: 2025-05-08
description: Switch between different models and providers with a single string!
draft: false
tags:
- LLMs
- Instructor
---
We are pleased to introduce a significant enhancement to Instructor: the **`from_provider()`** function. While Instructor has always focused on providing robust structured outputs, we've observed that many users work with multiple LLM providers. This often involves repetitive setup for each client.
The `from_provider()` function aims to simplify this process, making it easier to initialize clients and experiment across different models.
This new feature offers a streamlined, string-based method to initialize an Instructor-enhanced client for a variety of popular LLM providers.
<!-- more -->
## What is `from_provider()`?
The `from_provider()` function serves as a smart factory for creating LLM clients. By providing a model string identifier, such as `"openai/gpt-4o"` or `"anthropic/claude-3-opus-20240229"`, the function handles the necessary setup:
- **Automatic SDK Detection**: It identifies the targeted provider (e.g., OpenAI, Anthropic, Google, Mistral, Cohere).
- **Client Initialization**: It dynamically imports the required provider-specific SDK and initializes the native client (like `openai.OpenAI()` or `anthropic.Anthropic()`).
- **Instructor Patching**: It automatically applies the Instructor patch to the client, enabling structured outputs, validation, and retry mechanisms.
- **Sensible Defaults**: It uses recommended `instructor.Mode` settings for each provider, optimized for performance and capabilities such as tool use or JSON mode, where applicable.
- **Sync and Async Support**: Users can obtain either a synchronous or an asynchronous client by setting the `async_client=True` flag.
## Key Benefits
The `from_provider()` function is designed to streamline several common workflows:
- **Model Comparison**: Facilitates quick switching between different models or providers to evaluate performance, cost, or output quality for specific tasks.
- **Multi-Provider Strategies**: Simplifies the implementation of fallback mechanisms or routing queries to different LLMs based on criteria like complexity or cost, reducing client management overhead.
- **Rapid Prototyping**: Allows for faster setup when starting with a new provider or model.
- **Simplified Configuration**: Reduces boilerplate code in projects that integrate with multiple LLM providers.
## How it Works: A Look Under the Hood
Internally, `from_provider()` (located in `instructor/auto_client.py`) parses the model string (e.g., `"openai/gpt-5-nano"`) to identify the provider and model name. It then uses conditional logic to import the correct libraries, instantiate the client, and apply the appropriate Instructor patch. For instance, the conceptual handling for an OpenAI client would involve importing the `openai` SDK and `instructor.from_openai`.
```python
# Conceptual illustration of internal logic for OpenAI:
# (Actual implementation is in instructor/auto_client.py)
# if provider == "openai":
# import openai
# from instructor import from_openai, Mode
#
# # 'async_client', 'model_name', 'kwargs' are determined by from_provider
# native_client = openai.AsyncOpenAI() if async_client else openai.OpenAI()
#
# return from_openai(
# native_client,
# model=model_name,
# mode=Mode.TOOLS, # Default mode for OpenAI
# **kwargs,
# )
```
The function also manages dependencies by alerting users to install missing packages (e.g., via `uv pip install openai`) if they are not found.
## Example Usage
> Note : Ensure your API keys (e.g., `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`) are configured as environment variables to run this code.
Here's a self-contained example demonstrating how `from_provider()` can be used to retrieve structured output from google gemini's flash-2.0 model.
```python
import instructor
from pydantic import BaseModel
from typing import Iterable
# Define your data structure
class Person(BaseModel):
name: str
age: int
# Connect to any provider with a single line
client = instructor.from_provider("google/gemini-2.0-flash")
# Extract structured data
response = client.create(
messages=[
{
"role": "user",
"content": "Alice is 30 and Bob is 25.",
}
],
response_model=Iterable[Person],
)
for person in response:
print(f"Name: {person.name}, Age: {person.age}")
#> Name: Alice, Age: 30
#> Name: Bob, Age: 25
# Output:
# Name: Alice, Age: 30
# Name: Bob, Age: 25
```
Switching providers is as simple as changing the string:
```python
# OpenAI
client = instructor.from_provider("openai/gpt-4.1")
# Anthropic (with version date)
client = instructor.from_provider("anthropic/claude-3-5-haiku-20241022")
```
With the unified provider interface, you can now easily benchmark different models on the same task. This is crucial when you need to:
1. Compare response quality across different providers
2. Test which model gives the best structured extraction results
3. Optimize for speed vs. accuracy tradeoffs
4. Run A/B tests between providers without code refactoring
Instead of maintaining separate codebases for each provider or complex switching logic, you can focus on what matters: finding the optimal model for your specific use case.
### Async Support
When building production applications that need to remain responsive, asynchronous processing is essential.
Instructor's unified provider interface supports this workflow with a simple `async_client` keyword during initialization.
```python
client = instructor.from_provider("openai/gpt-4.1", async_client=True)
```
The async implementation works particularly well for web servers, batch processing jobs, or any scenario where you need to extract structured data without blocking your application's main thread.
Here's how you can implement it:
```python
import instructor
from pydantic import BaseModel
import asyncio
class UserProfile(BaseModel):
name: str
country: str
async def get_user_profile():
# Initialise an asynchronous client
async_client = instructor.from_provider("openai/gpt-4.1-mini", async_client=True)
# Extract data asynchronously
profile = await async_client.create(
messages=[{"role": "user", "content": "Extract: Maria lives in Spain."}],
response_model=UserProfile,
)
print(f"Name: {profile.name}, Country: {profile.country}")
#> Name: Maria, Country: Spain
if __name__ == "__main__":
asyncio.run(get_user_profile())
```
### Provider Specific Parameters
Some providers require additional parameters for optimal performance.
Rather than hiding these options, Instructor allows you to pass them directly through the from_provider function:
```python
# Anthropic requires max tokens
client = instructor.from_provider("anthropic/claude-3-sonnet-20240229", max_tokens=1024)
```
If you'd like to change this parameter down the line, you can just do so by setting it on the `client.chat.completions.create` function again.
### Type Completion
To make it easy for you to find the right model string, we now ship with auto-complete for these new model-provider initialisation strings.
This is automatically provided for you out of the box when you use the new `from_provider` method as seen below.

Say bye to fiddling around with messy model versioning and get cracking to working on your business logic instead!
## Path Forward
The `from_provider()` function offers a convenient method for client initialization. Instructor remains a lightweight wrapper around your chosen LLM provider's client, and users always retain the flexibility to initialize and patch clients manually for more granular control or when using providers not yet covered by this utility.
This unified interface is intended to balance ease of use for common tasks with the underlying flexibility of Instructor, aiming to make multi-provider LLM development more accessible and efficient. However, there is still much to do to further streamline multi-provider workflows. Future efforts could focus on:
- **Unified Prompt Caching API**: While Instructor supports prompt caching for providers like [Anthropic](../../integrations/anthropic.md#caching) (see also our [blog post on Anthropic prompt caching](../posts/anthropic-prompt-caching.md) and the general [Prompt Caching concepts](../../concepts/prompt_caching.md)), a more standardized, cross-provider API for managing cache behavior could significantly simplify optimizing costs and latency.
- **Unified Multimodal Object Handling**: Instructor already provides a robust way to work with [multimodal inputs like Images, Audio, and PDFs](../../concepts/multimodal.md) across different providers. However, a higher-level unified API could further abstract provider-specific nuances for these types, making it even simpler to build applications that seamlessly switch between, for example, OpenAI's vision capabilities and Anthropic's, without changing how media objects are passed.
These are areas where `instructor` can continue to reduce friction for developers working in an increasingly diverse LLM ecosystem.
We encourage you to try `from_provider()` in your projects, particularly when experimenting with multiple LLMs. Feedback and suggestions for additional providers or features are always welcome.
## Related Documentation
- [Provider Patching](../../concepts/patching.md) - How provider integration works
- [All Integrations](../../integrations/index.md) - Supported provider list
## See Also
- [String-Based Initialization](string-based-init.md) - Alternative init method
- [Framework Comparison](best_framework.md) - Multi-provider advantages
- [Getting Started](introduction.md) - Quick start guide
================================================
FILE: docs/blog/posts/anthropic-prompt-caching.md
================================================
---
authors:
- ivanleomk
categories:
- Anthropic
comments: true
date: 2024-09-14
description: Discover how prompt caching with Anthropic can improve response times
and reduce costs for large context applications.
draft: false
tags:
- prompt caching
- Anthropic
- API optimization
- cost reduction
- latency improvement
---
# Why should I use prompt caching?
Developers often face two key challenges when working with large context - Slow response times and high costs. This is especially true when we're making multiple of these calls over time, severely impacting the cost and latency of our applications. With Anthropic's new prompt caching feature, we can easily solve both of these issues.
Since the new feature is still in beta, we're going to wait for it to be generally available before we integrate it into instructor. In the meantime, we've put together a quickstart guide on how to use the feature in your own applications.
<!-- more -->
!!! warning "Caching Limitations"
There are a few important limitations to be aware of when using prompt caching:
- **Minimum cache size**: For Claude Haiku, your cached content needs to be a minimum of 2048 tokens. For Claude Sonnet, the minimum is 1024 tokens.
- **Tool definitions**: Currently, tool definitions cannot be cached. However, support for caching tool definitions is planned for a future update.
- **Upgrade Anthropic**: You must upgrade to Anthropic version `0.34.0` or later to use prompt caching. Make sure that you're using the latest version of the Anthropic SDK.
Keep these limitations in mind when implementing prompt caching in your applications.
??? note "Source Text"
In the following example, we'll be using a short excerpt from the novel "Pride and Prejudice" by Jane Austen. This text serves as an example of a substantial context that might typically lead to slow response times and high costs when working with language models. You can download it manually [here](https://www.gutenberg.org/cache/epub/1342/pg1342.txt)
```
_Walt Whitman has somewhere a fine and just distinction between “loving
by allowance” and “loving with personal love.” This distinction applies
to books as well as to men and women; and in the case of the not very
numerous authors who are the objects of the personal affection, it
brings a curious consequence with it. There is much more difference as
to their best work than in the case of those others who are loved “by
allowance” by convention, and because it is felt to be the right and
proper thing to love them. And in the sect--fairly large and yet
unusually choice--of Austenians or Janites, there would probably be
found partisans of the claim to primacy of almost every one of the
novels. To some the delightful freshness and humour of_ Northanger
Abbey, _its completeness, finish, and_ entrain, _obscure the undoubted
critical facts that its scale is small, and its scheme, after all, that
of burlesque or parody, a kind in which the first rank is reached with
difficulty._ Persuasion, _relatively faint in tone, and not enthralling
in interest, has devotees who exalt above all the others its exquisite
delicacy and keeping. The catastrophe of_ Mansfield Park _is admittedly
theatrical, the hero and heroine are insipid, and the author has almost
wickedly destroyed all romantic interest by expressly admitting that
Edmund only took Fanny because Mary shocked him, and that Fanny might
very likely have taken Crawford if he had been a little more assiduous;
yet the matchless rehearsal-scenes and the characters of Mrs. Norris and
others have secured, I believe, a considerable party for it._ Sense and
Sensibility _has perhaps the fewest out-and-out admirers; but it does
not want them._
_I suppose, however, that the majority of at least competent votes
would, all things considered, be divided between_ Emma _and the present
book; and perhaps the vulgar verdict (if indeed a fondness for Miss
Austen be not of itself a patent of exemption from any possible charge
of vulgarity) would go for_ Emma. _It is the larger, the more varied, the
more popular; the author had by the time of its composition seen rather
more of the world, and had improved her general, though not her most
peculiar and characteristic dialogue; such figures as Miss Bates, as the
Eltons, cannot but unite the suffrages of everybody. On the other hand,
I, for my part, declare for_ Pride and Prejudice _unhesitatingly. It
seems to me the most perfect, the most characteristic, the most
eminently quintessential of its author’s works; and for this contention
in such narrow space as is permitted to me, I propose here to show
cause._
_In the first place, the book (it may be barely necessary to remind the
reader) was in its first shape written very early, somewhere about 1796,
when Miss Austen was barely twenty-one; though it was revised and
finished at Chawton some fifteen years later, and was not published till
1813, only four years before her death. I do not know whether, in this
combination of the fresh and vigorous projection of youth, and the
critical revision of middle life, there may be traced the distinct
superiority in point of construction, which, as it seems to me, it
possesses over all the others. The plot, though not elaborate, is almost
regular enough for Fielding; hardly a character, hardly an incident
could be retrenched without loss to the story. The elopement of Lydia
and Wickham is not, like that of Crawford and Mrs. Rushworth, a_ coup de
théâtre; _it connects itself in the strictest way with the course of the
story earlier, and brings about the denouement with complete propriety.
All the minor passages--the loves of Jane and Bingley, the advent of Mr.
Collins, the visit to Hunsford, the Derbyshire tour--fit in after the
same unostentatious, but masterly fashion. There is no attempt at the
hide-and-seek, in-and-out business, which in the transactions between
Frank Churchill and Jane Fairfax contributes no doubt a good deal to the
intrigue of_ Emma, _but contributes it in a fashion which I do not think
the best feature of that otherwise admirable book. Although Miss Austen
always liked something of the misunderstanding kind, which afforded her
opportunities for the display of the peculiar and incomparable talent to
be noticed presently, she has been satisfied here with the perfectly
natural occasions provided by the false account of Darcy’s conduct given
by Wickham, and by the awkwardness (arising with equal naturalness) from
the gradual transformation of Elizabeth’s own feelings from positive
aversion to actual love. I do not know whether the all-grasping hand of
the playwright has ever been laid upon_ Pride and Prejudice; _and I dare
say that, if it were, the situations would prove not startling or
garish enough for the footlights, the character-scheme too subtle and
delicate for pit and gallery. But if the attempt were made, it would
certainly not be hampered by any of those loosenesses of construction,
which, sometimes disguised by the conveniences of which the novelist can
avail himself, appear at once on the stage._
_I think, however, though the thought will doubtless seem heretical to
more than one school of critics, that construction is not the highest
merit, the choicest gift, of the novelist. It sets off his other gifts
and graces most advantageously to the critical eye; and the want of it
will sometimes mar those graces--appreciably, though not quite
consciously--to eyes by no means ultra-critical. But a very badly-built
novel which excelled in pathetic or humorous character, or which
displayed consummate command of dialogue--perhaps the rarest of all
faculties--would be an infinitely better thing than a faultless plot
acted and told by puppets with pebbles in their mouths. And despite the
ability which Miss Austen has shown in working out the story, I for one
should put_ Pride and Prejudice _far lower if it did not contain what
seem to me the very masterpieces of Miss Austen’s humour and of her
faculty of character-creation--masterpieces who may indeed admit John
Thorpe, the Eltons, Mrs. Norris, and one or two others to their company,
but who, in one instance certainly, and perhaps in others, are still
superior to them._
_The characteristics of Miss Austen’s humour are so subtle and delicate
that they are, perhaps, at all times easier to apprehend than to
express, and at any particular time likely to be differently
apprehended by different persons. To me this humour seems to possess a
greater affinity, on the whole, to that of Addison than to any other of
the numerous species of this great British genus. The differences of
scheme, of time, of subject, of literary convention, are, of course,
obvious enough; the difference of sex does not, perhaps, count for much,
for there was a distinctly feminine element in “Mr. Spectator,” and in
Jane Austen’s genius there was, though nothing mannish, much that was
masculine. But the likeness of quality consists in a great number of
common subdivisions of quality--demureness, extreme minuteness of touch,
avoidance of loud tones and glaring effects. Also there is in both a
certain not inhuman or unamiable cruelty. It is the custom with those
who judge grossly to contrast the good nature of Addison with the
savagery of Swift, the mildness of Miss Austen with the boisterousness
of Fielding and Smollett, even with the ferocious practical jokes that
her immediate predecessor, Miss Burney, allowed without very much
protest. Yet, both in Mr. Addison and in Miss Austen there is, though a
restrained and well-mannered, an insatiable and ruthless delight in
roasting and cutting up a fool. A man in the early eighteenth century,
of course, could push this taste further than a lady in the early
nineteenth; and no doubt Miss Austen’s principles, as well as her heart,
would have shrunk from such things as the letter from the unfortunate
husband in the_ Spectator, _who describes, with all the gusto and all the
innocence in the world, how his wife and his friend induce him to play
at blind-man’s-buff. But another_ Spectator _letter--that of the damsel
of fourteen who wishes to marry Mr. Shapely, and assures her selected
Mentor that “he admires your_ Spectators _mightily”--might have been
written by a rather more ladylike and intelligent Lydia Bennet in the
days of Lydia’s great-grandmother; while, on the other hand, some (I
think unreasonably) have found “cynicism” in touches of Miss Austen’s
own, such as her satire of Mrs. Musgrove’s self-deceiving regrets over
her son. But this word “cynical” is one of the most misused in the
English language, especially when, by a glaring and gratuitous
falsification of its original sense, it is applied, not to rough and
snarling invective, but to gentle and oblique satire. If cynicism means
the perception of “the other side,” the sense of “the accepted hells
beneath,” the consciousness that motives are nearly always mixed, and
that to seem is not identical with to be--if this be cynicism, then
every man and woman who is not a fool, who does not care to live in a
fool’s paradise, who has knowledge of nature and the world and life, is
a cynic. And in that sense Miss Austen certainly was one. She may even
have been one in the further sense that, like her own Mr. Bennet, she
took an epicurean delight in dissecting, in displaying, in setting at
work her fools and her mean persons. I think she did take this delight,
and I do not think at all the worse of her for it as a woman, while she
was immensely the better for it as an artist.
```
Let's first initialize our Anthropic client, this will be the same as what we've done before except we're now using the new `beta.prompt_caching` method.
```python
from instructor import Instructor, Mode, patch
from anthropic import Anthropic
client = Instructor(
client=Anthropic(),
create=patch(
create=Anthropic().beta.prompt_caching.messages.create,
mode=Mode.TOOLS,
),
mode=Mode.TOOLS,
)
```
We'll then create a new `Character` class that will be used to extract out a single character from the text and read in our source text ( roughly 2856 tokens using the Anthropic tokenizer).
```python
with open("./book.txt") as f:
book = f.read()
class Character(BaseModel):
name: str
description: str
```
Once we've done this, we can then make an api call to get the description of the character.
```python
for _ in range(2):
resp, completion = client.create_with_completion( # (1)!
model="claude-3-haiku-20240307",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "<book>" + book + "</book>",
"cache_control": {"type": "ephemeral"}, # (2)!
},
{
"type": "text",
"text": "Extract a character from the text given above",
},
],
},
],
response_model=Character,
max_tokens=1000,
)
assert isinstance(resp, Character)
print(completion.usage) # (3)!
print(resp)
```
1. Using the `create_with_completion` method we can get back both the structured response and the completion object
2. We set the `cache_control` parameter to "ephemeral" to tell Anthropic to cache the book content temporarily
3. We print out the usage information to monitor token consumption
You'll notice that the usage information is different than what we've seen before. This is because we're now using the `create_with_completion` method which returns both the structured response and the completion object. The completion object contains usage information which we can use to monitor token consumption.
When we run this, you'll notice that we get the following output.
```bash
PromptCachingBetaUsage(
cache_creation_input_tokens=2856,
cache_read_input_tokens=0,
input_tokens=30,
output_tokens=119
)
Character(
name='Elizabeth Bennet',
description="The protagonist of Jane Austen's novel Pride and Prejudice, who
undergoes a transformation from initially disliking Mr. Darcy to eventually falling
in love with him. The passage describes Elizabeth as a complex, nuanced character,
noting how her feelings towards Darcy evolve naturally over the course of the story."
)
PromptCachingBetaUsage(
cache_creation_input_tokens=0,
cache_read_input_tokens=2856,
input_tokens=30,
output_tokens=93
)
Character(
name='Mrs. Norris',
description='A character from Jane Austen\'s novel Mansfield Park, described as
having "matchless" scenes and being one of the characters that has secured a
considerable party of admirers for the novel.'
)
```
You'll notice that in the first request, we created `2856` tokens and in the second request, we read `2856` tokens.
In other words, `book_content` was cached after the first request and reused in the second request. When you have a larger context window, this can save you a significant amount of money and time because your requests will return a lot faster too.
This is the entire code for the example above.
```python
from instructor import Instructor, Mode, patch
from anthropic import Anthropic
from pydantic import BaseModel
client = Instructor(
client=Anthropic(),
create=patch(
create=Anthropic().beta.prompt_caching.messages.create,
mode=Mode.TOOLS,
),
mode=Mode.TOOLS,
)
class Character(BaseModel):
name: str
description: str
with open("./book.txt") as f:
book = f.read()
for _ in range(2):
resp, completion = client.create_with_completion(
model="claude-3-haiku-20240307",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "<book>" + book + "</book>",
"cache_control": {"type": "ephemeral"},
},
{
"type": "text",
"text": "Extract a character from the text given above",
},
],
},
],
response_model=Character,
max_tokens=1000,
)
assert isinstance(resp, Character)
print(completion.usage)
print(resp)
```
## Related Documentation
- [Caching Strategies](../../concepts/caching.md) - General caching concepts
- [Anthropic Integration](../../integrations/anthropic.md) - Full Anthropic guide
## See Also
- [Anthropic Structured Outputs](structured-output-anthropic.md) - Use with caching
- [Response Caching](caching.md) - General caching strategies
- [Performance Monitoring](logfire.md) - Track cache performance
================================================
FILE: docs/blog/posts/anthropic-web-search-structured.md
================================================
---
date: 2025-05-07
authors:
- jxnl
categories:
- tutorials
- anthropic
- structured-data
---
# Using Anthropic's Web Search with Instructor for Real-Time Data
Anthropic's new web search tool, when combined with Instructor, provides a powerful way to get real-time, structured data from the web. This allows you to build applications that can answer questions and provide information that is up-to-date, going beyond the knowledge cut-off of large language models.
In this post, we'll explore how to use the `web_search` tool with Instructor to fetch the latest information and structure it into a Pydantic model. Even a simple structure can be very effective for clarity and further processing.
<!-- more -->
## How it Works
The web search tool enables Claude models to perform web searches during a generation. When you provide the `web_search` tool in your API request, Claude can decide to use it if the prompt requires information it doesn't have. The API then executes the search, provides the results back to Claude, and Claude can then use this information to generate a response. Importantly, Claude will cite its sources from the search results. You can find more details in the [official Anthropic documentation](https://docs.anthropic.com/en/docs/build-with-claude/tool-use/web-search-tool).
Instructor simplifies this process by allowing you to define a Pydantic model for the desired output structure. When Claude uses the web search tool and formulates an answer, Instructor ensures that the final output conforms to your defined schema.
## Example: Getting the Latest UFC Results
Let's look at a practical example. We want to get the latest UFC fight results.
First, ensure you have `instructor` and `anthropic` installed:
```bash
uv add instructor anthropic
```
Now, let's define our Pydantic model for the response:
```python
import instructor
from pydantic import BaseModel
# Noticed thhat we use JSON not TOOLS mode
client = instructor.from_provider(
"anthropic/claude-3-7-sonnet-latest",
mode=instructor.Mode.JSON,
async_client=False,
)
class Citation(BaseModel):
id: int
url: str
class Response(BaseModel):
citations: list[Citation]
response: str
```
This Response model is straightforward. It gets the model to first generate a list of citations for articles that it referenced before generating it's answer.
This helps to ground its response in the sources it retrieved and provide a higher quality response.
Now, we can make the API call:
```python
response_data, completion_details = client.messages.create_with_completion(
messages=[
{
"role": "system",
"content": "You are a helpful assistant that summarizes news articles. Your final response should be only contain a single JSON object returned in your final message to the user. Make sure to provide the exact ids for the citations that support the information you provide in the form of inline citations as [1] [2] [3] which correspond to a unique id you generate for a url that you find in the web search tool which is relevant to your final response.",
},
{
"role": "user",
"content": "What are the latest results for the UFC and who won? Answer this in a concise response that's under 3 sentences.",
},
],
tools=[{"type": "web_search_20250305", "name": "web_search", "max_uses": 3}],
response_model=Response,
)
print("Response:")
print(response_data.response)
print("\nCitations:")
for citation in response_data.citations:
print(f"{citation.id}: {citation.url}")
```
This approach provides a clean way to get the LLM's answer into a defined Pydantic object. The `examples/anthropic-web-tool/run.py` script reflects this implementation.
Expected output (will vary based on real-time web search data):
```
Response:
The latest UFC event was UFC Fight Night: Sandhagen vs Figueiredo held on May 3, 2025, in Des Moines, Iowa. Cory Sandhagen defeated former champion Deiveson Figueiredo by TKO (knee injury) in the main event, while Reinier de Ridder upset previously undefeated prospect Bo Nickal by TKO in the co-main event [1][2]. The next major UFC event is UFC 315 on May 10, featuring a welterweight championship bout between Belal Muhammad and Jack Della Maddalena [3].
Citations:
1: https://www.ufc.com/news/main-card-results-highlights-winner-interviews-ufc-fight-night-sandhagen-vs-figueiredo-wells-fargo-arena-des-moines
2: https://www.mmamania.com/2025/5/4/24423285/ufc-des-moines-results-sooo-about-last-night-sandhagen-vs-figueiredo-espn-mma-bo-nickal
3: https://en.wikipedia.org/wiki/UFC_315
```
## Key Benefits
- **Real-Time Information**: Access the latest data directly from the web.
- **Structured Output**: Even with a simple model, Instructor ensures the output is a Pydantic object, making it easy to work with programmatically.
- **Source Citations**: Claude automatically cites sources, allowing for verification (details in the API response, not shown in this simplified example).
- **Reduced Hallucinations**: By relying on web search for factual, up-to-the-minute data, the likelihood of the LLM providing incorrect or outdated information is reduced.
## Configuring the Web Search Tool
Anthropic provides several options to configure the web search tool:
- `max_uses`: Limit the number of searches Claude can perform in a single request.
- `allowed_domains`: Restrict searches to a list of specific domains.
- `blocked_domains`: Prevent searches on certain domains.
- `user_location`: Localize search results by providing an approximate location (city, region, country, timezone).
For example, to limit searches to 3 and only allow results from `espn.com` and `ufc.com`:
```python
tools = (
[
{
"type": "web_search_20250305",
"name": "web_search",
"max_uses": 3,
"allowed_domains": ["espn.com", "ufc.com"],
}
],
)
```
You cannot use `allowed_domains` and `blocked_domains` in the same request.
## Conclusion
Combining Anthropic's web search tool with Instructor's structured data capabilities opens up exciting possibilities for building dynamic, information-rich applications. Whether you're tracking sports scores, news updates, or market trends, this powerful duo can help you access and organize real-time web data effectively, even with simple Pydantic models.
Check out the example code in `examples/anthropic-web-tool/run.py` to see this implementation, and refer to the [Anthropic web search documentation](https://docs.anthropic.com/en/docs/build-with-claude/tool-use/web-search-tool) for more in-depth information on the tool's capabilities.
================================================
FILE: docs/blog/posts/anthropic.md
================================================
---
authors:
- jxnl
categories:
- Anthropic
comments: true
date: 2024-03-20
description: Learn how to integrate Anthropic's powerful language models into your projects using Instructor, with step-by-step guidance on installation, client setup, and creating structured outputs with Pydantic models.
draft: false
tags:
- Anthropic
- API Development
- Pydantic
- Python
- LLM Techniques
---
# Structured Outputs with Anthropic
A special shoutout to [Shreya](https://twitter.com/shreyaw_) for her contributions to the anthropic support. As of now, all features are operational with the exception of streaming support.
For those eager to experiment, simply patch the client with `ANTHROPIC_JSON`, which will enable you to leverage the `anthropic` client for making requests.
```
pip install instructor[anthropic]
```
!!! warning "Missing Features"
Just want to acknowledge that we know that we are missing partial streaming and some better re-asking support for XML. We are working on it and will have it soon.
```python
from pydantic import BaseModel
from typing import List
import anthropic
import instructor
# Patching the Anthropics client with the instructor for enhanced capabilities
anthropic_client = instructor.from_openai(
create=anthropic.Anthropic().messages.create,
mode=instructor.Mode.JSON
)
class Properties(BaseModel):
name: str
value: str
class User(BaseModel):
name: str
age: int
properties: List[Properties]
user_response = anthropic_client(
model="claude-3-haiku-20240307",
max_tokens=1024,
max_retries=0,
messages=[
{
"role": "user",
"content": "Create a user for a model with a name, age, and properties.",
}
],
response_model=User,
) # type: ignore
print(user_response.model_dump_json(indent=2))
"""
{
"name": "John",
"age": 25,
"properties": [
{
"key": "favorite_color",
"value": "blue"
}
]
}
```
We're encountering challenges with deeply nested types and eagerly invite the community to test, provide feedback, and suggest necessary improvements as we enhance the anthropic client's support.
================================================
FILE: docs/blog/posts/bad-schemas-could-break-llms.md
================================================
---
authors:
- ivanleomk
categories:
- LLM Techniques
comments: true
date: 2024-09-26
description: Discover how response models impact LLM performance, focusing on structured
outputs for optimal results in GPT-4o and Claude models.
draft: false
tags:
- LLM Performance
- Response Models
- St
gitextract_z1bftxv1/ ├── .coveragerc ├── .cursor/ │ └── rules/ │ ├── documentation-sync.mdc │ ├── followups.mdc │ ├── new-features-planning.mdc │ ├── readme.md │ └── simple-language.mdc ├── .cursorignore ├── .github/ │ ├── FUNDING.yml │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ └── feature_request.md │ ├── PULL_REQUEST_TEMPLATE/ │ │ └── pull_request_template.md │ ├── dependabot.yml │ └── workflows/ │ ├── ai-label.yml │ ├── evals.yml │ ├── python-publish.yml │ ├── ruff.yml │ ├── scheduled-release.yml │ ├── test.yml │ ├── test_docs.yml │ └── ty.yml ├── .gitignore ├── .grit/ │ ├── .gitignore │ └── grit.yaml ├── .pre-commit-config.yaml ├── .ruff.toml ├── AGENT.md ├── CHANGELOG.md ├── CLAUDE.md ├── CONTRIBUTING.md ├── LICENSE ├── NEW_PROVIDER_AGENT_INSTRUCTIONS.md ├── README.md ├── build_mkdocs.sh ├── cross_link_mapping.yaml ├── docs/ │ ├── AGENT.md │ ├── api-docstring-assessment.md │ ├── api.md │ ├── architecture.md │ ├── blog/ │ │ ├── .authors.yml │ │ ├── index.md │ │ └── posts/ │ │ ├── aisummit-2023.md │ │ ├── announcing-gemini-tool-calling-support.md │ │ ├── announcing-instructor-responses-support.md │ │ ├── announcing-unified-provider-interface.md │ │ ├── anthropic-prompt-caching.md │ │ ├── anthropic-web-search-structured.md │ │ ├── anthropic.md │ │ ├── bad-schemas-could-break-llms.md │ │ ├── best_framework.md │ │ ├── caching.md │ │ ├── chain-of-density.md │ │ ├── chat-with-your-pdf-with-gemini.md │ │ ├── citations.md │ │ ├── consistent-stories.md │ │ ├── course.md │ │ ├── cursor-rules.md │ │ ├── distilation-part1.md │ │ ├── extract-model-looks.md │ │ ├── extracting-model-metadata.md │ │ ├── fake-data.md │ │ ├── full-fastapi-visibility.md │ │ ├── generating-pdf-citations.md │ │ ├── generator.md │ │ ├── google-openai-client.md │ │ ├── introducing-structured-outputs-with-cerebras-inference.md │ │ ├── introducing-structured-outputs.md │ │ ├── introduction.md │ │ ├── jinja-proposal.md │ │ ├── langsmith.md │ │ ├── learn-async.md │ │ ├── llm-as-reranker.md │ │ ├── llms-txt-adoption.md │ │ ├── llms-txt-support.md │ │ ├── logfire.md │ │ ├── lseg-market-surveillance.md │ │ ├── matching-language.md │ │ ├── migrating-to-uv.md │ │ ├── mkdocs-llmstxt-plugin-integration.md │ │ ├── multimodal-gemini.md │ │ ├── native_caching.md │ │ ├── open_source.md │ │ ├── openai-distilation-store.md │ │ ├── openai-multimodal.md │ │ ├── pairwise-llm-judge.md │ │ ├── parea.md │ │ ├── pydantic-is-still-all-you-need.md │ │ ├── rag-and-beyond.md │ │ ├── rag-timelines.md │ │ ├── semantic-validation-structured-outputs.md │ │ ├── situate-context.md │ │ ├── string-based-init.md │ │ ├── structured-output-anthropic.md │ │ ├── tidy-data-from-messy-tables.md │ │ ├── timestamp.md │ │ ├── using_json.md │ │ ├── validation-part1.md │ │ ├── version-1.md │ │ ├── why-care-about-mcps.md │ │ ├── writer-support.md │ │ ├── youtube-flashcards.md │ │ └── youtube-transcripts.md │ ├── cli/ │ │ ├── batch.md │ │ ├── finetune.md │ │ ├── index.md │ │ └── usage.md │ ├── concepts/ │ │ ├── alias.md │ │ ├── batch.md │ │ ├── caching.md │ │ ├── citation.md │ │ ├── dictionary_operations.md │ │ ├── distillation.md │ │ ├── enums.md │ │ ├── error_handling.md │ │ ├── fastapi.md │ │ ├── fields.md │ │ ├── from_provider.md │ │ ├── hooks.md │ │ ├── index.md │ │ ├── iterable.md │ │ ├── lists.md │ │ ├── logging.md │ │ ├── maybe.md │ │ ├── migration.md │ │ ├── mode-migration.md │ │ ├── models.md │ │ ├── multimodal.md │ │ ├── parallel.md │ │ ├── partial.md │ │ ├── patching.md │ │ ├── philosophy.md │ │ ├── prompt_caching.md │ │ ├── prompting.md │ │ ├── raw_response.md │ │ ├── reask_validation.md │ │ ├── retrying.md │ │ ├── semantic_validation.md │ │ ├── templating.md │ │ ├── typeadapter.md │ │ ├── typeddicts.md │ │ ├── types.md │ │ ├── union.md │ │ ├── unions.md │ │ ├── usage.md │ │ └── validation.md │ ├── contributing.md │ ├── debugging.md │ ├── examples/ │ │ ├── action_items.md │ │ ├── audio_extraction.md │ │ ├── batch_classification_langsmith.md │ │ ├── batch_in_memory.md │ │ ├── batch_job_oai.md │ │ ├── building_knowledge_graphs.md │ │ ├── bulk_classification.md │ │ ├── classification.md │ │ ├── document_segmentation.md │ │ ├── entity_resolution.md │ │ ├── exact_citations.md │ │ ├── examples.md │ │ ├── extract_contact_info.md │ │ ├── extract_slides.md │ │ ├── extracting_receipts.md │ │ ├── extracting_tables.md │ │ ├── groq.md │ │ ├── image_to_ad_copy.md │ │ ├── index.md │ │ ├── knowledge_graph.md │ │ ├── local_classification.md │ │ ├── mistral.md │ │ ├── moderation.md │ │ ├── multi_modal_gemini.md │ │ ├── multiple_classification.md │ │ ├── ollama.md │ │ ├── open_source.md │ │ ├── pandas_df.md │ │ ├── partial_streaming.md │ │ ├── pii.md │ │ ├── planning-tasks.md │ │ ├── recursive.md │ │ ├── search.md │ │ ├── self_critique.md │ │ ├── single_classification.md │ │ ├── sqlmodel.md │ │ ├── tables_from_vision.md │ │ ├── tracing_with_langfuse.md │ │ ├── using_decimals.md │ │ ├── watsonx.md │ │ └── youtube_clips.md │ ├── faq.md │ ├── getting-started.md │ ├── help.md │ ├── hooks/ │ │ └── hide_lines.py │ ├── index.md │ ├── installation.md │ ├── integrations/ │ │ ├── anthropic.md │ │ ├── anyscale.md │ │ ├── azure.md │ │ ├── bedrock.md │ │ ├── cerebras.md │ │ ├── cohere.md │ │ ├── cortex.md │ │ ├── databricks.md │ │ ├── deepseek.md │ │ ├── fireworks.md │ │ ├── genai.md │ │ ├── google.md │ │ ├── groq.md │ │ ├── index.md │ │ ├── litellm.md │ │ ├── llama-cpp-python.md │ │ ├── mistral.md │ │ ├── ollama.md │ │ ├── openai-responses.md │ │ ├── openai.md │ │ ├── openrouter.md │ │ ├── perplexity.md │ │ ├── sambanova.md │ │ ├── together.md │ │ ├── truefoundry.md │ │ ├── vertex.md │ │ ├── writer.md │ │ └── xai.md │ ├── javascripts/ │ │ └── katex.js │ ├── jobs.md │ ├── learning/ │ │ ├── getting_started/ │ │ │ ├── first_extraction.md │ │ │ ├── installation.md │ │ │ ├── response_models.md │ │ │ └── structured_outputs.md │ │ ├── index.md │ │ ├── patterns/ │ │ │ ├── field_validation.md │ │ │ ├── list_extraction.md │ │ │ ├── nested_structure.md │ │ │ ├── optional_fields.md │ │ │ ├── prompt_templates.md │ │ │ └── simple_object.md │ │ ├── streaming/ │ │ │ ├── basics.md │ │ │ └── lists.md │ │ └── validation/ │ │ ├── basics.md │ │ ├── custom_validators.md │ │ ├── field_level_validation.md │ │ └── retry_mechanisms.md │ ├── llms.txt │ ├── modes-comparison.md │ ├── newsletter.md │ ├── overrides/ │ │ └── main.html │ ├── prompting/ │ │ ├── decomposition/ │ │ │ ├── decomp.md │ │ │ ├── faithful_cot.md │ │ │ ├── least_to_most.md │ │ │ ├── plan_and_solve.md │ │ │ ├── program_of_thought.md │ │ │ ├── recurs_of_thought.md │ │ │ ├── skeleton_of_thought.md │ │ │ └── tree-of-thought.md │ │ ├── ensembling/ │ │ │ ├── cosp.md │ │ │ ├── dense.md │ │ │ ├── diverse.md │ │ │ ├── max_mutual_information.md │ │ │ ├── meta_cot.md │ │ │ ├── more.md │ │ │ ├── prompt_paraphrasing.md │ │ │ ├── self_consistency.md │ │ │ ├── universal_self_consistency.md │ │ │ └── usp.md │ │ ├── few_shot/ │ │ │ ├── cosp.md │ │ │ ├── example_generation/ │ │ │ │ └── sg_icl.md │ │ │ ├── example_ordering.md │ │ │ └── exemplar_selection/ │ │ │ ├── knn.md │ │ │ └── vote_k.md │ │ ├── index.md │ │ ├── self_criticism/ │ │ │ ├── chain_of_verification.md │ │ │ ├── cumulative_reason.md │ │ │ ├── reversecot.md │ │ │ ├── self_calibration.md │ │ │ ├── self_refine.md │ │ │ └── self_verification.md │ │ ├── thought_generation/ │ │ │ ├── chain_of_thought_few_shot/ │ │ │ │ ├── active_prompt.md │ │ │ │ ├── auto_cot.md │ │ │ │ ├── complexity_based.md │ │ │ │ ├── contrastive.md │ │ │ │ ├── memory_of_thought.md │ │ │ │ ├── prompt_mining.md │ │ │ │ └── uncertainty_routed_cot.md │ │ │ └── chain_of_thought_zero_shot/ │ │ │ ├── analogical_prompting.md │ │ │ ├── step_back_prompting.md │ │ │ ├── tab_cot.md │ │ │ └── thread_of_thought.md │ │ └── zero_shot/ │ │ ├── emotion_prompting.md │ │ ├── rar.md │ │ ├── re2.md │ │ ├── role_prompting.md │ │ ├── s2a.md │ │ ├── self_ask.md │ │ ├── simtom.md │ │ └── style_prompting.md │ ├── repository-overview.md │ ├── start-here.md │ ├── templates/ │ │ └── provider_template.md │ ├── tutorials/ │ │ ├── 1-introduction.ipynb │ │ ├── 2-tips.ipynb │ │ ├── 3-0-applications-rag.ipynb │ │ ├── 3-1-validation-rag.ipynb │ │ ├── 4-validation.ipynb │ │ ├── 5-knowledge-graphs.ipynb │ │ ├── 6-chain-of-density.ipynb │ │ ├── 7-synthetic-data-generation.ipynb │ │ └── index.md │ └── why.md ├── ellipsis.yaml ├── examples/ │ ├── __init__.py │ ├── anthropic/ │ │ └── run.py │ ├── anthropic-web-tool/ │ │ └── run.py │ ├── asyncio-benchmarks/ │ │ └── run.py │ ├── auto-ticketer/ │ │ └── run.py │ ├── automodel/ │ │ └── run.py │ ├── avail/ │ │ ├── run.py │ │ └── run_mixtral.py │ ├── batch-classification/ │ │ ├── run-cache.py │ │ ├── run.py │ │ └── run_langsmith.py │ ├── batch_api/ │ │ ├── README.md │ │ ├── in_memory_batch_example.py │ │ └── run_batch_test.py │ ├── caching/ │ │ ├── example_diskcache.py │ │ ├── example_redis.py │ │ ├── lru.py │ │ └── run.py │ ├── caching_prototype/ │ │ ├── README.md │ │ └── run_real.py │ ├── chain-of-density/ │ │ ├── Readme.md │ │ ├── chain_of_density.py │ │ ├── finetune.py │ │ └── requirements.txt │ ├── citation_with_extraction/ │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── citation_fuzzy_match.py │ │ ├── diagram.py │ │ ├── main.py │ │ ├── modal_main.py │ │ └── requirements.txt │ ├── citations/ │ │ └── run.py │ ├── classification/ │ │ ├── classifiy_with_validation.py │ │ ├── multi_prediction.py │ │ └── simple_prediction.py │ ├── codegen-from-schema/ │ │ ├── create_fastapi_app.py │ │ ├── input.json │ │ ├── models.py │ │ ├── readme.md │ │ └── run.py │ ├── cohere/ │ │ └── cohere.py │ ├── crm/ │ │ └── run.py │ ├── decimals/ │ │ └── run.py │ ├── distilations/ │ │ ├── math_finetunes_val.jsonl │ │ ├── readme.md │ │ ├── three_digit_mul.py │ │ └── three_digit_mul_dispatch.py │ ├── evals/ │ │ ├── eval.py │ │ ├── models.py │ │ ├── stats_dict.py │ │ ├── streamlit.py │ │ └── test.jsonl │ ├── extract-table/ │ │ ├── run_vision.py │ │ ├── run_vision_langsmith.py │ │ ├── run_vision_org.py │ │ ├── run_vision_org_table.py │ │ ├── run_vision_receipt.py │ │ └── test.py │ ├── extracting-pii/ │ │ └── run.py │ ├── fastapi_app/ │ │ ├── __init__.py │ │ ├── main.py │ │ └── script.py │ ├── fizzbuzz/ │ │ └── run.py │ ├── gpt-engineer/ │ │ ├── changes.diff │ │ ├── generate.py │ │ ├── program.json │ │ └── refactor.py │ ├── groq/ │ │ ├── groq_example.py │ │ └── groq_example2.py │ ├── hooks/ │ │ ├── README.md │ │ └── run.py │ ├── iterables/ │ │ └── run.py │ ├── knowledge-graph/ │ │ ├── run.py │ │ └── run_stream.py │ ├── learn-async/ │ │ └── run.py │ ├── llm-judge-relevance/ │ │ └── run.py │ ├── logfire/ │ │ ├── classify.py │ │ ├── image.py │ │ ├── requirements.txt │ │ └── validate.py │ ├── logfire-fastapi/ │ │ ├── Readme.md │ │ ├── requirements.txt │ │ ├── server.py │ │ └── test.py │ ├── logging/ │ │ └── run.py │ ├── match_language/ │ │ ├── run_v1.py │ │ └── run_v2.py │ ├── mistral/ │ │ └── mistral.py │ ├── multi-actions/ │ │ └── run.py │ ├── multiple_search_queries/ │ │ ├── diagram.py │ │ └── segment_search_queries.py │ ├── open_source_examples/ │ │ ├── README.md │ │ ├── openrouter.py │ │ ├── perplexity.py │ │ └── runpod.py │ ├── openai/ │ │ ├── __init__.py │ │ └── run.py │ ├── openai-audio/ │ │ └── run.py │ ├── parallel/ │ │ └── run.py │ ├── partial_streaming/ │ │ ├── benchmark.py │ │ └── run.py │ ├── patching/ │ │ ├── anyscale.py │ │ ├── oai.py │ │ ├── pcalls.py │ │ └── together.py │ ├── proscons/ │ │ └── run.py │ ├── query_planner_execution/ │ │ ├── diagram.py │ │ └── query_planner_execution.py │ ├── recursive_filepaths/ │ │ ├── diagram.py │ │ └── parse_recursive_paths.py │ ├── reranker/ │ │ └── run.py │ ├── resolving-complex-entities/ │ │ └── run.py │ ├── retry/ │ │ └── run.py │ ├── safer_sql_example/ │ │ ├── diagram.py │ │ └── safe_sql.py │ ├── simple-extraction/ │ │ ├── maybe_user.py │ │ └── user.py │ ├── situate_context/ │ │ └── run.py │ ├── sqlmodel/ │ │ ├── run.py │ │ └── test_basic.py │ ├── stream_action_items/ │ │ └── run.py │ ├── synethic-data/ │ │ └── run.py │ ├── task_planner/ │ │ ├── diagram.py │ │ └── task_planner_topological_sort.py │ ├── tenacity-benchmarks/ │ │ └── run.py │ ├── timestamps/ │ │ └── run.py │ ├── union/ │ │ └── run.py │ ├── validated-multiclass/ │ │ ├── output.json │ │ └── run.py │ ├── validators/ │ │ ├── allm_validator.py │ │ ├── annotator.py │ │ ├── chain_of_thought_validator.py │ │ ├── citations.py │ │ ├── competitors.py │ │ ├── field_validator.py │ │ ├── just_a_guy.py │ │ ├── llm_validator.py │ │ ├── moderation.py │ │ └── readme.md │ ├── vision/ │ │ ├── image_to_ad_copy.py │ │ ├── run.py │ │ ├── run_raw.py │ │ ├── run_table.py │ │ └── slides.py │ ├── watsonx/ │ │ └── watsonx.py │ ├── youtube/ │ │ └── run.py │ ├── youtube-clips/ │ │ └── run.py │ └── youtube-flashcards/ │ └── run.py ├── github_issue.md ├── instructor/ │ ├── __init__.py │ ├── _types/ │ │ ├── __init__.py │ │ └── _alias.py │ ├── auto_client.py │ ├── batch/ │ │ ├── __init__.py │ │ ├── models.py │ │ ├── processor.py │ │ ├── providers/ │ │ │ ├── __init__.py │ │ │ ├── anthropic.py │ │ │ ├── base.py │ │ │ └── openai.py │ │ ├── request.py │ │ └── utils.py │ ├── cache/ │ │ └── __init__.py │ ├── cli/ │ │ ├── __init__.py │ │ ├── batch.py │ │ ├── cli.py │ │ ├── deprecated_hub.py │ │ ├── files.py │ │ ├── jobs.py │ │ └── usage.py │ ├── client.py │ ├── core/ │ │ ├── __init__.py │ │ ├── client.py │ │ ├── exceptions.py │ │ ├── hooks.py │ │ ├── patch.py │ │ └── retry.py │ ├── distil.py │ ├── dsl/ │ │ ├── __init__.py │ │ ├── citation.py │ │ ├── iterable.py │ │ ├── json_tracker.py │ │ ├── maybe.py │ │ ├── parallel.py │ │ ├── partial.py │ │ ├── response_list.py │ │ ├── simple_type.py │ │ └── validators.py │ ├── exceptions.py │ ├── function_calls.py │ ├── hooks.py │ ├── mode.py │ ├── models.py │ ├── multimodal.py │ ├── patch.py │ ├── process_response.py │ ├── processing/ │ │ ├── __init__.py │ │ ├── function_calls.py │ │ ├── multimodal.py │ │ ├── response.py │ │ ├── schema.py │ │ └── validators.py │ ├── providers/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── anthropic/ │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ └── utils.py │ │ ├── bedrock/ │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ └── utils.py │ │ ├── cerebras/ │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ └── utils.py │ │ ├── cohere/ │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ └── utils.py │ │ ├── fireworks/ │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ └── utils.py │ │ ├── gemini/ │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ └── utils.py │ │ ├── genai/ │ │ │ ├── __init__.py │ │ │ └── client.py │ │ ├── groq/ │ │ │ ├── __init__.py │ │ │ └── client.py │ │ ├── mistral/ │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ └── utils.py │ │ ├── openai/ │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ ├── perplexity/ │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ └── utils.py │ │ ├── vertexai/ │ │ │ ├── __init__.py │ │ │ └── client.py │ │ ├── writer/ │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ └── utils.py │ │ └── xai/ │ │ ├── __init__.py │ │ ├── client.py │ │ └── utils.py │ ├── py.typed │ ├── templating.py │ ├── utils/ │ │ ├── __init__.py │ │ ├── core.py │ │ └── providers.py │ ├── validation/ │ │ ├── __init__.py │ │ ├── async_validators.py │ │ └── llm_validators.py │ └── validators.py ├── mkdocs.yml ├── pyproject.toml ├── requirements-doc.txt ├── requirements-examples.txt ├── requirements.txt ├── scripts/ │ ├── README.md │ ├── audit_patterns.py │ ├── check_blog_excerpts.py │ ├── check_links.py │ ├── fix_api_calls.py │ ├── fix_doc_tests.py │ ├── fix_old_patterns.py │ ├── make_clean.py │ ├── make_desc.py │ ├── make_sitemap.py │ ├── validate_headings.py │ └── validate_meta_tags.py ├── sitemap.yaml ├── tests/ │ ├── __init__.py │ ├── conftest.py │ ├── docs/ │ │ ├── _concept_groups.py │ │ ├── _example_groups.py │ │ ├── conftest.py │ │ ├── test_concepts.py │ │ ├── test_concepts_advanced.py │ │ ├── test_concepts_operations.py │ │ ├── test_concepts_providers.py │ │ ├── test_docs.py │ │ ├── test_examples.py │ │ ├── test_examples_batch.py │ │ ├── test_examples_integrations.py │ │ ├── test_examples_multimodal.py │ │ ├── test_examples_providers.py │ │ ├── test_hub.py │ │ ├── test_mkdocs.py │ │ ├── test_posts.py │ │ └── test_prompt_tips.py │ ├── dsl/ │ │ ├── test_gemini_tools_async_streaming.py │ │ ├── test_partial.py │ │ ├── test_simple_type.py │ │ └── test_simple_type_fix.py │ ├── genai/ │ │ └── test_safety_settings.py │ ├── llm/ │ │ ├── __init__.py │ │ ├── shared_config.py │ │ ├── test_anthropic/ │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── test_multimodal.py │ │ │ ├── test_reasoning.py │ │ │ ├── test_system.py │ │ │ └── util.py │ │ ├── test_bedrock/ │ │ │ ├── conftest.py │ │ │ ├── test_bedrock_native_passthrough.py │ │ │ ├── test_normalize.py │ │ │ ├── test_openai_image_conversion.py │ │ │ └── test_prepare_kwargs.py │ │ ├── test_core_providers/ │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── capabilities.py │ │ │ ├── conftest.py │ │ │ ├── test_basic_extraction.py │ │ │ ├── test_response_modes.py │ │ │ ├── test_retries.py │ │ │ ├── test_simple_types.py │ │ │ ├── test_streaming.py │ │ │ └── test_validation.py │ │ ├── test_gemini/ │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── evals/ │ │ │ │ ├── __init__.py │ │ │ │ └── test_extract_users.py │ │ │ ├── test_list_content.py │ │ │ ├── test_multimodal_content.py │ │ │ └── util.py │ │ ├── test_genai/ │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── test_decimal.py │ │ │ ├── test_format.py │ │ │ ├── test_invalid_schema.py │ │ │ ├── test_reask.py │ │ │ ├── test_schema_conversion.py │ │ │ ├── test_utils.py │ │ │ └── util.py │ │ ├── test_litellm.py │ │ ├── test_new_client.py │ │ ├── test_openai/ │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── slow/ │ │ │ │ └── test_response.py │ │ │ ├── test_attr.py │ │ │ ├── test_hooks.py │ │ │ ├── test_multimodal.py │ │ │ ├── test_multitask.py │ │ │ ├── test_patch.py │ │ │ ├── test_validation_context.py │ │ │ ├── test_validators.py │ │ │ └── util.py │ │ ├── test_vertexai/ │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── test_deprecated_async.py │ │ │ ├── test_format.py │ │ │ ├── test_message_parser.py │ │ │ ├── test_modes.py │ │ │ └── util.py │ │ └── test_writer/ │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── evals/ │ │ │ ├── __init__.py │ │ │ ├── test_classification_enums.py │ │ │ ├── test_classification_literals.py │ │ │ ├── test_entities.py │ │ │ ├── test_extract_users.py │ │ │ └── test_sentiment_analysis.py │ │ ├── test_format_common_models.py │ │ ├── test_format_difficult_models.py │ │ └── util.py │ ├── processing/ │ │ └── test_anthropic_json.py │ ├── test_auto_client.py │ ├── test_batch_in_memory.py │ ├── test_cache_integration.py │ ├── test_cache_key.py │ ├── test_dict_operations.py │ ├── test_dict_operations_validation.py │ ├── test_dynamic_model_creation.py │ ├── test_exception_backwards_compat.py │ ├── test_exceptions.py │ ├── test_fizzbuzz_fix.py │ ├── test_formatting.py │ ├── test_function_calls.py │ ├── test_genai_config_merging.py │ ├── test_genai_reask.py │ ├── test_json_extraction.py │ ├── test_json_extraction_edge_cases.py │ ├── test_list_response.py │ ├── test_list_response_wrapper.py │ ├── test_logging.py │ ├── test_message_processing.py │ ├── test_multimodal.py │ ├── test_multitask.py │ ├── test_patch.py │ ├── test_process_response.py │ ├── test_response_model_conversion.py │ ├── test_retry_json_mode.py │ ├── test_schema.py │ ├── test_schema_utils.py │ ├── test_simple_types.py │ ├── test_streaming_reask_bug.py │ ├── test_utils.py │ ├── test_xai_optional_dependency.py │ └── v2/ │ └── test_provider_modes.py ├── ty-tests.toml └── ty.toml
SYMBOL INDEX (1910 symbols across 307 files)
FILE: docs/hooks/hide_lines.py
function on_startup (line 8) | def on_startup(command: str, dirty: bool) -> None: # noqa: ARG001
FILE: examples/anthropic-web-tool/run.py
class Citation (line 13) | class Citation(BaseModel):
class Response (line 18) | class Response(BaseModel):
FILE: examples/anthropic/run.py
class Properties (line 9) | class Properties(BaseModel):
class User (line 14) | class User(BaseModel):
FILE: examples/asyncio-benchmarks/run.py
class Person (line 33) | class Person(BaseModel):
method validate_age (line 40) | def validate_age(cls, v):
function extract_person (line 58) | async def extract_person(text: str) -> Person:
function sequential_processing (line 68) | async def sequential_processing() -> tuple[list[Person], float]:
function gather_processing (line 85) | async def gather_processing() -> tuple[list[Person], float]:
function as_completed_processing (line 107) | async def as_completed_processing() -> tuple[list[Person], float]:
function rate_limited_extract_person (line 128) | async def rate_limited_extract_person(
function rate_limited_gather (line 136) | async def rate_limited_gather(concurrency_limit: int = 3) -> tuple[list[...
function rate_limited_as_completed (line 157) | async def rate_limited_as_completed(
function robust_gather_processing (line 185) | async def robust_gather_processing() -> tuple[list[Person], float]:
function timeout_gather_processing (line 206) | async def timeout_gather_processing(
function progress_tracking_processing (line 230) | async def progress_tracking_processing() -> tuple[list[Person], float]:
function chunked_processing (line 253) | async def chunked_processing(chunk_size: int = 3) -> tuple[list[Person],...
function benchmark_all_methods (line 273) | async def benchmark_all_methods():
function sync_example (line 359) | def sync_example():
function main (line 382) | async def main():
FILE: examples/auto-ticketer/run.py
class PriorityEnum (line 11) | class PriorityEnum(str, Enum):
class Subtask (line 17) | class Subtask(BaseModel):
class Ticket (line 26) | class Ticket(BaseModel):
class ActionItems (line 44) | class ActionItems(BaseModel):
function generate (line 52) | def generate(data: str):
FILE: examples/automodel/run.py
class UserInfo (line 14) | class UserInfo(BaseModel):
function test_async_client (line 22) | async def test_async_client(
function test_sync_client (line 44) | def test_sync_client(client_name: str, client: instructor.Instructor) ->...
function main (line 61) | async def main():
FILE: examples/avail/run.py
class DateRange (line 12) | class DateRange(BaseModel):
class AvailabilityResponse (line 44) | class AvailabilityResponse(BaseModel):
function prepare_dates (line 48) | def prepare_dates(n=7) -> str:
function parse_availability (line 63) | def parse_availability(text: str) -> Iterable[AvailabilityResponse]:
FILE: examples/avail/run_mixtral.py
class DateRange (line 19) | class DateRange(BaseModel):
class AvailabilityResponse (line 51) | class AvailabilityResponse(BaseModel):
function prepare_dates (line 55) | def prepare_dates(n=7) -> str:
function parse_availability (line 70) | def parse_availability(text: str):
FILE: examples/batch-classification/run-cache.py
class QuestionType (line 12) | class QuestionType(Enum):
class QuestionClassification (line 24) | class QuestionClassification(BaseModel):
method validate_classification (line 47) | def validate_classification(cls, v):
function classify (line 55) | async def classify(data: str):
function main (line 70) | async def main(questions: list[str]):
FILE: examples/batch-classification/run.py
class QuestionType (line 14) | class QuestionType(Enum):
class QuestionClassification (line 26) | class QuestionClassification(BaseModel):
method validate_classification (line 49) | def validate_classification(cls, v):
function classify (line 56) | async def classify(data: str):
function main (line 71) | async def main(questions: list[str], *, path_to_jsonl: str = None):
FILE: examples/batch-classification/run_langsmith.py
class QuestionType (line 16) | class QuestionType(Enum):
class QuestionClassification (line 28) | class QuestionClassification(BaseModel):
method validate_classification (line 51) | def validate_classification(cls, v):
function classify (line 60) | async def classify(data: str):
function main (line 75) | async def main(questions: list[str]):
FILE: examples/batch_api/in_memory_batch_example.py
class User (line 12) | class User(BaseModel):
function main (line 20) | def main():
function compare_file_vs_memory (line 165) | def compare_file_vs_memory():
function demo_polling_logic (line 203) | def demo_polling_logic():
FILE: examples/batch_api/run_batch_test.py
class User (line 49) | class User(BaseModel):
function create_test_messages (line 54) | def create_test_messages() -> list[list[dict]]:
function get_expected_results (line 74) | def get_expected_results() -> list[User]:
function check_api_key (line 81) | def check_api_key(provider: str) -> bool:
function create_openai_batch (line 110) | def create_openai_batch(model: str, messages_list: list[list[dict]]) -> ...
function create_anthropic_batch (line 136) | def create_anthropic_batch(
function create_google_batch (line 161) | def create_google_batch(model: str, messages_list: list[list[dict]]) -> ...
function create (line 179) | def create(
function list_batches (line 254) | def list_batches():
function fetch (line 284) | def fetch(
function show_results (line 354) | def show_results(
function poll_for_results (line 469) | def poll_for_results(
function fetch_openai_results_with_status (line 527) | def fetch_openai_results_with_status(
function fetch_anthropic_results_with_status (line 567) | def fetch_anthropic_results_with_status(
function fetch_openai_results (line 615) | def fetch_openai_results(batch_id: str, validate: bool) -> list[User]:
function fetch_anthropic_results (line 659) | def fetch_anthropic_results(batch_id: str, validate: bool) -> list[User]:
function fetch_google_results (line 703) | def fetch_google_results(batch_job_name: str, validate: bool) -> list[Us...
function validate_results (line 749) | def validate_results(results: list[User], provider_name: str) -> bool:
function help (line 782) | def help():
function list_models (line 824) | def list_models():
FILE: examples/caching/example_diskcache.py
class UserDetail (line 13) | class UserDetail(BaseModel):
function instructor_cache (line 21) | def instructor_cache(func):
function extract (line 65) | def extract(data) -> UserDetail:
function aextract (line 76) | async def aextract(data) -> UserDetail:
function test_extract (line 86) | def test_extract():
function atest_extract (line 102) | async def atest_extract():
FILE: examples/caching/example_redis.py
function instructor_cache (line 13) | def instructor_cache(func):
class UserDetail (line 38) | class UserDetail(BaseModel):
function extract (line 44) | def extract(data) -> UserDetail:
function test_extract (line 55) | def test_extract():
FILE: examples/caching/lru.py
class UserDetail (line 9) | class UserDetail(BaseModel):
function extract (line 15) | def extract(data):
function test_extract (line 25) | def test_extract():
FILE: examples/caching/run.py
class UserDetail (line 49) | class UserDetail(BaseModel):
class CacheMetrics (line 57) | class CacheMetrics:
method __init__ (line 60) | def __init__(self):
method record_hit (line 69) | def record_hit(self, func_name: str, time_saved: float):
method record_miss (line 75) | def record_miss(self, func_name: str):
method record_error (line 80) | def record_error(self, func_name: str, error: str):
method hit_rate (line 85) | def hit_rate(self) -> float:
method get_stats (line 89) | def get_stats(self) -> dict[str, Any]:
method reset (line 99) | def reset(self):
function smart_cache_key (line 112) | def smart_cache_key(
function extract_functools (line 128) | def extract_functools(data: str) -> UserDetail:
function monitored_functools_cache (line 144) | def monitored_functools_cache(func: F) -> F:
function extract_functools_monitored (line 176) | def extract_functools_monitored(data: str) -> UserDetail:
function create_diskcache_decorator (line 188) | def create_diskcache_decorator(
function extract_diskcache (line 242) | def extract_diskcache(data: str) -> UserDetail:
function create_redis_decorator (line 254) | def create_redis_decorator(
function extract_redis (line 319) | def extract_redis(data: str) -> UserDetail:
function extract_no_cache (line 331) | def extract_no_cache(data: str) -> UserDetail:
function extract_l1 (line 345) | def extract_l1(data: str) -> UserDetail:
function extract_l2 (line 350) | def extract_l2(data: str) -> UserDetail:
function extract_l3 (line 355) | def extract_l3(data: str) -> UserDetail:
function benchmark_caching_strategy (line 365) | def benchmark_caching_strategy(
function calculate_cost_savings (line 419) | def calculate_cost_savings(baseline_stats: dict, cached_stats: dict) -> ...
function run_async_example (line 458) | async def run_async_example():
function demonstrate_schema_invalidation (line 486) | def demonstrate_schema_invalidation():
function main (line 514) | def main():
FILE: examples/caching_prototype/run_real.py
class User (line 10) | class User(BaseModel):
function test_autocache (line 15) | def test_autocache():
function test_create_with_completion (line 56) | def test_create_with_completion():
function test_diskcache (line 100) | def test_diskcache():
function test_cache_ttl (line 170) | def test_cache_ttl():
function test_different_inputs (line 221) | def test_different_inputs():
FILE: examples/chain-of-density/chain_of_density.py
class InitialSummary (line 11) | class InitialSummary(BaseModel):
class RewrittenSummary (line 22) | class RewrittenSummary(BaseModel):
method min_entity_density (line 51) | def min_entity_density(cls, v: str):
method min_length (line 69) | def min_length(cls, v: str):
method has_missing_entities (line 79) | def has_missing_entities(cls, missing_entities: list[str]):
method has_no_absent_entities (line 87) | def has_no_absent_entities(cls, absent_entities: list[str]):
function summarize_article (line 97) | def summarize_article(article: str, summary_steps: int = 3):
FILE: examples/chain-of-density/finetune.py
class GeneratedSummary (line 23) | class GeneratedSummary(BaseModel):
function distil_summarization (line 42) | def distil_summarization(text: str) -> GeneratedSummary:
FILE: examples/citation_with_extraction/citation_fuzzy_match.py
class Fact (line 10) | class Fact(BaseModel):
method validate_sources (line 20) | def validate_sources(self, info: FieldValidationInfo) -> "Fact":
method _get_span (line 41) | def _get_span(self, quote, context, errs=5):
method get_spans (line 56) | def get_spans(self, context):
class QuestionAnswer (line 61) | class QuestionAnswer(instructor.ResponseSchema):
method validate_sources (line 73) | def validate_sources(self) -> "QuestionAnswer":
function ask_ai (line 83) | def ask_ai(question: str, context: str) -> QuestionAnswer:
FILE: examples/citation_with_extraction/main.py
class Fact (line 25) | class Fact(BaseModel):
method _get_span (line 41) | def _get_span(self, quote, context):
method get_spans (line 56) | def get_spans(self, context):
class QuestionAnswer (line 62) | class QuestionAnswer(ResponseSchema, MultiTaskBase):
class Question (line 77) | class Question(BaseModel):
function stream_extract (line 83) | def stream_extract(question: Question) -> Iterable[Fact]:
function get_api_key (line 108) | def get_api_key(request: Request):
function extract (line 128) | async def extract(question: Question, openai_key: str = Depends(get_api_...
FILE: examples/citation_with_extraction/modal_main.py
function fastapi_app (line 11) | def fastapi_app():
FILE: examples/citations/run.py
class Statements (line 21) | class Statements(BaseModel):
method substring_quote_exists (line 27) | def substring_quote_exists(cls, v: str, info: ValidationInfo):
method substring_quote_exists (line 92) | def substring_quote_exists(self, info: ValidationInfo):
class AnswerWithCitaton (line 40) | class AnswerWithCitaton(BaseModel):
method validate_answer (line 175) | def validate_answer(self, info: ValidationInfo):
class Validation (line 75) | class Validation(BaseModel):
class Statements (line 87) | class Statements(BaseModel):
method substring_quote_exists (line 27) | def substring_quote_exists(cls, v: str, info: ValidationInfo):
method substring_quote_exists (line 92) | def substring_quote_exists(self, info: ValidationInfo):
class AnswerWithCitaton (line 112) | class AnswerWithCitaton(BaseModel):
method validate_answer (line 175) | def validate_answer(self, info: ValidationInfo):
class AnswerWithCitaton (line 170) | class AnswerWithCitaton(BaseModel):
method validate_answer (line 175) | def validate_answer(self, info: ValidationInfo):
FILE: examples/classification/classifiy_with_validation.py
class SOCCode (line 37) | class SOCCode(BaseModel):
method validate_code (line 45) | def validate_code(cls, v):
function classify_job (line 51) | def classify_job(description: str) -> SOCCode:
FILE: examples/classification/multi_prediction.py
class MultiLabels (line 11) | class MultiLabels(str, enum.Enum):
class MultiClassPrediction (line 18) | class MultiClassPrediction(BaseModel):
function multi_classify (line 23) | def multi_classify(data: str) -> MultiClassPrediction:
FILE: examples/classification/simple_prediction.py
class Labels (line 10) | class Labels(str, enum.Enum):
class SinglePrediction (line 15) | class SinglePrediction(BaseModel):
function classify (line 23) | def classify(data: str) -> SinglePrediction:
FILE: examples/codegen-from-schema/create_fastapi_app.py
class TemplateVariables (line 53) | class TemplateVariables(BaseModel):
function load_json_schema (line 57) | def load_json_schema(json_schema_path: str) -> dict:
function generate_pydantic_model (line 65) | def generate_pydantic_model(json_schema_path: str):
function extract_jinja_vars (line 73) | def extract_jinja_vars(prompt_template: str) -> list:
function render_app_template (line 77) | def render_app_template(template_str: str, **kwargs) -> str:
function create_app (line 82) | def create_app(
FILE: examples/codegen-from-schema/models.py
class Type (line 12) | class Type(Enum):
class PhoneNumber (line 18) | class PhoneNumber(BaseModel):
class ExtractPerson (line 23) | class ExtractPerson(BaseModel):
FILE: examples/codegen-from-schema/run.py
class TemplateVariables (line 20) | class TemplateVariables(BaseModel):
class RequestSchema (line 24) | class RequestSchema(BaseModel):
function extract_person (line 36) | async def extract_person(input: RequestSchema) -> ExtractPerson:
FILE: examples/cohere/cohere.py
class Person (line 14) | class Person(BaseModel):
class Group (line 19) | class Group(BaseModel):
FILE: examples/crm/run.py
class CRMSource (line 9) | class CRMSource(Enum):
class CRMSearch (line 16) | class CRMSearch(BaseModel):
class CRMSearchQuery (line 32) | class CRMSearchQuery(BaseModel):
function query_crm (line 41) | def query_crm(query: str) -> CRMSearchQuery:
FILE: examples/decimals/run.py
class Receipt (line 8) | class Receipt(BaseModel):
method parse_price (line 14) | def parse_price(cls, v):
FILE: examples/distilations/three_digit_mul.py
class Multiply (line 18) | class Multiply(BaseModel):
function fn (line 25) | def fn(a: int, b: int) -> Multiply:
FILE: examples/distilations/three_digit_mul_dispatch.py
class Multiply (line 24) | class Multiply(BaseModel):
function fn (line 31) | def fn(a: int, b: int) -> Multiply:
FILE: examples/evals/eval.py
class Status (line 11) | class Status(Enum):
class StreamingAccumulatorManager (line 17) | class StreamingAccumulatorManager:
method __init__ (line 18) | def __init__(self):
method validate_string (line 21) | def validate_string(self, json_string: str, index: int) -> None:
method process_validation_error (line 36) | def process_validation_error(self, error, index):
method update (line 48) | def update(self, index, data: Any, path: str = "$") -> None:
method summarize (line 65) | def summarize(self) -> dict[str, dict]:
class StreamingAccumulator (line 69) | class StreamingAccumulator:
method __init__ (line 70) | def __init__(self):
method update (line 86) | def update(self, index: Any, value: Any) -> None:
method summarize (line 108) | def summarize(self, key_name=None) -> dict[str, Union[int, float, dict]]:
FILE: examples/evals/models.py
class SourceType (line 6) | class SourceType(str, Enum):
class Search (line 14) | class Search(BaseModel):
class MultiSearch (line 22) | class MultiSearch(BaseModel):
FILE: examples/evals/streamlit.py
function get_lines (line 12) | def get_lines(stats_key, keys):
function render_dropdown_and_button (line 20) | def render_dropdown_and_button(stats_key):
FILE: examples/extract-table/run_vision.py
function md_to_df (line 22) | def md_to_df(data: Any) -> Any:
class Table (line 53) | class Table(BaseModel):
class MultipleTables (line 58) | class MultipleTables(BaseModel):
function extract (line 78) | def extract(url: str) -> MultipleTables:
FILE: examples/extract-table/run_vision_langsmith.py
function md_to_df (line 23) | def md_to_df(data: Any) -> Any:
class Table (line 54) | class Table(BaseModel):
class MultipleTables (line 59) | class MultipleTables(BaseModel):
function extract (line 80) | def extract(url: str) -> MultipleTables:
FILE: examples/extract-table/run_vision_org.py
class People (line 14) | class People(BaseModel):
class Organization (line 26) | class Organization(BaseModel):
function extract (line 30) | def extract(url: str):
FILE: examples/extract-table/run_vision_org_table.py
function md_to_df (line 22) | def md_to_df(data: Any) -> Any:
class Table (line 53) | class Table(BaseModel):
function extract (line 58) | def extract(url: str):
FILE: examples/extract-table/run_vision_receipt.py
class Item (line 12) | class Item(BaseModel):
class Receipt (line 18) | class Receipt(BaseModel):
method check_total (line 23) | def check_total(cls, values: "Receipt"):
function extract (line 34) | def extract(url: str) -> Receipt:
FILE: examples/extract-table/test.py
class User (line 11) | class User(BaseModel):
class MeetingInfo (line 16) | class MeetingInfo(BaseModel):
FILE: examples/extracting-pii/run.py
class Data (line 9) | class Data(BaseModel):
class PIIDataExtraction (line 15) | class PIIDataExtraction(BaseModel):
method scrub_data (line 22) | def scrub_data(self, content):
FILE: examples/fastapi_app/main.py
class SearchRequest (line 9) | class SearchRequest(BaseModel):
class SearchQuery (line 13) | class SearchQuery(ResponseSchema):
function search (line 28) | async def search(request: SearchRequest):
FILE: examples/fastapi_app/script.py
class SearchQuery (line 6) | class SearchQuery(ResponseSchema):
FILE: examples/fizzbuzz/run.py
function fizzbuzz_gpt (line 9) | def fizzbuzz_gpt(n) -> list[int | str]:
FILE: examples/gpt-engineer/generate.py
class File (line 10) | class File(ResponseSchema):
method save (line 20) | def save(self):
class Program (line 25) | class Program(ResponseSchema):
function develop (line 33) | def develop(data: str) -> Program:
FILE: examples/gpt-engineer/refactor.py
class Diff (line 11) | class Diff(ResponseSchema):
function refactor (line 62) | def refactor(new_requirements: str, program: Program) -> Diff:
FILE: examples/groq/groq_example.py
class Character (line 7) | class Character(BaseModel):
FILE: examples/groq/groq_example2.py
class UserExtract (line 13) | class UserExtract(BaseModel):
FILE: examples/hooks/run.py
class User (line 18) | class User(pydantic.BaseModel):
method validate_age (line 25) | def validate_age(cls, v: int) -> int:
class CompletionStats (line 31) | class CompletionStats:
method __init__ (line 34) | def __init__(self):
method report (line 40) | def report(self):
function main (line 49) | def main():
FILE: examples/iterables/run.py
class User (line 13) | class User(BaseModel):
function stream_extract (line 19) | def stream_extract(input: str) -> Iterable[User]:
FILE: examples/knowledge-graph/run.py
class Node (line 11) | class Node(BaseModel):
class Edge (line 17) | class Edge(BaseModel):
class KnowledgeGraph (line 24) | class KnowledgeGraph(BaseModel):
function generate_graph (line 29) | def generate_graph(input) -> KnowledgeGraph:
function visualize_knowledge_graph (line 42) | def visualize_knowledge_graph(kg: KnowledgeGraph):
FILE: examples/knowledge-graph/run_stream.py
class Node (line 12) | class Node(BaseModel):
method __hash__ (line 17) | def __hash__(self) -> int:
class Edge (line 21) | class Edge(BaseModel):
method __hash__ (line 27) | def __hash__(self) -> int:
class KnowledgeGraph (line 31) | class KnowledgeGraph(BaseModel):
method update (line 35) | def update(self, other: "KnowledgeGraph") -> "KnowledgeGraph":
method draw (line 42) | def draw(self, prefix: str = None):
function generate_graph (line 57) | def generate_graph(input: list[str]) -> KnowledgeGraph:
FILE: examples/learn-async/run.py
class Timer (line 12) | class Timer:
method __init__ (line 13) | def __init__(self, name):
method __aenter__ (line 18) | async def __aenter__(self):
method __aexit__ (line 21) | async def __aexit__(self, *args, **kwargs):
class Person (line 26) | class Person(BaseModel):
function extract_person (line 31) | async def extract_person(text: str) -> Person:
function main (line 41) | async def main():
FILE: examples/llm-judge-relevance/run.py
class Judgment (line 8) | class Judgment(BaseModel):
function judge_relevance (line 40) | def judge_relevance(question: str, text: str) -> Judgment:
FILE: examples/logfire-fastapi/server.py
class UserData (line 11) | class UserData(BaseModel):
class MultipleUserData (line 15) | class MultipleUserData(BaseModel):
class UserDetail (line 19) | class UserDetail(BaseModel):
function endpoint_function (line 33) | async def endpoint_function(data: UserData) -> UserDetail:
function extract_many_users (line 46) | async def extract_many_users(data: MultipleUserData):
function extract (line 63) | async def extract(data: UserData):
FILE: examples/logfire/classify.py
class Labels (line 8) | class Labels(str, enum.Enum):
class SinglePrediction (line 15) | class SinglePrediction(BaseModel):
function classify (line 30) | def classify(data: str) -> SinglePrediction:
FILE: examples/logfire/image.py
function md_to_df (line 21) | def md_to_df(data: Any) -> Any:
class Table (line 49) | class Table(BaseModel):
function extract_table_from_image (line 55) | def extract_table_from_image(url: str) -> Iterable[Table]:
FILE: examples/logfire/validate.py
class Statement (line 15) | class Statement(BaseModel):
FILE: examples/logging/run.py
class UserDetail (line 14) | class UserDetail(BaseModel):
FILE: examples/match_language/run_v1.py
class GeneratedSummary (line 53) | class GeneratedSummary(BaseModel):
function summarize_text (line 57) | async def summarize_text(text: str):
function main (line 78) | async def main():
FILE: examples/match_language/run_v2.py
class GeneratedSummary (line 53) | class GeneratedSummary(BaseModel):
function summarize_text (line 60) | async def summarize_text(text: str):
function main (line 81) | async def main():
FILE: examples/mistral/mistral.py
class UserDetails (line 8) | class UserDetails(BaseModel):
FILE: examples/multi-actions/run.py
class Action (line 11) | class Action(enum.Enum):
class Projects (line 17) | class Projects(enum.Enum):
class Buckets (line 24) | class Buckets(enum.Enum):
class TaskAction (line 33) | class TaskAction(BaseModel):
class Response (line 51) | class Response(BaseModel):
FILE: examples/multiple_search_queries/segment_search_queries.py
class SearchType (line 10) | class SearchType(str, enum.Enum):
class Search (line 17) | class Search(BaseModel):
method execute (line 26) | async def execute(self):
class MultiSearch (line 35) | class MultiSearch(BaseModel):
method execute (line 46) | def execute(self):
function segment (line 55) | def segment(data: str) -> MultiSearch:
FILE: examples/open_source_examples/openrouter.py
class UserDetail (line 35) | class UserDetail(BaseModel):
FILE: examples/open_source_examples/perplexity.py
class UserDetail (line 47) | class UserDetail(BaseModel):
FILE: examples/open_source_examples/runpod.py
class UserDetail (line 36) | class UserDetail(BaseModel):
FILE: examples/openai-audio/run.py
class Person (line 10) | class Person(BaseModel):
FILE: examples/openai/run.py
class UserInfo (line 16) | class UserInfo(BaseModel):
FILE: examples/parallel/run.py
class Weather (line 11) | class Weather(BaseModel):
class GoogleSearch (line 16) | class GoogleSearch(BaseModel):
FILE: examples/partial_streaming/benchmark.py
function num_tokens_from_string (line 13) | def num_tokens_from_string(string: str, model_name: str) -> int:
class User (line 23) | class User(BaseModel):
function benchmark_raw_stream (line 29) | def benchmark_raw_stream(model="gpt-4"):
function benchmark_partial_streaming (line 57) | def benchmark_partial_streaming(model="gpt-4"):
FILE: examples/partial_streaming/run.py
class User (line 11) | class User(BaseModel):
FILE: examples/patching/anyscale.py
class UserExtract (line 20) | class UserExtract(BaseModel):
FILE: examples/patching/oai.py
class UserExtract (line 16) | class UserExtract(BaseModel):
FILE: examples/patching/pcalls.py
class Weather (line 14) | class Weather(ResponseSchema):
class GoogleSearch (line 19) | class GoogleSearch(ResponseSchema):
class Query (line 25) | class Query(BaseModel):
FILE: examples/patching/together.py
class UserExtract (line 18) | class UserExtract(BaseModel):
FILE: examples/proscons/run.py
class Character (line 7) | class Character(BaseModel):
FILE: examples/query_planner_execution/query_planner_execution.py
class QueryType (line 11) | class QueryType(str, enum.Enum):
class ComputeQuery (line 21) | class ComputeQuery(BaseModel):
class MergedResponses (line 30) | class MergedResponses(BaseModel):
class Query (line 39) | class Query(BaseModel):
method execute (line 59) | async def execute(self, dependency_func):
class QueryPlan (line 85) | class QueryPlan(BaseModel):
method execute (line 95) | async def execute(self):
method dependencies (line 101) | def dependencies(self, idz: list[int]) -> list[Query]:
function query_planner (line 112) | def query_planner(question: str, plan=False) -> QueryPlan:
FILE: examples/recursive_filepaths/parse_recursive_paths.py
class NodeType (line 11) | class NodeType(str, enum.Enum):
class Node (line 18) | class Node(BaseModel):
method print_paths (line 42) | def print_paths(self, parent_path=""):
class DirectoryTree (line 57) | class DirectoryTree(BaseModel):
method print_paths (line 70) | def print_paths(self):
function parse_tree_to_filesystem (line 80) | def parse_tree_to_filesystem(data: str) -> DirectoryTree:
FILE: examples/reranker/run.py
class Label (line 9) | class Label(BaseModel):
method validate_chunk_id (line 22) | def validate_chunk_id(cls, v: str, info: ValidationInfo) -> str:
class RerankedResults (line 32) | class RerankedResults(BaseModel):
method model_validate (line 37) | def model_validate(cls, v: list[Label]) -> list[Label]:
function rerank_results (line 41) | def rerank_results(query: str, chunks: list[dict]) -> RerankedResults:
function main (line 80) | def main():
FILE: examples/resolving-complex-entities/run.py
class Property (line 14) | class Property(BaseModel):
class Entity (line 20) | class Entity(BaseModel):
class DocumentExtraction (line 39) | class DocumentExtraction(BaseModel):
function ask_ai (line 46) | def ask_ai(content) -> DocumentExtraction:
function generate_html_label (line 64) | def generate_html_label(entity: Entity) -> str:
function generate_graph (line 77) | def generate_graph(data: DocumentExtraction):
FILE: examples/retry/run.py
class User (line 10) | class User(BaseModel):
method name_is_uppercase (line 15) | def name_is_uppercase(cls, v: str):
FILE: examples/safer_sql_example/safe_sql.py
class SQLTemplateType (line 11) | class SQLTemplateType(str, enum.Enum):
class Parameters (line 16) | class Parameters(BaseModel):
class SQL (line 26) | class SQL(BaseModel):
method to_sql (line 51) | def to_sql(self):
function create_query (line 59) | def create_query(data: str) -> SQL:
FILE: examples/simple-extraction/maybe_user.py
class UserDetail (line 10) | class UserDetail(BaseModel):
function get_user_detail (line 19) | def get_user_detail(string) -> MaybeUser: # type: ignore
FILE: examples/simple-extraction/user.py
class UserDetail (line 10) | class UserDetail(BaseModel):
function get_user_detail (line 16) | def get_user_detail(string) -> UserDetail:
FILE: examples/situate_context/run.py
class SituatedContext (line 16) | class SituatedContext(BaseModel):
function situate_context (line 30) | async def situate_context(doc: str, chunk: str) -> SituatedContext:
function chunking_function (line 72) | def chunking_function(
function process_chunk (line 90) | async def process_chunk(doc: str, chunk: str) -> dict[str, str]:
function process (line 105) | async def process(
function main (line 172) | async def main():
FILE: examples/sqlmodel/run.py
function monitor_ai_calls (line 43) | def monitor_ai_calls(func):
class Team (line 64) | class Team(SQLModel, table=True):
class Hero (line 76) | class Hero(SQLModel, instructor.ResponseSchema, table=True):
method validate_name_format (line 102) | def validate_name_format(cls, v):
class Product (line 110) | class Product(SQLModel, instructor.ResponseSchema, table=True):
function create_hero (line 128) | def create_hero(prompt: str = "Create a unique superhero") -> Hero:
function create_hero_async (line 145) | async def create_hero_async(prompt: str = "Create a unique superhero") -...
function create_hero_team_async (line 162) | async def create_hero_team_async(team_size: int = 5) -> list[Hero]:
function create_heroes_batch (line 181) | async def create_heroes_batch(prompts: list[str]) -> list[Hero]:
function create_product (line 191) | def create_product(category: str) -> Product:
function setup_database (line 210) | def setup_database():
function create_sample_teams (line 216) | def create_sample_teams():
function assign_hero_to_team (line 239) | def assign_hero_to_team(hero: Hero, team_name: str):
function list_heroes_with_teams (line 254) | def list_heroes_with_teams():
function demonstrate_validation_errors (line 268) | def demonstrate_validation_errors():
function main (line 295) | async def main():
FILE: examples/sqlmodel/test_basic.py
class Team (line 22) | class Team(SQLModel, table=True):
class Hero (line 34) | class Hero(SQLModel, table=True):
method validate_name_format (line 60) | def validate_name_format(cls, v):
function test_basic_functionality (line 68) | def test_basic_functionality():
FILE: examples/stream_action_items/run.py
class ActionItem (line 13) | class ActionItem(BaseModel):
class ActionItemResponse (line 24) | class ActionItemResponse(BaseModel):
method patch (line 29) | def patch(self, action_item: ActionItem):
method __repr__ (line 36) | def __repr__(self):
method __str__ (line 45) | def __str__(self) -> str:
function yield_action_items (line 52) | def yield_action_items(transcript: str, state: ActionItemResponse):
function text_to_speech (line 104) | def text_to_speech(chunk):
function process_transcript (line 113) | def process_transcript(transcript: list[str]):
FILE: examples/synethic-data/run.py
class SyntheticQA (line 9) | class SyntheticQA(BaseModel):
function get_synthetic_data (line 34) | def get_synthetic_data() -> Iterable[SyntheticQA]:
FILE: examples/task_planner/task_planner_topological_sort.py
class TaskResult (line 25) | class TaskResult(BaseModel):
class TaskResults (line 30) | class TaskResults(BaseModel):
class Task (line 34) | class Task(BaseModel):
method aexecute (line 53) | async def aexecute(self, with_results: TaskResults) -> TaskResult:
class TaskPlan (line 63) | class TaskPlan(BaseModel):
method _get_execution_order (line 74) | def _get_execution_order(self) -> list[int]:
method execute (line 104) | async def execute(self) -> dict[int, TaskResult]:
function task_planner (line 147) | def task_planner(question: str) -> TaskPlan:
FILE: examples/tenacity-benchmarks/run.py
class UserInfo (line 45) | class UserInfo(BaseModel):
method validate_age (line 52) | def validate_age(cls, v):
method validate_email (line 59) | def validate_email(cls, v):
class MockError (line 76) | class MockError:
method __init__ (line 77) | def __init__(self):
method maybe_fail (line 81) | def maybe_fail(self):
function extract_user_info_with_mock_errors (line 117) | def extract_user_info_with_mock_errors(text: str) -> UserInfo:
function extract_user_info (line 137) | def extract_user_info(text: str) -> UserInfo:
function robust_extraction (line 157) | def robust_extraction(text: str) -> UserInfo:
function extract_with_validation (line 169) | def extract_with_validation(text: str) -> UserInfo:
function should_retry (line 176) | def should_retry(result: UserInfo) -> bool:
function extract_valid_user (line 187) | def extract_valid_user(text: str) -> UserInfo:
function rate_limit_safe_extraction (line 212) | def rate_limit_safe_extraction(text: str) -> UserInfo:
function network_resilient_extraction (line 224) | def network_resilient_extraction(text: str) -> UserInfo:
function logged_extraction (line 237) | def logged_extraction(text: str) -> UserInfo:
function get_client (line 245) | def get_client():
function circuit_breaker_extraction (line 251) | def circuit_breaker_extraction(text: str) -> UserInfo:
function monitored_extraction (line 260) | def monitored_extraction(text: str) -> UserInfo:
function benchmark_retry_methods (line 278) | def benchmark_retry_methods():
function test_batch_processing (line 392) | def test_batch_processing():
function demonstrate_error_types (line 433) | def demonstrate_error_types():
function main (line 493) | def main():
FILE: examples/timestamps/run.py
class Segment (line 8) | class Segment(BaseModel):
class SegmentWithTimestamp (line 16) | class SegmentWithTimestamp(BaseModel):
method parse_timestamp (line 26) | def parse_timestamp(self):
FILE: examples/union/run.py
class Search (line 7) | class Search(BaseModel):
method process (line 12) | def process(self):
class Lookup (line 17) | class Lookup(BaseModel):
method process (line 22) | def process(self):
class Finish (line 27) | class Finish(BaseModel):
method process (line 32) | def process(self):
class TakeAction (line 38) | class TakeAction(BaseModel):
method process (line 41) | def process(self):
FILE: examples/validated-multiclass/run.py
class Tag (line 11) | class Tag(BaseModel):
method validate_ids (line 16) | def validate_ids(self, info: ValidationInfo):
class TagWithInstructions (line 29) | class TagWithInstructions(Tag):
class TagRequest (line 33) | class TagRequest(BaseModel):
class TagResponse (line 38) | class TagResponse(BaseModel):
function tag_single_request (line 43) | async def tag_single_request(text: str, tags: list[Tag]) -> Tag:
function tag_request (line 65) | async def tag_request(request: TagRequest) -> TagResponse:
FILE: examples/validators/allm_validator.py
class QuestionAnswerNoEvil (line 12) | class QuestionAnswerNoEvil(BaseModel):
function main (line 22) | async def main():
FILE: examples/validators/annotator.py
function name_must_contain_space (line 6) | def name_must_contain_space(v: str) -> str:
class UserDetail (line 12) | class UserDetail(BaseModel):
FILE: examples/validators/chain_of_thought_validator.py
class Validation (line 11) | class Validation(BaseModel):
function validator (line 21) | def validator(values):
class Response (line 44) | class Response(BaseModel):
method chain_of_thought_makes_sense (line 50) | def chain_of_thought_makes_sense(cls, data):
FILE: examples/validators/citations.py
function citation_exists (line 9) | def citation_exists(v: str, info: ValidationInfo):
class AnswerWithCitation (line 21) | class AnswerWithCitation(BaseModel):
FILE: examples/validators/competitors.py
function no_competitors (line 10) | def no_competitors(v: str) -> str:
class Response (line 22) | class Response(BaseModel):
FILE: examples/validators/field_validator.py
class UserDetail (line 4) | class UserDetail(BaseModel):
method name_must_contain_space (line 9) | def name_must_contain_space(cls, v):
FILE: examples/validators/just_a_guy.py
class AnswerWithCitation (line 4) | class AnswerWithCitation(BaseModel):
method remove_stopwords (line 10) | def remove_stopwords(cls, v: str, info: ValidationInfo):
FILE: examples/validators/llm_validator.py
class QuestionAnswer (line 12) | class QuestionAnswer(BaseModel):
class QuestionAnswerNoEvil (line 46) | class QuestionAnswerNoEvil(BaseModel):
FILE: examples/validators/moderation.py
class Response (line 12) | class Response(BaseModel):
FILE: examples/vision/image_to_ad_copy.py
class Product (line 23) | class Product(BaseModel):
method generate_prompt (line 47) | def generate_prompt(self):
class IdentifiedProduct (line 56) | class IdentifiedProduct(BaseModel):
method __bool__ (line 76) | def __bool__(self):
class AdCopy (line 80) | class AdCopy(BaseModel):
function read_images (line 111) | def read_images(image_urls: list[str]) -> IdentifiedProduct:
function generate_ad_copy (line 141) | def generate_ad_copy(product: Product) -> AdCopy:
function run (line 162) | def run(images: list[str]) -> tuple[list[Product], list[AdCopy]]:
FILE: examples/vision/run.py
class Circle (line 9) | class Circle(BaseModel):
function encode_image (line 15) | def encode_image(image_path):
function draw_circle (line 20) | def draw_circle(image_size, num_circles, path):
FILE: examples/vision/run_raw.py
class SearchQuery (line 7) | class SearchQuery(BaseModel):
class MultiSearchQuery (line 15) | class MultiSearchQuery(BaseModel):
function extract_table (line 19) | def extract_table(url: str):
FILE: examples/vision/run_table.py
function to_markdown (line 18) | def to_markdown(df: pd.DataFrame) -> str:
function md_to_df (line 22) | def md_to_df(data: Any) -> Any:
class Table (line 53) | class Table(BaseModel):
function extract_table (line 58) | def extract_table(url: str):
FILE: examples/vision/slides.py
class Competitor (line 23) | class Competitor(BaseModel):
class Industry (line 29) | class Industry(BaseModel):
class Competition (line 40) | class Competition(BaseModel):
function read_images (line 58) | def read_images(image_urls: list[str]) -> Competition:
function process_and_identify_competitors (line 88) | def process_and_identify_competitors():
FILE: examples/watsonx/watsonx.py
class Company (line 18) | class Company(BaseModel):
FILE: examples/youtube-clips/run.py
function extract_video_id (line 10) | def extract_video_id(url: str) -> str | None:
class TranscriptSegment (line 18) | class TranscriptSegment(BaseModel):
function get_transcript_with_timing (line 24) | def get_transcript_with_timing(
class YoutubeClip (line 44) | class YoutubeClip(BaseModel):
class YoutubeClips (line 56) | class YoutubeClips(BaseModel):
function yield_clips (line 60) | def yield_clips(segments: Iterable[TranscriptSegment]) -> Iterable[Youtu...
FILE: examples/youtube-flashcards/run.py
class QuestionAnswer (line 11) | class QuestionAnswer(BaseModel):
function process_user_input (line 29) | def process_user_input(state: State, user_input: str) -> State:
function get_youtube_transcript (line 38) | def get_youtube_transcript(state: State) -> State:
function generate_question_and_answers (line 51) | def generate_question_and_answers(state: State) -> State:
function build_application (line 82) | def build_application():
FILE: examples/youtube/run.py
class Chapter (line 12) | class Chapter(BaseModel):
function get_youtube_transcript (line 30) | def get_youtube_transcript(video_id: str) -> str:
function extract_chapters (line 41) | def extract_chapters(transcript: str):
FILE: instructor/auto_client.py
function from_provider (line 42) | def from_provider(
function from_provider (line 51) | def from_provider(
function from_provider (line 60) | def from_provider(
function from_provider (line 69) | def from_provider(
function from_provider (line 77) | def from_provider(
FILE: instructor/batch/__init__.py
class BatchJob (line 70) | class BatchJob:
method parse_from_file (line 74) | def parse_from_file(
method parse_from_string (line 82) | def parse_from_string(
method _extract_structured_data (line 115) | def _extract_structured_data(cls, data: dict[str, Any]) -> Optional[di...
FILE: instructor/batch/models.py
class BatchSuccess (line 18) | class BatchSuccess(BaseModel, Generic[T]):
class BatchError (line 28) | class BatchError(BaseModel):
class BatchStatus (line 38) | class BatchStatus(str, Enum):
class BatchTimestamps (line 49) | class BatchTimestamps(BaseModel):
class BatchRequestCounts (line 61) | class BatchRequestCounts(BaseModel):
class BatchErrorInfo (line 78) | class BatchErrorInfo(BaseModel):
class BatchFiles (line 86) | class BatchFiles(BaseModel):
class BatchJobInfo (line 95) | class BatchJobInfo(BaseModel):
method from_openai (line 128) | def from_openai(cls, batch_data: dict[str, Any]) -> BatchJobInfo:
method from_anthropic (line 222) | def from_anthropic(cls, batch_data: dict[str, Any]) -> BatchJobInfo:
FILE: instructor/batch/processor.py
class BatchProcessor (line 18) | class BatchProcessor(Generic[T]):
method __init__ (line 21) | def __init__(self, model: str, response_model: type[T]):
method create_batch_from_messages (line 37) | def create_batch_from_messages(
method submit_batch (line 94) | def submit_batch(
method get_batch_status (line 114) | def get_batch_status(self, batch_id: str) -> dict[str, Any]:
method retrieve_results (line 118) | def retrieve_results(self, batch_id: str) -> list[BatchResult]:
method list_batches (line 123) | def list_batches(self, limit: int = 10) -> list[BatchJobInfo]:
method get_results (line 134) | def get_results(
method cancel_batch (line 157) | def cancel_batch(self, batch_id: str) -> dict[str, Any]:
method delete_batch (line 168) | def delete_batch(self, batch_id: str) -> dict[str, Any]:
method parse_results (line 179) | def parse_results(self, results_content: str) -> list[BatchResult]:
method _extract_from_response (line 249) | def _extract_from_response(self, data: dict[str, Any]) -> dict[str, An...
FILE: instructor/batch/providers/__init__.py
function get_provider (line 17) | def get_provider(provider_name: str) -> BatchProvider:
FILE: instructor/batch/providers/anthropic.py
class AnthropicProvider (line 17) | class AnthropicProvider(BatchProvider):
method submit_batch (line 20) | def submit_batch(
method get_status (line 69) | def get_status(self, batch_id: str) -> dict[str, Any]:
method retrieve_results (line 92) | def retrieve_results(self, batch_id: str) -> str:
method download_results (line 139) | def download_results(self, batch_id: str, file_path: str) -> None:
method cancel_batch (line 184) | def cancel_batch(self, batch_id: str) -> dict[str, Any]:
method delete_batch (line 202) | def delete_batch(self, batch_id: str) -> dict[str, Any]:
method list_batches (line 224) | def list_batches(self, limit: int = 10) -> list[BatchJobInfo]:
FILE: instructor/batch/providers/base.py
class BatchProvider (line 16) | class BatchProvider(ABC):
method submit_batch (line 20) | def submit_batch(
method get_status (line 30) | def get_status(self, batch_id: str) -> dict[str, Any]:
method retrieve_results (line 35) | def retrieve_results(self, batch_id: str) -> str:
method download_results (line 40) | def download_results(self, batch_id: str, file_path: str) -> None:
method cancel_batch (line 45) | def cancel_batch(self, batch_id: str) -> dict[str, Any]:
method delete_batch (line 50) | def delete_batch(self, batch_id: str) -> dict[str, Any]:
method list_batches (line 55) | def list_batches(self, limit: int = 10) -> list[BatchJobInfo]:
FILE: instructor/batch/providers/openai.py
class OpenAIProvider (line 16) | class OpenAIProvider(BatchProvider):
method submit_batch (line 19) | def submit_batch(
method get_status (line 67) | def get_status(self, batch_id: str) -> dict[str, Any]:
method retrieve_results (line 87) | def retrieve_results(self, batch_id: str) -> str:
method download_results (line 145) | def download_results(self, batch_id: str, file_path: str) -> None:
method cancel_batch (line 204) | def cancel_batch(self, batch_id: str) -> dict[str, Any]:
method delete_batch (line 215) | def delete_batch(self, batch_id: str) -> dict[str, Any]:
method list_batches (line 231) | def list_batches(self, limit: int = 10) -> list[BatchJobInfo]:
FILE: instructor/batch/request.py
class Function (line 16) | class Function(BaseModel):
class Tool (line 22) | class Tool(BaseModel):
class RequestBody (line 27) | class RequestBody(BaseModel):
class BatchModel (line 36) | class BatchModel(BaseModel):
class BatchRequest (line 43) | class BatchRequest(BaseModel, Generic[T]):
method get_json_schema (line 55) | def get_json_schema(self) -> dict[str, Any]:
method to_openai_format (line 59) | def to_openai_format(self) -> dict[str, Any]:
method to_anthropic_format (line 110) | def to_anthropic_format(self) -> dict[str, Any]:
method save_to_file (line 154) | def save_to_file(
FILE: instructor/batch/utils.py
function filter_successful (line 11) | def filter_successful(results: list[BatchResult]) -> list[BatchSuccess[T]]:
function filter_errors (line 16) | def filter_errors(results: list[BatchResult]) -> list[BatchError]:
function extract_results (line 21) | def extract_results(results: list[BatchResult]) -> list[T]:
function get_results_by_custom_id (line 26) | def get_results_by_custom_id(results: list[BatchResult]) -> dict[str, Ba...
FILE: instructor/cache/__init__.py
class BaseCache (line 45) | class BaseCache(ABC):
method get (line 52) | def get(self, key: str) -> Any | None: # noqa: ANN401 – value type ar...
method set (line 56) | def set(
class AutoCache (line 69) | class AutoCache(BaseCache):
method __init__ (line 72) | def __init__(self, maxsize: int = 128):
method get (line 82) | def get(self, key: str) -> Any | None: # noqa: ANN401
method set (line 92) | def set(
function _import_diskcache (line 113) | def _import_diskcache(): # pragma: no cover – only executed when requested
class DiskCache (line 125) | class DiskCache(BaseCache):
method __init__ (line 128) | def __init__(self, directory: str = ".instructor_cache", **kwargs: Any):
method get (line 132) | def get(self, key: str) -> Any | None: # noqa: ANN401
method set (line 135) | def set(self, key: str, value: Any, ttl: int | None = None) -> None: ...
function make_cache_key (line 147) | def make_cache_key(
function load_cached_response (line 191) | def load_cached_response(cache: BaseCache, key: str, response_model: typ...
function store_cached_response (line 241) | def store_cached_response(
FILE: instructor/cli/batch.py
function generate_table (line 18) | def generate_table(batch_jobs: list[BatchJobInfo], provider: str, full_i...
function get_jobs (line 125) | def get_jobs(limit: int = 10, provider: str = "openai") -> list[BatchJob...
function watch (line 155) | def watch(
function create_from_file (line 224) | def create_from_file(
function cancel (line 286) | def cancel(
function delete (line 348) | def delete(
function download_file (line 395) | def download_file(
function results (line 447) | def results(
function create (line 509) | def create(
FILE: instructor/cli/cli.py
function docs (line 22) | def docs(
FILE: instructor/cli/deprecated_hub.py
function hub (line 7) | def hub() -> None:
FILE: instructor/cli/files.py
function generate_file_table (line 19) | def generate_file_table(files: list[openai.types.FileObject]) -> Table:
function get_files (line 41) | def get_files() -> list[openai.types.FileObject]:
function get_file_status (line 48) | def get_file_status(file_id: str) -> str:
function upload (line 56) | def upload(
function download (line 79) | def download(
function delete (line 93) | def delete(file_id: str = typer.Argument(help="ID of the file to delete"...
function status (line 106) | def status(
function list (line 121) | def list() -> None:
FILE: instructor/cli/jobs.py
class FuneTuningParams (line 18) | class FuneTuningParams(TypedDict, total=False):
function generate_table (line 24) | def generate_table(jobs: list[FineTuningJob]) -> Table:
function status_color (line 68) | def status_color(status: str) -> str:
function get_jobs (line 74) | def get_jobs(limit: int = 5) -> list[FineTuningJob]:
function get_file_status (line 78) | def get_file_status(file_id: str) -> str:
function watch (line 87) | def watch(
function create_from_id (line 106) | def create_from_id(
function create_from_file (line 146) | def create_from_file(
function cancel (line 232) | def cancel(
FILE: instructor/cli/usage.py
function fetch_usage (line 23) | async def fetch_usage(date: str) -> dict[str, Any]:
function get_usage_for_past_n_days (line 31) | async def get_usage_for_past_n_days(n_days: int) -> list[dict[str, Any]]:
function get_model_cost (line 80) | def get_model_cost(
function calculate_cost (line 103) | def calculate_cost(
function group_and_sum_by_date_and_snapshot (line 119) | def group_and_sum_by_date_and_snapshot(usage_data: list[dict[str, Any]])...
function list (line 168) | def list(
FILE: instructor/client.py
function __getattr__ (line 9) | def __getattr__(name: str):
FILE: instructor/core/client.py
class Response (line 33) | class Response:
method __init__ (line 34) | def __init__(
method create (line 40) | def create(
method create_with_completion (line 68) | def create_with_completion(
method create_iterable (line 90) | def create_iterable(
method create_partial (line 112) | def create_partial(
class AsyncResponse (line 135) | class AsyncResponse(Response):
method __init__ (line 136) | def __init__(self, client: AsyncInstructor):
method create (line 139) | async def create(
method create_with_completion (line 167) | async def create_with_completion(
method create_iterable (line 189) | async def create_iterable(
class Instructor (line 212) | class Instructor:
method __init__ (line 220) | def __init__(
method on (line 246) | def on(
method off (line 262) | def off(
method clear (line 278) | def clear(
method chat (line 295) | def chat(self) -> Self:
method completions (line 299) | def completions(self) -> Self:
method messages (line 303) | def messages(self) -> Self:
method create (line 307) | def create(
method create (line 320) | def create(
method create (line 333) | def create(
method create (line 346) | def create(
method create (line 358) | def create(
method create_partial (line 388) | def create_partial(
method create_partial (line 401) | def create_partial(
method create_partial (line 413) | def create_partial(
method create_iterable (line 446) | def create_iterable(
method create_iterable (line 459) | def create_iterable(
method create_iterable (line 471) | def create_iterable(
method create_with_completion (line 503) | def create_with_completion(
method create_with_completion (line 516) | def create_with_completion(
method create_with_completion (line 528) | def create_with_completion(
method handle_kwargs (line 558) | def handle_kwargs(self, kwargs: dict[str, Any]) -> dict[str, Any]:
method __getattr__ (line 570) | def __getattr__(self, attr: str) -> Any:
class AsyncInstructor (line 577) | class AsyncInstructor(Instructor):
method __init__ (line 585) | def __init__(
method create (line 608) | async def create( # type: ignore[override]
method create_partial (line 660) | async def create_partial( # type: ignore[override]
method create_iterable (line 691) | async def create_iterable( # type: ignore[override]
method create_with_completion (line 722) | async def create_with_completion( # type: ignore[override]
function from_openai (line 754) | def from_openai(
function from_openai (line 763) | def from_openai(
function map_chat_completion_to_response (line 771) | def map_chat_completion_to_response(messages, client, *args, **kwargs) -...
function async_map_chat_completion_to_response (line 779) | async def async_map_chat_completion_to_response(
function from_openai (line 789) | def from_openai(
function from_litellm (line 877) | def from_litellm(
function from_litellm (line 885) | def from_litellm(
function from_litellm (line 892) | def from_litellm(
FILE: instructor/core/exceptions.py
class InstructorError (line 8) | class InstructorError(Exception):
method from_exception (line 47) | def from_exception(
method __init__ (line 62) | def __init__(
method __str__ (line 71) | def __str__(self) -> str:
class FailedAttempt (line 103) | class FailedAttempt(NamedTuple):
class IncompleteOutputException (line 136) | class IncompleteOutputException(InstructorError):
method __init__ (line 177) | def __init__(
class InstructorRetryException (line 188) | class InstructorRetryException(InstructorError):
method __init__ (line 239) | def __init__(
class ValidationError (line 258) | class ValidationError(InstructorError):
class ProviderError (line 301) | class ProviderError(InstructorError):
method __init__ (line 333) | def __init__(self, provider: str, message: str, *args: Any, **kwargs: ...
class ConfigurationError (line 338) | class ConfigurationError(InstructorError):
class ModeError (line 375) | class ModeError(InstructorError):
method __init__ (line 408) | def __init__(
class ClientError (line 423) | class ClientError(InstructorError):
class AsyncValidationError (line 447) | class AsyncValidationError(ValueError, InstructorError):
class ResponseParsingError (line 482) | class ResponseParsingError(ValueError, InstructorError):
method __init__ (line 523) | def __init__(
class MultimodalError (line 537) | class MultimodalError(ValueError, InstructorError):
method __init__ (line 585) | def __init__(
FILE: instructor/core/hooks.py
class HookName (line 12) | class HookName(Enum):
class CompletionKwargsHandler (line 21) | class CompletionKwargsHandler(Protocol):
method __call__ (line 24) | def __call__(self, *args: Any, **kwargs: Any) -> None: ...
class CompletionResponseHandler (line 27) | class CompletionResponseHandler(Protocol):
method __call__ (line 30) | def __call__(self, response: Any) -> None: ...
class CompletionErrorHandler (line 33) | class CompletionErrorHandler(Protocol):
method __call__ (line 36) | def __call__(self, error: Exception) -> None: ...
class ParseErrorHandler (line 39) | class ParseErrorHandler(Protocol):
method __call__ (line 42) | def __call__(self, error: Exception) -> None: ...
class Hooks (line 66) | class Hooks:
method __init__ (line 74) | def __init__(self) -> None:
method on (line 78) | def on(
method get_hook_name (line 108) | def get_hook_name(self, hook_name: HookNameType) -> HookName:
method emit (line 128) | def emit(self, hook_name: HookName, *args: Any, **kwargs: Any) -> None:
method emit_completion_arguments (line 147) | def emit_completion_arguments(self, *args: Any, **kwargs: Any) -> None:
method emit_completion_response (line 157) | def emit_completion_response(self, response: Any) -> None:
method emit_completion_error (line 166) | def emit_completion_error(self, error: Exception) -> None:
method emit_completion_last_attempt (line 175) | def emit_completion_last_attempt(self, error: Exception) -> None:
method emit_parse_error (line 184) | def emit_parse_error(self, error: Exception) -> None:
method off (line 193) | def off(
method clear (line 212) | def clear(
method __add__ (line 229) | def __add__(self, other: Hooks) -> Hooks:
method __iadd__ (line 266) | def __iadd__(self, other: Hooks) -> Hooks:
method combine (line 298) | def combine(cls, *hooks_instances: Hooks) -> Hooks:
method copy (line 331) | def copy(self) -> Hooks:
FILE: instructor/core/patch.py
class InstructorChatCompletionCreate (line 37) | class InstructorChatCompletionCreate(Protocol):
method __call__ (line 38) | def __call__(
class AsyncInstructorChatCompletionCreate (line 49) | class AsyncInstructorChatCompletionCreate(Protocol):
method __call__ (line 50) | async def __call__(
function handle_context (line 61) | def handle_context(
function patch (line 90) | def patch(
function patch (line 97) | def patch(
function patch (line 104) | def patch(
function patch (line 111) | def patch(
function patch (line 117) | def patch( # type: ignore
function apatch (line 293) | def apatch(client: AsyncOpenAI, mode: Mode = Mode.TOOLS) -> AsyncOpenAI:
FILE: instructor/core/retry.py
function initialize_retrying (line 48) | def initialize_retrying(
function initialize_usage (line 91) | def initialize_usage(mode: Mode) -> CompletionUsage | Any:
function extract_messages (line 122) | def extract_messages(kwargs: dict[str, Any]) -> Any:
function retry_sync (line 143) | def retry_sync(
function retry_async (line 299) | async def retry_async(
FILE: instructor/distil.py
class OpenAIChatKwargs (line 30) | class OpenAIChatKwargs(TypedDict):
class FinetuneFormat (line 35) | class FinetuneFormat(enum.Enum):
function get_signature_from_fn (line 40) | def get_signature_from_fn(fn: Callable[..., Any]) -> str:
function format_function (line 66) | def format_function(func: Callable[..., Any]) -> str:
function is_return_type_base_model_or_instance (line 85) | def is_return_type_base_model_or_instance(func: Callable[..., Any]) -> b...
class Instructions (line 99) | class Instructions:
method __init__ (line 100) | def __init__(
method distil (line 132) | def distil(
method track (line 211) | def track(
method openai_kwargs (line 259) | def openai_kwargs(
FILE: instructor/dsl/citation.py
class CitationMixin (line 5) | class CitationMixin(BaseModel):
method validate_sources (line 61) | def validate_sources(self, info: ValidationInfo) -> "CitationMixin":
method _get_span (line 78) | def _get_span(
method get_spans (line 95) | def get_spans(self, context: str) -> Generator[tuple[int, int], None, ...
FILE: instructor/dsl/iterable.py
class IterableBase (line 21) | class IterableBase:
method from_streaming_response (line 25) | def from_streaming_response(
method from_streaming_response_async (line 48) | async def from_streaming_response_async(
method tasks_from_mistral_chunks (line 64) | async def tasks_from_mistral_chunks(
method tasks_from_chunks (line 83) | def tasks_from_chunks(
method tasks_from_chunks_async (line 105) | async def tasks_from_chunks_async(
method extract_cls_task_type (line 127) | def extract_cls_task_type(
method extract_json (line 148) | def extract_json(
method extract_json_async (line 364) | async def extract_json_async(
method get_object (line 569) | def get_object(s: str, stack: int) -> tuple[Optional[str], str]:
function IterableModel (line 581) | def IterableModel(
FILE: instructor/dsl/json_tracker.py
function is_json_complete (line 16) | def is_json_complete(json_str: str) -> bool:
class JsonCompleteness (line 31) | class JsonCompleteness:
method __init__ (line 54) | def __init__(self) -> None:
method analyze (line 57) | def analyze(self, json_str: str) -> None:
method _mark_all (line 80) | def _mark_all(self, data: Any, path: str) -> None:
method _check_siblings (line 91) | def _check_siblings(self, data: Any, path: str) -> None:
method is_path_complete (line 119) | def is_path_complete(self, path: str) -> bool:
method get_complete_paths (line 132) | def get_complete_paths(self) -> set[str]:
method is_root_complete (line 136) | def is_root_complete(self) -> bool:
FILE: instructor/dsl/maybe.py
class MaybeBase (line 7) | class MaybeBase(BaseModel, Generic[T]):
method __bool__ (line 16) | def __bool__(self) -> bool:
function Maybe (line 20) | def Maybe(model: type[T]) -> type[MaybeBase[T]]:
FILE: instructor/dsl/parallel.py
class ParallelBase (line 27) | class ParallelBase:
method __init__ (line 28) | def __init__(self, *models: type[BaseModel]):
method from_response (line 37) | def from_response(
class VertexAIParallelBase (line 55) | class VertexAIParallelBase(ParallelBase):
method from_response (line 56) | def from_response(
function is_union_type (line 90) | def is_union_type(typehint: type[Iterable[T]]) -> bool:
function is_union_type (line 95) | def is_union_type(typehint: type[Iterable[T]]) -> bool:
function get_types_array (line 99) | def get_types_array(typehint: type[Iterable[T]]) -> tuple[type[T], ...]:
function handle_parallel_model (line 114) | def handle_parallel_model(typehint: type[Iterable[T]]) -> list[dict[str,...
function handle_anthropic_parallel_model (line 125) | def handle_anthropic_parallel_model(
function ParallelModel (line 135) | def ParallelModel(typehint: type[Iterable[T]]) -> ParallelBase:
function VertexAIParallelModel (line 140) | def VertexAIParallelModel(typehint: type[Iterable[T]]) -> VertexAIParall...
class AnthropicParallelBase (line 145) | class AnthropicParallelBase(ParallelBase):
method from_response (line 146) | def from_response(
function AnthropicParallelModel (line 171) | def AnthropicParallelModel(typehint: type[Iterable[T]]) -> AnthropicPara...
FILE: instructor/dsl/partial.py
class MakeFieldsOptional (line 52) | class MakeFieldsOptional:
class PartialLiteralMixin (line 56) | class PartialLiteralMixin:
method __init_subclass__ (line 67) | def __init_subclass__(cls, **kwargs: Any) -> None:
function remove_control_chars (line 78) | def remove_control_chars(s):
function process_potential_object (line 82) | def process_potential_object(potential_object, partial_mode, partial_mod...
function _build_partial_object (line 117) | def _build_partial_object(
function _build_partial_list (line 188) | def _build_partial_list(
function _process_generic_arg (line 223) | def _process_generic_arg(
function _make_field_optional (line 262) | def _make_field_optional(
class PartialBase (line 299) | class PartialBase(Generic[T_Model]):
method get_partial_model (line 302) | def get_partial_model(cls) -> type[T_Model]:
method from_streaming_response (line 338) | def from_streaming_response(
method from_streaming_response_async (line 352) | async def from_streaming_response_async(
method writer_model_from_chunks (line 368) | def writer_model_from_chunks(
method writer_model_from_chunks_async (line 404) | async def writer_model_from_chunks_async(
method model_from_chunks (line 440) | def model_from_chunks(
method model_from_chunks_async (line 475) | async def model_from_chunks_async(
method extract_json (line 510) | def extract_json(
method extract_json_async (line 739) | async def extract_json_async(
class Partial (line 963) | class Partial(Generic[T_Model]):
method __new__ (line 973) | def __new__(
method __init_subclass__ (line 985) | def __init_subclass__(
method __class_getitem__ (line 997) | def __class_getitem__(
FILE: instructor/dsl/response_list.py
class ListResponse (line 14) | class ListResponse(list[T], Generic[T]):
method __init__ (line 24) | def __init__(self, iterable=(), _raw_response: Any | None = None): # ...
method from_list (line 29) | def from_list(cls, items: list[T], *, raw_response: Any | None) -> Lis...
method get_raw_response (line 32) | def get_raw_response(self) -> Any | None:
method __getitem__ (line 35) | def __getitem__(self, key): # type: ignore[no-untyped-def]
FILE: instructor/dsl/simple_type.py
class AdapterBase (line 17) | class AdapterBase(BaseModel):
class ModelAdapter (line 21) | class ModelAdapter(typing.Generic[T]):
method __class_getitem__ (line 26) | def __class_getitem__(cls, response_model: type[BaseModel]) -> type[Ba...
function validateIsSubClass (line 39) | def validateIsSubClass(response_model: type):
function is_simple_type (line 61) | def is_simple_type(
FILE: instructor/dsl/validators.py
function __getattr__ (line 7) | def __getattr__(name: str):
FILE: instructor/hooks.py
function __getattr__ (line 9) | def __getattr__(name: str):
FILE: instructor/mode.py
class Mode (line 9) | class Mode(enum.Enum):
method tool_modes (line 76) | def tool_modes(cls) -> set["Mode"]:
method json_modes (line 104) | def json_modes(cls) -> set["Mode"]:
method warn_mode_functions_deprecation (line 126) | def warn_mode_functions_deprecation(cls):
FILE: instructor/multimodal.py
function __getattr__ (line 9) | def __getattr__(name: str):
FILE: instructor/patch.py
function __getattr__ (line 9) | def __getattr__(name: str):
FILE: instructor/process_response.py
function __getattr__ (line 9) | def __getattr__(name: str):
FILE: instructor/processing/function_calls.py
function _handle_incomplete_output (line 42) | def _handle_incomplete_output(completion: Any) -> None:
function _extract_text_content (line 55) | def _extract_text_content(completion: Any) -> str:
function _validate_model_from_json (line 81) | def _validate_model_from_json(
class OpenAISchema (line 113) | class OpenAISchema(BaseModel):
method openai_schema (line 118) | def openai_schema(cls) -> dict[str, Any]:
method anthropic_schema (line 131) | def anthropic_schema(cls) -> dict[str, Any]:
method gemini_schema (line 136) | def gemini_schema(cls) -> Any:
method from_response (line 141) | def from_response(
method parse_genai_structured_outputs (line 266) | def parse_genai_structured_outputs(
method parse_genai_tools (line 277) | def parse_genai_tools(
method parse_cohere_json_schema (line 308) | def parse_cohere_json_schema(
method parse_anthropic_tools (line 358) | def parse_anthropic_tools(
method parse_anthropic_json (line 384) | def parse_anthropic_json(
method parse_bedrock_json (line 427) | def parse_bedrock_json(
method parse_bedrock_tools (line 454) | def parse_bedrock_tools(
method parse_gemini_json (line 490) | def parse_gemini_json(
method parse_vertexai_tools (line 523) | def parse_vertexai_tools(
method parse_vertexai_json (line 536) | def parse_vertexai_json(
method parse_cohere_tools (line 547) | def parse_cohere_tools(
method parse_writer_tools (line 636) | def parse_writer_tools(
method parse_writer_json (line 658) | def parse_writer_json(
method parse_functions (line 678) | def parse_functions(
method parse_responses_tools (line 695) | def parse_responses_tools(
method parse_tools (line 723) | def parse_tools(
method parse_mistral_structured_outputs (line 751) | def parse_mistral_structured_outputs(
method parse_json (line 770) | def parse_json(
function openai_schema (line 793) | def openai_schema(cls: type[BaseModel]) -> OpenAISchema:
FILE: instructor/processing/multimodal.py
class ImageParamsBase (line 50) | class ImageParamsBase(TypedDict):
class ImageParams (line 55) | class ImageParams(ImageParamsBase, total=False):
class Image (line 59) | class Image(BaseModel):
method autodetect (line 69) | def autodetect(cls, source: str | Path) -> Image:
method autodetect_safely (line 93) | def autodetect_safely(cls, source: Union[str, Path]) -> Union[Image, s...
method is_base64 (line 108) | def is_base64(cls, s: str) -> bool:
method from_base64 (line 112) | def from_base64(cls, data_uri: str) -> Image:
method from_gs_url (line 127) | def from_gs_url(cls, data_uri: str, timeout: int = 30) -> Image:
method from_raw_base64 (line 156) | def from_raw_base64(cls, data: str) -> Image:
method from_url (line 186) | def from_url(cls, url: str) -> Image:
method from_path (line 208) | def from_path(cls, path: Union[str, Path]) -> Image: # noqa: UP007
method url_to_base64 (line 225) | def url_to_base64(url: str) -> str:
method to_anthropic (line 232) | def to_anthropic(self) -> dict[str, Any]:
method to_openai (line 249) | def to_openai(self, mode: Mode) -> dict[str, Any]:
method to_genai (line 279) | def to_genai(self):
class Audio (line 316) | class Audio(BaseModel):
method autodetect (line 326) | def autodetect(cls, source: str | Path) -> Audio:
method autodetect_safely (line 350) | def autodetect_safely(cls, source: Union[str, Path]) -> Union[Audio, s...
method is_base64 (line 365) | def is_base64(cls, s: str) -> bool:
method from_base64 (line 369) | def from_base64(cls, data_uri: str) -> Audio:
method from_url (line 381) | def from_url(cls, url: str) -> Audio:
method from_path (line 395) | def from_path(cls, path: Union[str, Path]) -> Audio: # noqa: UP007
method from_gs_url (line 418) | def from_gs_url(cls, data_uri: str, timeout: int = 30) -> Audio:
method to_openai (line 446) | def to_openai(self, mode: Mode) -> dict[str, Any]:
method to_anthropic (line 456) | def to_anthropic(self) -> dict[str, Any]:
method to_genai (line 459) | def to_genai(self):
class ImageWithCacheControl (line 476) | class ImageWithCacheControl(Image):
method from_image_params (line 484) | def from_image_params(cls, image_params: ImageParams) -> Image:
method to_anthropic (line 495) | def to_anthropic(self) -> dict[str, Any]:
class PDF (line 503) | class PDF(BaseModel):
method autodetect (line 511) | def autodetect(cls, source: str | Path) -> PDF:
method autodetect_safely (line 555) | def autodetect_safely(cls, source: Union[str, Path]) -> Union[PDF, str...
method is_base64 (line 570) | def is_base64(cls, s: str) -> bool:
method from_base64 (line 574) | def from_base64(cls, data_uri: str) -> PDF:
method from_path (line 587) | def from_path(cls, path: str | Path) -> PDF:
method from_raw_base64 (line 603) | def from_raw_base64(cls, data: str) -> PDF:
method from_gs_url (line 618) | def from_gs_url(cls, data_uri: str, timeout: int = 30) -> PDF:
method from_url (line 648) | def from_url(cls, url: str) -> PDF:
method to_mistral (line 665) | def to_mistral(self) -> dict[str, Any]:
method to_openai (line 677) | def to_openai(self, mode: Mode) -> dict[str, Any]:
method to_anthropic (line 734) | def to_anthropic(self) -> dict[str, Any]:
method to_genai (line 762) | def to_genai(self):
method to_bedrock (line 791) | def to_bedrock(self, name: str | None = None) -> dict[str, Any]:
class PDFWithCacheControl (line 864) | class PDFWithCacheControl(PDF):
method to_anthropic (line 867) | def to_anthropic(self) -> dict[str, Any]:
class PDFWithGenaiFile (line 874) | class PDFWithGenaiFile(PDF):
method from_new_genai_file (line 876) | def from_new_genai_file(
method from_existing_genai_file (line 899) | def from_existing_genai_file(cls, file_name: str) -> PDFWithGenaiFile:
method to_genai (line 916) | def to_genai(self):
function convert_contents (line 937) | def convert_contents(
function autodetect_media (line 985) | def autodetect_media(
function convert_messages (line 1025) | def convert_messages(
function extract_genai_multimodal_content (line 1093) | def extract_genai_multimodal_content(
FILE: instructor/processing/response.py
function process_response_async (line 173) | async def process_response_async(
function process_response (line 279) | def process_response(
function is_typed_dict (line 397) | def is_typed_dict(cls) -> bool:
function handle_response_model (line 405) | def handle_response_model(
function handle_reask_kwargs (line 528) | def handle_reask_kwargs(
FILE: instructor/processing/schema.py
function generate_openai_schema (line 27) | def generate_openai_schema(model: type[BaseModel]) -> dict[str, Any]:
function generate_anthropic_schema (line 74) | def generate_anthropic_schema(model: type[BaseModel]) -> dict[str, Any]:
function generate_gemini_schema (line 94) | def generate_gemini_schema(model: type[BaseModel]) -> Any:
FILE: instructor/processing/validators.py
class Validator (line 10) | class Validator(OpenAISchema):
FILE: instructor/providers/anthropic/client.py
function from_anthropic (line 10) | def from_anthropic(
function from_anthropic (line 21) | def from_anthropic(
function from_anthropic (line 33) | def from_anthropic(
FILE: instructor/providers/anthropic/utils.py
class SystemMessage (line 17) | class SystemMessage(TypedDict, total=False):
function combine_system_messages (line 23) | def combine_system_messages(
function extract_system_messages (line 80) | def extract_system_messages(messages: list[dict[str, Any]]) -> list[Syst...
function reask_anthropic_tools (line 137) | def reask_anthropic_tools(
function reask_anthropic_json (line 198) | def reask_anthropic_json(
function handle_anthropic_message_conversion (line 243) | def handle_anthropic_message_conversion(new_kwargs: dict[str, Any]) -> d...
function handle_anthropic_tools (line 264) | def handle_anthropic_tools(
function handle_anthropic_reasoning_tools (line 314) | def handle_anthropic_reasoning_tools(
function handle_anthropic_json (line 364) | def handle_anthropic_json(
function handle_anthropic_parallel_tools (line 425) | def handle_anthropic_parallel_tools(
FILE: instructor/providers/bedrock/client.py
function from_bedrock (line 13) | def from_bedrock(
function from_bedrock (line 22) | def from_bedrock(
function handle_bedrock_json (line 30) | def handle_bedrock_json(
function from_bedrock (line 41) | def from_bedrock(
FILE: instructor/providers/bedrock/utils.py
function generate_bedrock_schema (line 19) | def generate_bedrock_schema(response_model: type[Any]) -> dict[str, Any]:
function reask_bedrock_json (line 46) | def reask_bedrock_json(
function reask_bedrock_tools (line 73) | def reask_bedrock_tools(
function _normalize_bedrock_image_format (line 135) | def _normalize_bedrock_image_format(mime_or_ext: str) -> str:
function _openai_image_part_to_bedrock (line 156) | def _openai_image_part_to_bedrock(part: dict[str, Any]) -> dict[str, Any]:
function _to_bedrock_content_items (line 213) | def _to_bedrock_content_items(content: Any) -> list[dict[str, Any]]:
function _prepare_bedrock_converse_kwargs_internal (line 285) | def _prepare_bedrock_converse_kwargs_internal(
function handle_bedrock_json (line 417) | def handle_bedrock_json(
function handle_bedrock_tools (line 456) | def handle_bedrock_tools(
FILE: instructor/providers/cerebras/client.py
function from_cerebras (line 13) | def from_cerebras(
function from_cerebras (line 21) | def from_cerebras(
function from_cerebras (line 28) | def from_cerebras(
FILE: instructor/providers/cerebras/utils.py
function reask_cerebras_tools (line 16) | def reask_cerebras_tools(
function handle_cerebras_tools (line 44) | def handle_cerebras_tools(
function handle_cerebras_json (line 70) | def handle_cerebras_json(
FILE: instructor/providers/cohere/client.py
function from_cohere (line 18) | def from_cohere(
function from_cohere (line 26) | def from_cohere(
function from_cohere (line 34) | def from_cohere(
function from_cohere (line 42) | def from_cohere(
function from_cohere (line 49) | def from_cohere(
FILE: instructor/providers/cohere/utils.py
function reask_cohere_tools (line 14) | def reask_cohere_tools(
function handle_cohere_modes (line 92) | def handle_cohere_modes(new_kwargs: dict[str, Any]) -> tuple[None, dict[...
function handle_cohere_json_schema (line 145) | def handle_cohere_json_schema(
function handle_cohere_tools (line 180) | def handle_cohere_tools(
FILE: instructor/providers/fireworks/client.py
function from_fireworks (line 19) | def from_fireworks(
function from_fireworks (line 27) | def from_fireworks(
function from_fireworks (line 34) | def from_fireworks(
FILE: instructor/providers/fireworks/utils.py
function reask_fireworks_tools (line 16) | def reask_fireworks_tools(kwargs: dict[str, Any], response: Any, excepti...
function reask_fireworks_json (line 40) | def reask_fireworks_json(
function handle_fireworks_tools (line 63) | def handle_fireworks_tools(
function handle_fireworks_json (line 89) | def handle_fireworks_json(
FILE: instructor/providers/gemini/client.py
function from_gemini (line 11) | def from_gemini(
function from_gemini (line 20) | def from_gemini(
function from_gemini (line 28) | def from_gemini(
FILE: instructor/providers/gemini/utils.py
function _get_model_schema (line 27) | def _get_model_schema(response_model: Any) -> dict[str, Any]:
function _get_model_name (line 48) | def _get_model_name(response_model: Any) -> str:
function transform_to_gemini_prompt (line 64) | def transform_to_gemini_prompt(
function verify_no_unions (line 131) | def verify_no_unions(obj: dict[str, Any]) -> bool: # noqa: ARG001
function map_to_gemini_function_schema (line 153) | def map_to_gemini_function_schema(obj: dict[str, Any]) -> dict[str, Any]:
function map_to_genai_schema (line 243) | def map_to_genai_schema(obj: dict[str, Any]) -> genai_types.Schema:
function update_genai_kwargs (line 275) | def update_genai_kwargs(
function update_gemini_kwargs (line 470) | def update_gemini_kwargs(kwargs: dict[str, Any]) -> dict[str, Any]:
function extract_genai_system_message (line 546) | def extract_genai_system_message(
function convert_to_genai_messages (line 581) | def convert_to_genai_messages(
function reask_gemini_tools (line 651) | def reask_gemini_tools(
function reask_gemini_json (line 694) | def reask_gemini_json(
function reask_vertexai_tools (line 717) | def reask_vertexai_tools(
function reask_vertexai_json (line 739) | def reask_vertexai_json(
function reask_genai_tools (line 770) | def reask_genai_tools(
function reask_genai_structured_outputs (line 847) | def reask_genai_structured_outputs(
function handle_genai_message_conversion (line 881) | def handle_genai_message_conversion(
function handle_gemini_json (line 920) | def handle_gemini_json(
function handle_gemini_tools (line 974) | def handle_gemini_tools(
function handle_genai_structured_outputs (line 1009) | def handle_genai_structured_outputs(
function handle_genai_tools (line 1096) | def handle_genai_tools(
function handle_vertexai_parallel_tools (line 1196) | def handle_vertexai_parallel_tools(
function handle_vertexai_tools (line 1226) | def handle_vertexai_tools(
function handle_vertexai_json (line 1252) | def handle_vertexai_json(
FILE: instructor/providers/genai/client.py
function from_genai (line 12) | def from_genai(
function from_genai (line 21) | def from_genai(
function from_genai (line 29) | def from_genai(
FILE: instructor/providers/groq/client.py
function from_groq (line 10) | def from_groq(
function from_groq (line 18) | def from_groq(
function from_groq (line 25) | def from_groq(
FILE: instructor/providers/mistral/client.py
function from_mistral (line 11) | def from_mistral(
function from_mistral (line 20) | def from_mistral(
function from_mistral (line 28) | def from_mistral(
FILE: instructor/providers/mistral/utils.py
function reask_mistral_structured_outputs (line 16) | def reask_mistral_structured_outputs(
function reask_mistral_tools (line 46) | def reask_mistral_tools(
function handle_mistral_tools (line 74) | def handle_mistral_tools(
function handle_mistral_structured_outputs (line 94) | def handle_mistral_structured_outputs(
FILE: instructor/providers/openai/utils.py
function _is_stream_response (line 22) | def _is_stream_response(response: Any) -> bool:
function _filter_responses_tool_calls (line 31) | def _filter_responses_tool_calls(output_items: list[Any]) -> list[Any]:
function _format_responses_tool_call_details (line 44) | def _format_responses_tool_call_details(tool_call: Any) -> str:
function reask_tools (line 62) | def reask_tools(
function reask_responses_tools (line 106) | def reask_responses_tools(
function reask_md_json (line 151) | def reask_md_json(
function reask_default (line 187) | def reask_default(
function handle_parallel_tools (line 228) | def handle_parallel_tools(
function handle_functions (line 271) | def handle_functions(
function handle_tools_strict (line 295) | def handle_tools_strict(
function handle_tools (line 320) | def handle_tools(
function handle_responses_tools (line 348) | def handle_responses_tools(
function handle_responses_tools_with_inbuilt_tools (line 402) | def handle_responses_tools_with_inbuilt_tools(
function handle_json_o1 (line 452) | def handle_json_o1(
function handle_json_modes (line 491) | def handle_json_modes(
function handle_openrouter_structured_outputs (line 559) | def handle_openrouter_structured_outputs(
FILE: instructor/providers/perplexity/client.py
function from_perplexity (line 9) | def from_perplexity(
function from_perplexity (line 17) | def from_perplexity(
function from_perplexity (line 24) | def from_perplexity(
FILE: instructor/providers/perplexity/utils.py
function reask_perplexity_json (line 15) | def reask_perplexity_json(
function handle_perplexity_json (line 38) | def handle_perplexity_json(
FILE: instructor/providers/vertexai/client.py
function _create_gemini_json_schema (line 13) | def _create_gemini_json_schema(model: type[BaseModel]) -> dict[str, Any]:
function _create_vertexai_tool (line 30) | def _create_vertexai_tool(
function vertexai_message_parser (line 54) | def vertexai_message_parser(
function _vertexai_message_list_parser (line 79) | def _vertexai_message_list_parser(
function vertexai_function_response_parser (line 89) | def vertexai_function_response_parser(
function vertexai_process_response (line 104) | def vertexai_process_response(
function vertexai_process_json_response (line 121) | def vertexai_process_json_response(_kwargs: dict[str, Any], model: type[...
function from_vertexai (line 138) | def from_vertexai(
FILE: instructor/providers/writer/client.py
function from_writer (line 11) | def from_writer(
function from_writer (line 19) | def from_writer(
function from_writer (line 26) | def from_writer(
FILE: instructor/providers/writer/utils.py
function reask_writer_tools (line 16) | def reask_writer_tools(
function reask_writer_json (line 45) | def reask_writer_json(
function handle_writer_tools (line 69) | def handle_writer_tools(
function handle_writer_json (line 89) | def handle_writer_json(
FILE: instructor/providers/xai/client.py
function _raise_xai_sdk_missing (line 17) | def _raise_xai_sdk_missing() -> None:
function _get_model_schema (line 27) | def _get_model_schema(response_model: Any) -> dict[str, Any]:
function _get_model_name (line 48) | def _get_model_name(response_model: Any) -> str:
function _finalize_parsed_response (line 61) | def _finalize_parsed_response(parsed: Any, raw_response: Any) -> Any:
function from_xai (line 87) | def from_xai(
function from_xai (line 95) | def from_xai(
function from_xai (line 102) | def from_xai(
FILE: instructor/providers/xai/utils.py
function _convert_messages (line 22) | def _convert_messages(messages: list[dict[str, Any]]):
function reask_xai_json (line 54) | def reask_xai_json(
function reask_xai_tools (line 74) | def reask_xai_tools(
function handle_xai_json (line 103) | def handle_xai_json(
function handle_xai_tools (line 134) | def handle_xai_tools(
FILE: instructor/templating.py
function apply_template (line 9) | def apply_template(text: str, context: dict[str, Any]) -> str:
function process_message (line 14) | def process_message(
function handle_templating (line 84) | def handle_templating(
FILE: instructor/utils/__init__.py
function __getattr__ (line 60) | def __getattr__(name):
FILE: instructor/utils/core.py
function extract_json_from_codeblock (line 43) | def extract_json_from_codeblock(content: str) -> str:
function extract_json_from_stream (line 68) | def extract_json_from_stream(
function extract_json_from_stream_async (line 197) | async def extract_json_from_stream_async(
function update_total_usage (line 326) | def update_total_usage(
function dump_message (line 383) | def dump_message(message: ChatCompletionMessage) -> ChatCompletionMessag...
function is_async (line 416) | def is_async(func: Callable[..., Any]) -> bool:
function merge_consecutive_messages (line 425) | def merge_consecutive_messages(messages: list[dict[str, Any]]) -> list[d...
class classproperty (line 485) | class classproperty(Generic[R_co]):
method __init__ (line 499) | def __init__(self, method: Callable[[Any], R_co]) -> None:
method __get__ (line 502) | def __get__(self, instance: object, cls: type[Any]) -> R_co:
function get_message_content (line 506) | def get_message_content(message: ChatCompletionMessageParam) -> list[Any]:
function disable_pydantic_error_url (line 533) | def disable_pydantic_error_url():
function is_typed_dict (line 564) | def is_typed_dict(cls) -> bool:
function is_simple_type (line 572) | def is_simple_type(typehint: type[T]) -> bool:
function prepare_response_model (line 579) | def prepare_response_model(response_model: type[T] | None) -> type[T] | ...
FILE: instructor/utils/providers.py
class Provider (line 9) | class Provider(Enum):
function get_provider (line 32) | def get_provider(base_url: str) -> Provider:
FILE: instructor/validation/async_validators.py
class AsyncValidationContext (line 11) | class AsyncValidationContext:
method __init__ (line 14) | def __init__(self, context: dict[str, Any]):
function async_field_validator (line 18) | def async_field_validator(field: str, *fields: str) -> Callable[[T], T]:
function async_model_validator (line 43) | def async_model_validator() -> Callable[[T], T]:
FILE: instructor/validation/llm_validators.py
function llm_validator (line 9) | def llm_validator(
function openai_moderation (line 79) | def openai_moderation(client: OpenAI) -> Callable[[str], str]:
FILE: instructor/validators.py
function __getattr__ (line 9) | def __getattr__(name: str):
FILE: scripts/audit_patterns.py
function find_markdown_files (line 18) | def find_markdown_files(docs_dir: Path) -> List[Path]:
function audit_api_calls (line 23) | def audit_api_calls(content: str, file_path: Path) -> Dict[str, List[int]]:
function audit_old_init_patterns (line 42) | def audit_old_init_patterns(content: str, file_path: Path) -> Dict[str, ...
function audit_unused_imports (line 62) | def audit_unused_imports(content: str, file_path: Path) -> Dict[str, Lis...
function process_file (line 94) | def process_file(file_path: Path) -> Dict[str, Dict[str, List[int]]]:
function main (line 109) | def main():
FILE: scripts/check_blog_excerpts.py
function check_blog_excerpts (line 16) | def check_blog_excerpts(blog_posts_dir: str = "docs/blog/posts") -> bool:
function main (line 78) | def main():
FILE: scripts/check_links.py
function find_markdown_files (line 16) | def find_markdown_files(docs_dir: Path) -> list[Path]:
function extract_links (line 21) | def extract_links(content: str, file_path: Path) -> list[tuple[str, int]...
function resolve_link (line 45) | def resolve_link(link_url: str, source_file: Path, docs_dir: Path) -> tu...
function check_file (line 69) | def check_file(file_path: Path, docs_dir: Path) -> dict[str, list[tuple[...
function find_orphaned_pages (line 91) | def find_orphaned_pages(files: list[Path], docs_dir: Path) -> set[Path]:
function main (line 124) | def main():
FILE: scripts/fix_api_calls.py
function find_markdown_files (line 17) | def find_markdown_files(docs_dir: Path) -> list[Path]:
function replace_api_calls (line 22) | def replace_api_calls(content: str, dry_run: bool = False) -> tuple[str,...
function process_file (line 52) | def process_file(file_path: Path, dry_run: bool = False) -> int:
function main (line 71) | def main():
FILE: scripts/fix_doc_tests.py
function run_update (line 17) | def run_update(test_file: str) -> bool:
FILE: scripts/fix_old_patterns.py
function find_markdown_files (line 48) | def find_markdown_files(docs_dir: Path) -> List[Path]:
function extract_model_name (line 53) | def extract_model_name(content: str, match_start: int, match_end: int) -...
function replace_from_pattern (line 74) | def replace_from_pattern(
function replace_patch_pattern (line 110) | def replace_patch_pattern(content: str, dry_run: bool = False) -> Tuple[...
function replace_old_patterns (line 179) | def replace_old_patterns(content: str, dry_run: bool = False) -> Tuple[s...
function process_file (line 203) | def process_file(file_path: Path, dry_run: bool = False) -> int:
function main (line 222) | def main():
FILE: scripts/make_clean.py
function clean_markdown_content (line 17) | def clean_markdown_content(content: str) -> str:
function process_markdown_files (line 48) | def process_markdown_files(docs_dir: str = "docs", dry_run: bool = False...
function main (line 125) | def main():
FILE: scripts/make_desc.py
function generate_ai_frontmatter (line 17) | async def generate_ai_frontmatter(
function get_all_categories (line 75) | def get_all_categories(root_dir: str) -> set[str]:
function preview_categories (line 96) | def preview_categories(root_dir: str) -> None:
function process_file (line 115) | async def process_file(
function process_files (line 144) | async def process_files(
function main (line 186) | def main(
FILE: scripts/make_sitemap.py
function traverse_docs (line 18) | def traverse_docs(
function extract_markdown_links (line 43) | def extract_markdown_links(content: str) -> list[str]:
function normalize_path (line 72) | def normalize_path(path: str, current_path: str) -> str:
function analyze_content (line 107) | async def analyze_content(
function generate_sitemap (line 187) | async def generate_sitemap(
function main (line 301) | def main(
FILE: scripts/validate_headings.py
function find_markdown_files (line 17) | def find_markdown_files(docs_dir: Path) -> list[Path]:
function extract_headings (line 22) | def extract_headings(content: str) -> list[tuple[int, str, int]]:
function validate_headings (line 43) | def validate_headings(headings: list[tuple[int, str, int]]) -> dict[str,...
function process_file (line 76) | def process_file(file_path: Path) -> dict[str, list[str]]:
function main (line 86) | def main():
FILE: scripts/validate_meta_tags.py
function find_markdown_files (line 19) | def find_markdown_files(docs_dir: Path) -> List[Path]:
function extract_frontmatter (line 24) | def extract_frontmatter(content: str) -> Dict[str, str]:
function validate_file (line 53) | def validate_file(file_path: Path) -> Dict[str, List[str]]:
function main (line 101) | def main():
FILE: tests/docs/_concept_groups.py
function concept_paths (line 60) | def concept_paths(names: Iterable[str]) -> list[str]:
function all_concept_files (line 64) | def all_concept_files() -> list[str]:
function core_concept_files (line 68) | def core_concept_files() -> list[str]:
function collect_examples (line 75) | def collect_examples(files: Iterable[str]):
FILE: tests/docs/_example_groups.py
function example_paths (line 43) | def example_paths(names: Iterable[str]) -> list[str]:
function all_example_files (line 47) | def all_example_files() -> list[str]:
function core_example_files (line 51) | def core_example_files() -> list[str]:
function collect_examples (line 58) | def collect_examples(files: Iterable[str]):
FILE: tests/docs/conftest.py
function pytest_addoption (line 9) | def pytest_addoption(parser: pytest.Parser) -> None:
function eval_example (line 19) | def eval_example(
FILE: tests/docs/test_concepts.py
function test_format_concepts_core (line 10) | def test_format_concepts_core(example: CodeExample, eval_example: EvalEx...
FILE: tests/docs/test_concepts_advanced.py
function test_format_concepts_advanced (line 10) | def test_format_concepts_advanced(example: CodeExample, eval_example: Ev...
FILE: tests/docs/test_concepts_operations.py
function test_format_concepts_operations (line 10) | def test_format_concepts_operations(example: CodeExample, eval_example: ...
FILE: tests/docs/test_concepts_providers.py
function test_format_concepts_providers (line 10) | def test_format_concepts_providers(example: CodeExample, eval_example: E...
FILE: tests/docs/test_docs.py
function test_readme (line 6) | def test_readme(example: CodeExample, eval_example: EvalExample):
function test_index (line 14) | def test_index(example: CodeExample, eval_example: EvalExample):
FILE: tests/docs/test_examples.py
function test_index (line 9) | def test_index(example: CodeExample, eval_example: EvalExample):
FILE: tests/docs/test_examples_batch.py
function test_examples_batch (line 10) | def test_examples_batch(example: CodeExample, eval_example: EvalExample):
FILE: tests/docs/test_examples_integrations.py
function test_examples_integrations (line 10) | def test_examples_integrations(example: CodeExample, eval_example: EvalE...
FILE: tests/docs/test_examples_multimodal.py
function test_examples_multimodal (line 10) | def test_examples_multimodal(example: CodeExample, eval_example: EvalExa...
FILE: tests/docs/test_examples_providers.py
function test_examples_providers (line 10) | def test_examples_providers(example: CodeExample, eval_example: EvalExam...
FILE: tests/docs/test_hub.py
function test_format_blog (line 6) | def test_format_blog(example: CodeExample, eval_example: EvalExample) ->...
FILE: tests/docs/test_mkdocs.py
function test_files_good (line 12) | def test_files_good(fpath):
FILE: tests/docs/test_posts.py
function test_index (line 6) | def test_index(example: CodeExample, eval_example: EvalExample):
FILE: tests/docs/test_prompt_tips.py
function test_format_concepts (line 7) | def test_format_concepts(example: CodeExample, eval_example: EvalExample):
FILE: tests/dsl/test_gemini_tools_async_streaming.py
function test_sync_extract_json_from_stream_handles_codeblock (line 18) | def test_sync_extract_json_from_stream_handles_codeblock():
function test_async_extract_json_from_stream_handles_codeblock (line 25) | async def test_async_extract_json_from_stream_handles_codeblock():
function test_sync_gemini_tools_mode_triggers_json_extraction (line 36) | def test_sync_gemini_tools_mode_triggers_json_extraction():
function test_async_gemini_tools_mode_triggers_json_extraction (line 43) | def test_async_gemini_tools_mode_triggers_json_extraction():
FILE: tests/dsl/test_partial.py
class SampleNestedPartial (line 21) | class SampleNestedPartial(BaseModel):
class SamplePartial (line 25) | class SamplePartial(BaseModel):
class NestedA (line 30) | class NestedA(BaseModel):
class NestedB (line 35) | class NestedB(BaseModel):
class UnionWithNested (line 42) | class UnionWithNested(BaseModel):
function test_partial (line 48) | def test_partial():
function test_partial_with_whitespace (line 119) | def test_partial_with_whitespace():
function test_async_partial_with_whitespace (line 129) | async def test_async_partial_with_whitespace():
function test_summary_extraction (line 146) | def test_summary_extraction():
function test_summary_extraction_async (line 177) | async def test_summary_extraction_async():
function test_union_with_nested (line 206) | def test_union_with_nested():
function test_partial_with_default_factory (line 213) | def test_partial_with_default_factory():
class TestMakeFieldOptionalWorksWithPydanticV2 (line 244) | class TestMakeFieldOptionalWorksWithPydanticV2:
method test_deepcopy_approach_makes_field_optional (line 253) | def test_deepcopy_approach_makes_field_optional(self):
method test_make_field_optional_function_works (line 270) | def test_make_field_optional_function_works(self):
method test_partial_model_validates_empty_dict (line 286) | def test_partial_model_validates_empty_dict(self):
method test_partial_validates_incremental_streaming_data (line 304) | def test_partial_validates_incremental_streaming_data(self):
method test_partial_with_all_field_types (line 328) | def test_partial_with_all_field_types(self):
class TestLiteralTypeStreaming (line 353) | class TestLiteralTypeStreaming:
method test_literal_without_mixin_fails_on_incomplete_string (line 363) | def test_literal_without_mixin_fails_on_incomplete_string(self):
method test_literal_with_mixin_incomplete_string_becomes_none (line 380) | def test_literal_with_mixin_incomplete_string_becomes_none(self):
method test_literal_accepts_valid_complete_value (line 397) | def test_literal_accepts_valid_complete_value(self):
method test_literal_with_missing_field_is_none (line 412) | def test_literal_with_missing_field_is_none(self):
method test_literal_rejects_complete_invalid_value (line 426) | def test_literal_rejects_complete_invalid_value(self):
class TestPartialStreamingWithComplexTypes (line 440) | class TestPartialStreamingWithComplexTypes:
method test_enum_incomplete_string_becomes_none (line 446) | def test_enum_incomplete_string_becomes_none(self):
method test_enum_accepts_valid_complete_value (line 464) | def test_enum_accepts_valid_complete_value(self):
method test_optional_literal_incomplete_string_becomes_none (line 480) | def test_optional_literal_incomplete_string_becomes_none(self):
method test_optional_literal_accepts_valid_value (line 493) | def test_optional_literal_accepts_valid_value(self):
method test_union_literal_incomplete_string_becomes_none (line 505) | def test_union_literal_incomplete_string_becomes_none(self):
method test_union_literal_accepts_valid_values (line 519) | def test_union_literal_accepts_valid_values(self):
method test_union_of_literals_matches_all_branches (line 534) | def test_union_of_literals_matches_all_branches(self):
method test_list_literal_incomplete_item_dropped (line 549) | def test_list_literal_incomplete_item_dropped(self):
method test_list_literal_accepts_valid_items (line 563) | def test_list_literal_accepts_valid_items(self):
class TestDiscriminatedUnionPartial (line 576) | class TestDiscriminatedUnionPartial:
method test_discriminated_union_not_compatible_with_partial (line 586) | def test_discriminated_union_not_compatible_with_partial(self):
method test_union_without_discriminator_works (line 607) | def test_union_without_discriminator_works(self):
method test_single_value_literal_incomplete_string (line 629) | def test_single_value_literal_incomplete_string(self):
class TestModelValidatorsDuringStreaming (line 648) | class TestModelValidatorsDuringStreaming:
method test_model_validator_skipped_during_streaming (line 656) | def test_model_validator_skipped_during_streaming(self):
method test_model_validator_runs_when_complete (line 681) | def test_model_validator_runs_when_complete(self):
method test_multiple_model_validators (line 707) | def test_multiple_model_validators(self):
method test_validators_run_without_streaming_context (line 743) | def test_validators_run_without_streaming_context(self):
class TestFinalValidationAfterStreaming (line 771) | class TestFinalValidationAfterStreaming:
method test_final_validation_catches_missing_required_fields (line 778) | def test_final_validation_catches_missing_required_fields(self):
method test_final_validation_passes_with_all_required_fields (line 797) | def test_final_validation_passes_with_all_required_fields(self):
method test_final_validation_runs_model_validators (line 815) | def test_final_validation_runs_model_validators(self):
method test_streaming_yields_partial_objects_before_final_validation (line 839) | def test_streaming_yields_partial_objects_before_final_validation(self):
method test_original_model_reference_is_stored (line 865) | def test_original_model_reference_is_stored(self):
method test_async_final_validation_catches_missing_required_fields (line 877) | async def test_async_final_validation_catches_missing_required_fields(...
class TestRecursiveModels (line 896) | class TestRecursiveModels:
method test_basic_recursive_model (line 899) | def test_basic_recursive_model(self):
method test_nested_recursive_model (line 917) | def test_nested_recursive_model(self):
method test_mutually_recursive_models (line 943) | def test_mutually_recursive_models(self):
method test_direct_self_reference (line 969) | def test_direct_self_reference(self):
method test_complex_recursive_with_validators (line 989) | def test_complex_recursive_with_validators(self):
method test_recursive_with_union_types (line 1092) | def test_recursive_with_union_types(self):
FILE: tests/dsl/test_simple_type.py
class SimpleTypeTests (line 8) | class SimpleTypeTests(unittest.TestCase):
method test_is_simple_type_with_base_model (line 9) | def test_is_simple_type_with_base_model(self):
method test_is_simple_type_with_str (line 15) | def test_is_simple_type_with_str(self):
method test_is_simple_type_with_int (line 18) | def test_is_simple_type_with_int(self):
method test_is_simple_type_with_float (line 21) | def test_is_simple_type_with_float(self):
method test_is_simple_type_with_bool (line 24) | def test_is_simple_type_with_bool(self):
method test_is_simple_type_with_enum (line 27) | def test_is_simple_type_with_enum(self):
method test_is_simple_type_with_annotated (line 33) | def test_is_simple_type_with_annotated(self):
method test_is_simple_type_with_literal (line 37) | def test_is_simple_type_with_literal(self):
method test_is_simple_type_with_union (line 41) | def test_is_simple_type_with_union(self):
method test_is_simple_type_with_iterable (line 45) | def test_is_simple_type_with_iterable(self):
FILE: tests/dsl/test_simple_type_fix.py
class TestSimpleTypeFix (line 8) | class TestSimpleTypeFix(unittest.TestCase):
method test_list_with_union_type (line 9) | def test_list_with_union_type(self):
method test_list_with_union_type_alternative_syntax (line 20) | def test_list_with_union_type_alternative_syntax(self):
FILE: tests/genai/test_safety_settings.py
function test_update_genai_kwargs_safety_settings_with_image_content_uses_image_categories (line 4) | def test_update_genai_kwargs_safety_settings_with_image_content_uses_ima...
function test_update_genai_kwargs_maps_text_thresholds_to_image_categories (line 41) | def test_update_genai_kwargs_maps_text_thresholds_to_image_categories():
function test_handle_genai_tools_autodetect_images_uses_image_categories (line 81) | def test_handle_genai_tools_autodetect_images_uses_image_categories():
FILE: tests/llm/shared_config.py
function get_available_providers (line 82) | def get_available_providers() -> list[tuple[str, instructor.Mode]]:
function pytest_generate_tests (line 114) | def pytest_generate_tests(metafunc):
function pytest_configure (line 130) | def pytest_configure(config):
function skip_if_provider_unavailable (line 155) | def skip_if_provider_unavailable(provider_name: str):
FILE: tests/llm/test_anthropic/test_multimodal.py
class ImageDescription (line 11) | class ImageDescription(BaseModel):
function test_multimodal_image_description (line 29) | def test_multimodal_image_description(model, mode):
function test_multimodal_image_description_autodetect (line 58) | def test_multimodal_image_description_autodetect(model, mode):
function test_multimodal_image_description_autodetect_image_params (line 90) | def test_multimodal_image_description_autodetect_image_params(model, mode):
function test_multimodal_image_description_autodetect_image_params_cache (line 125) | def test_multimodal_image_description_autodetect_image_params_cache(mode...
class LineItem (line 164) | class LineItem(BaseModel):
class Receipt (line 170) | class Receipt(BaseModel):
function test_multimodal_pdf_file (line 177) | def test_multimodal_pdf_file(model, mode, pdf_source):
function test_multimodal_pdf_file_with_cache_control (line 213) | def test_multimodal_pdf_file_with_cache_control(model, mode, pdf_source):
FILE: tests/llm/test_anthropic/test_reasoning.py
class Answer (line 5) | class Answer(BaseModel):
function test_reasoning (line 9) | def test_reasoning():
FILE: tests/llm/test_anthropic/test_system.py
class User (line 9) | class User(BaseModel):
function test_creation (line 15) | def test_creation(model, mode):
function test_creation_with_system_cache (line 42) | def test_creation_with_system_cache(model, mode):
function test_creation_with_system_cache_anthropic_style (line 83) | def test_creation_with_system_cache_anthropic_style(model, mode):
function test_creation_no_response_model (line 121) | def test_creation_no_response_model(model, mode):
FILE: tests/llm/test_bedrock/conftest.py
function tiny_png_bytes (line 7) | def tiny_png_bytes() -> bytes:
function tiny_png_data_url (line 15) | def tiny_png_data_url(tiny_png_bytes: bytes) -> str:
function image_url (line 20) | def image_url() -> str:
function tiny_pdf_bytes (line 26) | def tiny_pdf_bytes() -> bytes:
FILE: tests/llm/test_bedrock/test_bedrock_native_passthrough.py
function test_bedrock_native_text_passthrough (line 5) | def test_bedrock_native_text_passthrough():
function test_bedrock_native_image_passthrough (line 11) | def test_bedrock_native_image_passthrough(tiny_png_bytes: bytes):
function test_bedrock_native_document_passthrough (line 17) | def test_bedrock_native_document_passthrough(tiny_pdf_bytes: bytes):
FILE: tests/llm/test_bedrock/test_normalize.py
function test_normalize_bedrock_image_format (line 25) | def test_normalize_bedrock_image_format(inp, expected):
FILE: tests/llm/test_bedrock/test_openai_image_conversion.py
function test_openai_image_part_to_bedrock_data_url (line 10) | def test_openai_image_part_to_bedrock_data_url(tiny_png_data_url: str):
function test_openai_image_part_to_bedrock_https (line 20) | def test_openai_image_part_to_bedrock_https(image_url: str):
function test_to_bedrock_content_items_openai_combo (line 38) | def test_to_bedrock_content_items_openai_combo(
FILE: tests/llm/test_bedrock/test_prepare_kwargs.py
function test_prepare_bedrock_kwargs_openai_text_plus_image (line 5) | def test_prepare_bedrock_kwargs_openai_text_plus_image(image_url: str):
FILE: tests/llm/test_core_providers/capabilities.py
function get_provider_name (line 127) | def get_provider_name(model_string: str) -> str:
function provider_supports (line 132) | def provider_supports(
function skip_if_unsupported (line 151) | def skip_if_unsupported(
FILE: tests/llm/test_core_providers/test_basic_extraction.py
class User (line 12) | class User(BaseModel):
class UserList (line 17) | class UserList(BaseModel):
class Address (line 21) | class Address(BaseModel):
class UserWithAddress (line 27) | class UserWithAddress(BaseModel):
function test_simple_extraction (line 34) | async def test_simple_extraction(provider_config):
function test_list_extraction (line 50) | async def test_list_extraction(provider_config):
function test_nested_model_extraction (line 72) | async def test_nested_model_extraction(provider_config):
function test_extraction_with_field_descriptions (line 97) | async def test_extraction_with_field_descriptions(provider_config):
FILE: tests/llm/test_core_providers/test_response_modes.py
class Task (line 14) | class Task(BaseModel):
function test_create_method (line 21) | async def test_create_method(provider_config):
function test_chat_completions_create_method (line 42) | async def test_chat_completions_create_method(provider_config):
function test_messages_create_method (line 62) | async def test_messages_create_method(provider_config):
function test_create_with_completion (line 82) | async def test_create_with_completion(provider_config):
function test_response_model_none (line 105) | async def test_response_model_none(provider_config):
FILE: tests/llm/test_core_providers/test_retries.py
class ValidatedUser (line 10) | class ValidatedUser(BaseModel):
method name_must_have_content (line 16) | def name_must_have_content(cls, v: str) -> str:
function test_max_retries_parameter (line 23) | async def test_max_retries_parameter(provider_config):
function test_validation_with_retries (line 40) | async def test_validation_with_retries(provider_config):
FILE: tests/llm/test_core_providers/test_simple_types.py
function test_int (line 18) | async def test_int(provider_config):
function test_bool (line 36) | async def test_bool(provider_config):
function test_str (line 54) | async def test_str(provider_config):
function test_literal (line 72) | async def test_literal(provider_config):
function test_union (line 90) | async def test_union(provider_config):
function test_enum (line 109) | async def test_enum(provider_config):
function test_annotated_int (line 134) | async def test_annotated_int(provider_config):
FILE: tests/llm/test_core_providers/test_streaming.py
class User (line 16) | class User(BaseModel):
class Weather (line 21) | class Weather(BaseModel):
class SearchQuery (line 27) | class SearchQuery(BaseModel):
function test_partial_streaming (line 33) | async def test_partial_streaming(provider_config):
function test_iterable_streaming (line 57) | async def test_iterable_streaming(provider_config):
function test_iterable_streaming_with_stream_flag (line 80) | async def test_iterable_streaming_with_stream_flag(provider_config):
function test_iterable_union_streaming (line 99) | async def test_iterable_union_streaming(provider_config):
function test_create_iterable_method (line 123) | async def test_create_iterable_method(provider_config):
FILE: tests/llm/test_core_providers/test_validation.py
class UserWithValidation (line 12) | class UserWithValidation(BaseModel):
method name_must_not_be_empty (line 18) | def name_must_not_be_empty(cls, v: str) -> str:
class Email (line 24) | class Email(BaseModel):
method email_must_be_valid (line 29) | def email_must_be_valid(cls, v: str) -> str:
class TemperatureReading (line 35) | class TemperatureReading(BaseModel):
function test_basic_validation (line 41) | async def test_basic_validation(provider_config):
function test_list_with_validation (line 58) | async def test_list_with_validation(provider_config):
function test_custom_validator (line 81) | async def test_custom_validator(provider_config):
function test_field_constraints (line 97) | async def test_field_constraints(provider_config):
function test_max_retries (line 118) | async def test_max_retries(provider_config):
FILE: tests/llm/test_gemini/evals/test_extract_users.py
class UserDetails (line 8) | class UserDetails(BaseModel):
function test_extract (line 22) | def test_extract(model, data, mode):
FILE: tests/llm/test_gemini/test_list_content.py
class User (line 7) | class User(BaseModel):
class UserList (line 12) | class UserList(BaseModel):
function test_list_of_strings (line 20) | async def test_list_of_strings():
FILE: tests/llm/test_gemini/test_multimodal_content.py
class Description (line 6) | class Description(BaseModel):
function test_audio_compatability_list (line 16) | def test_audio_compatability_list():
function test_audio_compatability_multiple_messages (line 37) | def test_audio_compatability_multiple_messages():
FILE: tests/llm/test_genai/conftest.py
function client (line 26) | def client():
function aclient (line 31) | def aclient():
function genai_client (line 36) | def genai_client():
FILE: tests/llm/test_genai/test_decimal.py
class Receipt (line 8) | class Receipt(BaseModel):
method parse_decimals (line 16) | def parse_decimals(cls, v):
class Invoice (line 22) | class Invoice(BaseModel):
method parse_grand_total (line 28) | def parse_grand_total(cls, v):
function test_decimal_extraction (line 36) | def test_decimal_extraction(client, model, mode):
function test_decimal_extraction_async (line 76) | async def test_decimal_extraction_async(aclient, model, mode):
class SimpleProduct (line 99) | class SimpleProduct(BaseModel):
method parse_price (line 105) | def parse_price(cls, v):
function test_simple_decimal_extraction (line 113) | def test_simple_decimal_extraction(client, model, mode):
FILE: tests/llm/test_genai/test_format.py
class User (line 10) | class User(BaseModel):
class Users (line 15) | class Users(BaseModel):
function test_simple_string_message (line 21) | def test_simple_string_message(client, model, mode):
function test_system_prompt (line 36) | def test_system_prompt(client, model, mode):
function test_system_kwarg (line 60) | def test_system_kwarg(client, model, mode):
function test_system_kwarg_genai (line 81) | def test_system_kwarg_genai(client, model, mode):
function test_system_prompt_list (line 106) | def test_system_prompt_list(client, model, mode):
function test_format_genai_typed (line 133) | def test_format_genai_typed(client, model, mode):
function test_format_string (line 154) | def test_format_string(client, model: str, mode: instructor.Mode, is_lis...
FILE: tests/llm/test_genai/test_invalid_schema.py
function test_nested (line 15) | def test_nested(mode, model):
function test_union (line 43) | def test_union(mode, model):
function test_optional_types_allowed (line 64) | def test_optional_types_allowed():
function test_union_types_allowed_schema (line 81) | def test_union_types_allowed_schema():
function test_genai_api_call_with_different_types (line 102) | def test_genai_api_call_with_different_types(mode):
function test_genai_api_call_with_nested_models (line 132) | def test_genai_api_call_with_nested_models(mode):
function test_genai_api_call_with_different_types_async (line 170) | async def test_genai_api_call_with_different_types_async(mode):
function test_genai_api_call_with_nested_models_async (line 201) | async def test_genai_api_call_with_nested_models_async(mode):
FILE: tests/llm/test_genai/test_reask.py
function test_genai_tools_validation_retry_preserves_model_content (line 8) | def test_genai_tools_validation_retry_preserves_model_content(mode):
FILE: tests/llm/test_genai/test_schema_conversion.py
class Priority (line 13) | class Priority(Enum):
class SimpleModel (line 19) | class SimpleModel(BaseModel):
class OptionalModel (line 25) | class OptionalModel(BaseModel):
class EnumModel (line 31) | class EnumModel(BaseModel):
class NestedModel (line 36) | class NestedModel(BaseModel):
function test_simple_schema_conversion (line 42) | def test_simple_schema_conversion():
function test_optional_schema_conversion (line 65) | def test_optional_schema_conversion():
function test_enum_schema_conversion (line 94) | def test_enum_schema_conversion():
function test_nested_schema_conversion (line 121) | def test_nested_schema_conversion():
function test_verify_no_unions_valid (line 147) | def test_verify_no_unions_valid():
function test_verify_no_unions_invalid (line 158) | def test_verify_no_unions_invalid():
function test_schema_without_refs (line 168) | def test_schema_without_refs():
function test_schema_with_description (line 187) | def test_schema_with_description():
function test_union_type_raises_error (line 206) | def test_union_type_raises_error():
function test_verify_no_unions_allows_optional (line 221) | def test_verify_no_unions_allows_optional():
function test_verify_no_unions_allows_decimal (line 235) | def test_verify_no_unions_allows_decimal():
function test_verify_no_unions_rejects_other_unions (line 251) | def test_verify_no_unions_rejects_other_unions():
function test_verify_no_unions_rejects_complex_unions (line 262) | def test_verify_no_unions_rejects_complex_unions():
function test_verify_no_unions_nested_schemas (line 277) | def test_verify_no_unions_nested_schemas():
function test_decimal_schema_conversion_succeeds (line 320) | def test_decimal_schema_conversion_succeeds():
FILE: tests/llm/test_genai/test_utils.py
function test_update_genai_kwargs_basic (line 4) | def test_update_genai_kwargs_basic():
function test_update_genai_kwargs_safety_settings (line 33) | def test_update_genai_kwargs_safety_settings():
function test_update_genai_kwargs_with_custom_safety_settings (line 69) | def test_update_genai_kwargs_with_custom_safety_settings():
function test_update_genai_kwargs_safety_settings_with_image_content_uses_image_categories (line 111) | def test_update_genai_kwargs_safety_settings_with_image_content_uses_ima...
function test_update_genai_kwargs_maps_text_thresholds_to_image_categories (line 148) | def test_update_genai_kwargs_maps_text_thresholds_to_image_categories():
function test_update_genai_kwargs_none_values (line 188) | def test_update_genai_kwargs_none_values():
function test_update_genai_kwargs_empty (line 207) | def test_update_genai_kwargs_empty():
function test_update_genai_kwargs_preserves_original (line 218) | def test_update_genai_kwargs_preserves_original():
function test_update_genai_kwargs_thinking_config (line 239) | def test_update_genai_kwargs_thinking_config():
function test_update_genai_kwargs_thinking_config_none (line 253) | def test_update_genai_kwargs_thinking_config_none():
function test_update_genai_kwargs_no_thinking_config (line 264) | def test_update_genai_kwargs_no_thinking_config():
function test_handle_genai_structured_outputs_thinking_config_in_config (line 283) | def test_handle_genai_structured_outputs_thinking_config_in_config():
function test_handle_genai_structured_outputs_thinking_config_kwarg_priority (line 316) | def test_handle_genai_structured_outputs_thinking_config_kwarg_priority():
function test_handle_genai_tools_thinking_config_in_config (line 347) | def test_handle_genai_tools_thinking_config_in_config():
FILE: tests/llm/test_litellm.py
function test_litellm_create (line 17) | def test_litellm_create():
function test_async_litellm_create (line 23) | def test_async_litellm_create():
FILE: tests/llm/test_new_client.py
class User (line 24) | class User(BaseModel):
function test_client_create (line 29) | def test_client_create():
function test_client_messages_create (line 41) | def test_client_messages_create():
function test_client_chat_completions_create_with_response (line 53) | def test_client_chat_completions_create_with_response():
function test_client_chat_completions_create (line 69) | def test_client_chat_completions_create():
function test_client_chat_completions_create_partial (line 81) | def test_client_chat_completions_create_partial():
function test_client_chat_completions_create_iterable (line 92) | def test_client_chat_completions_create_iterable():
function test_async_client_chat_completions_create (line 107) | async def test_async_client_chat_completions_create():
function test_async_client_chat_completions_create_partial (line 121) | async def test_async_client_chat_completions_create_partial():
function test_async_client_chat_completions_create_iterable (line 134) | async def test_async_client_chat_completions_create_iterable():
function test_async_client_chat_completions_create_with_response (line 147) | async def test_async_client_chat_completions_create_with_response():
function test_client_from_anthropic_with_response (line 163) | def test_client_from_anthropic_with_response():
function test_client_anthropic_response (line 180) | def test_client_anthropic_response():
function test_client_anthropic_bedrock_response (line 198) | def test_client_anthropic_bedrock_response():
function test_async_client_anthropic_response (line 222) | async def test_async_client_anthropic_response():
function test_async_client_anthropic_bedrock_response (line 241) | async def test_async_client_anthropic_bedrock_response():
function test_client_cohere_response (line 265) | def test_client_cohere_response():
function test_client_cohere_response_with_nested_classes (line 283) | def test_client_cohere_response_with_nested_classes():
function test_client_cohere_async (line 320) | async def test_client_cohere_async():
function test_client_from_mistral_with_response (line 356) | def test_client_from_mistral_with_response():
function test_client_mistral_response (line 375) | def test_client_mistral_response():
FILE: tests/llm/test_openai/conftest.py
function client (line 18) | def client():
function aclient (line 23) | def aclient():
FILE: tests/llm/test_openai/slow/test_response.py
class UserProfile (line 11) | class UserProfile(BaseModel):
function test_basic_response_methods (line 24) | def test_basic_response_methods(client: OpenAI, mode, model):
function test_create_iterable_from_create (line 39) | def test_create_iterable_from_create(client: OpenAI, mode, model):
function test_create_with_completion (line 58) | def test_create_with_completion(client: OpenAI, mode, model):
function test_create_iterable (line 76) | def test_create_iterable(client: OpenAI, mode, model):
function test_create_partial (line 95) | def test_create_partial(client: OpenAI, mode, model):
function test_basic_response_methods_async (line 121) | async def test_basic_response_methods_async(client: AsyncOpenAI, mode, m...
function test_create_iterable_from_create_async (line 137) | async def test_create_iterable_from_create_async(aclient: AsyncOpenAI, m...
function test_create_with_completion_async (line 159) | async def test_create_with_completion_async(aclient: AsyncOpenAI, mode, ...
function test_create_iterable_async (line 178) | async def test_create_iterable_async(aclient: AsyncOpenAI, mode, model):
function test_create_partial_async (line 198) | async def test_create_partial_async(aclient: AsyncOpenAI, mode, model):
FILE: tests/llm/test_openai/test_attr.py
function test_has_embedding (line 6) | def test_has_embedding():
function test_has_embedding_async (line 17) | async def test_has_embedding_async():
FILE: tests/llm/test_openai/test_hooks.py
function client (line 8) | def client():
function log_kwargs (line 12) | def log_kwargs(*args, **kwargs):
function log_kwargs_1 (line 16) | def log_kwargs_1(*args, **kwargs):
function log_kwargs_2 (line 20) | def log_kwargs_2(*args, **kwargs):
function test_on_method_str (line 32) | def test_on_method_str(
function test_on_method_enum (line 52) | def test_on_method_enum(
function test_off_method_str (line 72) | def test_off_method_str(
function test_off_method_enum (line 99) | def test_off_method_enum(
function test_clear_method_str (line 124) | def test_clear_method_str(
function test_clear_method (line 145) | def test_clear_method(
function test_clear_no_args (line 164) | def test_clear_no_args(
FILE: tests/llm/test_openai/test_multimodal.py
class LineItem (line 26) | class LineItem(BaseModel):
class Receipt (line 32) | class Receipt(BaseModel):
function gettysburg_audio (line 37) | def gettysburg_audio():
function test_multimodal_audio_description (line 51) | def test_multimodal_audio_description(audio_file, mode, client):
class ImageDescription (line 80) | class ImageDescription(BaseModel):
function test_multimodal_image_description (line 87) | def test_multimodal_image_description(model, mode, client):
function test_multimodal_image_description_autodetect (line 117) | def test_multimodal_image_description_autodetect(model, mode, client):
function test_multimodal_image_description_autodetect_no_response_model (line 148) | def test_multimodal_image_description_autodetect_no_response_model(model...
function test_multimodal_pdf_file (line 180) | def test_multimodal_pdf_file(model, mode, client, pdf_source):
FILE: tests/llm/test_openai/test_multitask.py
class User (line 10) | class User(BaseModel):
function test_multi_user (line 19) | def test_multi_user(model, mode, client):
function async_map_chat_completion_to_response (line 55) | async def async_map_chat_completion_to_response(
function test_multi_user_tools_mode_async (line 67) | async def test_multi_user_tools_mode_async(model, mode, aclient):
function test_multi_user_stream (line 109) | def test_multi_user_stream(model, mode, client):
function test_multi_user_tools_mode_async_stream (line 144) | async def test_multi_user_tools_mode_async_stream(model, mode, aclient):
FILE: tests/llm/test_openai/test_patch.py
class UserExtract (line 12) | class UserExtract(BaseModel):
class UserExtractTypedDict (line 17) | class UserExtractTypedDict(TypedDict):
function test_typed_dict (line 23) | def test_typed_dict(model, mode, client):
function test_runmodel (line 48) | def test_runmodel(model, mode, client):
function test_runmodel_async (line 76) | async def test_runmodel_async(model, mode, aclient):
class UserExtractValidated (line 102) | class UserExtractValidated(BaseModel):
method validate_name (line 108) | def validate_name(cls, v):
function test_runmodel_validator (line 117) | def test_runmodel_validator(model, mode, client):
function test_runmodel_async_validator (line 143) | async def test_runmodel_async_validator(model, mode, aclient):
FILE: tests/llm/test_openai/test_validation_context.py
class Message (line 9) | class Message(BaseModel):
method no_banned_words (line 14) | def no_banned_words(cls, v: str, info: ValidationInfo):
function test_banned_words_validation (line 29) | def test_banned_words_validation(model: str, mode: instructor.Mode, clie...
function test_banned_words_validation_old (line 49) | def test_banned_words_validation_old(model: str, mode: instructor.Mode, ...
function test_no_banned_words_validation (line 69) | def test_no_banned_words_validation(model: str, mode: instructor.Mode, c...
function test_forced_words_validation (line 90) | def test_forced_words_validation(model: str, mode: instructor.Mode, clie...
FILE: tests/llm/test_openai/test_validators.py
function test_patch_completes_successfully (line 13) | def test_patch_completes_successfully(client):
function test_runmodel_validator_error (line 24) | def test_runmodel_validator_error(model, mode, client):
function test_runmodel_validator_default_openai_client (line 50) | def test_runmodel_validator_default_openai_client(model, client):
FILE: tests/llm/test_vertexai/test_deprecated_async.py
class User (line 8) | class User(BaseModel):
function test_deprecated_async_warning (line 14) | def test_deprecated_async_warning(_):
function test_both_async_params_error (line 27) | def test_both_async_params_error(_):
FILE: tests/llm/test_vertexai/test_format.py
class User (line 9) | class User(BaseModel):
function test_format_string (line 15) | def test_format_string(model, mode, is_list):
FILE: tests/llm/test_vertexai/test_message_parser.py
function test_vertexai_message_parser_string_content (line 6) | def test_vertexai_message_parser_string_content():
function test_vertexai_message_parser_list_content (line 17) | def test_vertexai_message_parser_list_content():
function test_vertexai_message_parser_invalid_content (line 39) | def test_vertexai_message_parser_invalid_content():
function test_vertexai_message_parser_invalid_list_item (line 46) | def test_vertexai_message_parser_invalid_list_item():
FILE: tests/llm/test_vertexai/test_modes.py
class Item (line 15) | class Item(BaseModel):
class Order (line 20) | class Order(BaseModel):
function test_mixed_content_types (line 26) | def test_mixed_content_types(model, mode):
FILE: tests/llm/test_writer/conftest.py
function configure_writer (line 14) | def configure_writer():
FILE: tests/llm/test_writer/evals/test_classification_enums.py
class Labels (line 14) | class Labels(str, enum.Enum):
class SinglePrediction (line 19) | class SinglePrediction(BaseModel):
function test_writer_classification (line 40) | def test_writer_classification(
class MultiLabels (line 61) | class MultiLabels(str, enum.Enum):
class MultiClassPrediction (line 67) | class MultiClassPrediction(BaseModel):
function test_writer_multi_classify (line 88) | def test_writer_multi_classify(
FILE: tests/llm/test_writer/evals/test_classification_literals.py
class SinglePrediction (line 13) | class SinglePrediction(BaseModel):
function test_classification (line 29) | async def test_classification(
class MultiClassPrediction (line 50) | class MultiClassPrediction(BaseModel):
function test_writer_multi_classify (line 72) | async def test_writer_multi_classify(
FILE: tests/llm/test_writer/evals/test_entities.py
class Property (line 13) | class Property(BaseModel):
class Entity (line 19) | class Entity(BaseModel):
class DocumentExtraction (line 38) | class DocumentExtraction(BaseModel):
function ask_ai (line 45) | def ask_ai(content: str, model: str, client: Instructor) -> DocumentExtr...
function test_writer_extract (line 91) | def test_writer_extract(model: str, mode: instructor.Mode):
FILE: tests/llm/test_writer/evals/test_extract_users.py
class UserDetails (line 9) | class UserDetails(BaseModel):
function test_writer_extract (line 22) | def test_writer_extract(
FILE: tests/llm/test_writer/evals/test_sentiment_analysis.py
class Sentiment (line 11) | class Sentiment(str, enum.Enum):
class SentimentAnalysis (line 17) | class SentimentAnalysis(BaseModel):
function test_writer_sentiment_analysis (line 38) | def test_writer_sentiment_analysis(
FILE: tests/llm/test_writer/test_format_common_models.py
class User (line 7) | class User(BaseModel):
class UserList (line 12) | class UserList(BaseModel):
function test_writer_format_literal (line 26) | def test_writer_format_literal(model: str, mode: instructor.Mode):
function test_writer_format_enum (line 46) | def test_writer_format_enum(model: str, mode: instructor.Mode):
function test_writer_format_bool (line 71) | def test_writer_format_bool(model: str, mode: instructor.Mode):
function test_writer_format_sync (line 91) | def test_writer_format_sync(model: str, mode: instructor.Mode):
function test_writer_format_async (line 116) | async def test_writer_format_async(mode: instructor.Mode, model: str):
function test_writer_format_list_of_strings (line 142) | def test_writer_format_list_of_strings(mode: instructor.Mode, model: str):
FILE: tests/llm/test_writer/test_format_difficult_models.py
class Item (line 10) | class Item(BaseModel):
class Order (line 15) | class Order(BaseModel):
function test_writer_format_nested_model (line 21) | def test_writer_format_nested_model(mode: instructor.Mode, model: str):
class Book (line 54) | class Book(BaseModel):
class LibraryRecord (line 61) | class LibraryRecord(BaseModel):
function test_writer_format_complex_nested_model (line 68) | def test_writer_format_complex_nested_model(mode: instructor.Mode, model...
FILE: tests/processing/test_anthropic_json.py
class _AnthropicTestModel (line 18) | class _AnthropicTestModel(instructor.OpenAISchema): # type: ignore[misc]
function _build_message (line 22) | def _build_message(data_content: str) -> Message:
function test_parse_anthropic_json_strict_control_characters (line 35) | def test_parse_anthropic_json_strict_control_characters() -> None:
function test_parse_anthropic_json_non_strict_preserves_control_characters (line 42) | def test_parse_anthropic_json_non_strict_preserves_control_characters() ...
FILE: tests/test_auto_client.py
class User (line 9) | class User(BaseModel):
function should_skip_provider (line 37) | def should_skip_provider(provider_string: str) -> bool:
function test_user_extraction_sync (line 50) | def test_user_extraction_sync(provider_string):
function test_user_extraction_async (line 72) | async def test_user_extraction_async(provider_string):
function test_invalid_provider_format (line 92) | def test_invalid_provider_format():
function test_unsupported_provider (line 101) | def test_unsupported_provider():
function test_additional_kwargs_passed (line 110) | def test_additional_kwargs_passed():
function test_api_key_parameter_extraction (line 140) | def test_api_key_parameter_extraction():
function test_api_key_parameter_with_environment_fallback (line 163) | def test_api_key_parameter_with_environment_fallback():
function test_api_key_parameter_with_async_client (line 189) | def test_api_key_parameter_with_async_client():
function test_api_key_parameter_not_passed_when_none (line 212) | def test_api_key_parameter_not_passed_when_none():
function test_api_key_logging (line 235) | def test_api_key_logging():
function test_openai_provider_respects_base_url (line 273) | def test_openai_provider_respects_base_url():
function test_openai_provider_async_client_with_base_url (line 298) | def test_openai_provider_async_client_with_base_url():
function test_openai_provider_without_base_url (line 325) | def test_openai_provider_without_base_url():
function test_databricks_provider_uses_environment_configuration (line 346) | def test_databricks_provider_uses_environment_configuration():
function test_databricks_provider_respects_custom_base_url (line 380) | def test_databricks_provider_respects_custom_base_url():
function test_databricks_provider_async_client (line 414) | def test_databricks_provider_async_client():
function test_databricks_provider_requires_token (line 448) | def test_databricks_provider_requires_token():
function test_databricks_provider_requires_host (line 469) | def test_databricks_provider_requires_host():
function test_genai_mode_parameter_passed_to_provider (line 490) | def test_genai_mode_parameter_passed_to_provider():
function test_genai_mode_defaults_when_not_provided (line 514) | def test_genai_mode_defaults_when_not_provided():
function test_google_provider_runtime_import_error_propagates (line 535) | def test_google_provider_runtime_import_error_propagates():
function test_vertexai_provider_runtime_import_error_propagates (line 579) | def test_vertexai_provider_runtime_import_error_propagates():
function test_generative_ai_provider_runtime_import_error_propagates (line 621) | def test_generative_ai_provider_runtime_import_error_propagates():
FILE: tests/test_batch_in_memory.py
class User (line 15) | class User(BaseModel):
class TestBatchRequestInMemory (line 21) | class TestBatchRequestInMemory:
method test_save_to_bytesio_openai (line 24) | def test_save_to_bytesio_openai(self):
method test_save_to_bytesio_anthropic (line 52) | def test_save_to_bytesio_anthropic(self):
method test_save_to_file_still_works (line 78) | def test_save_to_file_still_works(self):
method test_multiple_requests_in_buffer (line 111) | def test_multiple_requests_in_buffer(self):
method test_invalid_buffer_type_raises_error (line 137) | def test_invalid_buffer_type_raises_error(self):
class TestProviderInMemorySupport (line 152) | class TestProviderInMemorySupport:
method test_openai_provider_accepts_bytesio (line 155) | def test_openai_provider_accepts_bytesio(self):
method test_anthropic_provider_accepts_bytesio (line 184) | def test_anthropic_provider_accepts_bytesio(self):
method test_provider_invalid_type_raises_error (line 211) | def test_provider_invalid_type_raises_error(self):
FILE: tests/test_cache_integration.py
function test_auto_cache_prevents_duplicate_provider_calls (line 8) | def test_auto_cache_prevents_duplicate_provider_calls(monkeypatch):
FILE: tests/test_cache_key.py
class UserV1 (line 11) | class UserV1(BaseModel):
class UserV1DiffDesc (line 15) | class UserV1DiffDesc(BaseModel):
class UserV1DiffField (line 19) | class UserV1DiffField(BaseModel):
class UserDoc1 (line 24) | class UserDoc1(BaseModel):
class UserDoc2 (line 30) | class UserDoc2(BaseModel):
function test_cache_key_changes_on_description_change (line 36) | def test_cache_key_changes_on_description_change():
function test_cache_key_changes_on_field_change (line 44) | def test_cache_key_changes_on_field_change():
function test_cache_key_same_for_identical_schema (line 52) | def test_cache_key_same_for_identical_schema():
function test_cache_key_changes_on_docstring_change (line 58) | def test_cache_key_changes_on_docstring_change():
FILE: tests/test_dict_operations.py
class TestDictionaryOperations (line 42) | class TestDictionaryOperations:
method test_extract_messages_benchmark (line 45) | def test_extract_messages_benchmark(self):
method test_combine_system_messages_benchmark (line 82) | def test_combine_system_messages_benchmark(self):
method test_extract_system_messages_benchmark (line 134) | def test_extract_system_messages_benchmark(self):
method test_update_gemini_kwargs_benchmark (line 166) | def test_update_gemini_kwargs_benchmark(self):
FILE: tests/test_dict_operations_validation.py
class TestDictOperationsValidation (line 12) | class TestDictOperationsValidation:
method test_extract_messages_validation (line 15) | def test_extract_messages_validation(self):
method test_combine_system_messages_validation (line 49) | def test_combine_system_messages_validation(self):
method test_extract_system_messages_validation (line 87) | def test_extract_system_messages_validation(self):
method test_update_gemini_kwargs_validation (line 130) | def test_update_gemini_kwargs_validation(self):
FILE: tests/test_dynamic_model_creation.py
function test_dynamic_model_creation_with_field_description (line 5) | def test_dynamic_model_creation_with_field_description():
FILE: tests/test_exception_backwards_compat.py
function test_response_parsing_error_is_value_error (line 12) | def test_response_parsing_error_is_value_error():
function test_multimodal_error_is_value_error (line 26) | def test_multimodal_error_is_value_error():
function test_async_validation_error_is_value_error (line 40) | def test_async_validation_error_is_value_error():
function test_exception_inheritance_chain (line 50) | def test_exception_inheritance_chain():
function test_mixed_exception_catching (line 68) | def test_mixed_exception_catching():
function test_exception_attributes_preserved (line 94) | def test_exception_attributes_preserved():
FILE: tests/test_exceptions.py
function test_all_exceptions_can_be_imported (line 18) | def test_all_exceptions_can_be_imported():
function test_exception_hierarchy (line 31) | def test_exception_hierarchy():
function test_base_instructor_error_can_be_caught (line 42) | def test_base_instructor_error_can_be_caught():
function test_incomplete_output_exception (line 66) | def test_incomplete_output_exception():
function test_instructor_retry_exception (line 77) | def test_instructor_retry_exception():
function test_validation_error (line 102) | def test_validation_error():
function test_provider_error (line 112) | def test_provider_error():
function test_configuration_error (line 125) | def test_configuration_error():
function test_mode_error (line 135) | def test_mode_error():
function test_client_error (line 152) | def test_client_error():
function test_specific_exception_catching (line 162) | def test_specific_exception_catching():
function test_multiple_exception_handling (line 183) | def test_multiple_exception_handling():
function test_exception_import_from_instructor (line 213) | def test_exception_import_from_instructor():
function test_instructor_error_from_exception (line 227) | def test_instructor_error_from_exception():
function test_instructor_error_str_with_no_failed_attempts (line 255) | def test_instructor_error_str_with_no_failed_attempts():
function test_instructor_error_str_with_failed_attempts (line 264) | def test_instructor_error_str_with_failed_attempts():
function test_instructor_error_str_xml_structure (line 300) | def test_instructor_error_str_xml_structure():
function test_failed_attempt_namedtuple (line 324) | def test_failed_attempt_namedtuple():
function test_instructor_error_failed_attempts_attribute (line 344) | def test_instructor_error_failed_attempts_attribute():
function test_instructor_retry_exception_with_failed_attempts (line 360) | def test_instructor_retry_exception_with_failed_attempts():
function test_multiple_exception_types_with_failed_attempts (line 383) | def test_multiple_exception_types_with_failed_attempts():
function test_failed_attempts_propagation_through_retry_cycles (line 402) | def test_failed_attempts_propagation_through_retry_cycles():
function test_failed_attempts_propagation_in_exception_hierarchy (line 445) | def test_failed_attempts_propagation_in_exception_hierarchy():
function test_failed_attempts_accumulation_simulation (line 478) | def test_failed_attempts_accumulation_simulation():
function test_failed_attempts_with_empty_and_none_completions (line 534) | def test_failed_attempts_with_empty_and_none_completions():
function test_failed_attempts_exception_chaining (line 570) | def test_failed_attempts_exception_chaining():
FILE: tests/test_fizzbuzz_fix.py
class TestFizzbuzzFix (line 7) | class TestFizzbuzzFix(unittest.TestCase):
method test_fizzbuzz_response_model (line 8) | def test_fizzbuzz_response_model(self):
FILE: tests/test_formatting.py
function test_handle_insecure_template (line 7) | def test_handle_insecure_template():
function test_handle_templating_with_context (line 21) | def test_handle_templating_with_context():
function test_handle_templating_without_context (line 30) | def test_handle_templating_without_context():
function test_handle_templating_with_anthropic_format (line 38) | def test_handle_templating_with_anthropic_format():
function test_handle_templating_with_mixed_content (line 55) | def test_handle_templating_with_mixed_content():
function test_handle_templating_with_secret_context (line 80) | def test_handle_templating_with_secret_context():
function test_handle_templating_with_cohere_format (line 116) | def test_handle_templating_with_cohere_format():
function test_handle_templating_with_gemini_format (line 131) | def test_handle_templating_with_gemini_format():
FILE: tests/test_function_calls.py
function test_model (line 22) | def test_model() -> type[OpenAISchema]:
function mock_completion (line 31) | def mock_completion(request: Any) -> ChatCompletion:
function mock_anthropic_message (line 66) | def mock_anthropic_message(request: Any) -> Message:
function test_openai_schema (line 86) | def test_openai_schema() -> None:
function test_openai_schema_raises_error (line 106) | def test_openai_schema_raises_error() -> None:
function test_no_docstring (line 114) | def test_no_docstring() -> None:
function test_incomplete_output_exception (line 129) | def test_incomplete_output_exception(
function test_complete_output_no_exception (line 136) | def test_complete_output_no_exception(
function test_incomplete_output_exception_raise (line 152) | def test_incomplete_output_exception_raise(
function test_anthropic_no_exception (line 159) | def test_anthropic_no_exception(
function test_control_characters_not_allowed_in_anthropic_json_strict_mode (line 177) | def test_control_characters_not_allowed_in_anthropic_json_strict_mode(
function test_control_characters_allowed_in_anthropic_json_non_strict_mode (line 199) | def test_control_characters_allowed_in_anthropic_json_non_strict_mode(
function test_pylance_url_config (line 213) | def test_pylance_url_config() -> None:
function test_refusal_attribute (line 234) | def test_refusal_attribute(test_model: type[OpenAISchema]):
function test_no_refusal_attribute (line 261) | def test_no_refusal_attribute(test_model: type[OpenAISchema]):
function test_missing_refusal_attribute (line 296) | def test_missing_refusal_attribute(test_model: type[OpenAISchema]):
FILE: tests/test_genai_config_merging.py
function test_update_genai_kwargs_thinking_config_from_config_object (line 24) | def test_update_genai_kwargs_thinking_config_from_config_object():
function test_update_genai_kwargs_thinking_config_kwarg_priority (line 57) | def test_update_genai_kwargs_thinking_config_kwarg_priority():
function test_update_genai_kwargs_config_object_automatic_function_calling (line 90) | def test_update_genai_kwargs_config_object_automatic_function_calling():
function test_update_genai_kwargs_config_object_does_not_override_base (line 119) | def test_update_genai_kwargs_config_object_does_not_override_base():
function test_update_genai_kwargs_no_config_object (line 139) | def test_update_genai_kwargs_no_config_object():
function test_update_genai_kwargs_config_object_with_no_thinking_config (line 156) | def test_update_genai_kwargs_config_object_with_no_thinking_config():
function test_verify_no_unions_always_returns_true (line 179) | def test_verify_no_unions_always_returns_true():
function test_map_to_gemini_function_schema_accepts_union_types (line 217) | def test_map_to_gemini_function_schema_accepts_union_types():
function test_update_genai_kwargs_config_object_cached_content (line 240) | def test_update_genai_kwargs_config_object_cached_content():
function test_update_genai_kwargs_cached_content_does_not_override_base (line 265) | def test_update_genai_kwargs_cached_content_does_not_override_base():
function test_handle_genai_structured_outputs_skips_system_instruction_with_cached_content (line 285) | def test_handle_genai_structured_outputs_skips_system_instruction_with_c...
function test_handle_genai_structured_outputs_sets_system_instruction_without_cached_content (line 318) | def test_handle_genai_structured_outputs_sets_system_instruction_without...
function test_handle_genai_tools_skips_tools_and_system_instruction_with_cached_content (line 341) | def test_handle_genai_tools_skips_tools_and_system_instruction_with_cach...
function test_handle_genai_tools_sets_tools_without_cached_content (line 377) | def test_handle_genai_tools_sets_tools_without_cached_content():
function test_update_genai_kwargs_config_dict_labels (line 402) | def test_update_genai_kwargs_config_dict_labels():
function test_update_genai_kwargs_config_dict_cached_content (line 412) | def test_update_genai_kwargs_config_dict_cached_content():
function test_update_genai_kwargs_config_dict_thinking_config (line 422) | def test_update_genai_kwargs_config_dict_thinking_config():
function test_handle_genai_structured_outputs_preserves_labels_from_config_dict (line 433) | def test_handle_genai_structured_outputs_preserves_labels_from_config_di...
function test_handle_genai_tools_preserves_labels_from_config_dict (line 453) | def test_handle_genai_tools_preserves_labels_from_config_dict():
function test_handle_genai_structured_outputs_skips_system_instruction_with_cached_content_dict (line 473) | def test_handle_genai_structured_outputs_skips_system_instruction_with_c...
function test_handle_genai_tools_skips_tools_and_system_instruction_with_cached_content_dict (line 497) | def test_handle_genai_tools_skips_tools_and_system_instruction_with_cach...
FILE: tests/test_genai_reask.py
function _response_with_content (line 12) | def _response_with_content(content: types.Content) -> types.GenerateCont...
function test_reask_genai_tools_preserves_thought_signature (line 16) | def test_reask_genai_tools_preserves_thought_signature():
function test_reask_genai_tools_finds_function_call_part_when_not_first (line 39) | def test_reask_genai_tools_finds_function_call_part_when_not_first():
function test_reask_genai_tools_handles_none_response (line 61) | def test_reask_genai_tools_handles_none_response():
function test_reask_genai_tools_falls_back_when_no_function_call (line 71) | def test_reask_genai_tools_falls_back_when_no_function_call():
FILE: tests/test_json_extraction.py
class Person (line 18) | class Person(BaseModel):
class TestJSONExtraction (line 24) | class TestJSONExtraction:
method test_extract_from_codeblock (line 27) | def test_extract_from_codeblock(self):
method test_extract_from_codeblock_no_language (line 50) | def test_extract_from_codeblock_no_language(self):
method test_extract_plain_json (line 73) | def test_extract_plain_json(self):
method test_nested_json (line 93) | def test_nested_json(self):
method test_json_with_arrays (line 118) | def test_json_with_arrays(self):
method test_invalid_json (line 140) | def test_invalid_json(self):
method test_extract_from_stream (line 156) | def test_extract_from_stream(self):
class TestTextExtraction (line 177) | class TestTextExtraction:
method test_extract_text_openai_format (line 180) | def test_extract_text_openai_format(self):
method test_extract_text_simple_format (line 197) | def test_extract_text_simple_format(self):
method test_extract_text_anthropic_format (line 208) | def test_extract_text_anthropic_format(self):
method test_extract_text_bedrock_format (line 227) | def test_extract_text_bedrock_format(self):
method test_extract_text_unknown_format (line 236) | def test_extract_text_unknown_format(self):
class TestModelValidation (line 249) | class TestModelValidation:
method test_validate_model_strict (line 252) | def test_validate_model_strict(self):
method test_validate_model_non_strict (line 261) | def test_validate_model_non_strict(self):
method test_validate_model_json_error (line 271) | def test_validate_model_json_error(self):
method test_validate_model_json_error_non_strict (line 282) | def test_validate_model_json_error_non_strict(self):
class PersonSchema (line 290) | class PersonSchema(OpenAISchema):
class TestBedrockJSONParsing (line 298) | class TestBedrockJSONParsing:
method test_parse_bedrock_json_simple (line 301) | def test_parse_bedrock_json_simple(self):
method test_parse_bedrock_json_with_reasoning_content (line 316) | def test_parse_bedrock_json_with_reasoning_content(self):
method test_parse_bedrock_json_with_codeblock (line 338) | def test_parse_bedrock_json_with_codeblock(self):
method test_parse_bedrock_json_no_text_content (line 357) | def test_parse_bedrock_json_no_text_content(self):
method test_parse_bedrock_json_multiple_text_contents (line 375) | def test_parse_bedrock_json_multiple_text_contents(self):
FILE: tests/test_json_extraction_edge_cases.py
class TestJSONExtractionEdgeCases (line 17) | class TestJSONExtractionEdgeCases:
method test_empty_input (line 20) | def test_empty_input(self):
method test_no_json_content (line 25) | def test_no_json_content(self):
method test_multiple_json_objects (line 32) | def test_multiple_json_objects(self):
method test_escaped_quotes (line 51) | def test_escaped_quotes(self):
method test_unicode_characters (line 64) | def test_unicode_characters(self):
method test_json_with_backslashes (line 77) | def test_json_with_backslashes(self):
method test_nested_codeblocks (line 90) | def test_nested_codeblocks(self):
method test_json_with_codeblock_in_a_value (line 107) | def test_json_with_codeblock_in_a_value(self):
method test_malformed_codeblock (line 118) | def test_malformed_codeblock(self):
method test_complex_nested_structure (line 132) | def test_complex_nested_structure(self):
method test_json_with_comments (line 158) | def test_json_with_comments(self):
method test_stream_with_nested_braces (line 178) | def test_stream_with_nested_braces(self):
method test_stream_with_string_containing_braces (line 195) | def test_stream_with_string_containing_braces(self):
method test_async_stream_extraction (line 213) | async def test_async_stream_extraction(self):
method test_async_stream_with_escaped_quotes (line 238) | async def test_async_stream_with_escaped_quotes(self):
FILE: tests/test_list_response.py
class User (line 15) | class User(BaseModel):
function test_listresponse_preserves_raw_response_on_slice (line 19) | def test_listresponse_preserves_raw_response_on_slice() -> None:
function test_process_response_wraps_iterablebase_tasks_with_raw_response (line 32) | def test_process_response_wraps_iterablebase_tasks_with_raw_response() -...
function test_prepare_response_model_supports_list_and_iterable (line 57) | def test_prepare_response_model_supports_list_and_iterable() -> None:
FILE: tests/test_list_response_wrapper.py
class DummyIterableModel (line 15) | class DummyIterableModel(BaseModel, IterableBase):
method from_response (line 19) | def from_response(cls, completion, **kwargs): # noqa: ANN001,ARG003
method from_streaming_response (line 23) | def from_streaming_response( # noqa: ANN001
method from_streaming_response_async (line 31) | def from_streaming_response_async( # noqa: ANN001
class DummyCompletion (line 43) | class DummyCompletion(BaseModel):
function test_process_response_returns_list_response_for_iterable_model (line 47) | def test_process_response_returns_list_response_for_iterable_model():
function test_process_response_streaming_returns_list_response_for_iterable_model (line 62) | def test_process_response_streaming_returns_list_response_for_iterable_m...
function test_process_response_async_streaming_returns_list_response_for_iterable_model (line 77) | async def test_process_response_async_streaming_returns_list_response_fo...
function test_prepare_response_model_treats_list_as_iterable_model (line 97) | def test_prepare_response_model_treats_list_as_iterable_model():
FILE: tests/test_logging.py
function test_from_provider_logging (line 5) | def test_from_provider_logging(caplog):
FILE: tests/test_message_processing.py
class TestMergeConsecutiveMessages (line 16) | class TestMergeConsecutiveMessages:
method test_empty_messages (line 19) | def test_empty_messages(self):
method test_single_message (line 24) | def test_single_message(self):
method test_consecutive_same_role (line 30) | def test_consecutive_same_role(self):
method test_alternating_roles (line 42) | def test_alternating_roles(self):
method test_mixed_content_types (line 55) | def test_mixed_content_types(self):
method test_multiple_consecutive (line 67) | def test_multiple_consecutive(self):
class TestGetMessageContent (line 88) | class TestGetMessageContent:
method test_string_content (line 91) | def test_string_content(self):
method test_list_content (line 97) | def test_list_content(self):
method test_empty_content (line 103) | def test_empty_content(self):
method test_none_content (line 109) | def test_none_content(self):
method test_missing_content (line 115) | def test_missing_content(self):
method test_empty_message (line 121) | def test_empty_message(self):
class TestTransformToGeminiPrompt (line 128) | class TestTransformToGeminiPrompt:
method test_empty_messages (line 131) | def test_empty_messages(self):
method test_user_message (line 136) | def test_user_message(self):
method test_assistant_message (line 144) | def test_assistant_message(self):
method test_system_message (line 152) | def test_system_message(self):
method test_full_conversation (line 160) | def test_full_conversation(self):
method test_multiple_system_messages (line 178) | def test_multiple_system_messages(self):
class TestUpdateGeminiKwargs (line 193) | class TestUpdateGeminiKwargs:
method test_transform_messages (line 196) | def test_transform_messages(self):
method test_generation_config (line 205) | def test_generation_config(self):
method test_safety_settings (line 226) | def test_safety_settings(self):
method test_existing_safety_settings (line 235) | def test_existing_safety_settings(self):
class TestSystemMessages (line 252) | class TestSystemMessages:
method test_combine_system_messages_strings (line 255) | def test_combine_system_messages_strings(self):
method test_combine_system_messages_lists (line 262) | def test_combine_system_messages_lists(self):
method test_combine_system_messages_mixed (line 271) | def test_combine_system_messages_mixed(self):
method test_combine_system_messages_none (line 280) | def test_combine_system_messages_none(self):
method test_extract_system_messages_empty (line 287) | def test_extract_system_messages_empty(self):
method test_extract_system_messages_no_system (line 293) | def test_extract_system_messages_no_system(self):
method test_extract_system_messages_string (line 302) | def test_extract_system_messages_string(self):
method test_extract_system_messages_list (line 313) | def test_extract_system_messages_list(self):
method test_extract_system_messages_multiple (line 327) | def test_extract_system_messages_multiple(self):
FILE: tests/test_multimodal.py
function base64_jpeg (line 17) | def base64_jpeg():
function base64_png (line 23) | def base64_png():
function test_image_from_url (line 28) | def test_image_from_url():
function test_image_from_path (line 36) | def test_image_from_path(tmp_path: Path):
function test_image_to_anthropic (line 47) | def test_image_to_anthropic():
function test_image_to_openai (line 57) | def test_image_to_openai():
function test_convert_contents (line 66) | def test_convert_contents():
function test_convert_messages (line 75) | def test_convert_messages():
function test_convert_messages_anthropic (line 93) | def test_convert_messages_anthropic():
function test_convert_messages_gemini (line 123) | def test_convert_messages_gemini():
function test_image_from_path_unsupported_format (line 137) | def test_image_from_path_unsupported_format(tmp_path: Path):
function test_image_from_path_empty_file (line 145) | def test_image_from_path_empty_file(tmp_path: Path):
function test_image_to_openai_base64 (line 153) | def test_image_to_openai_base64():
function test_convert_contents_single_string (line 162) | def test_convert_contents_single_string():
function test_convert_contents_single_image (line 168) | def test_convert_contents_single_image():
function test_convert_messages_mixed_content (line 177) | def test_convert_messages_mixed_content():
function test_convert_contents_invalid_type (line 190) | def test_convert_contents_invalid_type():
function test_convert_contents_anthropic_mode (line 195) | def test_convert_contents_anthropic_mode():
function test_convert_contents_custom_dict (line 206) | def test_convert_contents_custom_dict():
function test_image_from_base64_url (line 216) | def test_image_from_base64_url(base64_png):
function test_image_from_url_with_query_params (line 224) | def test_image_from_url_with_query_params():
function test_image_from_url_with_unusual_extension (line 232) | def test_image_from_url_with_unusual_extension():
function test_image_to_openai_with_base64_source (line 240) | def test_image_to_openai_with_base64_source(base64_png):
function test_image_to_anthropic_with_base64_source (line 252) | def test_image_to_anthropic_with_base64_source(base64_png):
function test_image_from_various_urls (line 276) | def test_image_from_various_urls(url, request):
function test_convert_contents_with_base64_image (line 287) | def test_convert_contents_with_base64_image(base64_png):
function test_image_autodetect (line 326) | def test_image_autodetect(input_data, expected_type, expected_media_type...
function test_image_autodetect_invalid_input (line 357) | def test_image_autodetect_invalid_input():
function test_image_autodetect_empty_file (line 365) | def test_image_autodetect_empty_file(tmp_path):
function test_raw_base64_autodetect_jpeg (line 372) | def test_raw_base64_autodetect_jpeg(base64_jpeg):
function test_raw_base64_autodetect_png (line 379) | def test_raw_base64_autodetect_png(base64_png):
function test_autodetect_media_data_uris (line 386) | def test_autodetect_media_data_uris():
function test_convert_messages_autodetect_media (line 408) | def test_convert_messages_autodetect_media():
function test_pdf_from_url (line 440) | def test_pdf_from_url():
function test_pdf_from_gs_url (line 456) | def test_pdf_from_gs_url():
function test_audio_from_url (line 477) | def test_audio_from_url():
function test_audio_from_gs_url (line 497) | def test_audio_from_gs_url():
function test_audio_from_base64 (line 517) | def test_audio_from_base64():
function test_pdf_to_bedrock_with_s3_uri (line 532) | def test_pdf_to_bedrock_with_s3_uri():
function test_pdf_to_bedrock_with_s3_uri_custom_name (line 550) | def test_pdf_to_bedrock_with_s3_uri_custom_name():
function test_pdf_to_bedrock_with_invalid_s3_uri (line 566) | def test_pdf_to_bedrock_with_invalid_s3_uri():
function test_pdf_to_bedrock_with_base64_data (line 577) | def test_pdf_to_bedrock_with_base64_data():
function test_pdf_to_bedrock_with_path_source (line 596) | def test_pdf_to_bedrock_with_path_source(tmp_path):
function test_pdf_to_bedrock_with_url_source (line 610) | def test_pdf_to_bedrock_with_url_source():
function test_pdf_to_bedrock_name_sanitization (line 632) | def test_pdf_to_bedrock_name_sanitization():
function test_pdf_to_bedrock_name_from_path_source (line 659) | def test_pdf_to_bedrock_name_from_path_source(tmp_path):
function test_pdf_to_bedrock_name_from_url (line 670) | def test_pdf_to_bedrock_name_from_url():
function test_pdf_to_bedrock_name_from_gs_url (line 690) | def test_pdf_to_bedrock_name_from_gs_url():
function test_pdf_to_bedrock_default_name (line 707) | def test_pdf_to_bedrock_default_name():
function test_pdf_to_bedrock_missing_data_no_source (line 724) | def test_pdf_to_bedrock_missing_data_no_source():
FILE: tests/test_multitask.py
function test_multi_task (line 6) | def test_multi_task():
FILE: tests/test_patch.py
function test_patch_completes_successfully (line 9) | def test_patch_completes_successfully():
function test_apatch_completes_successfully (line 13) | def test_apatch_completes_successfully():
function test_is_async_returns_true_if_function_is_async (line 17) | def test_is_async_returns_true_if_function_is_async():
function test_is_async_returns_false_if_function_is_not_async (line 24) | def test_is_async_returns_false_if_function_is_not_async():
function test_is_async_returns_true_if_wrapped_function_is_async (line 31) | def test_is_async_returns_true_if_wrapped_function_is_async():
function test_is_async_returns_true_if_double_wrapped_function_is_async (line 42) | def test_is_async_returns_true_if_double_wrapped_function_is_async():
function test_is_async_returns_true_if_triple_wrapped_function_is_async (line 57) | def test_is_async_returns_true_if_triple_wrapped_function_is_async():
FILE: tests/test_process_response.py
function test_typed_dict_conversion (line 7) | def test_typed_dict_conversion() -> None:
function test_openai_to_bedrock_conversion (line 22) | def test_openai_to_bedrock_conversion() -> None:
function test_bedrock_native_preserved (line 45) | def test_bedrock_native_preserved() -> None:
function test_mixed_openai_and_bedrock (line 64) | def test_mixed_openai_and_bedrock() -> None:
function test_bedrock_round_trip (line 93) | def test_bedrock_round_trip() -> None:
function test_empty_and_missing_content (line 109) | def test_empty_and_missing_content() -> None:
function test_bedrock_invalid_content_format (line 123) | def test_bedrock_invalid_content_format() -> None:
FILE: tests/test_response_model_conversion.py
function get_system_prompt (line 15) | def get_system_prompt(user_tool_definition, mode):
function test_json_preserves_description_of_non_english_characters_in_json_mode (line 30) | def test_json_preserves_description_of_non_english_characters_in_json_mode(
FILE: tests/test_retry_json_mode.py
class User (line 19) | class User(BaseModel):
function test_json_decode_error_caught_by_retry (line 24) | def test_json_decode_error_caught_by_retry():
function test_validation_error_caught_by_retry (line 68) | def test_validation_error_caught_by_retry():
FILE: tests/test_schema.py
function test_annotation_schema (line 17) | def test_annotation_schema():
class User (line 27) | class User(BaseModel):
class AdminUser (line 32) | class AdminUser(BaseModel):
function test_new_union_types (line 38) | def test_new_union_types():
function test_old_union_type (line 49) | def test_old_union_type():
function test_tuple_with_multiple_args (line 59) | def test_tuple_with_multiple_args():
function test_dict_with_multiple_value_types (line 69) | def test_dict_with_multiple_value_types():
function test_nested_complex_types (line 79) | def test_nested_complex_types():
function test_openai_schema_tuple_mapping (line 90) | def test_openai_schema_tuple_mapping():
function test_openai_schema_dict_mapping (line 97) | def test_openai_schema_dict_mapping():
function test_openai_schema_ordered_dict_mapping (line 104) | def test_openai_schema_ordered_dict_mapping():
function test_openai_schema_supports_optional_none_310 (line 112) | def test_openai_schema_supports_optional_none_310():
function test_openai_schema_supports_optional_none (line 127) | def test_openai_schema_supports_optional_none() -> None:
function test_default_values_and_validators (line 143) | def test_default_values_and_validators():
function test_inheritance (line 154) | def test_inheritance():
function test_alias_and_field_customization (line 167) | def test_alias_and_field_customization():
function test_standard_python_types (line 177) | def test_standard_python_types():
function test_any_type (line 191) | def test_any_type():
function test_literal_type (line 201) | def test_literal_type():
function test_str_any_dict (line 211) | def test_str_any_dict():
FILE: tests/test_schema_utils.py
class TestModel (line 15) | class TestModel(BaseModel):
class TestModelWithDocstring (line 23) | class TestModelWithDocstring(BaseModel):
class TestModelOldStyle (line 37) | class TestModelOldStyle(TestModel, OpenAISchema):
function test_generate_openai_schema_matches_class_method (line 43) | def test_generate_openai_schema_matches_class_method():
function test_generate_anthropic_schema_matches_class_method (line 59) | def test_generate_anthropic_schema_matches_class_method():
function test_generate_gemini_schema_matches_class_method (line 75) | def test_generate_gemini_schema_matches_class_method():
function test_docstring_parameter_enrichment (line 90) | def test_docstring_parameter_enrichment():
function test_schema_caching (line 103) | def test_schema_caching():
function test_required_fields_generation (line 113) | def test_required_fields_generation():
function test_field_descriptions (line 124) | def test_field_descriptions():
function test_schema_name_and_title (line 134) | def test_schema_name_and_title():
function test_no_inheritance_required (line 141) | def test_no_inheritance_required():
function test_anthropic_schema_uses_openai_base (line 155) | def test_anthropic_schema_uses_openai_base():
FILE: tests/test_simple_types.py
function test_enum_simple (line 5) | def test_enum_simple():
function test_standard_types (line 16) | def test_standard_types():
function test_partial_not_simple (line 21) | def test_partial_not_simple():
function test_annotated_simple (line 28) | def test_annotated_simple():
function test_literal_simple (line 37) | def test_literal_simple():
function test_union_simple (line 45) | def test_union_simple():
function test_iterable_not_simple (line 53) | def test_iterable_not_simple():
FILE: tests/test_streaming_reask_bug.py
class MockStream (line 19) | class MockStream:
method __iter__ (line 22) | def __iter__(self):
method __next__ (line 25) | def __next__(self):
class MockResponsesToolCall (line 29) | class MockResponsesToolCall:
method __init__ (line 32) | def __init__(
class MockResponsesReasoningItem (line 45) | class MockResponsesReasoningItem:
class MockResponsesResponse (line 51) | class MockResponsesResponse:
method __init__ (line 54) | def __init__(self, output: list[Any]) -> None:
function create_mock_validation_error (line 58) | def create_mock_validation_error():
class TestStreamingReaskBug (line 77) | class TestStreamingReaskBug:
method test_reask_tools_with_stream_object_does_not_crash (line 80) | def test_reask_tools_with_stream_object_does_not_crash(self):
method test_reask_anthropic_tools_with_stream_object (line 105) | def test_reask_anthropic_tools_with_stream_object(self):
method test_reask_with_none_response (line 122) | def test_reask_with_none_response(self):
method test_reask_responses_tools_skips_reasoning_items_and_includes_details (line 138) | def test_reask_responses_tools_skips_reasoning_items_and_includes_deta...
method test_reask_md_json_with_stream_object (line 168) | def test_reask_md_json_with_stream_object(self):
class TestStreamingReaskIntegration (line 190) | class TestStreamingReaskIntegration:
method client (line 194) | def client(self):
method test_streaming_with_retries_and_failing_validator (line 206) | def test_streaming_with_retries_and_failing_validator(self, client):
FILE: tests/test_utils.py
function test_extract_json_from_codeblock (line 14) | def test_extract_json_from_codeblock():
function test_extract_json_from_codeblock_no_end (line 28) | def test_extract_json_from_codeblock_no_end():
function test_extract_json_from_codeblock_no_start (line 45) | def test_extract_json_from_codeblock_no_start():
function test_stream_json (line 61) | def test_stream_json():
function test_stream_json_async (line 93) | async def test_stream_json_async():
function test_merge_consecutive_messages (line 134) | def test_merge_consecutive_messages():
function test_merge_consecutive_messages_empty (line 154) | def test_merge_consecutive_messages_empty():
function test_merge_consecutive_messages_single (line 160) | def test_merge_consecutive_messages_single():
function test_classproperty (line 172) | def test_classproperty():
function test_combine_system_messages_string_string (line 192) | def test_combine_system_messages_string_string():
function test_combine_system_messages_list_list (line 199) | def test_combine_system_messages_list_list():
function test_combine_system_messages_string_list (line 209) | def test_combine_system_messages_string_list():
function test_combine_system_messages_list_string (line 219) | def test_combine_system_messages_list_string():
function test_combine_system_messages_none_string (line 229) | def test_combine_system_messages_none_string():
function test_combine_system_messages_none_list (line 236) | def test_combine_system_messages_none_list():
function test_combine_system_messages_invalid_type (line 243) | def test_combine_system_messages_invalid_type():
function test_extract_system_messages (line 248) | def test_extract_system_messages():
function test_extract_system_messages_no_system (line 262) | def test_extract_system_messages_no_system():
function test_combine_system_messages_with_cache_control (line 271) | def test_combine_system_messages_with_cache_control():
function test_combine_system_messages_string_to_cache_control (line 300) | def test_combine_system_messages_string_to_cache_control():
function test_extract_system_messages_with_cache_control (line 323) | def test_extract_system_messages_with_cache_control():
function test_combine_system_messages_preserve_cache_control (line 352) | def test_combine_system_messages_preserve_cache_control():
FILE: tests/test_xai_optional_dependency.py
function test_from_provider_xai_requires_optional_extra (line 4) | def test_from_provider_xai_requires_optional_extra():
function test_direct_from_xai_has_clear_error_when_sdk_missing (line 16) | def test_direct_from_xai_has_clear_error_when_sdk_missing():
FILE: tests/v2/test_provider_modes.py
class Answer (line 36) | class Answer(BaseModel):
class Weather (line 42) | class Weather(BaseModel):
class GoogleSearch (line 49) | class GoogleSearch(BaseModel):
function test_mode_is_registered (line 88) | def test_mode_is_registered(provider: Provider, mode: Mode):
function test_mode_basic_extraction (line 108) | def test_mode_basic_extraction(provider: Provider, mode: Mode):
function test_mode_async_extraction (line 144) | async def test_mode_async_extraction(provider: Provider, mode: Mode):
function test_anthropic_parallel_tools_extraction (line 171) | def test_anthropic_parallel_tools_extraction():
function test_anthropic_tools_with_thinking (line 205) | def test_anthropic_tools_with_thinking(mode: Mode):
function test_anthropic_reasoning_tools_deprecation (line 230) | def test_anthropic_reasoning_tools_deprecation():
function test_all_modes_covered (line 280) | def test_all_modes_covered(provider: Provider):
Condensed preview — 706 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (4,437K chars).
[
{
"path": ".coveragerc",
"chars": 59,
"preview": "[run]\nsource =\n instructor/\nomit =\n instructor/cli/*\n"
},
{
"path": ".cursor/rules/documentation-sync.mdc",
"chars": 1175,
"preview": "---\ndescription: when making code changes or adding documentation\nglobs: [\"*.py\", \"*.md\"]\nalwaysApply: true\n---\n\n- When "
},
{
"path": ".cursor/rules/followups.mdc",
"chars": 289,
"preview": "---\ndescription: when AI agents are collaborating on code\nglobs: \"*\"\nalwaysApply: true\n---\nMake sure to come up with fol"
},
{
"path": ".cursor/rules/new-features-planning.mdc",
"chars": 1863,
"preview": "---\ndescription: when asked to implement new features or clients\nglobs: *.py\nalwaysApply: true\n---\n\n- When being asked t"
},
{
"path": ".cursor/rules/readme.md",
"chars": 3453,
"preview": "# Cursor Rules\n\nCursor rules are configuration files that help guide AI-assisted development in the Cursor IDE. They pro"
},
{
"path": ".cursor/rules/simple-language.mdc",
"chars": 307,
"preview": "---\ndescription: when writing documentation\nglobs: *.md\nalwaysApply: false\n---\n\n- When writing documents and concepts ma"
},
{
"path": ".cursorignore",
"chars": 82,
"preview": "# Add directories or file patterns to ignore during indexing (e.g. foo/ or *.csv)\n"
},
{
"path": ".github/FUNDING.yml",
"chars": 12,
"preview": "github: jxnl"
},
{
"path": ".github/ISSUE_TEMPLATE/bug_report.md",
"chars": 812,
"preview": "---\nname: Bug report\nabout: Create a report to help us improve\n---\n\n- [ ] This is actually a bug report.\n- [ ] I am not "
},
{
"path": ".github/ISSUE_TEMPLATE/feature_request.md",
"chars": 559,
"preview": "---\nname: Feature request\nabout: Suggest an idea for this project\n---\n\n**Is your feature request related to a problem? P"
},
{
"path": ".github/PULL_REQUEST_TEMPLATE/pull_request_template.md",
"chars": 458,
"preview": "> Please use conventional commits to describe your changes. For example, `feat: add new feature` or `fix: fix a bug`. If"
},
{
"path": ".github/dependabot.yml",
"chars": 551,
"preview": "# To get started with Dependabot version updates, you'll need to specify which\n# package ecosystems to update and where "
},
{
"path": ".github/workflows/ai-label.yml",
"chars": 431,
"preview": "name: AI Labeler\n\non:\n issues:\n types: [opened, reopened]\n pull_request:\n types: [opened, reopened]\n\njobs:\n ai-"
},
{
"path": ".github/workflows/evals.yml",
"chars": 726,
"preview": "name: Weekly Tests\n\non:\n workflow_dispatch:\n schedule:\n - cron: \"0 0 * * 0\" # Runs at 00:00 UTC every Sunday\n push"
},
{
"path": ".github/workflows/python-publish.yml",
"chars": 1048,
"preview": "# This workflow will upload a Python Package using Twine when a release is created\n# For more information see: https://d"
},
{
"path": ".github/workflows/ruff.yml",
"chars": 664,
"preview": "name: Ruff\n\non:\n push:\n pull_request:\n branches: [main]\n\nenv:\n WORKING_DIRECTORY: \".\"\n CUSTOM_PACKAGES: \"instruct"
},
{
"path": ".github/workflows/scheduled-release.yml",
"chars": 10212,
"preview": "name: Scheduled Release\n\non:\n schedule:\n # Every 2 weeks on Monday at 9 AM UTC\n - cron: '0 9 * * 1/2'\n workflow_"
},
{
"path": ".github/workflows/test.yml",
"chars": 11495,
"preview": "name: Test\non:\n pull_request:\n push:\n branches:\n - main\n\njobs:\n # Core tests without LLM providers\n core-tes"
},
{
"path": ".github/workflows/test_docs.yml",
"chars": 948,
"preview": "name: Test Docs\non:\n schedule:\n - cron: '0 0 1 * *' # Runs at 00:00 on the 1st of every month\njobs:\n release:\n "
},
{
"path": ".github/workflows/ty.yml",
"chars": 654,
"preview": "name: ty\n\non:\n pull_request:\n branches: [main]\n push:\n branches: [main]\n\nenv:\n WORKING_DIRECTORY: \".\"\n\njobs:\n "
},
{
"path": ".gitignore",
"chars": 3448,
"preview": ".DS_Store\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n\n# Distributio"
},
{
"path": ".grit/.gitignore",
"chars": 19,
"preview": ".gritmodules\n*.log\n"
},
{
"path": ".grit/grit.yaml",
"chars": 84,
"preview": "version: 0.0.1\npatterns:\n - name: github.com/getgrit/python#openai\n level: info\n"
},
{
"path": ".pre-commit-config.yaml",
"chars": 1326,
"preview": "repos:\n - repo: https://github.com/astral-sh/ruff-pre-commit\n rev: v0.9.9 # Ruff version\n hooks:\n - id: ruff"
},
{
"path": ".ruff.toml",
"chars": 1033,
"preview": "# Exclude a variety of commonly ignored directories.\nexclude = [\n \".bzr\",\n \".direnv\",\n \".eggs\",\n \".git\",\n "
},
{
"path": "AGENT.md",
"chars": 3539,
"preview": "# AGENT.md\n\n## Commands\n- Install: `uv pip install -e \".[dev]\"` or `poetry install --with dev`\n- Run tests: `uv run pyte"
},
{
"path": "CHANGELOG.md",
"chars": 3343,
"preview": "# Changelog\n\nAll notable changes to this project will be documented in this file. The format is based on [Keep a Changel"
},
{
"path": "CLAUDE.md",
"chars": 13087,
"preview": "# CLAUDE.md\n\nThis file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.\n\n# I"
},
{
"path": "CONTRIBUTING.md",
"chars": 12939,
"preview": "# Contributing to Instructor\n\nThank you for considering contributing to Instructor! This document provides guidelines an"
},
{
"path": "LICENSE",
"chars": 1066,
"preview": "MIT License\n\nCopyright (c) 2023 Jason Liu\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\n"
},
{
"path": "NEW_PROVIDER_AGENT_INSTRUCTIONS.md",
"chars": 25874,
"preview": "# AI Agent Instructions: Creating a New Instructor Provider\n\n**Instructions for AI coding agents to create a new provide"
},
{
"path": "README.md",
"chars": 8074,
"preview": "# Instructor: Structured Outputs for LLMs\n\nGet reliable JSON from any LLM. Built on Pydantic for validation, type safety"
},
{
"path": "build_mkdocs.sh",
"chars": 81,
"preview": "pip install -r requirements.txt\npip install -r requirements-doc.txt\nmkdocs build\n"
},
{
"path": "cross_link_mapping.yaml",
"chars": 11928,
"preview": "# Cross-Link Mapping for Instructor Documentation\n# This file maps blog posts and documentation pages to their related c"
},
{
"path": "docs/AGENT.md",
"chars": 2759,
"preview": "---\ntitle: Documentation Agent Guide\ndescription: Internal guide for maintaining and improving Instructor documentation\n"
},
{
"path": "docs/api-docstring-assessment.md",
"chars": 8151,
"preview": "# API Docstring Quality Assessment\n\nThis document assesses the quality and completeness of docstrings for all API items "
},
{
"path": "docs/api.md",
"chars": 2501,
"preview": "---\ntitle: API Reference Guide\ndescription: Explore the comprehensive API reference with details on instructors, validat"
},
{
"path": "docs/architecture.md",
"chars": 6336,
"preview": "---\ntitle: Instructor Architecture Overview\ndescription: Learn about the internal architecture and design decisions of t"
},
{
"path": "docs/blog/.authors.yml",
"chars": 1245,
"preview": "authors:\n jxnl:\n name: Jason Liu\n description: Creator\n avatar: https://avatars.githubusercontent.com/u/485223"
},
{
"path": "docs/blog/index.md",
"chars": 2721,
"preview": "# Subscribe to our Newsletter for Updates and Tips\n\nIf you want to get updates on new features and tips on how to use In"
},
{
"path": "docs/blog/posts/aisummit-2023.md",
"chars": 1140,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Pydantic\ncomments: true\ndate: 2023-11-02\ndescription: Explore insights on utilizing Py"
},
{
"path": "docs/blog/posts/announcing-gemini-tool-calling-support.md",
"chars": 3409,
"preview": "---\nauthors:\n- ivanleomk\ncategories:\n- LLM Techniques\ncomments: true\ndate: 2024-09-03\ndescription: Introducing structure"
},
{
"path": "docs/blog/posts/announcing-instructor-responses-support.md",
"chars": 7094,
"preview": "---\nauthors:\n - ivanleomk\ncategories:\n - instructor\ncomments: true\ndate: 2025-05-11\ndescription: Take advantage of Ope"
},
{
"path": "docs/blog/posts/announcing-unified-provider-interface.md",
"chars": 9943,
"preview": "---\nauthors:\n - jxnl\n - ivanleomk\ncategories:\n - instructor\ncomments: true\ndate: 2025-05-08\ndescription: Switch betwe"
},
{
"path": "docs/blog/posts/anthropic-prompt-caching.md",
"chars": 17423,
"preview": "---\nauthors:\n- ivanleomk\ncategories:\n- Anthropic\ncomments: true\ndate: 2024-09-14\ndescription: Discover how prompt cachin"
},
{
"path": "docs/blog/posts/anthropic-web-search-structured.md",
"chars": 6748,
"preview": "---\ndate: 2025-05-07\nauthors:\n - jxnl\ncategories:\n - tutorials\n - anthropic\n - structured-data\n---\n\n# Using Anthropi"
},
{
"path": "docs/blog/posts/anthropic.md",
"chars": 2178,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Anthropic\ncomments: true\ndate: 2024-03-20\ndescription: Learn how to integrate Anthropi"
},
{
"path": "docs/blog/posts/bad-schemas-could-break-llms.md",
"chars": 18355,
"preview": "---\nauthors:\n- ivanleomk\ncategories:\n- LLM Techniques\ncomments: true\ndate: 2024-09-26\ndescription: Discover how response"
},
{
"path": "docs/blog/posts/best_framework.md",
"chars": 5638,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- LLM Techniques\ncomments: true\ndate: 2024-03-05\ndescription: Discover how the Instructo"
},
{
"path": "docs/blog/posts/caching.md",
"chars": 36676,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Performance Optimization\n- Cost Reduction\n- API Efficiency\n- Python Development\ncommen"
},
{
"path": "docs/blog/posts/chain-of-density.md",
"chars": 27541,
"preview": "---\nauthors:\n- ivanleomk\n- jxnl\ncategories:\n- LLM Techniques\ncomments: true\ndate: 2023-11-05\ndescription: Learn to imple"
},
{
"path": "docs/blog/posts/chat-with-your-pdf-with-gemini.md",
"chars": 5865,
"preview": "---\nauthors:\n - ivanleomk\ncategories:\n - Gemini\n - Document Processing\ncomments: true\ndate: 2024-11-11\ndescription: L"
},
{
"path": "docs/blog/posts/citations.md",
"chars": 8975,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Pydantic\ncomments: true\ndate: 2023-11-18\ndescription: Explore how Pydantic enhances LL"
},
{
"path": "docs/blog/posts/consistent-stories.md",
"chars": 10371,
"preview": "---\nauthors:\n - ivanleomk\ncategories:\n - OpenAI\ncomments: true\ndate: 2024-12-10\ndescription: Generating complex DAGS w"
},
{
"path": "docs/blog/posts/course.md",
"chars": 777,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- OpenAI\ncomments: true\ndate: 2024-02-14\ndescription: Discover a free one-hour course on"
},
{
"path": "docs/blog/posts/cursor-rules.md",
"chars": 4845,
"preview": "---\nauthors:\n - jxnl\ncategories:\n - Contributing\ncomments: true\ndate: 2025-03-18\ndescription:\n Learn how Instructor's"
},
{
"path": "docs/blog/posts/distilation-part1.md",
"chars": 6338,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- LLM Techniques\ncomments: true\ndate: 2023-10-17\ndescription: Explore Instructor for fin"
},
{
"path": "docs/blog/posts/extract-model-looks.md",
"chars": 10377,
"preview": "---\nauthors:\n - ivanleomk\ncategories:\n - OpenAI\ncomments: true\ndate: 2024-12-10\ndescription: Generating complex DAGS w"
},
{
"path": "docs/blog/posts/extracting-model-metadata.md",
"chars": 8074,
"preview": "---\ntitle: \"Extracting Metadata from Images using Structured Extraction\"\ndate: 2024-12-11\ndescription: Structured Extrac"
},
{
"path": "docs/blog/posts/fake-data.md",
"chars": 5394,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Pydantic\ncomments: true\ndate: 2024-03-08\ndescription: Learn to generate synthetic data"
},
{
"path": "docs/blog/posts/full-fastapi-visibility.md",
"chars": 13404,
"preview": "---\nauthors:\n- ivanleomk\n- jxnl\ncategories:\n- LLM Observability\ncomments: true\ndate: 2024-05-03\ndescription: Discover ho"
},
{
"path": "docs/blog/posts/generating-pdf-citations.md",
"chars": 7073,
"preview": "---\nauthors:\n - ivanleomk\ncategories:\n - Gemini\n - Document Processing\ncomments: true\ndate: 2024-11-15\ndescription: G"
},
{
"path": "docs/blog/posts/generator.md",
"chars": 11930,
"preview": "---\nauthors:\n- jxnl\n- anmol\ncategories:\n- LLM Techniques\ncomments: true\ndate: 2023-11-26\ndescription: Explore Python gen"
},
{
"path": "docs/blog/posts/google-openai-client.md",
"chars": 8433,
"preview": "---\nauthors:\n - ivanleomk\ncategories:\n - Google\n - OpenAI\ncomments: true\ndate: 2024-11-10\ndescription: Learn why Inst"
},
{
"path": "docs/blog/posts/introducing-structured-outputs-with-cerebras-inference.md",
"chars": 3417,
"preview": "---\nauthors:\n - ivanleomk\n - sarahchieng\ncategories:\n - API Development\n - Pydantic\n - Performance Optimization\ncom"
},
{
"path": "docs/blog/posts/introducing-structured-outputs.md",
"chars": 13562,
"preview": "---\nauthors:\n- ivanleomk\ncategories:\n- OpenAI\ncomments: true\ndate: 2024-08-20\ndescription: Explore the challenges of Ope"
},
{
"path": "docs/blog/posts/introduction.md",
"chars": 5284,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Pydantic\ncomments: true\ndate: 2023-09-11\ndescription: Learn how Pydantic simplifies wo"
},
{
"path": "docs/blog/posts/jinja-proposal.md",
"chars": 8934,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- LLM Techniques\ncomments: true\ndate: 2024-09-19\ndescription: Explore the integration of"
},
{
"path": "docs/blog/posts/langsmith.md",
"chars": 6129,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- LLM Techniques\ncomments: true\ndate: 2024-02-18\ndescription: Explore how LangSmith enha"
},
{
"path": "docs/blog/posts/learn-async.md",
"chars": 8263,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- LLM Techniques\ncomments: true\ndate: 2023-11-13\ndescription: \"Master Python asyncio.gat"
},
{
"path": "docs/blog/posts/llm-as-reranker.md",
"chars": 7591,
"preview": "---\nauthors:\n - jxnl\ncategories:\n - LLM\n - Pydantic\ncomments: true\ndate: 2024-10-23\ndescription: Learn how to use Ins"
},
{
"path": "docs/blog/posts/llms-txt-adoption.md",
"chars": 4122,
"preview": "---\nauthors:\n - jxnl\ncategories:\n - Announcements\ncomments: true\ndate: 2025-03-19\ndescription:\n Instructor adopts llm"
},
{
"path": "docs/blog/posts/llms-txt-support.md",
"chars": 1374,
"preview": "---\nauthors:\n - jxnl\ncategories:\n - Announcements\ncomments: true\ndate: 2025-08-29\ndescription:\n Instructor now automa"
},
{
"path": "docs/blog/posts/logfire.md",
"chars": 9841,
"preview": "---\nauthors:\n- ivanleomk\n- jxnl\ncategories:\n- LLM Observability\ncomments: true\ndate: 2024-05-01\ndescription: Explore Log"
},
{
"path": "docs/blog/posts/lseg-market-surveillance.md",
"chars": 2709,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Production\n- Financial Services\ncomments: true\ndate: 2025-09-11\ndescription: London St"
},
{
"path": "docs/blog/posts/matching-language.md",
"chars": 17130,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Pydantic\ncomments: true\ndate: 2024-03-28\ndescription: Explore techniques to ensure lan"
},
{
"path": "docs/blog/posts/migrating-to-uv.md",
"chars": 10302,
"preview": "---\nauthors:\n - ivanleomk\ncategories:\n - UV\ncomments: true\ndate: 2024-12-26\ndescription: How we migrated from poetry t"
},
{
"path": "docs/blog/posts/mkdocs-llmstxt-plugin-integration.md",
"chars": 4689,
"preview": "---\nauthors:\n - jxnl\ncategories:\n - Technical\n - Documentation\ncomments: true\ndate: 2025-08-29\ndescription:\n Deep di"
},
{
"path": "docs/blog/posts/multimodal-gemini.md",
"chars": 10011,
"preview": "---\nauthors:\n - ivanleomk\ncategories:\n - Gemini\n - Multimodal\ncomments: true\ndate: 2024-10-23\ndescription: Learn how "
},
{
"path": "docs/blog/posts/native_caching.md",
"chars": 8548,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Performance Optimization\n- Cost Reduction\n- API Efficiency\n- Python Development\ncommen"
},
{
"path": "docs/blog/posts/open_source.md",
"chars": 7336,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- API Development\ncomments: true\ndate: 2024-03-07\ndescription: Discover how Instructor i"
},
{
"path": "docs/blog/posts/openai-distilation-store.md",
"chars": 5176,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- OpenAI\ncomments: true\ndate: 2024-10-02\ndescription: Learn how to use OpenAI's API Mode"
},
{
"path": "docs/blog/posts/openai-multimodal.md",
"chars": 3917,
"preview": "---\nauthors:\n - jxnl\ncategories:\n - OpenAI\n - Audio\ncomments: true\ndate: 2024-10-17\ndescription: Explore the new audi"
},
{
"path": "docs/blog/posts/pairwise-llm-judge.md",
"chars": 6280,
"preview": "---\nauthors:\n - jxnl\ncategories:\n - LLM\n - Pydantic\ncomments: true\ndate: 2024-10-17\ndescription: Explore how to use I"
},
{
"path": "docs/blog/posts/parea.md",
"chars": 6401,
"preview": "---\nauthors:\n - jxnl\n - joschkabraun\ncategories:\n - LLM Observability\ncomments: true\ndate: 2024-07-17\ndescription:\n "
},
{
"path": "docs/blog/posts/pydantic-is-still-all-you-need.md",
"chars": 5785,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Pydantic\ncomments: true\ndate: 2024-09-07\ndescription: Explore how Pydantic enhances st"
},
{
"path": "docs/blog/posts/rag-and-beyond.md",
"chars": 11348,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- LLM Techniques\ncomments: true\ndate: 2023-09-17\ndescription: 'Explore how to enhance Re"
},
{
"path": "docs/blog/posts/rag-timelines.md",
"chars": 4339,
"preview": "---\nauthors:\n - jxnl\ncategories:\n - LLM Techniques\ncomments: true\ndate: 2024-06-06\ndescription:\n Explore enhancing RA"
},
{
"path": "docs/blog/posts/semantic-validation-structured-outputs.md",
"chars": 12926,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Validation\n- Pydantic\n- LLMs\ncomments: true\ndate: 2025-05-20\ndescription: Learn how se"
},
{
"path": "docs/blog/posts/situate-context.md",
"chars": 6946,
"preview": "---\nauthors:\n - jxnl\ncategories:\n - Anthropic\n - LLM Techniques\n - Python\ncomments: true\ndate: 2024-09-26\ndescriptio"
},
{
"path": "docs/blog/posts/string-based-init.md",
"chars": 9578,
"preview": "---\ndraft: false\ndate: 2024-04-20\nauthors:\n - jxnl\ncategories:\n - Tutorial\n---\n\n# Unified Provider Interface with Stri"
},
{
"path": "docs/blog/posts/structured-output-anthropic.md",
"chars": 5019,
"preview": "---\nauthors:\n - jxnl\ncategories:\n - Anthropic\ncomments: true\ndate: 2024-10-23\ndescription: Learn how to leverage Anthr"
},
{
"path": "docs/blog/posts/tidy-data-from-messy-tables.md",
"chars": 6536,
"preview": "---\ntitle: Using Structured Outputs to convert messy tables into tidy data\ndescription: With instructor, converting mess"
},
{
"path": "docs/blog/posts/timestamp.md",
"chars": 6496,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Pydantic\ncomments: true\ndate: 2024-09-26\ndescription: Learn how to ensure consistent t"
},
{
"path": "docs/blog/posts/using_json.md",
"chars": 4994,
"preview": "---\nauthors:\n - jxnl\ncategories:\n - LLM Techniques\ncomments: true\ndate: 2024-06-15\ndescription:\n Learn how to easily "
},
{
"path": "docs/blog/posts/validation-part1.md",
"chars": 20691,
"preview": "---\nauthors:\n- jxnl\n- ivanleomk\ncategories:\n- Pydantic\n- Data Validation\n- Python\ncomments: true\ndate: 2023-10-23\ndescri"
},
{
"path": "docs/blog/posts/version-1.md",
"chars": 8207,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- OpenAI\ncomments: true\ndate: 2024-04-01\ndescription: 'Introducing instructor 1.0.0: Sim"
},
{
"path": "docs/blog/posts/why-care-about-mcps.md",
"chars": 9665,
"preview": "---\ntitle: Understanding Model Context Protocol (MCP)\ndate: 2025-03-27\ndescription: A comprehensive look at the Model Co"
},
{
"path": "docs/blog/posts/writer-support.md",
"chars": 5362,
"preview": "---\nauthors:\n - ivanleomk\n - yanomaly\ncategories:\n - Writer SDK\ncomments: true\ndate: 2024-11-19\ndescription: Announci"
},
{
"path": "docs/blog/posts/youtube-flashcards.md",
"chars": 13347,
"preview": "---\nauthors:\n- jxnl\n- zilto\ncategories:\n- Data Processing\ncomments: true\ndate: 2024-10-18\ndescription: Flashcard generat"
},
{
"path": "docs/blog/posts/youtube-transcripts.md",
"chars": 10636,
"preview": "---\nauthors:\n- jxnl\ncategories:\n- Data Processing\ncomments: true\ndate: 2024-07-11\ndescription: Learn how to extract and "
},
{
"path": "docs/cli/batch.md",
"chars": 16908,
"preview": "---\ntitle: Managing Batch Jobs with Multi-Provider CLI\ndescription: Learn how to create, list, cancel, and delete batch "
},
{
"path": "docs/cli/finetune.md",
"chars": 8436,
"preview": "---\ntitle: Managing Fine-Tuning Jobs with the Instructor CLI\ndescription: Learn how to create, view, and manage fine-tun"
},
{
"path": "docs/cli/index.md",
"chars": 2880,
"preview": "---\ntitle: Instructor CLI Tools\ndescription: Command-line utilities for monitoring API usage, fine-tuning models, and ac"
},
{
"path": "docs/cli/usage.md",
"chars": 2182,
"preview": "---\ntitle: OpenAI API Usage CLI Guide\ndescription: Learn how to monitor OpenAI API usage with the CLI tool, including co"
},
{
"path": "docs/concepts/alias.md",
"chars": 596,
"preview": "---\ntitle: Pydantic Aliases Overview\ndescription: Explore the concept of aliases in Pydantic. Discover the latest docume"
},
{
"path": "docs/concepts/batch.md",
"chars": 6062,
"preview": "---\ntitle: Batch Processing\ndescription: Process multiple LLM requests efficiently using batch processing for 50% cost s"
},
{
"path": "docs/concepts/caching.md",
"chars": 15303,
"preview": "## See Also\n\n- [Prompt Caching](./prompt_caching.md) - Cache prompts for cost optimization\n- [Performance Optimization]("
},
{
"path": "docs/concepts/citation.md",
"chars": 5964,
"preview": "---\ntitle: Citation Extraction with CitationMixin\ndescription: Learn how to extract and validate citations from source t"
},
{
"path": "docs/concepts/dictionary_operations.md",
"chars": 4388,
"preview": "## See Also\n\n- [Types](./types.md) - Working with different data types\n- [Response Models](./models.md) - Working with P"
},
{
"path": "docs/concepts/distillation.md",
"chars": 7180,
"preview": "---\ntitle: Seamless Fine-Tuning of Python Functions Using Instructor's Distillation\ndescription: Learn how to fine-tune "
},
{
"path": "docs/concepts/enums.md",
"chars": 1323,
"preview": "---\ntitle: Using Enums and Literals in Pydantic for Role Management\ndescription: Learn how to implement Enums and Litera"
},
{
"path": "docs/concepts/error_handling.md",
"chars": 7853,
"preview": "---\ntitle: Error Handling\ndescription: Learn how to handle errors and exceptions when using Instructor for structured ou"
},
{
"path": "docs/concepts/fastapi.md",
"chars": 3670,
"preview": "---\ntitle: FastAPI Integration with Instructor - API Development Guide\ndescription: Build production-ready APIs with Fas"
},
{
"path": "docs/concepts/fields.md",
"chars": 5922,
"preview": "---\ntitle: Customizing Pydantic Models with Field Metadata\ndescription: Learn how to enhance Pydantic models with metada"
},
{
"path": "docs/concepts/from_provider.md",
"chars": 9949,
"preview": "---\ntitle: Using from_provider for Unified Client Creation\ndescription: Learn how to use from_provider to create Instruc"
},
{
"path": "docs/concepts/hooks.md",
"chars": 6616,
"preview": "---\ntitle: Hooks\ndescription: Learn how to use hooks for event handling, logging, and error handling in Instructor.\n---\n"
},
{
"path": "docs/concepts/index.md",
"chars": 5364,
"preview": "---\ntitle: Instructor Concepts - Core Features and Patterns\ndescription: Explore core concepts and features of the Instr"
},
{
"path": "docs/concepts/iterable.md",
"chars": 7239,
"preview": "---\ntitle: Iterable Extraction with Instructor - Stream Multiple Objects\ndescription: Use Iterable types to extract and "
},
{
"path": "docs/concepts/lists.md",
"chars": 4485,
"preview": "---\ntitle: Streaming Lists with Instructor - Extract Multiple Objects\ndescription: Learn how to extract multiple structu"
},
{
"path": "docs/concepts/logging.md",
"chars": 2965,
"preview": "---\ntitle: Logging and Monitoring with Instructor - Debug Guide\ndescription: Implement comprehensive logging for Instruc"
},
{
"path": "docs/concepts/maybe.md",
"chars": 3066,
"preview": "---\ntitle: Maybe Types and Optional Handling in Instructor\ndescription: Handle optional and nullable data with Maybe typ"
},
{
"path": "docs/concepts/migration.md",
"chars": 4889,
"preview": "---\ntitle: Migration Guide\ndescription: Migrate from older Instructor patterns to the modern from_provider approach.\n---"
},
{
"path": "docs/concepts/mode-migration.md",
"chars": 2860,
"preview": "---\ntitle: Mode Migration Guide\ndescription: Migrate from provider-specific modes to the core modes in Instructor.\n---\n\n"
},
{
"path": "docs/concepts/models.md",
"chars": 6266,
"preview": "---\ntitle: Using Pydantic Models for Structured Outputs\ndescription: Learn how to define LLM output schemas with Pydanti"
},
{
"path": "docs/concepts/multimodal.md",
"chars": 17999,
"preview": "---\ntitle: Seamless Multimodal Interactions with Instructor\ndescription: Learn how the Image, PDF and Audio class in Ins"
},
{
"path": "docs/concepts/parallel.md",
"chars": 4507,
"preview": "---\ntitle: Parallel Tools\ndescription: Learn about parallel tools in OpenAI, Google, and Anthropic.\n---\n\n## See Also\n\n- "
},
{
"path": "docs/concepts/partial.md",
"chars": 6277,
"preview": "---\ntitle: Streaming Partial Responses with Instructor and OpenAI\ndescription: Learn to utilize field-level streaming wi"
},
{
"path": "docs/concepts/patching.md",
"chars": 5310,
"preview": "---\ntitle: How Instructor Patches LLM Clients\ndescription: Learn how Instructor adds structured output capabilities to L"
},
{
"path": "docs/concepts/philosophy.md",
"chars": 7799,
"preview": "---\ntitle: Philosophy\ndescription: The principles behind Instructor - why simple beats complex every time.\n---\n\n# Philos"
},
{
"path": "docs/concepts/prompt_caching.md",
"chars": 16062,
"preview": "---\ntitle: Understanding Prompt Caching for API Efficiency\ndescription: Explore how prompt caching optimizes performance"
},
{
"path": "docs/concepts/prompting.md",
"chars": 8578,
"preview": "---\ntitle: Prompt Engineering Best Practices\ndescription: Learn prompt engineering tips for using Pydantic and Instructo"
},
{
"path": "docs/concepts/raw_response.md",
"chars": 4564,
"preview": "---\ntitle: Creating a Model with OpenAI Completions\ndescription: Learn how to create a custom model using OpenAI's API t"
},
{
"path": "docs/concepts/reask_validation.md",
"chars": 11254,
"preview": "---\ntitle: Enhancing AI Validations with Pydantic's Framework\ndescription: Learn how to improve AI outputs using Pydanti"
},
{
"path": "docs/concepts/retrying.md",
"chars": 8508,
"preview": "---\ntitle: \"Retry Logic with Tenacity\"\ndescription: \"Learn how to implement retry logic with Tenacity for LLM applicatio"
},
{
"path": "docs/concepts/semantic_validation.md",
"chars": 20529,
"preview": "---\ntitle: Semantic Validation with LLMs\ndescription: Using LLMs for complex validation that goes beyond rule-based appr"
},
{
"path": "docs/concepts/templating.md",
"chars": 7865,
"preview": "---\ntitle: Prompt Templating with Jinja - Dynamic Prompt Generation\ndescription: Create dynamic prompts using Jinja temp"
},
{
"path": "docs/concepts/typeadapter.md",
"chars": 415,
"preview": "---\ntitle: TypeAdapter in Instructor - Custom Type Handling\ndescription: Use Pydantic TypeAdapter for custom type valida"
},
{
"path": "docs/concepts/typeddicts.md",
"chars": 838,
"preview": "---\ntitle: Using TypedDicts with OpenAI API\ndescription: Learn how to utilize TypedDicts in Python with the OpenAI API f"
},
{
"path": "docs/concepts/types.md",
"chars": 7733,
"preview": "---\ntitle: Working with Types in Instructor\ndescription: Learn how to use different data types with Instructor, from sim"
},
{
"path": "docs/concepts/union.md",
"chars": 482,
"preview": "---\ntitle: Using Union Types in Pydantic Models\ndescription: Learn how to implement Union types in Pydantic models to ha"
},
{
"path": "docs/concepts/unions.md",
"chars": 10711,
"preview": "---\ntitle: Union Types in Instructor\ndescription: Learn how to use Union types to handle multiple possible response type"
},
{
"path": "docs/concepts/usage.md",
"chars": 2068,
"preview": "---\ntitle: Handling Non-Streaming Requests in OpenAI with Usage Tracking\ndescription: Learn how to manage non-streaming "
},
{
"path": "docs/concepts/validation.md",
"chars": 5026,
"preview": "---\ntitle: Validation\ndescription: Learn how to validate LLM outputs with Pydantic for type safety and data consistency."
},
{
"path": "docs/contributing.md",
"chars": 14034,
"preview": "---\ntitle: Contribute to Instructor: Evals, Issues, and Pull Requests\ndescription: Join us in enhancing the Instructor l"
},
{
"path": "docs/debugging.md",
"chars": 6561,
"preview": "---\ntitle: Debugging Instructor Applications\ndescription: Learn how to debug Instructor applications with hooks, logging"
},
{
"path": "docs/examples/action_items.md",
"chars": 5770,
"preview": "---\ntitle: Automating Action Item Extraction from Meeting Transcripts\ndescription: Learn to extract actionable items fro"
},
{
"path": "docs/examples/audio_extraction.md",
"chars": 3263,
"preview": "---\ntitle: Audio Information Extraction with OpenAI\ndescription: Learn how to extract structured information from audio "
},
{
"path": "docs/examples/batch_classification_langsmith.md",
"chars": 5532,
"preview": "---\ntitle: Enhancing OpenAI Client with LangSmith and Instructor\ndescription: Discover how to integrate LangSmith with t"
},
{
"path": "docs/examples/batch_in_memory.md",
"chars": 9846,
"preview": "---\ntitle: In-Memory Batch Processing for Serverless Applications\ndescription: Learn how to use Instructor's in-memory b"
},
{
"path": "docs/examples/batch_job_oai.md",
"chars": 9071,
"preview": "---\ntitle: Generating Synthetic Data with OpenAI's Batch API\ndescription: Learn to use OpenAI's Batch API for large-scal"
},
{
"path": "docs/examples/building_knowledge_graphs.md",
"chars": 2616,
"preview": "---\ntitle: Building Knowledge Graphs from Text\ndescription: Learn to construct knowledge graphs from textual data using "
},
{
"path": "docs/examples/bulk_classification.md",
"chars": 15580,
"preview": "---\ntitle: User-Provided Tag Classification Tutorial\ndescription: Learn to classify user-provided tags effectively using"
},
{
"path": "docs/examples/classification.md",
"chars": 10285,
"preview": "---\ntitle: Text Classification with OpenAI and Pydantic\ndescription: Learn to implement single-label and multi-label tex"
},
{
"path": "docs/examples/document_segmentation.md",
"chars": 9806,
"preview": "---\ntitle: \"Document Segmentation with LLMs: A Comprehensive Guide\"\ndescription: Learn effective document segmentation t"
},
{
"path": "docs/examples/entity_resolution.md",
"chars": 10436,
"preview": "---\ntitle: Entity Resolution and Visualization for Legal Documents\ndescription: Learn how to extract, resolve, and visua"
},
{
"path": "docs/examples/exact_citations.md",
"chars": 7245,
"preview": "---\ntitle: Citation Validation with Instructor - Prevent Hallucinations\ndescription: Validate AI-generated answers with "
},
{
"path": "docs/examples/examples.md",
"chars": 2606,
"preview": "---\ntitle: Few-Shot Learning with Examples - Pydantic Models\ndescription: Enhance Pydantic models with practical example"
},
{
"path": "docs/examples/extract_contact_info.md",
"chars": 4174,
"preview": "---\ntitle: Contact Information Extraction - Lead Generation Automation\ndescription: Automate customer lead extraction fr"
},
{
"path": "docs/examples/extract_slides.md",
"chars": 7391,
"preview": "---\ntitle: Extracting Competitor Data from Slides Using AI\ndescription: Learn how to extract competitor data from presen"
},
{
"path": "docs/examples/extracting_receipts.md",
"chars": 5632,
"preview": "---\ntitle: Receipt Data Extraction with GPT-4 Vision - Expense Tracking\ndescription: Extract and validate receipt data f"
},
{
"path": "docs/examples/extracting_tables.md",
"chars": 10454,
"preview": "---\ntitle: Extracting Tables from Images using GPT-Vision\ndescription: Learn how to use Python and GPT-Vision to extract"
},
{
"path": "docs/examples/groq.md",
"chars": 1703,
"preview": "---\ntitle: Groq AI Integration - Fast Structured Outputs\ndescription: Use Groq AI with Instructor for fast structured ou"
},
{
"path": "docs/examples/image_to_ad_copy.md",
"chars": 16296,
"preview": "---\ntitle: Automatically Generate Advertising Copy from Product Images Using GPT-4 Vision\ndescription: Learn how to use "
},
{
"path": "docs/examples/index.md",
"chars": 6577,
"preview": "---\ntitle: Instructor Cookbook Collection\ndescription: Practical examples and recipes for solving real-world problems wi"
},
{
"path": "docs/examples/knowledge_graph.md",
"chars": 13652,
"preview": "---\ntitle: 'Visualizing Knowledge Graphs: A Guide to Complex Topics'\ndescription: Learn how to create and update knowled"
},
{
"path": "docs/examples/local_classification.md",
"chars": 5606,
"preview": "---\ntitle: Classifying Confidential Data with Local AI Models\ndescription: Learn to classify private documents securely "
},
{
"path": "docs/examples/mistral.md",
"chars": 1223,
"preview": "---\ntitle: Using MistralAI for Structured Outputs\ndescription: Learn how to use MistralAI models for inference, includin"
},
{
"path": "docs/examples/moderation.md",
"chars": 2727,
"preview": "---\ntitle: OpenAI Moderation Example for Content Compliance\ndescription: Learn how to use OpenAI's moderation endpoint t"
},
{
"path": "docs/examples/multi_modal_gemini.md",
"chars": 7532,
"preview": "---\ntitle: Utilizing Gemini for Multi-Modal Data Processing with Audio Files\ndescription: Learn how to use Gemini with G"
},
{
"path": "docs/examples/multiple_classification.md",
"chars": 2281,
"preview": "---\ntitle: Multi-Label Classification - Support Ticket Categorization\ndescription: Implement multi-label classification "
},
{
"path": "docs/examples/ollama.md",
"chars": 5685,
"preview": "---\ntitle: Harnessing Structured Outputs with Ollama and Instructor\ndescription: Discover how to utilize Ollama's Instru"
},
{
"path": "docs/examples/open_source.md",
"chars": 791,
"preview": "---\ntitle: Open Source Model Providers for Chat API\ndescription: Explore tested open source models compatible with the O"
},
{
"path": "docs/examples/pandas_df.md",
"chars": 4075,
"preview": "---\ntitle: Extracting DataFrames from Markdown using Pandas\ndescription: Learn how to extract and convert Markdown table"
},
{
"path": "docs/examples/partial_streaming.md",
"chars": 2446,
"preview": "---\ntitle: Partial Response Streaming - Field-Level Updates\ndescription: Stream partial responses with Instructor for re"
},
{
"path": "docs/examples/pii.md",
"chars": 6135,
"preview": "---\ntitle: Extracting and Scrubbing PII Data with OpenAI\ndescription: Learn to extract and sanitize Personally Identifia"
},
{
"path": "docs/examples/planning-tasks.md",
"chars": 7971,
"preview": "---\ntitle: Query Planning with Instructor - Complex Task Decomposition\ndescription: Plan and execute complex query plans"
},
{
"path": "docs/examples/recursive.md",
"chars": 4453,
"preview": "---\ntitle: Working with Recursive Schemas in Instructor\ndescription: Learn how to effectively implement and use recursiv"
},
{
"path": "docs/examples/search.md",
"chars": 2558,
"preview": "---\ntitle: Search Query Segmentation with Instructor - Multi-Task Extraction\ndescription: Segment complex search queries"
},
{
"path": "docs/examples/self_critique.md",
"chars": 4860,
"preview": "---\ntitle: Implementing Self-Correction with LLM Validator\ndescription: Learn how to use llm_validator for self-healing "
},
{
"path": "docs/examples/single_classification.md",
"chars": 2033,
"preview": "---\ntitle: Single-Label Text Classification - SPAM Detection Example\ndescription: Implement single-label text classifica"
},
{
"path": "docs/examples/sqlmodel.md",
"chars": 17863,
"preview": "---\ntitle: SQLModel with Instructor - Complete Guide to AI-Powered Database Operations\ndescription: Master SQLModel inte"
},
{
"path": "docs/examples/tables_from_vision.md",
"chars": 4168,
"preview": "---\ntitle: Extracting Tables from Images Using OpenAI GPT-4\ndescription: Learn how to convert images into markdown table"
},
{
"path": "docs/examples/tracing_with_langfuse.md",
"chars": 8683,
"preview": "---\ntitle: Observability & Tracing with Langfuse\ndescription: Learn how to trace and monitor Instructor API calls using "
},
{
"path": "docs/examples/using_decimals.md",
"chars": 1422,
"preview": "---\ntitle: Working with Decimal Types in Instructor\ndescription: Learn how to use Python Decimal types for precise finan"
},
{
"path": "docs/examples/watsonx.md",
"chars": 1893,
"preview": "---\ntitle: IBM watsonx.ai Integration - Enterprise LLM Inference\ndescription: Use IBM watsonx.ai with Instructor through"
},
{
"path": "docs/examples/youtube_clips.md",
"chars": 4533,
"preview": "---\ntitle: Generating YouTube Clips from Transcripts Using Instructor\ndescription: Learn to create concise YouTube clips"
},
{
"path": "docs/faq.md",
"chars": 5280,
"preview": "---\ntitle: Frequently Asked Questions\ndescription: Common questions and answers about using Instructor\n---\n\n# Frequently"
},
{
"path": "docs/getting-started.md",
"chars": 6850,
"preview": "---\ntitle: Getting Started\ndescription: A step-by-step guide to getting started with Instructor for structured outputs f"
},
{
"path": "docs/help.md",
"chars": 1455,
"preview": "---\ntitle: Getting Started with Instructor: Help and Resources\ndescription: Explore key resources for getting help with "
},
{
"path": "docs/hooks/hide_lines.py",
"chars": 849,
"preview": "from typing import Any\nimport mkdocs.plugins\nfrom pymdownx import highlight # type: ignore\n\n\n@mkdocs.plugins.event_prio"
},
{
"path": "docs/index.md",
"chars": 17878,
"preview": "---\ntitle: \"Instructor - Multi-Language Library for Structured LLM Outputs | Python, TypeScript, Go, Ruby\"\ndescription: "
},
{
"path": "docs/installation.md",
"chars": 808,
"preview": "---\ntitle: Installing Instructor with Pip\ndescription: Learn how to install Instructor and its dependencies using pip fo"
},
{
"path": "docs/integrations/anthropic.md",
"chars": 23410,
"preview": "---\ntitle: \"Anthropic Claude Tutorial: Structured Outputs with Instructor\"\ndescription: \"Complete guide to using Anthrop"
},
{
"path": "docs/integrations/anyscale.md",
"chars": 2080,
"preview": "---\ntitle: Anyscale\ndescription: Guide to using instructor with Anyscale\n---\n\n# Structured outputs with Anyscale, a comp"
},
{
"path": "docs/integrations/azure.md",
"chars": 9349,
"preview": "---\ntitle: Structured outputs with Azure OpenAI, a complete guide w/ instructor\ndescription: Learn how to use Azure Open"
},
{
"path": "docs/integrations/bedrock.md",
"chars": 9826,
"preview": "---\ntitle: Structured Outputs with AWS Bedrock and Pydantic\ndescription: Learn how to use AWS Bedrock with Instructor fo"
},
{
"path": "docs/integrations/cerebras.md",
"chars": 5951,
"preview": "---\ntitle: \"Structured outputs with Cerebras, a complete guide w/ instructor\"\ndescription: \"Complete guide to using Inst"
},
{
"path": "docs/integrations/cohere.md",
"chars": 5101,
"preview": "---\ntitle: Structured outputs with Cohere, a complete guide w/ instructor\ndescription: Learn how to leverage Cohere's co"
},
{
"path": "docs/integrations/cortex.md",
"chars": 4280,
"preview": "---\ntitle: \"Structured outputs with Cortex, a complete guide w/ instructor\"\ndescription: \"Learn how to use Cortex with I"
}
]
// ... and 506 more files (download for full content)
About this extraction
This page contains the full source code of the 567-labs/instructor GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 706 files (4.0 MB), approximately 1.1M tokens, and a symbol index with 1910 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.