SYMBOL INDEX (834 symbols across 71 files) FILE: .github/scripts/zenodo_publish.py function new_version_from_record (line 52) | def new_version_from_record(record_id: str): function upload_file (line 65) | def upload_file(bucket_url: str, path: str, dest_name: str = None): function main (line 78) | def main(): FILE: benchmarks/benchmark.py class BenchmarkRunner (line 66) | class BenchmarkRunner: method __init__ (line 69) | def __init__(self): method set_tokenizer (line 75) | def set_tokenizer(self, tokenizer_type: str): method print_header (line 84) | def print_header(self): method benchmark_tokenization (line 94) | def benchmark_tokenization(self) -> list[dict[str, Any]]: method test_single_extraction (line 140) | def test_single_extraction( method test_diverse_text_types (line 276) | def test_diverse_text_types( method save_results (line 311) | def save_results(self, results: dict[str, Any]): method run_diverse_benchmark (line 358) | def run_diverse_benchmark(self, models: list[str] | None = None): function main (line 373) | def main(): FILE: benchmarks/config.py class TokenizationConfig (line 33) | class TokenizationConfig: class ModelConfig (line 41) | class ModelConfig: class TextTypes (line 52) | class TextTypes(str, enum.Enum): class DisplayConfig (line 79) | class DisplayConfig: class PathConfig (line 90) | class PathConfig: method get_result_path (line 95) | def get_result_path(self, timestamp: str, suffix: str = "") -> Path: FILE: benchmarks/plotting.py function create_diverse_plots (line 36) | def create_diverse_plots(results: dict[str, Any], filepath: Path) -> bool: function _plot_tokenization_throughput (line 84) | def _plot_tokenization_throughput(ax, results): function _plot_tokenization_rate (line 125) | def _plot_tokenization_rate(ax, results): function _plot_extraction_density (line 170) | def _plot_extraction_density(ax, results): function _plot_processing_speed (line 220) | def _plot_processing_speed(ax, results): function _plot_summary_table (line 270) | def _plot_summary_table(ax, results): function create_comparison_plots (line 320) | def create_comparison_plots(json_files: list[Path], output_path: Path) -... function _plot_entity_comparison (line 376) | def _plot_entity_comparison(ax, all_results): function _plot_time_comparison (line 435) | def _plot_time_comparison(ax, all_results): function _plot_tokenization_comparison (line 492) | def _plot_tokenization_comparison(ax, all_results): function _plot_success_rate_comparison (line 550) | def _plot_success_rate_comparison(ax, all_results): function _plot_token_rate_by_language (line 606) | def _plot_token_rate_by_language(ax, all_results): function _plot_timeline (line 645) | def _plot_timeline(ax, all_results): FILE: benchmarks/utils.py function download_text (line 26) | def download_text(url: str) -> str: function extract_text_content (line 42) | def extract_text_content(full_text: str) -> str: function get_text_from_gutenberg (line 77) | def get_text_from_gutenberg(text_type: config.TextTypes) -> str: function get_optimal_text_size (line 95) | def get_optimal_text_size(text: str, model_id: str) -> str: function get_extraction_example (line 117) | def get_extraction_example(text_type: config.TextTypes) -> dict[str, str... function get_git_info (line 131) | def get_git_info() -> dict[str, str]: function analyze_tokenization (line 167) | def analyze_tokenization( function format_tokenization_summary (line 194) | def format_tokenization_summary(analysis: dict[str, Any]) -> str: FILE: examples/custom_provider_plugin/langextract_provider_example/provider.py class CustomGeminiProvider (line 31) | class CustomGeminiProvider(lx.inference.BaseLanguageModel): method __init__ (line 56) | def __init__( method get_schema_class (line 103) | def get_schema_class(cls) -> type[lx.schema.BaseSchema] | None: method apply_schema (line 114) | def apply_schema(self, schema_instance: lx.schema.BaseSchema | None) -... method infer (line 138) | def infer( FILE: examples/custom_provider_plugin/langextract_provider_example/schema.py class CustomProviderSchema (line 24) | class CustomProviderSchema(lx.schema.BaseSchema): method __init__ (line 39) | def __init__(self, schema_dict: dict[str, Any], strict_mode: bool = Tr... method from_examples (line 50) | def from_examples( method to_provider_config (line 122) | def to_provider_config(self) -> dict[str, Any]: method supports_strict_mode (line 145) | def supports_strict_mode(self) -> bool: method schema_dict (line 155) | def schema_dict(self) -> dict[str, Any]: FILE: examples/custom_provider_plugin/test_example_provider.py function main (line 29) | def main(): FILE: examples/ollama/demo_ollama.py function check_ollama_available (line 67) | def check_ollama_available(url: str = DEFAULT_OLLAMA_URL) -> bool: function ensure_output_directory (line 76) | def ensure_output_directory() -> Path: function print_header (line 83) | def print_header(title: str, width: int = 80) -> None: function print_section (line 90) | def print_section(title: str, width: int = 60) -> None: function print_results_summary (line 96) | def print_results_summary(extractions: list[lx.data.Extraction]) -> None: function example_romeo_juliet (line 114) | def example_romeo_juliet( function example_medication_ner (line 175) | def example_medication_ner( function example_medication_relationships (line 231) | def example_medication_relationships( function example_shakespeare_dialogue (line 328) | def example_shakespeare_dialogue( function save_results (line 418) | def save_results( function main (line 458) | def main(): FILE: langextract/__init__.py function extract (line 53) | def extract(*args: Any, **kwargs: Any): function visualize (line 58) | def visualize(*args: Any, **kwargs: Any): function __getattr__ (line 87) | def __getattr__(name: str) -> Any: function __dir__ (line 101) | def __dir__(): FILE: langextract/_compat/exceptions.py function __getattr__ (line 26) | def __getattr__(name: str): FILE: langextract/_compat/inference.py class InferenceType (line 23) | class InferenceType(enum.Enum): function __getattr__ (line 30) | def __getattr__(name: str): FILE: langextract/_compat/registry.py function __getattr__ (line 25) | def __getattr__(name: str): FILE: langextract/_compat/schema.py function __getattr__ (line 23) | def __getattr__(name: str): FILE: langextract/annotation.py function _merge_non_overlapping_extractions (line 46) | def _merge_non_overlapping_extractions( function _extractions_overlap (line 87) | def _extractions_overlap( function _document_chunk_iterator (line 118) | def _document_chunk_iterator( class Annotator (line 163) | class Annotator: method __init__ (line 166) | def __init__( method annotate_documents (line 209) | def annotate_documents( method _annotate_documents_single_pass (line 284) | def _annotate_documents_single_pass( method _annotate_documents_sequential_passes (line 442) | def _annotate_documents_sequential_passes( method annotate_text (line 527) | def annotate_text( FILE: langextract/chunking.py class TokenUtilError (line 35) | class TokenUtilError(exceptions.LangExtractError): class TextChunk (line 40) | class TextChunk: method __str__ (line 60) | def __str__(self): method document_id (line 86) | def document_id(self) -> str | None: method document_text (line 93) | def document_text(self) -> tokenizer_lib.TokenizedText | None: method chunk_text (line 100) | def chunk_text(self) -> str: method sanitized_chunk_text (line 111) | def sanitized_chunk_text(self) -> str: method additional_context (line 118) | def additional_context(self) -> str | None: method char_interval (line 125) | def char_interval(self) -> data.CharInterval: function create_token_interval (line 143) | def create_token_interval( function get_token_interval_text (line 169) | def get_token_interval_text( function get_char_interval (line 216) | def get_char_interval( function _sanitize (line 246) | def _sanitize(text: str) -> str: function make_batches_of_textchunk (line 265) | def make_batches_of_textchunk( class SentenceIterator (line 282) | class SentenceIterator: method __init__ (line 285) | def __init__( method __iter__ (line 312) | def __iter__(self) -> Iterator[tokenizer_lib.TokenInterval]: method __next__ (line 315) | def __next__(self) -> tokenizer_lib.TokenInterval: class ChunkIterator (line 343) | class ChunkIterator: method __init__ (line 385) | def __init__( method __iter__ (line 422) | def __iter__(self) -> Iterator[TextChunk]: method _tokens_exceed_buffer (line 425) | def _tokens_exceed_buffer( method __next__ (line 441) | def __next__(self) -> TextChunk: FILE: langextract/core/base_model.py class BaseLanguageModel (line 31) | class BaseLanguageModel(abc.ABC): method __init__ (line 38) | def __init__(self, constraint: types.Constraint | None = None, **kwarg... method get_schema_class (line 52) | def get_schema_class(cls) -> type[Any] | None: method apply_schema (line 56) | def apply_schema(self, schema_instance: schema.BaseSchema | None) -> N... method schema (line 68) | def schema(self) -> schema.BaseSchema | None: method set_fence_output (line 76) | def set_fence_output(self, fence_output: bool | None) -> None: method requires_fence_output (line 87) | def requires_fence_output(self) -> bool: method merge_kwargs (line 104) | def merge_kwargs( method infer (line 122) | def infer( method infer_batch (line 137) | def infer_batch( method parse_output (line 156) | def parse_output(self, output: str) -> Any: FILE: langextract/core/data.py class AlignmentStatus (line 43) | class AlignmentStatus(enum.Enum): class CharInterval (line 51) | class CharInterval: class Extraction (line 64) | class Extraction: method __init__ (line 96) | def __init__( method token_interval (line 120) | def token_interval(self) -> tokenizer.TokenInterval | None: method token_interval (line 124) | def token_interval(self, value: tokenizer.TokenInterval | None) -> None: class Document (line 129) | class Document: method __init__ (line 149) | def __init__( method document_id (line 161) | def document_id(self) -> str: method document_id (line 168) | def document_id(self, value: str | None) -> None: method tokenized_text (line 173) | def tokenized_text(self) -> tokenizer.TokenizedText: method tokenized_text (line 179) | def tokenized_text(self, value: tokenizer.TokenizedText) -> None: class AnnotatedDocument (line 184) | class AnnotatedDocument: method __init__ (line 204) | def __init__( method document_id (line 216) | def document_id(self) -> str: method document_id (line 223) | def document_id(self, value: str | None) -> None: method tokenized_text (line 228) | def tokenized_text(self) -> tokenizer.TokenizedText | None: method tokenized_text (line 234) | def tokenized_text(self, value: tokenizer.TokenizedText) -> None: class ExampleData (line 239) | class ExampleData: FILE: langextract/core/debug_utils.py function _safe_repr (line 49) | def _safe_repr(obj: Any) -> str: function _redact_value (line 57) | def _redact_value(name: str, value: Any) -> str: function _redact_mapping (line 73) | def _redact_mapping(mapping: Mapping[str, Any]) -> dict[str, str]: function _format_bound_args (line 81) | def _format_bound_args( function debug_log_calls (line 106) | def debug_log_calls(fn: Callable) -> Callable: function configure_debug_logging (line 151) | def configure_debug_logging() -> None: FILE: langextract/core/exceptions.py class LangExtractError (line 38) | class LangExtractError(Exception): class InferenceError (line 47) | class InferenceError(LangExtractError): class InferenceConfigError (line 51) | class InferenceConfigError(InferenceError): class InferenceRuntimeError (line 59) | class InferenceRuntimeError(InferenceError): method __init__ (line 66) | def __init__( class InferenceOutputError (line 85) | class InferenceOutputError(LangExtractError): method __init__ (line 88) | def __init__(self, message: str): class InvalidDocumentError (line 93) | class InvalidDocumentError(LangExtractError): class InternalError (line 100) | class InternalError(LangExtractError): class ProviderError (line 107) | class ProviderError(LangExtractError): class SchemaError (line 111) | class SchemaError(LangExtractError): class FormatError (line 115) | class FormatError(LangExtractError): class FormatParseError (line 119) | class FormatParseError(FormatError): FILE: langextract/core/format_handler.py class FormatHandler (line 49) | class FormatHandler: method __init__ (line 66) | def __init__( method __repr__ (line 106) | def __repr__(self) -> str: method format_extraction_example (line 116) | def format_extraction_example( method parse_output (line 151) | def parse_output( method _add_fences (line 247) | def _add_fences(self, content: str) -> str: method _is_valid_language_tag (line 252) | def _is_valid_language_tag( method _parse_with_fallback (line 261) | def _parse_with_fallback(self, content: str, strict: bool): method _extract_content (line 278) | def _extract_content(self, text: str) -> str: method from_resolver_params (line 348) | def from_resolver_params( method from_kwargs (line 425) | def from_kwargs(cls, **kwargs) -> FormatHandler: FILE: langextract/core/schema.py class BaseSchema (line 38) | class BaseSchema(abc.ABC): method from_examples (line 43) | def from_examples( method to_provider_config (line 51) | def to_provider_config(self) -> dict[str, Any]: method requires_raw_output (line 61) | def requires_raw_output(self) -> bool: method validate_format (line 68) | def validate_format(self, format_handler: fh.FormatHandler) -> None: method sync_with_provider_kwargs (line 78) | def sync_with_provider_kwargs(self, kwargs: dict[str, Any]) -> None: class FormatModeSchema (line 93) | class FormatModeSchema(BaseSchema): method __init__ (line 101) | def __init__(self, format_type: types.FormatType = types.FormatType.JS... method from_examples (line 108) | def from_examples( method to_provider_config (line 117) | def to_provider_config(self) -> dict[str, Any]: method requires_raw_output (line 122) | def requires_raw_output(self) -> bool: method sync_with_provider_kwargs (line 126) | def sync_with_provider_kwargs(self, kwargs: dict[str, Any]) -> None: FILE: langextract/core/tokenizer.py class BaseTokenizerError (line 53) | class BaseTokenizerError(exceptions.LangExtractError): class InvalidTokenIntervalError (line 57) | class InvalidTokenIntervalError(BaseTokenizerError): class SentenceRangeError (line 61) | class SentenceRangeError(BaseTokenizerError): class CharInterval (line 66) | class CharInterval: class TokenInterval (line 79) | class TokenInterval: class TokenType (line 94) | class TokenType(enum.IntEnum): class Token (line 109) | class Token: class TokenizedText (line 135) | class TokenizedText: class Tokenizer (line 165) | class Tokenizer(abc.ABC): method tokenize (line 169) | def tokenize(self, text: str) -> TokenizedText: class RegexTokenizer (line 180) | class RegexTokenizer(Tokenizer): method tokenize (line 188) | def tokenize(self, text: str) -> TokenizedText: function tokenize (line 234) | def tokenize( class Sentinel (line 257) | class Sentinel: method __init__ (line 260) | def __init__(self, name: str): method __repr__ (line 263) | def __repr__(self) -> str: function _get_script_fast (line 273) | def _get_script_fast(char: str) -> str | Sentinel: function _classify_grapheme (line 282) | def _classify_grapheme(g: str) -> TokenType: function _get_common_script_cached (line 313) | def _get_common_script_cached(c: str) -> str | Sentinel: class UnicodeTokenizer (line 321) | class UnicodeTokenizer(Tokenizer): method tokenize (line 336) | def tokenize(self, text: str) -> TokenizedText: method _emit_token (line 444) | def _emit_token( function tokens_text (line 470) | def tokens_text( function _is_end_of_sentence_token (line 510) | def _is_end_of_sentence_token( function _is_sentence_break_after_newline (line 549) | def _is_sentence_break_after_newline( function find_sentence_range (line 580) | def find_sentence_range( FILE: langextract/core/types.py class FormatType (line 30) | class FormatType(enum.Enum): class ConstraintType (line 37) | class ConstraintType(enum.Enum): class Constraint (line 44) | class Constraint: class ScoredOutput (line 55) | class ScoredOutput: method __str__ (line 61) | def __str__(self) -> str: FILE: langextract/data_lib.py function enum_asdict_factory (line 27) | def enum_asdict_factory(items: Iterable[tuple[str, Any]]) -> dict[str, A... function annotated_document_to_dict (line 57) | def annotated_document_to_dict( function dict_to_annotated_document (line 85) | def dict_to_annotated_document( FILE: langextract/extraction.py function extract (line 36) | def extract( FILE: langextract/factory.py class ModelConfig (line 36) | class ModelConfig: function _kwargs_with_environment_defaults (line 53) | def _kwargs_with_environment_defaults( function create_model (line 103) | def create_model( function create_model_from_id (line 179) | def create_model_from_id( function _create_model_with_schema (line 200) | def _create_model_with_schema( FILE: langextract/inference.py function __getattr__ (line 26) | def __getattr__(name: str): FILE: langextract/io.py class InvalidDatasetError (line 38) | class InvalidDatasetError(exceptions.LangExtractError): class Dataset (line 43) | class Dataset(abc.ABC): method load (line 50) | def load(self, delimiter: str = ',') -> Iterator[data.Document]: function save_annotated_documents (line 85) | def save_annotated_documents( function load_annotated_documents_jsonl (line 140) | def load_annotated_documents_jsonl( function _read_csv (line 191) | def _read_csv( function is_url (line 222) | def is_url(text: str) -> bool: function download_text_from_url (line 261) | def download_text_from_url( FILE: langextract/plugins.py function _safe_entry_points (line 44) | def _safe_entry_points(group: str) -> list: function _discovered (line 63) | def _discovered() -> dict[str, str]: function available_providers (line 88) | def available_providers( function _load_class (line 124) | def _load_class(spec: str) -> type[base_model.BaseLanguageModel]: function get_provider_class (line 183) | def get_provider_class( FILE: langextract/progress.py function create_download_progress_bar (line 34) | def create_download_progress_bar( function create_extraction_progress_bar (line 81) | def create_extraction_progress_bar( function print_download_complete (line 105) | def print_download_complete( function print_extraction_complete (line 122) | def print_extraction_complete() -> None: function print_extraction_summary (line 127) | def print_extraction_summary( function create_save_progress_bar (line 167) | def create_save_progress_bar( function create_load_progress_bar (line 189) | def create_load_progress_bar( function print_save_complete (line 223) | def print_save_complete(num_docs: int, file_path: str) -> None: function print_load_complete (line 238) | def print_load_complete(num_docs: int, file_path: str) -> None: function get_model_info (line 253) | def get_model_info(language_model: Any) -> str | None: function format_extraction_stats (line 271) | def format_extraction_stats(current_chars: int, processed_chars: int) ->... function create_extraction_postfix (line 286) | def create_extraction_postfix(current_chars: int, processed_chars: int) ... function format_extraction_progress (line 301) | def format_extraction_progress( function create_pass_progress_bar (line 331) | def create_pass_progress_bar( FILE: langextract/prompt_validation.py class PromptValidationLevel (line 44) | class PromptValidationLevel(enum.Enum): class _IssueKind (line 52) | class _IssueKind(enum.Enum): class ValidationIssue (line 60) | class ValidationIssue: method short_msg (line 72) | def short_msg(self) -> str: class ValidationReport (line 87) | class ValidationReport: method has_failed (line 93) | def has_failed(self) -> bool: method has_non_exact (line 98) | def has_non_exact(self) -> bool: class PromptAlignmentError (line 103) | class PromptAlignmentError(RuntimeError): class AlignmentPolicy (line 108) | class AlignmentPolicy: function _preview (line 116) | def _preview(s: str, n: int = 120) -> str: function validate_prompt_alignment (line 122) | def validate_prompt_alignment( function handle_alignment_report (line 212) | def handle_alignment_report( FILE: langextract/prompting.py class PromptBuilderError (line 31) | class PromptBuilderError(exceptions.LangExtractError): class ParseError (line 35) | class ParseError(PromptBuilderError): class PromptTemplateStructured (line 40) | class PromptTemplateStructured: function read_prompt_template_structured_from_file (line 52) | def read_prompt_template_structured_from_file( class QAPromptGenerator (line 85) | class QAPromptGenerator: method __str__ (line 94) | def __str__(self) -> str: method format_example_as_text (line 98) | def format_example_as_text(self, example: data.ExampleData) -> str: method render (line 115) | def render(self, question: str, additional_context: str | None = None)... class PromptBuilder (line 141) | class PromptBuilder: method __init__ (line 148) | def __init__(self, generator: QAPromptGenerator): method build_prompt (line 156) | def build_prompt( class ContextAwarePromptBuilder (line 179) | class ContextAwarePromptBuilder(PromptBuilder): method __init__ (line 193) | def __init__( method context_window_chars (line 210) | def context_window_chars(self) -> int | None: method build_prompt (line 215) | def build_prompt( method _build_effective_context (line 242) | def _build_effective_context( method _update_state (line 268) | def _update_state(self, document_id: str, chunk_text: str) -> None: FILE: langextract/providers/__init__.py function load_builtins_once (line 49) | def load_builtins_once() -> None: function load_plugins_once (line 74) | def load_plugins_once() -> None: function _reset_for_testing (line 145) | def _reset_for_testing() -> None: function __getattr__ (line 152) | def __getattr__(name: str): FILE: langextract/providers/builtin_registry.py class ProviderConfig (line 26) | class ProviderConfig(TypedDict): FILE: langextract/providers/gemini.py class GeminiLanguageModel (line 56) | class GeminiLanguageModel(base_model.BaseLanguageModel): # pylint: disa... method get_schema_class (line 76) | def get_schema_class(cls) -> type[schema.BaseSchema] | None: method apply_schema (line 84) | def apply_schema(self, schema_instance: schema.BaseSchema | None) -> N... method __init__ (line 94) | def __init__( method _validate_schema_config (line 190) | def _validate_schema_config(self) -> None: method _process_single_prompt (line 202) | def _process_single_prompt( method infer (line 228) | def infer( FILE: langextract/providers/gemini_batch.py class BatchConfig (line 58) | class BatchConfig: method __post_init__ (line 82) | def __post_init__(self): method from_dict (line 113) | def from_dict(cls, d: dict | None) -> BatchConfig: function _default_job_create_callback (line 142) | def _default_job_create_callback(job: Any) -> None: function _snake_to_camel (line 165) | def _snake_to_camel(key: str) -> str: function _is_vertexai_client (line 171) | def _is_vertexai_client(client) -> bool: function _get_project_location (line 183) | def _get_project_location( function _get_bucket_name (line 203) | def _get_bucket_name(project: str | None, location: str) -> str: function _ensure_bucket_lifecycle (line 209) | def _ensure_bucket_lifecycle( function _build_request (line 248) | def _build_request( function _submit_file (line 297) | def _submit_file( class GCSBatchCache (line 378) | class GCSBatchCache: method __init__ (line 381) | def __init__(self, bucket_name: str, project: str | None = None): method _compute_hash (line 387) | def _compute_hash(self, key_data: dict) -> str: method _get_single (line 392) | def _get_single(self, key_hash: str) -> str | None: method get_multi (line 404) | def get_multi(self, key_data_list: Sequence[dict]) -> dict[int, str]: method set_multi (line 426) | def set_multi(self, items: Sequence[tuple[dict, str]]) -> None: method iter_items (line 465) | def iter_items(self) -> Iterator[tuple[str, str]]: class _TextResponse (line 485) | class _TextResponse(Protocol): function _safe_get_nested (line 491) | def _safe_get_nested(data: dict, *keys) -> Any: function _extract_text (line 516) | def _extract_text(resp: _TextResponse | dict[str, Any] | None) -> str | ... function _poll_completion (line 540) | def _poll_completion( function _parse_batch_line (line 587) | def _parse_batch_line( function _extract_from_file (line 614) | def _extract_from_file( function infer_batch (line 688) | def infer_batch( FILE: langextract/providers/ollama.py class OllamaLanguageModel (line 128) | class OllamaLanguageModel(base_model.BaseLanguageModel): method get_schema_class (line 153) | def get_schema_class(cls) -> type[schema.BaseSchema] | None: method __repr__ (line 161) | def __repr__(self) -> str: method __init__ (line 172) | def __init__( method infer (line 247) | def infer( method _ollama_query (line 278) | def _ollama_query( FILE: langextract/providers/openai.py class OpenAILanguageModel (line 38) | class OpenAILanguageModel(base_model.BaseLanguageModel): method requires_fence_output (line 54) | def requires_fence_output(self) -> bool: method __init__ (line 60) | def __init__( method _normalize_reasoning_params (line 117) | def _normalize_reasoning_params(self, config: dict) -> dict: method _process_single_prompt (line 133) | def _process_single_prompt( method infer (line 196) | def infer( FILE: langextract/providers/router.py class _Entry (line 41) | class _Entry: function _add_entry (line 55) | def _add_entry( function register_lazy (line 83) | def register_lazy( function register (line 108) | def register( function resolve (line 139) | def resolve(model_id: str) -> type[base_model.BaseLanguageModel]: function resolve_provider (line 170) | def resolve_provider(provider_name: str) -> type[base_model.BaseLanguage... function clear (line 217) | def clear() -> None: function list_providers (line 226) | def list_providers() -> list[tuple[tuple[str, ...], int]]: function list_entries (line 238) | def list_entries() -> list[tuple[list[str], int]]: FILE: langextract/providers/schemas/gemini.py class GeminiSchema (line 31) | class GeminiSchema(schema.BaseSchema): method schema_dict (line 41) | def schema_dict(self) -> dict[str, Any]: method schema_dict (line 46) | def schema_dict(self, schema_dict: dict[str, Any]) -> None: method to_provider_config (line 50) | def to_provider_config(self) -> dict[str, Any]: method requires_raw_output (line 62) | def requires_raw_output(self) -> bool: method validate_format (line 66) | def validate_format(self, format_handler: fh.FormatHandler) -> None: method from_examples (line 98) | def from_examples( FILE: langextract/registry.py function __getattr__ (line 28) | def __getattr__(name: str): FILE: langextract/resolver.py class AbstractResolver (line 53) | class AbstractResolver(abc.ABC): method __init__ (line 57) | def __init__( method fence_output (line 83) | def fence_output(self) -> bool: method fence_output (line 88) | def fence_output(self, fence_output: bool) -> None: method format_type (line 97) | def format_type(self) -> data.FormatType: method format_type (line 102) | def format_type(self, new_format_type: data.FormatType) -> None: method resolve (line 107) | def resolve( method align (line 123) | def align( class ResolverParsingError (line 166) | class ResolverParsingError(exceptions.LangExtractError): class Resolver (line 170) | class Resolver(AbstractResolver): method __init__ (line 181) | def __init__( method resolve (line 234) | def resolve( method align (line 279) | def align( method string_to_extraction_data (line 348) | def string_to_extraction_data( method extract_ordered_extractions (line 383) | def extract_ordered_extractions( class WordAligner (line 485) | class WordAligner: method __init__ (line 488) | def __init__(self): method _set_seqs (line 494) | def _set_seqs( method _get_matching_blocks (line 520) | def _get_matching_blocks(self) -> Sequence[tuple[int, int, int]]: method _fuzzy_align_extraction (line 537) | def _fuzzy_align_extraction( method align_extractions (line 663) | def align_extractions( function _tokenize_with_lowercase (line 874) | def _tokenize_with_lowercase( function _normalize_token (line 904) | def _normalize_token(token: str) -> str: FILE: langextract/schema.py function __getattr__ (line 29) | def __getattr__(name: str): FILE: langextract/visualization.py function get_ipython (line 43) | def get_ipython(): # type: ignore[no-redef] function _is_jupyter (line 49) | def _is_jupyter() -> bool: function _assign_colors (line 179) | def _assign_colors(extractions: list[data.Extraction]) -> dict[str, str]: function _filter_valid_extractions (line 196) | def _filter_valid_extractions( class TagType (line 211) | class TagType(enum.Enum): class SpanPoint (line 219) | class SpanPoint: function _build_highlighted_text (line 235) | def _build_highlighted_text( function _build_legend_html (line 314) | def _build_legend_html(color_map: dict[str, str]) -> str: function _format_attributes (line 331) | def _format_attributes(attributes: dict | None) -> str: function _prepare_extraction_data (line 359) | def _prepare_extraction_data( function _build_visualization_html (line 417) | def _build_visualization_html( function visualize (line 554) | def visualize( FILE: scripts/create_provider_plugin.py function create_directory_structure (line 44) | def create_directory_structure(package_name: str, force: bool = False) -... function create_pyproject_toml (line 68) | def create_pyproject_toml( function create_provider (line 106) | def create_provider( function create_schema (line 230) | def create_schema( function create_test_script (line 318) | def create_test_script( function create_readme (line 456) | def create_readme( function create_gitignore (line 522) | def create_gitignore(base_dir: Path) -> None: function create_license (line 581) | def create_license(base_dir: Path) -> None: function install_and_test (line 604) | def install_and_test(base_dir: Path) -> bool: function parse_arguments (line 638) | def parse_arguments(): function validate_patterns (line 693) | def validate_patterns(patterns: list[str]) -> None: function print_summary (line 710) | def print_summary( function create_plugin (line 737) | def create_plugin( function print_completion_summary (line 769) | def print_completion_summary(with_schema: bool) -> None: function main (line 787) | def main(): FILE: scripts/validate_community_providers.py function normalize_pypi (line 48) | def normalize_pypi(name: str) -> str: function find_table_bounds (line 53) | def find_table_bounds(lines: List[str]) -> Tuple[int, int]: function parse_row (line 64) | def parse_row(line: str) -> List[str]: function validate (line 70) | def validate(filepath: Path) -> bool: function print_report (line 188) | def print_report(errors: List[str], warnings: List[str]) -> None: FILE: tests/annotation_test.py class AnnotatorTest (line 35) | class AnnotatorTest(absltest.TestCase): method setUp (line 37) | def setUp(self): method assert_char_interval_match_source (line 47) | def assert_char_interval_match_source( method test_annotate_text_single_chunk (line 80) | def test_annotate_text_single_chunk(self): method test_annotate_text_without_index_suffix (line 206) | def test_annotate_text_without_index_suffix(self): method test_annotate_text_with_attributes_suffix (line 325) | def test_annotate_text_with_attributes_suffix(self): method test_annotate_text_multiple_chunks (line 469) | def test_annotate_text_multiple_chunks(self): method test_annotate_text_no_extractions (line 569) | def test_annotate_text_no_extractions(self): class AnnotatorMultipleDocumentTest (line 595) | class AnnotatorMultipleDocumentTest(parameterized.TestCase): method test_annotate_documents (line 691) | def test_annotate_documents( method test_annotate_documents_exceptions (line 766) | def test_annotate_documents_exceptions( class AnnotatorMultiPassTest (line 804) | class AnnotatorMultiPassTest(absltest.TestCase): method setUp (line 807) | def setUp(self): method test_multipass_extraction_non_overlapping (line 817) | def test_multipass_extraction_non_overlapping(self): method test_multipass_extraction_overlapping (line 867) | def test_multipass_extraction_overlapping(self): method test_multipass_extraction_single_pass (line 918) | def test_multipass_extraction_single_pass(self): method test_multipass_extraction_empty_passes (line 948) | def test_multipass_extraction_empty_passes(self): class MultiPassHelperFunctionsTest (line 988) | class MultiPassHelperFunctionsTest(parameterized.TestCase): method test_merge_non_overlapping_extractions (line 1052) | def test_merge_non_overlapping_extractions( method test_extractions_overlap (line 1117) | def test_extractions_overlap(self, ext1, ext2, expected): class AnnotateDocumentsGeneratorTest (line 1123) | class AnnotateDocumentsGeneratorTest(absltest.TestCase): method setUp (line 1126) | def setUp(self): method test_yields_documents_not_generators (line 1160) | def test_yields_documents_not_generators(self): class CrossChunkContextTest (line 1207) | class CrossChunkContextTest(absltest.TestCase): method setUp (line 1210) | def setUp(self): method test_context_window_includes_previous_chunk_text (line 1220) | def test_context_window_includes_previous_chunk_text(self): method test_no_context_included_when_disabled (line 1271) | def test_no_context_included_when_disabled(self): method test_context_window_per_document_isolation (line 1309) | def test_context_window_per_document_isolation(self): FILE: tests/chunking_test.py class SentenceIterTest (line 26) | class SentenceIterTest(absltest.TestCase): method test_basic (line 28) | def test_basic(self): method test_empty (line 59) | def test_empty(self): class ChunkIteratorTest (line 67) | class ChunkIteratorTest(absltest.TestCase): method test_multi_sentence_chunk (line 69) | def test_multi_sentence_chunk(self): method test_sentence_with_multiple_newlines_and_right_interval (line 96) | def test_sentence_with_multiple_newlines_and_right_interval(self): method test_break_sentence (line 111) | def test_break_sentence(self): method test_long_token_gets_own_chunk (line 164) | def test_long_token_gets_own_chunk(self): method test_newline_at_chunk_boundary_does_not_create_empty_interval (line 207) | def test_newline_at_chunk_boundary_does_not_create_empty_interval(self): method test_chunk_unicode_text (line 238) | def test_chunk_unicode_text(self): method test_newlines_is_secondary_sentence_break (line 261) | def test_newlines_is_secondary_sentence_break(self): method test_tokenizer_propagation (line 312) | def test_tokenizer_propagation(self): class BatchingTest (line 345) | class BatchingTest(parameterized.TestCase): method test_make_batches_of_textchunk (line 425) | def test_make_batches_of_textchunk( class TextChunkTest (line 447) | class TextChunkTest(absltest.TestCase): method test_string_output (line 449) | def test_string_output(self): class TextAdditionalContextTest (line 469) | class TextAdditionalContextTest(absltest.TestCase): method test_text_chunk_additional_context (line 473) | def test_text_chunk_additional_context(self): method test_chunk_iterator_without_additional_context (line 486) | def test_chunk_iterator_without_additional_context(self): method test_multiple_chunks_with_additional_context (line 497) | def test_multiple_chunks_with_additional_context(self): class TextChunkPropertyTest (line 517) | class TextChunkPropertyTest(parameterized.TestCase): method test_text_chunk_properties (line 549) | def test_text_chunk_properties( FILE: tests/data_lib_test.py class DataLibToDictParameterizedTest (line 27) | class DataLibToDictParameterizedTest(parameterized.TestCase): method test_annotated_document_to_dict (line 180) | def test_annotated_document_to_dict(self, annotated_doc, expected_dict): method test_annotated_document_to_dict_with_int64 (line 188) | def test_annotated_document_to_dict_with_int64(self): class IsUrlTest (line 207) | class IsUrlTest(absltest.TestCase): method test_valid_urls (line 210) | def test_valid_urls(self): method test_invalid_urls_with_text (line 219) | def test_invalid_urls_with_text(self): method test_invalid_urls_no_scheme (line 225) | def test_invalid_urls_no_scheme(self): FILE: tests/extract_precedence_test.py class ExtractParameterPrecedenceTest (line 27) | class ExtractParameterPrecedenceTest(absltest.TestCase): method setUp (line 30) | def setUp(self): method test_model_overrides_all_other_parameters (line 47) | def test_model_overrides_all_other_parameters( method test_config_overrides_model_id_and_language_model_type (line 76) | def test_config_overrides_model_id_and_language_model_type( method test_model_id_and_base_kwargs_override_language_model_type (line 115) | def test_model_id_and_base_kwargs_override_language_model_type( method test_language_model_type_only_emits_warning_and_works (line 154) | def test_language_model_type_only_emits_warning_and_works( method test_use_schema_constraints_warns_with_config (line 184) | def test_use_schema_constraints_warns_with_config( method test_use_schema_constraints_warns_with_model (line 216) | def test_use_schema_constraints_warns_with_model( FILE: tests/extract_schema_integration_test.py class ExtractSchemaIntegrationTest (line 26) | class ExtractSchemaIntegrationTest(absltest.TestCase): method setUp (line 29) | def setUp(self): method test_extract_with_gemini_uses_schema (line 47) | def test_extract_with_gemini_uses_schema(self): method test_extract_with_ollama_uses_json_mode (line 80) | def test_extract_with_ollama_uses_json_mode(self): method test_extract_explicit_fence_respected (line 113) | def test_extract_explicit_fence_respected(self): method test_extract_gemini_schema_deprecation_warning (line 147) | def test_extract_gemini_schema_deprecation_warning(self): method test_extract_no_schema_when_disabled (line 186) | def test_extract_no_schema_when_disabled(self): method test_validation_triggers_warning_for_gemini (line 229) | def test_validation_triggers_warning_for_gemini(self, mock_create_model): method test_no_validation_without_schema (line 280) | def test_no_validation_without_schema(self, mock_create_model): FILE: tests/factory_schema_test.py class FactorySchemaIntegrationTest (line 27) | class FactorySchemaIntegrationTest(absltest.TestCase): method setUp (line 30) | def setUp(self): method test_gemini_with_schema_returns_false_fence (line 46) | def test_gemini_with_schema_returns_false_fence(self): method test_ollama_with_schema_returns_false_fence (line 70) | def test_ollama_with_schema_returns_false_fence(self): method test_explicit_fence_output_respected (line 92) | def test_explicit_fence_output_respected(self): method test_no_schema_defaults_to_true_fence (line 111) | def test_no_schema_defaults_to_true_fence(self): method test_schema_disabled_returns_true_fence (line 134) | def test_schema_disabled_returns_true_fence(self): method test_caller_overrides_schema_config (line 156) | def test_caller_overrides_schema_config(self): method test_no_examples_no_schema (line 179) | def test_no_examples_no_schema(self): class SchemaApplicationTest (line 202) | class SchemaApplicationTest(absltest.TestCase): method test_apply_schema_called_when_supported (line 205) | def test_apply_schema_called_when_supported(self): FILE: tests/factory_test.py class FakeGeminiProvider (line 34) | class FakeGeminiProvider(base_model.BaseLanguageModel): method __init__ (line 37) | def __init__(self, model_id, api_key=None, **kwargs): method infer (line 43) | def infer(self, batch_prompts, **kwargs): method infer_batch (line 46) | def infer_batch(self, prompts, batch_size=32): class FakeOpenAIProvider (line 50) | class FakeOpenAIProvider(base_model.BaseLanguageModel): method __init__ (line 53) | def __init__(self, model_id, api_key=None, **kwargs): method infer (line 61) | def infer(self, batch_prompts, **kwargs): method infer_batch (line 64) | def infer_batch(self, prompts, batch_size=32): class FactoryTest (line 68) | class FactoryTest(absltest.TestCase): # pylint: disable=too-many-public... method setUp (line 70) | def setUp(self): method tearDown (line 80) | def tearDown(self): method test_create_model_basic (line 87) | def test_create_model_basic(self): method test_create_model_from_id (line 98) | def test_create_model_from_id(self): method test_uses_gemini_api_key_from_environment (line 107) | def test_uses_gemini_api_key_from_environment(self): method test_uses_openai_api_key_from_environment (line 115) | def test_uses_openai_api_key_from_environment(self): method test_falls_back_to_langextract_api_key_when_provider_key_missing (line 125) | def test_falls_back_to_langextract_api_key_when_provider_key_missing(s... method test_provider_specific_key_takes_priority_over_langextract_key (line 139) | def test_provider_specific_key_takes_priority_over_langextract_key(self): method test_explicit_kwargs_override_env (line 146) | def test_explicit_kwargs_override_env(self): method test_wraps_provider_initialization_error_in_inference_config_error (line 157) | def test_wraps_provider_initialization_error_in_inference_config_error... method test_raises_error_when_no_provider_matches_model_id (line 167) | def test_raises_error_when_no_provider_matches_model_id(self): method test_additional_kwargs_passed_through (line 176) | def test_additional_kwargs_passed_through(self): method test_ollama_uses_base_url_from_environment (line 194) | def test_ollama_uses_base_url_from_environment(self): method test_ollama_models_select_without_api_keys (line 216) | def test_ollama_models_select_without_api_keys(self): method test_model_config_fields_are_immutable (line 242) | def test_model_config_fields_are_immutable(self): method test_model_config_allows_dict_contents_modification (line 251) | def test_model_config_allows_dict_contents_modification(self): method test_uses_highest_priority_provider_when_multiple_match (line 261) | def test_uses_highest_priority_provider_when_multiple_match(self): method test_explicit_provider_overrides_pattern_matching (line 283) | def test_explicit_provider_overrides_pattern_matching(self): method test_provider_without_model_id_uses_provider_default (line 308) | def test_provider_without_model_id_uses_provider_default(self): method test_raises_error_when_neither_model_id_nor_provider_specified (line 331) | def test_raises_error_when_neither_model_id_nor_provider_specified(self): method test_gemini_vertexai_parameters_accepted (line 342) | def test_gemini_vertexai_parameters_accepted(self): method test_gemini_vertexai_with_credentials (line 391) | def test_gemini_vertexai_with_credentials(self): FILE: tests/format_handler_test.py class FormatHandlerTest (line 28) | class FormatHandlerTest(parameterized.TestCase): method test_format_and_parse (line 96) | def test_format_and_parse( # pylint: disable=too-many-arguments method test_end_to_end_integration_with_prompt_and_resolver (line 147) | def test_end_to_end_integration_with_prompt_and_resolver(self): method test_format_parse_roundtrip (line 232) | def test_format_parse_roundtrip( class NonGeminiModelParsingTest (line 257) | class NonGeminiModelParsingTest(parameterized.TestCase): method test_think_tags_stripped_before_parsing (line 260) | def test_think_tags_stripped_before_parsing(self): method test_top_level_list_accepted_as_fallback (line 276) | def test_top_level_list_accepted_as_fallback(self): method test_deepseek_r1_real_output (line 290) | def test_deepseek_r1_real_output(self): FILE: tests/inference_test.py class TestBaseLanguageModel (line 37) | class TestBaseLanguageModel(absltest.TestCase): method test_merge_kwargs_with_none (line 39) | def test_merge_kwargs_with_none(self): method test_merge_kwargs_without_extra_kwargs (line 71) | def test_merge_kwargs_without_extra_kwargs(self): class TestOllamaLanguageModel (line 90) | class TestOllamaLanguageModel(absltest.TestCase): method test_ollama_infer (line 93) | def test_ollama_infer(self, mock_ollama_query): method test_ollama_extra_kwargs_passed_to_api (line 164) | def test_ollama_extra_kwargs_passed_to_api(self, mock_post): method test_ollama_stop_and_top_p_passthrough (line 194) | def test_ollama_stop_and_top_p_passthrough(self, mock_post): method test_ollama_defaults_when_unspecified (line 222) | def test_ollama_defaults_when_unspecified(self, mock_post): method test_ollama_runtime_kwargs_override_stored (line 247) | def test_ollama_runtime_kwargs_override_stored(self, mock_post): method test_ollama_temperature_zero (line 274) | def test_ollama_temperature_zero(self, mock_post): method test_ollama_default_timeout (line 297) | def test_ollama_default_timeout(self): method test_ollama_timeout_through_infer (line 321) | def test_ollama_timeout_through_infer(self): class TestGeminiLanguageModel (line 347) | class TestGeminiLanguageModel(absltest.TestCase): method test_gemini_allowlist_filtering (line 350) | def test_gemini_allowlist_filtering(self, mock_client_class): method test_gemini_runtime_kwargs_filtered (line 398) | def test_gemini_runtime_kwargs_filtered(self, mock_client_class): method test_gemini_requires_auth_config (line 439) | def test_gemini_requires_auth_config(self): method test_gemini_vertexai_requires_project_and_location (line 448) | def test_gemini_vertexai_requires_project_and_location(self): method test_gemini_vertexai_initialization (line 456) | def test_gemini_vertexai_initialization(self, mock_client_class): method test_gemini_warns_when_both_auth_provided (line 480) | def test_gemini_warns_when_both_auth_provided( method test_gemini_vertexai_with_http_options (line 500) | def test_gemini_vertexai_with_http_options(self, mock_client_class): class TestOpenAILanguageModelInference (line 524) | class TestOpenAILanguageModelInference(parameterized.TestCase): method test_openai_infer_with_parameters (line 531) | def test_openai_infer_with_parameters( class TestOpenAILanguageModel (line 569) | class TestOpenAILanguageModel(absltest.TestCase): method test_openai_parse_output_json (line 571) | def test_openai_parse_output_json(self): method test_openai_parse_output_yaml (line 584) | def test_openai_parse_output_yaml(self): method test_openai_no_api_key_raises_error (line 597) | def test_openai_no_api_key_raises_error(self): method test_openai_extra_kwargs_passed (line 603) | def test_openai_extra_kwargs_passed(self, mock_openai_class): method test_openai_runtime_kwargs_override (line 629) | def test_openai_runtime_kwargs_override(self, mock_openai_class): method test_openai_json_response_format (line 652) | def test_openai_json_response_format(self, mock_openai_class): method test_openai_temperature_zero (line 676) | def test_openai_temperature_zero(self, mock_openai_class): method test_openai_temperature_none_not_sent (line 698) | def test_openai_temperature_none_not_sent(self, mock_openai_class): method test_openai_none_values_filtered (line 721) | def test_openai_none_values_filtered(self, mock_openai_class): method test_openai_no_system_message_when_not_json_yaml (line 744) | def test_openai_no_system_message_when_not_json_yaml(self, mock_openai... method test_gemini_none_values_filtered (line 770) | def test_gemini_none_values_filtered(self, mock_client_class): FILE: tests/init_test.py class InitTest (line 34) | class InitTest(parameterized.TestCase): method test_lang_extract_as_lx_extract (line 41) | def test_lang_extract_as_lx_extract( method test_extract_resolver_params_alignment_passthrough (line 160) | def test_extract_resolver_params_alignment_passthrough( method test_extract_resolver_params_suppress_parse_errors (line 207) | def test_extract_resolver_params_suppress_parse_errors( method test_extract_resolver_params_none_handling (line 252) | def test_extract_resolver_params_none_handling( method test_extract_resolver_params_typo_error (line 305) | def test_extract_resolver_params_typo_error(self, mock_create_model): method test_extract_resolver_params_docs_path_passthrough (line 337) | def test_extract_resolver_params_docs_path_passthrough( method test_extract_resolver_params_none_threshold (line 381) | def test_extract_resolver_params_none_threshold( method test_extract_custom_params_reach_inference (line 423) | def test_extract_custom_params_reach_inference( method test_extract_with_custom_tokenizer (line 468) | def test_extract_with_custom_tokenizer(self, mock_create_model): method test_data_module_exports_via_compatibility_shim (line 530) | def test_data_module_exports_via_compatibility_shim(self): method test_tokenizer_module_exports_via_compatibility_shim (line 549) | def test_tokenizer_module_exports_via_compatibility_shim(self): method test_show_progress_controls_progress_bar (line 600) | def test_show_progress_controls_progress_bar( method test_schema_validation_warning_issued (line 651) | def test_schema_validation_warning_issued(self, mock_create_model): method test_gemini_schema_deprecation_warning (line 702) | def test_gemini_schema_deprecation_warning(self): FILE: tests/progress_test.py class ProgressTest (line 25) | class ProgressTest(unittest.TestCase): method test_download_progress_bar (line 27) | def test_download_progress_bar(self): method test_extraction_progress_bar (line 37) | def test_extraction_progress_bar(self): method test_save_load_progress_bars (line 47) | def test_save_load_progress_bars(self): method test_model_info_extraction (line 57) | def test_model_info_extraction(self): method test_formatting_functions (line 68) | def test_formatting_functions(self): FILE: tests/prompt_validation_test.py class PromptAlignmentValidationTest (line 25) | class PromptAlignmentValidationTest(parameterized.TestCase): method test_alignment_detection (line 59) | def test_alignment_detection( method test_multiple_extractions_per_example (line 124) | def test_multiple_extractions_per_example( method test_validation_levels_that_dont_raise (line 173) | def test_validation_levels_that_dont_raise( method test_error_mode_raises_appropriately (line 213) | def test_error_mode_raises_appropriately( method test_empty_examples_produces_empty_report (line 244) | def test_empty_examples_produces_empty_report(self): method test_multiple_examples_preserve_indices (line 251) | def test_multiple_examples_preserve_indices(self): method test_validation_does_not_mutate_input (line 305) | def test_validation_does_not_mutate_input(self): method test_alignment_policies (line 353) | def test_alignment_policies( class ExtractIntegrationTest (line 394) | class ExtractIntegrationTest(absltest.TestCase): method test_extract_validates_in_error_mode (line 397) | def test_extract_validates_in_error_mode(self): FILE: tests/prompting_test.py class QAPromptGeneratorTest (line 25) | class QAPromptGeneratorTest(parameterized.TestCase): method test_generate_prompt (line 27) | def test_generate_prompt(self): method test_format_example (line 361) | def test_format_example( class PromptBuilderTest (line 402) | class PromptBuilderTest(absltest.TestCase): method _create_generator (line 405) | def _create_generator(self): method test_build_prompt_renders_chunk_text (line 432) | def test_build_prompt_renders_chunk_text(self): method test_build_prompt_includes_additional_context (line 445) | def test_build_prompt_includes_additional_context(self): class ContextAwarePromptBuilderTest (line 459) | class ContextAwarePromptBuilderTest(absltest.TestCase): method _create_generator (line 462) | def _create_generator(self): method test_context_window_chars_property (line 489) | def test_context_window_chars_property(self): method test_first_chunk_has_no_previous_context (line 501) | def test_first_chunk_has_no_previous_context(self): method test_second_chunk_includes_previous_context (line 517) | def test_second_chunk_includes_previous_context(self): method test_context_disabled_when_none (line 534) | def test_context_disabled_when_none(self): method test_context_isolated_per_document (line 550) | def test_context_isolated_per_document(self): method test_combines_previous_context_with_additional_context (line 574) | def test_combines_previous_context_with_additional_context(self): FILE: tests/provider_plugin_test.py function _create_mock_entry_points (line 40) | def _create_mock_entry_points(entry_points_list): class PluginSmokeTest (line 61) | class PluginSmokeTest(absltest.TestCase): method setUp (line 64) | def setUp(self): method test_plugin_discovery_and_usage (line 73) | def test_plugin_discovery_and_usage(self): method test_plugin_disabled_by_env_var (line 118) | def test_plugin_disabled_by_env_var(self): method test_handles_import_errors_gracefully (line 126) | def test_handles_import_errors_gracefully(self): method test_load_plugins_once_is_idempotent (line 159) | def test_load_plugins_once_is_idempotent(self): method test_non_subclass_entry_point_does_not_crash (line 185) | def test_non_subclass_entry_point_does_not_crash(self): method test_plugin_priority_override_core_provider (line 217) | def test_plugin_priority_override_core_provider(self): method test_resolve_provider_for_plugin (line 249) | def test_resolve_provider_for_plugin(self): method test_plugin_with_custom_schema (line 282) | def test_plugin_with_custom_schema(self): class PluginE2ETest (line 378) | class PluginE2ETest(absltest.TestCase): method test_plugin_with_schema_e2e (line 385) | def test_plugin_with_schema_e2e(self): method test_pip_install_discovery_and_cleanup (line 485) | def test_pip_install_discovery_and_cleanup(self): FILE: tests/provider_schema_test.py class ProviderSchemaDiscoveryTest (line 32) | class ProviderSchemaDiscoveryTest(absltest.TestCase): method test_gemini_returns_gemini_schema (line 35) | def test_gemini_returns_gemini_schema(self): method test_ollama_returns_format_mode_schema (line 44) | def test_ollama_returns_format_mode_schema(self): method test_openai_returns_none (line 53) | def test_openai_returns_none(self): class FormatModeSchemaTest (line 63) | class FormatModeSchemaTest(absltest.TestCase): method test_from_examples_ignores_examples (line 66) | def test_from_examples_ignores_examples(self): method test_to_provider_config_returns_format (line 88) | def test_to_provider_config_returns_format(self): method test_requires_raw_output_returns_true (line 101) | def test_requires_raw_output_returns_true(self): method test_different_examples_same_output (line 111) | def test_different_examples_same_output(self): class OllamaFormatParameterTest (line 148) | class OllamaFormatParameterTest(absltest.TestCase): method test_ollama_json_format_in_request_payload (line 151) | def test_ollama_json_format_in_request_payload(self): method test_ollama_default_format_is_json (line 179) | def test_ollama_default_format_is_json(self): method test_extract_with_ollama_passes_json_format (line 199) | def test_extract_with_ollama_passes_json_format(self): class OllamaYAMLOverrideTest (line 256) | class OllamaYAMLOverrideTest(absltest.TestCase): method test_ollama_yaml_format_in_request_payload (line 259) | def test_ollama_yaml_format_in_request_payload(self): method test_yaml_override_sets_fence_output_true (line 280) | def test_yaml_override_sets_fence_output_true(self): method test_json_format_keeps_fence_output_false (line 320) | def test_json_format_keeps_fence_output_false(self): class GeminiSchemaProviderIntegrationTest (line 362) | class GeminiSchemaProviderIntegrationTest(absltest.TestCase): method test_gemini_schema_to_provider_config (line 365) | def test_gemini_schema_to_provider_config(self): method test_gemini_requires_raw_output (line 410) | def test_gemini_requires_raw_output(self): method test_gemini_rejects_yaml_with_schema (line 419) | def test_gemini_rejects_yaml_with_schema(self): method test_gemini_forwards_schema_to_genai_client (line 454) | def test_gemini_forwards_schema_to_genai_client(self): method test_gemini_doesnt_forward_non_api_kwargs (line 509) | def test_gemini_doesnt_forward_non_api_kwargs(self): class SchemaShimTest (line 544) | class SchemaShimTest(absltest.TestCase): method test_constraint_types_import (line 547) | def test_constraint_types_import(self): method test_provider_schema_imports (line 564) | def test_provider_schema_imports(self): FILE: tests/registry_test.py class FakeProvider (line 33) | class FakeProvider(base_model.BaseLanguageModel): method infer (line 36) | def infer(self, batch_prompts, **kwargs): method infer_batch (line 39) | def infer_batch(self, prompts, batch_size=32): class AnotherFakeProvider (line 43) | class AnotherFakeProvider(base_model.BaseLanguageModel): method infer (line 46) | def infer(self, batch_prompts, **kwargs): method infer_batch (line 49) | def infer_batch(self, prompts, batch_size=32): class RegistryTest (line 53) | class RegistryTest(absltest.TestCase): method setUp (line 55) | def setUp(self): method tearDown (line 59) | def tearDown(self): method test_register_decorator (line 63) | def test_register_decorator(self): method test_register_lazy (line 73) | def test_register_lazy(self): method test_multiple_patterns (line 81) | def test_multiple_patterns(self): method test_priority_resolution (line 89) | def test_priority_resolution(self): method test_no_provider_registered (line 98) | def test_no_provider_registered(self): method test_caching (line 106) | def test_caching(self): method test_clear_registry (line 118) | def test_clear_registry(self): method test_list_entries (line 134) | def test_list_entries(self): method test_lazy_loading_defers_import (line 152) | def test_lazy_loading_defers_import(self): method test_regex_pattern_objects (line 165) | def test_regex_pattern_objects(self): method test_resolve_provider_by_name (line 179) | def test_resolve_provider_by_name(self): method test_resolve_provider_not_found (line 194) | def test_resolve_provider_not_found(self): method test_hf_style_model_id_patterns (line 200) | def test_hf_style_model_id_patterns(self): FILE: tests/resolver_test.py function assert_char_interval_match_source (line 27) | def assert_char_interval_match_source( class ParserTest (line 59) | class ParserTest(parameterized.TestCase): method test_parser_error_cases (line 141) | def test_parser_error_cases( class ExtractOrderedEntitiesTest (line 148) | class ExtractOrderedEntitiesTest(parameterized.TestCase): method test_extract_ordered_extractions_success (line 521) | def test_extract_ordered_extractions_success( method test_extract_ordered_extractions_exceptions (line 561) | def test_extract_ordered_extractions_exceptions( class AlignEntitiesTest (line 568) | class AlignEntitiesTest(parameterized.TestCase): method setUp (line 581) | def setUp(self): method test_extraction_alignment (line 1648) | def test_extraction_alignment( class ResolverTest (line 1677) | class ResolverTest(parameterized.TestCase): method setUp (line 1746) | def setUp(self): method test_resolve_valid_inputs (line 1829) | def test_resolve_valid_inputs(self, resolver, input_text, expected_out... method test_handle_integer_extraction (line 1834) | def test_handle_integer_extraction(self): method test_resolve_empty_yaml (line 1858) | def test_resolve_empty_yaml(self): method test_resolve_empty_yaml_without_suppress_parse_errors (line 1865) | def test_resolve_empty_yaml_without_suppress_parse_errors(self): method test_align_with_valid_chunk (line 1870) | def test_align_with_valid_chunk(self): method test_align_with_chunk_starting_in_middle (line 1917) | def test_align_with_chunk_starting_in_middle(self): method test_align_with_no_extractions_in_chunk (line 1969) | def test_align_with_no_extractions_in_chunk(self): method test_align_successful (line 1992) | def test_align_successful(self): method test_align_with_discontinuous_tokenized_text (line 2038) | def test_align_with_discontinuous_tokenized_text(self): method test_align_with_discontinuous_tokenized_text_but_right_chunk (line 2079) | def test_align_with_discontinuous_tokenized_text_but_right_chunk(self): method test_align_with_empty_annotated_extractions (line 2124) | def test_align_with_empty_annotated_extractions(self): class FenceFallbackTest (line 2149) | class FenceFallbackTest(parameterized.TestCase): method test_parsing_scenarios (line 2195) | def test_parsing_scenarios( method test_fallback_preserves_content_integrity (line 2213) | def test_fallback_preserves_content_integrity(self): method test_malformed_json_still_raises_error (line 2262) | def test_malformed_json_still_raises_error(self): method test_strict_fences_raises_on_missing_markers (line 2276) | def test_strict_fences_raises_on_missing_markers(self): method test_default_allows_fallback (line 2290) | def test_default_allows_fallback(self): method test_rejects_multiple_fenced_blocks (line 2302) | def test_rejects_multiple_fenced_blocks(self): class FlexibleSchemaTest (line 2323) | class FlexibleSchemaTest(parameterized.TestCase): method test_direct_list_format (line 2326) | def test_direct_list_format(self): method test_single_dict_as_extraction (line 2342) | def test_single_dict_as_extraction(self): method test_traditional_format_still_works (line 2354) | def test_traditional_format_still_works(self): method test_lenient_mode_accepts_list (line 2370) | def test_lenient_mode_accepts_list(self): method test_flexible_with_attributes (line 2382) | def test_flexible_with_attributes(self): FILE: tests/schema_test.py class BaseSchemaTest (line 34) | class BaseSchemaTest(absltest.TestCase): method test_abstract_methods_required (line 37) | def test_abstract_methods_required(self): method test_subclass_must_implement_all_methods (line 42) | def test_subclass_must_implement_all_methods(self): class BaseLanguageModelSchemaTest (line 55) | class BaseLanguageModelSchemaTest(absltest.TestCase): method test_get_schema_class_returns_none_by_default (line 58) | def test_get_schema_class_returns_none_by_default(self): method test_apply_schema_stores_instance (line 68) | def test_apply_schema_stores_instance(self): class GeminiSchemaTest (line 88) | class GeminiSchemaTest(parameterized.TestCase): method test_from_examples_constructs_expected_schema (line 237) | def test_from_examples_constructs_expected_schema( method test_to_provider_config_returns_response_schema (line 244) | def test_to_provider_config_returns_response_schema(self): method test_requires_raw_output_returns_true (line 266) | def test_requires_raw_output_returns_true(self): class SchemaValidationTest (line 284) | class SchemaValidationTest(parameterized.TestCase): method _create_test_schema (line 287) | def _create_test_schema(self): method test_gemini_validation (line 325) | def test_gemini_validation( method test_base_schema_no_validation (line 355) | def test_base_schema_no_validation(self): FILE: tests/test_gemini_batch_api.py function create_mock_batch_job (line 31) | def create_mock_batch_job( function _create_batch_response (line 46) | def _create_batch_response(idx, text_content): function _create_batch_error (line 58) | def _create_batch_error(idx, code, message): class TestGeminiBatchAPI (line 66) | class TestGeminiBatchAPI(absltest.TestCase): method setUp (line 69) | def setUp(self): method test_batch_routing_vertex (line 79) | def test_batch_routing_vertex(self, mock_client_cls): method test_realtime_when_disabled (line 125) | def test_realtime_when_disabled(self, mock_client_cls): method test_realtime_when_below_threshold (line 150) | def test_realtime_when_below_threshold(self, mock_client_cls): method test_batch_with_schema (line 180) | def test_batch_with_schema(self, mock_client_cls): method test_batch_error_handling (line 249) | def test_batch_error_handling(self, mock_client_cls): method test_file_based_ordering (line 272) | def test_file_based_ordering(self, mock_client_cls): method test_max_prompts_per_job (line 317) | def test_max_prompts_per_job(self, mock_client_cls): method test_batch_item_error (line 399) | def test_batch_item_error(self, mock_client_cls): class BatchConfigValidationTest (line 432) | class BatchConfigValidationTest(parameterized.TestCase): method test_validation_errors (line 441) | def test_validation_errors(self, **overrides): class EmptyAndPaddingTest (line 447) | class EmptyAndPaddingTest(absltest.TestCase): method test_empty_prompts_fast_path (line 451) | def test_empty_prompts_fast_path(self, mock_client_cls): method test_file_pad_to_expected_count (line 469) | def test_file_pad_to_expected_count(self, mock_client_cls): class GCSBatchCachingTest (line 505) | class GCSBatchCachingTest(absltest.TestCase): method setUp (line 508) | def setUp(self): method test_cache_hit_skips_inference (line 518) | def test_cache_hit_skips_inference(self, mock_client_cls): method test_partial_cache_hit (line 550) | def test_partial_cache_hit(self, mock_client_cls): method test_project_passed_to_storage_client (line 620) | def test_project_passed_to_storage_client(self, mock_client_cls): method test_cache_hashing_stability (line 675) | def test_cache_hashing_stability(self): FILE: tests/test_kwargs_passthrough.py class TestOpenAIKwargsPassthrough (line 27) | class TestOpenAIKwargsPassthrough(unittest.TestCase): method test_reasoning_effort_alias_normalization (line 31) | def test_reasoning_effort_alias_normalization(self, mock_openai_class): method test_reasoning_parameter_normalized (line 54) | def test_reasoning_parameter_normalized(self, mock_openai_class): method test_runtime_kwargs_override_stored (line 76) | def test_runtime_kwargs_override_stored(self, mock_openai_class): method test_falsy_values_preserved (line 102) | def test_falsy_values_preserved(self, mock_openai_class): method test_both_reasoning_forms_merge (line 127) | def test_both_reasoning_forms_merge(self, mock_openai_class): method test_custom_response_format (line 154) | def test_custom_response_format(self, mock_openai_class): method test_direct_reasoning_parameter (line 185) | def test_direct_reasoning_parameter(self, mock_openai_class): class TestOllamaAuthSupport (line 207) | class TestOllamaAuthSupport(parameterized.TestCase): method test_api_key_in_authorization_header (line 211) | def test_api_key_in_authorization_header(self, mock_post): method test_custom_auth_header_name (line 233) | def test_custom_auth_header_name(self, mock_post): method test_pass_through_kwargs (line 255) | def test_pass_through_kwargs(self, mock_post): method test_api_key_redacted_in_repr (line 281) | def test_api_key_redacted_in_repr(self): method test_localhost_auth_warning_but_still_works (line 295) | def test_localhost_auth_warning_but_still_works(self, mock_post): method test_runtime_kwargs_override (line 321) | def test_runtime_kwargs_override(self, mock_post): method test_localhost_detection (line 350) | def test_localhost_detection(self, url, should_warn, mock_post): method test_format_none_not_in_payload (line 377) | def test_format_none_not_in_payload(self, mock_post): method test_reserved_kwargs_not_in_options (line 402) | def test_reserved_kwargs_not_in_options(self, mock_post): method test_api_key_without_localhost_warning (line 430) | def test_api_key_without_localhost_warning(self, mock_post): FILE: tests/test_live_api.py function has_vertex_ai_credentials (line 59) | def has_vertex_ai_credentials(): function retry_on_transient_errors (line 113) | def retry_on_transient_errors(max_retries=3, backoff_factor=2.0): function add_delay_between_tests (line 158) | def add_delay_between_tests(): function get_basic_medication_examples (line 164) | def get_basic_medication_examples(): function get_relationship_examples (line 193) | def get_relationship_examples(): function extract_by_class (line 244) | def extract_by_class(result, extraction_class): function assert_extractions_contain (line 256) | def assert_extractions_contain(test_case, result, expected_classes): function assert_valid_char_intervals (line 270) | def assert_valid_char_intervals(test_case, result): class TestLiveAPIGemini (line 296) | class TestLiveAPIGemini(unittest.TestCase): method _check_cached_result (line 299) | def _check_cached_result(self, result_json: dict[str, Any]) -> bool: method _verify_gcs_cache_content (line 347) | def _verify_gcs_cache_content(self, bucket_name): method test_medication_extraction (line 373) | def test_medication_extraction(self): method test_multilingual_medication_extraction (line 436) | def test_multilingual_medication_extraction(self): method test_explicit_provider_gemini (line 487) | def test_explicit_provider_gemini(self): method test_medication_relationship_extraction (line 516) | def test_medication_relationship_extraction(self): method test_batch_extraction_vertex_gcs (line 579) | def test_batch_extraction_vertex_gcs(self, mock_infer_batch): method test_batch_caching_live (line 698) | def test_batch_caching_live(self): class TestCrossChunkContext (line 773) | class TestCrossChunkContext(unittest.TestCase): method test_context_window_extracts_from_both_chunks (line 779) | def test_context_window_extracts_from_both_chunks(self): class TestLiveAPIOpenAI (line 848) | class TestLiveAPIOpenAI(unittest.TestCase): method test_medication_extraction (line 854) | def test_medication_extraction(self): method test_explicit_provider_selection (line 918) | def test_explicit_provider_selection(self): method test_medication_relationship_extraction (line 951) | def test_medication_relationship_extraction(self): FILE: tests/test_ollama_integration.py function _ollama_available (line 24) | def _ollama_available(): function test_ollama_extraction (line 32) | def test_ollama_extraction(): function test_ollama_extraction_with_fence_fallback (line 76) | def test_ollama_extraction_with_fence_fallback(): function _model_available (line 115) | def _model_available(model_name): function test_deepseek_r1_extraction (line 131) | def test_deepseek_r1_extraction(): FILE: tests/tokenizer_test.py class TokenizerTest (line 23) | class TokenizerTest(parameterized.TestCase): method assertTokenListEqual (line 26) | def assertTokenListEqual(self, actual_tokens, expected_tokens, msg=None): method test_tokenize_various_inputs (line 150) | def test_tokenize_various_inputs(self, input_text, expected_tokens): method test_first_token_after_newline_flag (line 158) | def test_first_token_after_newline_flag(self): method test_performance_optimization_no_crash (line 197) | def test_performance_optimization_no_crash(self): method test_underscore_handling (line 213) | def test_underscore_handling(self): class UnicodeTokenizerTest (line 228) | class UnicodeTokenizerTest(parameterized.TestCase): method assertTokenListEqual (line 231) | def assertTokenListEqual(self, actual_tokens, expected_tokens, msg=None): method test_tokenize_various_inputs (line 295) | def test_tokenize_various_inputs(self, input_text, expected_tokens): method test_special_unicode_and_punctuation_handling (line 338) | def test_special_unicode_and_punctuation_handling( method test_first_token_after_newline_parity (line 377) | def test_first_token_after_newline_parity(self): method test_expanded_cjk_detection (line 391) | def test_expanded_cjk_detection(self): method test_mixed_script_and_emoji (line 401) | def test_mixed_script_and_emoji(self): method test_script_boundary_grouping (line 427) | def test_script_boundary_grouping(self): method test_non_spaced_scripts_no_grouping (line 451) | def test_non_spaced_scripts_no_grouping(self): method test_cjk_detection_regex (line 462) | def test_cjk_detection_regex(self): method test_newline_simplification (line 472) | def test_newline_simplification(self): method test_newline_simplification_start (line 482) | def test_newline_simplification_start(self): method test_mixed_line_endings (line 491) | def test_mixed_line_endings(self): method test_mixed_uncommon_scripts_no_grouping (line 501) | def test_mixed_uncommon_scripts_no_grouping(self): method test_unknown_script_merging_edge_case (line 518) | def test_unknown_script_merging_edge_case(self): method test_find_sentence_range_empty_input (line 528) | def test_find_sentence_range_empty_input(self): method test_normalization_indices_match_input (line 533) | def test_normalization_indices_match_input(self): method test_acronym_inconsistency (line 546) | def test_acronym_inconsistency(self): method test_consecutive_punctuation_grouping (line 559) | def test_consecutive_punctuation_grouping(self): method test_punctuation_merging_identical_only (line 572) | def test_punctuation_merging_identical_only(self): method test_distinct_unknown_scripts_do_not_merge (line 600) | def test_distinct_unknown_scripts_do_not_merge(self): method test_identical_unknown_scripts_merge (line 614) | def test_identical_unknown_scripts_merge(self): class ExceptionTest (line 629) | class ExceptionTest(absltest.TestCase): method test_invalid_token_interval_errors (line 632) | def test_invalid_token_interval_errors(self): method test_sentence_range_errors (line 662) | def test_sentence_range_errors(self): class NegativeTestCases (line 684) | class NegativeTestCases(parameterized.TestCase): method test_invalid_and_edge_case_unicode (line 745) | def test_invalid_and_edge_case_unicode(self, input_text, expected_toke... method test_empty_string_edge_case (line 777) | def test_empty_string_edge_case(self): method test_whitespace_only_string (line 785) | def test_whitespace_only_string(self): class TokensTextTest (line 802) | class TokensTextTest(parameterized.TestCase): method test_valid_intervals (line 829) | def test_valid_intervals( method test_invalid_intervals (line 863) | def test_invalid_intervals(self, input_text, start_index, end_index): class SentenceRangeTest (line 872) | class SentenceRangeTest(parameterized.TestCase): method test_partial_sentence_range (line 898) | def test_partial_sentence_range( method test_full_sentence_range (line 916) | def test_full_sentence_range(self, input_text, start_pos): method test_invalid_start_pos (line 936) | def test_invalid_start_pos(self, input_text, start_pos): method test_sentence_boundary_with_quote (line 942) | def test_sentence_boundary_with_quote(self): method test_sentence_splitting_permissive (line 949) | def test_sentence_splitting_permissive(self): method test_unicode_sentence_boundaries (line 969) | def test_unicode_sentence_boundaries(self): method test_configurable_sentence_splitting (line 985) | def test_configurable_sentence_splitting(self): FILE: tests/visualization_test.py class VisualizationTest (line 28) | class VisualizationTest(absltest.TestCase): method test_assign_colors_basic_assignment (line 30) | def test_assign_colors_basic_assignment(self): method test_build_highlighted_text_single_span_correct_html (line 54) | def test_build_highlighted_text_single_span_correct_html(self): method test_build_highlighted_text_escapes_html_in_text_and_tooltip (line 75) | def test_build_highlighted_text_escapes_html_in_text_and_tooltip(self): method test_visualize_basic_document_renders_correctly (line 103) | def test_visualize_basic_document_renders_correctly(self): method test_visualize_no_extractions_renders_text_and_empty_legend (line 144) | def test_visualize_no_extractions_renders_text_and_empty_legend(self):