SYMBOL INDEX (3742 symbols across 178 files) FILE: benchmarks/benchmark_array_xd.py function write (line 25) | def write(my_features, dummy_data, tmp_dir): function read_unformated (line 34) | def read_unformated(feats, tmp_dir): function read_formatted_as_numpy (line 43) | def read_formatted_as_numpy(feats, tmp_dir): function read_batch_unformated (line 53) | def read_batch_unformated(feats, tmp_dir): function read_batch_formatted_as_numpy (line 63) | def read_batch_formatted_as_numpy(feats, tmp_dir): function read_col_unformated (line 74) | def read_col_unformated(feats, tmp_dir): function read_col_formatted_as_numpy (line 83) | def read_col_formatted_as_numpy(feats, tmp_dir): function benchmark_array_xd (line 92) | def benchmark_array_xd(): FILE: benchmarks/benchmark_getitem_100B.py function generate_100B_dataset (line 19) | def generate_100B_dataset(num_examples: int, chunk_size: int) -> dataset... class RandIter (line 26) | class RandIter: method __post_init__ (line 32) | def __post_init__(self): method __iter__ (line 36) | def __iter__(self): method __len__ (line 39) | def __len__(self): function get_first_row (line 44) | def get_first_row(dataset: datasets.Dataset): function get_last_row (line 49) | def get_last_row(dataset: datasets.Dataset): function get_batch_of_1024_rows (line 54) | def get_batch_of_1024_rows(dataset: datasets.Dataset): function get_batch_of_1024_random_rows (line 59) | def get_batch_of_1024_random_rows(dataset: datasets.Dataset): function benchmark_table_100B (line 63) | def benchmark_table_100B(): FILE: benchmarks/benchmark_indices_mapping.py function select (line 16) | def select(dataset: datasets.Dataset): function sort (line 21) | def sort(dataset: datasets.Dataset): function shuffle (line 26) | def shuffle(dataset: datasets.Dataset): function train_test_split (line 31) | def train_test_split(dataset: datasets.Dataset): function shard (line 36) | def shard(dataset: datasets.Dataset, num_shards=10): function benchmark_indices_mapping (line 41) | def benchmark_indices_mapping(): FILE: benchmarks/benchmark_iterating.py function read (line 17) | def read(dataset: datasets.Dataset, length): function read_batch (line 23) | def read_batch(dataset: datasets.Dataset, length, batch_size): function read_formatted (line 29) | def read_formatted(dataset: datasets.Dataset, length, type): function read_formatted_batch (line 36) | def read_formatted_batch(dataset: datasets.Dataset, length, batch_size, ... function benchmark_iterating (line 42) | def benchmark_iterating(): FILE: benchmarks/benchmark_map_filter.py function map (line 18) | def map(dataset: datasets.Dataset, **kwargs): function filter (line 23) | def filter(dataset: datasets.Dataset, **kwargs): function benchmark_map_filter (line 27) | def benchmark_map_filter(): FILE: benchmarks/format.py function format_json_to_md (line 5) | def format_json_to_md(input_json_file, output_md_file): FILE: benchmarks/utils.py function get_duration (line 10) | def get_duration(func): function generate_examples (line 22) | def generate_examples(features: dict, num_examples=100, seq_shapes=None): function generate_example_dataset (line 47) | def generate_example_dataset(dataset_path, features, num_examples=100, s... FILE: src/datasets/arrow_dataset.py class DatasetInfoMixin (line 170) | class DatasetInfoMixin: method __init__ (line 175) | def __init__(self, info: DatasetInfo, split: Optional[NamedSplit]): method info (line 180) | def info(self): method split (line 185) | def split(self): method builder_name (line 190) | def builder_name(self) -> str: method citation (line 194) | def citation(self) -> str: method config_name (line 198) | def config_name(self) -> str: method dataset_size (line 202) | def dataset_size(self) -> Optional[int]: method description (line 206) | def description(self) -> str: method download_checksums (line 210) | def download_checksums(self) -> Optional[dict]: method download_size (line 214) | def download_size(self) -> Optional[int]: method features (line 218) | def features(self) -> Optional[Features]: method homepage (line 222) | def homepage(self) -> Optional[str]: method license (line 226) | def license(self) -> Optional[str]: method size_in_bytes (line 230) | def size_in_bytes(self) -> Optional[int]: method supervised_keys (line 234) | def supervised_keys(self): method version (line 238) | def version(self): class TensorflowDatasetMixin (line 242) | class TensorflowDatasetMixin: method _get_output_signature (line 246) | def _get_output_signature( method to_tf_dataset (line 343) | def to_tf_dataset( class DatasetTransformationNotAllowedError (line 551) | class DatasetTransformationNotAllowedError(Exception): function transmit_format (line 555) | def transmit_format(func): function update_metadata_with_features (line 598) | def update_metadata_with_features(table: Table, features: Features): function _check_table (line 614) | def _check_table(table) -> Table: function _check_column_names (line 626) | def _check_column_names(column_names: list[str]): function _check_valid_indices_value (line 634) | def _check_valid_indices_value(index, size): class NonExistentDatasetError (line 639) | class NonExistentDatasetError(Exception): class Column (line 645) | class Column(Sequence_): method __init__ (line 666) | def __init__(self, source: Union["Dataset", "Column"], column_name: str): method __iter__ (line 673) | def __iter__(self) -> Iterator[Any]: method __getitem__ (line 685) | def __getitem__(self, key: Union[int, str, list[int]]) -> Any: method __len__ (line 700) | def __len__(self) -> int: method __repr__ (line 703) | def __repr__(self): method __str__ (line 706) | def __str__(self): method __eq__ (line 709) | def __eq__(self, value): class Dataset (line 716) | class Dataset(DatasetInfoMixin, IndexableMixin, TensorflowDatasetMixin): method __init__ (line 719) | def __init__( method features (line 793) | def features(self) -> Features: method from_file (line 800) | def from_file( method from_buffer (line 840) | def from_buffer( method from_pandas (line 872) | def from_pandas( method from_polars (line 942) | def from_polars( method from_dict (line 990) | def from_dict( method from_list (line 1156) | def from_list( method from_csv (line 1290) | def from_csv( method from_generator (line 1345) | def from_generator( method from_json (line 1430) | def from_json( method from_parquet (line 1489) | def from_parquet( method from_text (line 1586) | def from_text( method from_spark (line 1650) | def from_spark( method from_sql (line 1714) | def from_sql( method __setstate__ (line 1770) | def __setstate__(self, state): method __del__ (line 1775) | def __del__(self): method __enter__ (line 1781) | def __enter__(self): method __exit__ (line 1784) | def __exit__(self, exc_type, exc_val, exc_tb): method save_to_disk (line 1788) | def save_to_disk( method _save_to_disk_single (line 1941) | def _save_to_disk_single(job_id: int, shard: "Dataset", fpath: str, st... method _build_local_temp_path (line 1968) | def _build_local_temp_path(uri_or_path: str) -> Path: method load_from_disk (line 1985) | def load_from_disk( method data (line 2106) | def data(self) -> Table: method cache_files (line 2126) | def cache_files(self) -> list[dict]: method num_columns (line 2144) | def num_columns(self) -> int: method num_rows (line 2159) | def num_rows(self) -> int: method column_names (line 2176) | def column_names(self) -> list[str]: method shape (line 2191) | def shape(self) -> tuple[int, int]: method unique (line 2207) | def unique(self, column: str) -> list: method class_encode_column (line 2238) | def class_encode_column(self, column: str, include_nulls: bool = False... method flatten (line 2314) | def flatten(self, new_fingerprint: Optional[str] = None, max_depth=16)... method cast (line 2360) | def cast( method cast_column (line 2445) | def cast_column(self, column: str, feature: FeatureType, new_fingerpri... method remove_columns (line 2489) | def remove_columns(self, column_names: Union[str, list[str]], new_fing... method rename_column (line 2543) | def rename_column( method rename_columns (line 2609) | def rename_columns(self, column_mapping: dict[str, str], new_fingerpri... method select_columns (line 2677) | def select_columns(self, column_names: Union[str, list[str]], new_fing... method _fast_select_column (line 2725) | def _fast_select_column(self, column_name: str) -> "Dataset": method __len__ (line 2731) | def __len__(self): method __iter__ (line 2748) | def __iter__(self): method iter (line 2777) | def iter(self, batch_size: int, drop_last_batch: bool = False): method __repr__ (line 2809) | def __repr__(self): method format (line 2813) | def format(self): method formatted_as (line 2822) | def formatted_as( method set_format (line 2854) | def set_format( method reset_format (line 2932) | def reset_format(self): method set_transform (line 2961) | def set_transform( method with_format (line 3004) | def with_format( method with_transform (line 3075) | def with_transform( method _getitem (line 3123) | def _getitem(self, key: Union[int, slice, str, ListLike[int]], **kwarg... method __getitem__ (line 3144) | def __getitem__(self, key: Union[int, slice, Iterable[int]]) -> dict: ... method __getitem__ (line 3148) | def __getitem__(self, key: str) -> list: # noqa: F811 method __getitem__ (line 3151) | def __getitem__(self, key): # noqa: F811 method __getitems__ (line 3158) | def __getitems__(self, keys: list) -> list: method cleanup_cache_files (line 3164) | def cleanup_cache_files(self) -> int: method _get_cache_file_path (line 3201) | def _get_cache_file_path(self, fingerprint): method map (line 3212) | def map( method _map_single (line 3668) | def _map_single( method batch (line 4063) | def batch( method filter (line 4116) | def filter( method flatten_indices (line 4262) | def flatten_indices( method _new_dataset_with_indices (line 4308) | def _new_dataset_with_indices( method select (line 4341) | def select( method _select_contiguous (line 4431) | def _select_contiguous( method _select_with_indices_mapping (line 4487) | def _select_with_indices_mapping( method skip (line 4592) | def skip(self, n: int) -> "Dataset": method repeat (line 4622) | def repeat(self, num_times: int) -> "Dataset": method take (line 4654) | def take(self, n: int) -> "Dataset": method sort (line 4679) | def sort( method shuffle (line 4809) | def shuffle( method train_test_split (line 4944) | def train_test_split( method shard (line 5220) | def shard( method to_csv (line 5297) | def to_csv( method to_dict (line 5356) | def to_dict(self, batch_size: Optional[int] = None, batched: bool = Fa... method to_list (line 5381) | def to_list(self) -> list: method to_json (line 5399) | def to_json( method to_pandas (line 5461) | def to_pandas( method to_polars (line 5500) | def to_polars( method to_parquet (line 5560) | def to_parquet( method to_sql (line 5600) | def to_sql( method _estimate_nbytes (line 5649) | def _estimate_nbytes(self) -> int: method _generate_tables_from_shards (line 5682) | def _generate_tables_from_shards(shards: list["Dataset"], batch_size: ... method _generate_tables_from_cache_file (line 5688) | def _generate_tables_from_cache_file(filename: str): method to_iterable_dataset (line 5692) | def to_iterable_dataset(self, num_shards: Optional[int] = 1) -> "Itera... method _push_parquet_shards_to_hub_single (line 5817) | def _push_parquet_shards_to_hub_single( method _push_parquet_shards_to_hub (line 5893) | def _push_parquet_shards_to_hub( method push_to_hub (line 6001) | def push_to_hub( method add_column (line 6197) | def add_column( method add_faiss_index (line 6250) | def add_faiss_index( method add_faiss_index_from_external_arrays (line 6330) | def add_faiss_index_from_external_arrays( method add_elasticsearch_index (line 6389) | def add_elasticsearch_index( method add_item (line 6459) | def add_item(self, item: dict, new_fingerprint: Optional[str] = None): method align_labels_with_mapping (line 6511) | def align_labels_with_mapping(self, label2id: dict, label_column: str)... function _push_to_repo (line 6589) | def _push_to_repo( function _push_to_bucket (line 6754) | def _push_to_bucket( function _get_updated_dataset_card (line 6826) | def _get_updated_dataset_card( function _concatenate_map_style_datasets (line 6959) | def _concatenate_map_style_datasets( function _interleave_map_style_datasets (line 7073) | def _interleave_map_style_datasets( function _split_by_node_map_style_dataset (line 7208) | def _split_by_node_map_style_dataset(dataset: Dataset, rank: int, world_... function get_indices_from_mask_function (line 7231) | def get_indices_from_mask_function( function async_get_indices_from_mask_function (line 7289) | async def async_get_indices_from_mask_function( FILE: src/datasets/arrow_reader.py class DatasetNotOnHfGcsError (line 65) | class DatasetNotOnHfGcsError(ConnectionError): class MissingFilesOnHfGcsError (line 71) | class MissingFilesOnHfGcsError(ConnectionError): class FileInstructions (line 78) | class FileInstructions: function make_file_instructions (line 92) | def make_file_instructions( class BaseReader (line 167) | class BaseReader: method __init__ (line 172) | def __init__(self, path: str, info: Optional["DatasetInfo"]): method _get_table_from_filename (line 183) | def _get_table_from_filename(self, filename_skip_take, in_memory=False... method _read_files (line 187) | def _read_files(self, files, in_memory=False) -> Table: method get_file_instructions (line 219) | def get_file_instructions(self, name, instruction, split_infos): method read (line 227) | def read( method read_files (line 254) | def read_files( class ArrowReader (line 285) | class ArrowReader(BaseReader): method __init__ (line 291) | def __init__(self, path: str, info: Optional["DatasetInfo"]): method _get_table_from_filename (line 301) | def _get_table_from_filename(self, filename_skip_take, in_memory=False... method read_table (line 317) | def read_table(filename, in_memory=False) -> Table: class ParquetReader (line 332) | class ParquetReader(BaseReader): method __init__ (line 338) | def __init__(self, path: str, info: Optional["DatasetInfo"]): method _get_table_from_filename (line 348) | def _get_table_from_filename(self, filename_skip_take, **kwargs): class _AbsoluteInstruction (line 364) | class _AbsoluteInstruction: class _RelativeInstruction (line 373) | class _RelativeInstruction: method __post_init__ (line 382) | def __post_init__(self): function _str_to_read_instruction (line 397) | def _str_to_read_instruction(spec): function _pct_to_abs_pct1 (line 412) | def _pct_to_abs_pct1(boundary, num_examples): function _pct_to_abs_closest (line 423) | def _pct_to_abs_closest(boundary, num_examples): function _rel_to_abs_instr (line 427) | def _rel_to_abs_instr(rel_instr, name2len): class ReadInstruction (line 456) | class ReadInstruction: method _init (line 495) | def _init(self, relative_instructions): method _read_instruction_from_relative_instructions (line 500) | def _read_instruction_from_relative_instructions(cls, relative_instruc... method __init__ (line 507) | def __init__(self, split_name, rounding=None, from_=None, to=None, uni... method from_spec (line 537) | def from_spec(cls, spec): method to_spec (line 567) | def to_spec(self): method __add__ (line 587) | def __add__(self, other): method __str__ (line 602) | def __str__(self): method __repr__ (line 605) | def __repr__(self): method to_absolute (line 608) | def to_absolute(self, name2len): FILE: src/datasets/arrow_writer.py function get_arrow_writer_batch_size_from_features (line 66) | def get_arrow_writer_batch_size_from_features(features: Optional[Feature... function get_writer_batch_size_from_features (line 105) | def get_writer_batch_size_from_features(features: Optional[Features]) ->... function get_writer_batch_size_from_data_size (line 145) | def get_writer_batch_size_from_data_size(num_rows: int, num_bytes: int) ... class SchemaInferenceError (line 169) | class SchemaInferenceError(ValueError): class TypedSequence (line 173) | class TypedSequence: method __init__ (line 213) | def __init__( method get_inferred_type (line 236) | def get_inferred_type(self) -> FeatureType: method _infer_custom_type_and_encode (line 252) | def _infer_custom_type_and_encode(data: Iterable) -> tuple[Iterable, O... method __arrow_array__ (line 290) | def __arrow_array__(self, type: Optional[pa.DataType] = None): method _arrow_array (line 296) | def _arrow_array(self, type: Optional[pa.DataType] = None): class OptimizedTypedSequence (line 462) | class OptimizedTypedSequence(TypedSequence): method __init__ (line 463) | def __init__( class ArrowWriter (line 487) | class ArrowWriter: method __init__ (line 490) | def __init__( method __len__ (line 552) | def __len__(self): method __enter__ (line 556) | def __enter__(self): method __exit__ (line 559) | def __exit__(self, exc_type, exc_val, exc_tb): method close (line 562) | def close(self): method _build_schema (line 572) | def _build_schema(self, inferred_schema: pa.Schema): method _build_writer (line 599) | def _build_writer(self, inferred_schema: pa.Schema): method schema (line 604) | def schema(self): method _build_metadata (line 615) | def _build_metadata(info: DatasetInfo, fingerprint: Optional[str] = No... method write_examples_on_file (line 624) | def write_examples_on_file(self): method write_rows_on_file (line 658) | def write_rows_on_file(self): method write (line 666) | def write( method write_row (line 684) | def write_row(self, row: pa.Table, writer_batch_size: Optional[int] = ... method write_batch (line 698) | def write_batch( method write_table (line 749) | def write_table(self, pa_table: pa.Table, writer_batch_size: Optional[... method finalize (line 767) | def finalize(self, close_stream=True): class ParquetWriter (line 789) | class ParquetWriter(ArrowWriter): method __init__ (line 790) | def __init__(self, *args, use_content_defined_chunking=True, write_pag... method _build_writer (line 797) | def _build_writer(self, inferred_schema: pa.Schema): FILE: src/datasets/builder.py class InvalidConfigName (line 91) | class InvalidConfigName(ValueError): class BuilderConfig (line 96) | class BuilderConfig: method __post_init__ (line 121) | def __post_init__(self): method __eq__ (line 132) | def __eq__(self, o): method create_config_id (line 139) | def create_config_id( method _resolve_data_files (line 203) | def _resolve_data_files(self, base_path: str, download_config: Downloa... class DatasetBuilder (line 209) | class DatasetBuilder: method __init__ (line 300) | def __init__( method __getstate__ (line 417) | def __getstate__(self): method __setstate__ (line 420) | def __setstate__(self, d): method _check_legacy_cache (line 425) | def _check_legacy_cache(self) -> Optional[str]: method _check_legacy_cache2 (line 448) | def _check_legacy_cache2(self, dataset_module: "DatasetModule") -> Opt... method _create_builder_config (line 495) | def _create_builder_config( method builder_configs (line 589) | def builder_configs(cls) -> dict[str, BuilderConfig]: method cache_dir (line 598) | def cache_dir(self): method _use_legacy_cache_dir_if_possible (line 601) | def _use_legacy_cache_dir_if_possible(self, dataset_module: "DatasetMo... method _relative_data_dir (line 609) | def _relative_data_dir(self, with_version=True, with_hash=True) -> str: method _build_cache_dir (line 629) | def _build_cache_dir(self): method _info (line 664) | def _info(self) -> DatasetInfo: method get_imported_module_dir (line 676) | def get_imported_module_dir(cls): method _rename (line 680) | def _rename(self, src: str, dst: str): method download_and_prepare (line 683) | def download_and_prepare( method _download_and_prepare (line 906) | def _download_and_prepare(self, dl_manager, verification_mode, **prepa... method download_post_processing_resources (line 959) | def download_post_processing_resources(self, dl_manager): method _load_info (line 975) | def _load_info(self) -> DatasetInfo: method _save_info (line 978) | def _save_info(self): method _make_split_generators_kwargs (line 987) | def _make_split_generators_kwargs(self, prepare_split_kwargs): method as_dataset (line 992) | def as_dataset( method _build_single_dataset (line 1067) | def _build_single_dataset( method _as_dataset (line 1136) | def _as_dataset(self, split: Union[ReadInstruction, Split] = Split.TRA... method _get_dataset_fingerprint (line 1165) | def _get_dataset_fingerprint(self, split: Union[ReadInstruction, Split... method as_streaming_dataset (line 1173) | def as_streaming_dataset( method _as_streaming_dataset_single (line 1208) | def _as_streaming_dataset_single( method _post_process (line 1219) | def _post_process(self, dataset: Dataset, resources_paths: Mapping[str... method _post_processing_resources (line 1223) | def _post_processing_resources(self, split: str) -> dict[str, str]: method _download_post_processing_resources (line 1227) | def _download_post_processing_resources( method _split_generators (line 1234) | def _split_generators(self, dl_manager: Union[DownloadManager, Streami... method _prepare_split (line 1281) | def _prepare_split( method _get_examples_iterable_for_split (line 1310) | def _get_examples_iterable_for_split(self, split_generator: SplitGener... class Key (line 1321) | class Key: method __str__ (line 1325) | def __str__(self): class GeneratorBasedBuilder (line 1329) | class GeneratorBasedBuilder(DatasetBuilder): method _generate_shards (line 1338) | def _generate_shards(self, **kwargs) -> Iterator[Union[str, dict[str, ... method _generate_examples (line 1359) | def _generate_examples(self, **kwargs) -> Iterator[tuple[Key, dict[str... method _prepare_split (line 1389) | def _prepare_split( method _prepare_split_single (line 1547) | def _prepare_split_single( method _download_and_prepare (line 1632) | def _download_and_prepare(self, dl_manager, verification_mode, **prepa... method _get_examples_iterable_for_split (line 1639) | def _get_examples_iterable_for_split(self, split_generator: SplitGener... class ArrowBasedBuilder (line 1647) | class ArrowBasedBuilder(DatasetBuilder): method _generate_shards (line 1650) | def _generate_shards(self, **kwargs) -> Iterator[Union[str, dict[str, ... method _generate_tables (line 1671) | def _generate_tables(self, **kwargs) -> Iterator[tuple[Key, pa.Table]]: method _prepare_split (line 1691) | def _prepare_split( method _prepare_split_single (line 1848) | def _prepare_split_single( method _get_examples_iterable_for_split (line 1938) | def _get_examples_iterable_for_split(self, split_generator: SplitGener... class _CountableBuilderMixin (line 1946) | class _CountableBuilderMixin(DatasetBuilder): method _generate_num_examples (line 1948) | def _generate_num_examples(self, **kwargs) -> Iterator[int]: method count_examples (line 1951) | def count_examples(self, dl_manager: DownloadManager) -> dict[str, int]: method _count_examples (line 1956) | def _count_examples(self, split_generator: SplitGenerator) -> int: method _count_examples_single (line 1967) | def _count_examples_single(self, gen_kwargs: dict[str, Any]) -> int: FILE: src/datasets/combine.py function interleave_datasets (line 18) | def interleave_datasets( function concatenate_datasets (line 168) | def concatenate_datasets( FILE: src/datasets/commands/__init__.py class BaseDatasetsCLICommand (line 5) | class BaseDatasetsCLICommand(ABC): method register_subcommand (line 8) | def register_subcommand(parser: ArgumentParser): method run (line 12) | def run(self): FILE: src/datasets/commands/datasets_cli.py function parse_unknown_args (line 10) | def parse_unknown_args(unknown_args): function main (line 14) | def main(): FILE: src/datasets/commands/delete_from_hub.py function _command_factory (line 8) | def _command_factory(args): class DeleteFromHubCommand (line 17) | class DeleteFromHubCommand(BaseDatasetsCLICommand): method register_subcommand (line 19) | def register_subcommand(parser): method __init__ (line 29) | def __init__( method run (line 41) | def run(self) -> None: FILE: src/datasets/commands/env.py function info_command_factory (line 13) | def info_command_factory(_): class EnvironmentCommand (line 17) | class EnvironmentCommand(BaseDatasetsCLICommand): method register_subcommand (line 19) | def register_subcommand(parser: ArgumentParser): method run (line 23) | def run(self): method format_dict (line 40) | def format_dict(d): FILE: src/datasets/commands/test.py function _test_command_factory (line 20) | def _test_command_factory(args): class TestCommand (line 35) | class TestCommand(BaseDatasetsCLICommand): method register_subcommand (line 39) | def register_subcommand(parser: ArgumentParser): method __init__ (line 75) | def __init__( method run (line 108) | def run(self): FILE: src/datasets/data_files.py class Url (line 33) | class Url(str): class EmptyDatasetError (line 37) | class EmptyDatasetError(FileNotFoundError): function contains_wildcards (line 117) | def contains_wildcards(pattern: str) -> bool: function sanitize_patterns (line 121) | def sanitize_patterns(patterns: Union[dict, list, str]) -> dict[str, Uni... function _is_inside_unrequested_special_dir (line 162) | def _is_inside_unrequested_special_dir(matched_rel_path: str, pattern: s... function _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir (line 195) | def _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(matc... function _get_data_files_patterns (line 257) | def _get_data_files_patterns(pattern_resolver: Callable[[str], list[str]... function resolve_pattern (line 301) | def resolve_pattern( function get_data_patterns (line 407) | def get_data_patterns(base_path: str, download_config: Optional[Download... function _get_single_origin_metadata (line 498) | def _get_single_origin_metadata( function _get_origin_metadata (line 522) | def _get_origin_metadata( class DataFilesList (line 551) | class DataFilesList(list[str]): method __init__ (line 569) | def __init__(self, data_files: list[str], origin_metadata: list[Single... method __add__ (line 573) | def __add__(self, other: "DataFilesList") -> "DataFilesList": method from_hf_repo (line 577) | def from_hf_repo( method from_local_or_remote (line 591) | def from_local_or_remote( method from_patterns (line 604) | def from_patterns( method filter (line 629) | def filter( class DataFilesDict (line 648) | class DataFilesDict(dict[str, DataFilesList]): method from_local_or_remote (line 665) | def from_local_or_remote( method from_hf_repo (line 687) | def from_hf_repo( method from_patterns (line 711) | def from_patterns( method filter (line 732) | def filter( class DataFilesPatternsList (line 741) | class DataFilesPatternsList(list[str]): method __init__ (line 748) | def __init__( method __add__ (line 756) | def __add__(self, other): method from_patterns (line 760) | def from_patterns( method resolve (line 765) | def resolve( method filter_extensions (line 788) | def filter_extensions(self, extensions: list[str]) -> "DataFilesPatter... class DataFilesPatternsDict (line 794) | class DataFilesPatternsDict(dict[str, DataFilesPatternsList]): method from_patterns (line 800) | def from_patterns( method resolve (line 815) | def resolve( method filter_extensions (line 825) | def filter_extensions(self, extensions: list[str]) -> "DataFilesPatter... FILE: src/datasets/dataset_dict.py class bind (line 58) | class bind(partial): method __call__ (line 59) | def __call__(self, *fn_args, **fn_kwargs): class DatasetDict (line 63) | class DatasetDict(dict[Union[str, NamedSplit], "Dataset"]): method _check_values_type (line 66) | def _check_values_type(self): method _check_values_features (line 71) | def _check_values_features(self): method __enter__ (line 79) | def __enter__(self): method __exit__ (line 82) | def __exit__(self, exc_type, exc_val, exc_tb): method __getitem__ (line 90) | def __getitem__(self, k) -> Dataset: method data (line 105) | def data(self) -> dict[str, Table]: method cache_files (line 120) | def cache_files(self) -> dict[str, dict]: method num_columns (line 138) | def num_columns(self) -> dict[str, int]: method num_rows (line 154) | def num_rows(self) -> dict[str, int]: method column_names (line 170) | def column_names(self) -> dict[str, list[str]]: method shape (line 188) | def shape(self) -> dict[str, tuple[int]]: method flatten (line 203) | def flatten(self, max_depth=16) -> "DatasetDict": method unique (line 236) | def unique(self, column: str) -> dict[str, list]: method cleanup_cache_files (line 260) | def cleanup_cache_files(self) -> dict[str, int]: method __repr__ (line 279) | def __repr__(self): method cast (line 284) | def cast(self, features: Features) -> "DatasetDict": method cast_column (line 316) | def cast_column(self, column: str, feature) -> "DatasetDict": method remove_columns (line 345) | def remove_columns(self, column_names: Union[str, list[str]]) -> "Data... method rename_column (line 387) | def rename_column(self, original_column_name: str, new_column_name: st... method rename_columns (line 435) | def rename_columns(self, column_mapping: dict[str, str]) -> "DatasetDi... method select_columns (line 473) | def select_columns(self, column_names: Union[str, list[str]]) -> "Data... method class_encode_column (line 509) | def class_encode_column(self, column: str, include_nulls: bool = False... method formatted_as (line 542) | def formatted_as( method set_format (line 581) | def set_format( method reset_format (line 632) | def reset_format(self): method set_transform (line 664) | def set_transform( method with_format (line 693) | def with_format( method with_transform (line 770) | def with_transform( method map (line 824) | def map( method filter (line 996) | def filter( method flatten_indices (line 1109) | def flatten_indices( method sort (line 1161) | def sort( method shuffle (line 1228) | def shuffle( method save_to_disk (line 1311) | def save_to_disk( method load_from_disk (line 1386) | def load_from_disk( method from_csv (line 1446) | def from_csv( method from_json (line 1489) | def from_json( method from_parquet (line 1532) | def from_parquet( method from_text (line 1581) | def from_text( method align_labels_with_mapping (line 1624) | def align_labels_with_mapping(self, label2id: dict, label_column: str)... method push_to_hub (line 1633) | def push_to_hub( class IterableDatasetDict (line 1822) | class IterableDatasetDict(dict[Union[str, NamedSplit], IterableDataset]): method _check_values_type (line 1823) | def _check_values_type(self): method _check_values_features (line 1828) | def _check_values_features(self): method __repr__ (line 1836) | def __repr__(self): method num_columns (line 1842) | def num_columns(self) -> dict[str, Optional[int]]: method column_names (line 1859) | def column_names(self) -> dict[str, Optional[list[str]]]: method with_format (line 1877) | def with_format( method map (line 1923) | def map( method filter (line 2023) | def filter( method shuffle (line 2086) | def shuffle( method rename_column (line 2147) | def rename_column(self, original_column_name: str, new_column_name: st... method rename_columns (line 2183) | def rename_columns(self, column_mapping: dict[str, str]) -> "IterableD... method remove_columns (line 2211) | def remove_columns(self, column_names: Union[str, list[str]]) -> "Iter... method select_columns (line 2237) | def select_columns(self, column_names: Union[str, list[str]]) -> "Iter... method cast_column (line 2263) | def cast_column(self, column: str, feature: FeatureType) -> "IterableD... method cast (line 2294) | def cast( method push_to_hub (line 2331) | def push_to_hub( function _push_to_repo (line 2521) | def _push_to_repo( function _push_to_bucket (line 2696) | def _push_to_bucket( FILE: src/datasets/distributed.py function split_dataset_by_node (line 10) | def split_dataset_by_node(dataset: DatasetType, rank: int, world_size: i... FILE: src/datasets/download/download_config.py class DownloadConfig (line 10) | class DownloadConfig: method copy (line 72) | def copy(self) -> "DownloadConfig": method __setattr__ (line 75) | def __setattr__(self, name, value): FILE: src/datasets/download/download_manager.py class DownloadMode (line 50) | class DownloadMode(enum.Enum): class DownloadManager (line 71) | class DownloadManager: method __init__ (line 74) | def __init__( method manual_dir (line 110) | def manual_dir(self): method downloaded_size (line 114) | def downloaded_size(self): method _record_sizes_checksums (line 118) | def _record_sizes_checksums(self, url_or_urls: NestedDataStructure, do... method download (line 131) | def download(self, url_or_urls): method _download_batched (line 181) | def _download_batched( method _download_single (line 224) | def _download_single(self, url_or_filename: str, download_config: Down... method iter_archive (line 234) | def iter_archive(self, path_or_buf: Union[str, io.BufferedReader]): method iter_files (line 259) | def iter_files(self, paths: Union[str, list[str]]): method extract (line 278) | def extract(self, path_or_paths): method download_and_extract (line 310) | def download_and_extract(self, url_or_urls): method get_recorded_sizes_checksums (line 328) | def get_recorded_sizes_checksums(self): method delete_extracted_files (line 331) | def delete_extracted_files(self): method manage_extracted_files (line 338) | def manage_extracted_files(self): FILE: src/datasets/download/streaming_download_manager.py class StreamingDownloadManager (line 47) | class StreamingDownloadManager: method __init__ (line 57) | def __init__( method manual_dir (line 72) | def manual_dir(self): method download (line 75) | def download(self, url_or_urls): method _download_single (line 95) | def _download_single(self, urlpath: str) -> str: method extract (line 102) | def extract(self, url_or_urls): method _extract (line 124) | def _extract(self, urlpath: str) -> str: method download_and_extract (line 151) | def download_and_extract(self, url_or_urls): method iter_archive (line 171) | def iter_archive(self, urlpath_or_buf: Union[str, io.BufferedReader]) ... method iter_files (line 196) | def iter_files(self, urlpaths: Union[str, list[str]]) -> Iterable[str]: method manage_extracted_files (line 215) | def manage_extracted_files(self): method get_recorded_sizes_checksums (line 218) | def get_recorded_sizes_checksums(self): FILE: src/datasets/exceptions.py class DatasetsError (line 12) | class DatasetsError(Exception): class DefunctDatasetError (line 16) | class DefunctDatasetError(DatasetsError): class FileNotFoundDatasetsError (line 20) | class FileNotFoundDatasetsError(DatasetsError, FileNotFoundError): class DataFilesNotFoundError (line 24) | class DataFilesNotFoundError(FileNotFoundDatasetsError): class DatasetNotFoundError (line 28) | class DatasetNotFoundError(FileNotFoundDatasetsError): class DatasetBuildError (line 37) | class DatasetBuildError(DatasetsError): class ManualDownloadError (line 41) | class ManualDownloadError(DatasetBuildError): class FileFormatError (line 45) | class FileFormatError(DatasetBuildError): class DatasetGenerationError (line 49) | class DatasetGenerationError(DatasetBuildError): class DatasetGenerationCastError (line 53) | class DatasetGenerationCastError(DatasetGenerationError): method from_cast_error (line 55) | def from_cast_error( class ChecksumVerificationError (line 90) | class ChecksumVerificationError(DatasetsError): class UnexpectedDownloadedFileError (line 94) | class UnexpectedDownloadedFileError(ChecksumVerificationError): class ExpectedMoreDownloadedFilesError (line 98) | class ExpectedMoreDownloadedFilesError(ChecksumVerificationError): class NonMatchingChecksumError (line 102) | class NonMatchingChecksumError(ChecksumVerificationError): class SplitsVerificationError (line 106) | class SplitsVerificationError(DatasetsError): class UnexpectedSplitsError (line 110) | class UnexpectedSplitsError(SplitsVerificationError): class ExpectedMoreSplitsError (line 114) | class ExpectedMoreSplitsError(SplitsVerificationError): class NonMatchingSplitsSizesError (line 118) | class NonMatchingSplitsSizesError(SplitsVerificationError): FILE: src/datasets/features/_torchcodec.py class AudioDecoder (line 5) | class AudioDecoder(_AudioDecoder): method __getitem__ (line 6) | def __getitem__(self, key: str): FILE: src/datasets/features/audio.py class Audio (line 24) | class Audio: method __call__ (line 93) | def __call__(self): method encode_example (line 96) | def encode_example(self, value: Union[str, bytes, bytearray, dict, "Au... method decode_example (line 164) | def decode_example( method flatten (line 223) | def flatten(self) -> Union["FeatureType", dict[str, "FeatureType"]]: method cast_storage (line 234) | def cast_storage(self, storage: Union[pa.StringArray, pa.StructArray])... method embed_storage (line 280) | def embed_storage(self, storage: pa.StructArray, token_per_repo_id=Non... function encode_torchcodec_audio (line 321) | def encode_torchcodec_audio(audio: "AudioDecoder") -> dict: FILE: src/datasets/features/features.py function _arrow_to_datasets_dtype (line 55) | def _arrow_to_datasets_dtype(arrow_type: pa.DataType) -> str: function string_to_arrow (line 125) | def string_to_arrow(datasets_dtype: str) -> pa.DataType: function _cast_to_python_objects (line 276) | def _cast_to_python_objects(obj: Any, only_1d_for_numpy: bool, optimize_... function cast_to_python_objects (line 467) | def cast_to_python_objects(obj: Any, only_1d_for_numpy=False, optimize_l... class Value (line 493) | class Value: method __post_init__ (line 547) | def __post_init__(self): method __call__ (line 554) | def __call__(self): method encode_example (line 557) | def encode_example(self, value): method __repr__ (line 573) | def __repr__(self): class _ArrayXD (line 577) | class _ArrayXD: method __post_init__ (line 578) | def __post_init__(self): method __call__ (line 581) | def __call__(self): method encode_example (line 585) | def encode_example(self, value): class Array2D (line 590) | class Array2D(_ArrayXD): class Array3D (line 615) | class Array3D(_ArrayXD): class Array4D (line 640) | class Array4D(_ArrayXD): class Array5D (line 665) | class Array5D(_ArrayXD): class _ArrayXDExtensionType (line 689) | class _ArrayXDExtensionType(pa.ExtensionType): method __init__ (line 692) | def __init__(self, shape: tuple, dtype: str): method __arrow_ext_serialize__ (line 705) | def __arrow_ext_serialize__(self): method __arrow_ext_deserialize__ (line 709) | def __arrow_ext_deserialize__(cls, storage_type, serialized): method __reduce__ (line 714) | def __reduce__(self): method __hash__ (line 717) | def __hash__(self): method __arrow_ext_class__ (line 720) | def __arrow_ext_class__(self): method _generate_dtype (line 723) | def _generate_dtype(self, dtype): method to_pandas_dtype (line 731) | def to_pandas_dtype(self): class Array2DExtensionType (line 735) | class Array2DExtensionType(_ArrayXDExtensionType): class Array3DExtensionType (line 739) | class Array3DExtensionType(_ArrayXDExtensionType): class Array4DExtensionType (line 743) | class Array4DExtensionType(_ArrayXDExtensionType): class Array5DExtensionType (line 747) | class Array5DExtensionType(_ArrayXDExtensionType): function _is_zero_copy_only (line 758) | def _is_zero_copy_only(pa_type: pa.DataType, unnest: bool = False) -> bool: class ArrayExtensionArray (line 780) | class ArrayExtensionArray(pa.ExtensionArray): method __array__ (line 781) | def __array__(self): method __getitem__ (line 785) | def __getitem__(self, i): method to_numpy (line 788) | def to_numpy(self, zero_copy_only=True): method to_pylist (line 832) | def to_pylist(self, maps_as_pydicts: Optional[Literal["lossy", "strict... class PandasArrayExtensionDtype (line 841) | class PandasArrayExtensionDtype(PandasExtensionDtype): method __init__ (line 844) | def __init__(self, value_type: Union["PandasArrayExtensionDtype", np.d... method __from_arrow__ (line 847) | def __from_arrow__(self, array: Union[pa.Array, pa.ChunkedArray]): method construct_array_type (line 855) | def construct_array_type(cls): method type (line 859) | def type(self) -> type: method kind (line 863) | def kind(self) -> str: method name (line 867) | def name(self) -> str: method value_type (line 871) | def value_type(self) -> np.dtype: class PandasArrayExtensionArray (line 875) | class PandasArrayExtensionArray(PandasExtensionArray): method __init__ (line 876) | def __init__(self, data: np.ndarray, copy: bool = False): method __array__ (line 880) | def __array__(self, dtype=None): method copy (line 900) | def copy(self, deep: bool = False) -> "PandasArrayExtensionArray": method _from_sequence (line 904) | def _from_sequence( method _concat_same_type (line 917) | def _concat_same_type(cls, to_concat: Sequence_["PandasArrayExtensionA... method dtype (line 929) | def dtype(self) -> PandasArrayExtensionDtype: method nbytes (line 933) | def nbytes(self) -> int: method isna (line 936) | def isna(self) -> np.ndarray: method __setitem__ (line 939) | def __setitem__(self, key: Union[int, slice, np.ndarray], value: Any) ... method __getitem__ (line 942) | def __getitem__(self, item: Union[int, slice, np.ndarray]) -> Union[np... method take (line 947) | def take( method __len__ (line 970) | def __len__(self) -> int: method __eq__ (line 973) | def __eq__(self, other) -> np.ndarray: function pandas_types_mapper (line 979) | def pandas_types_mapper(dtype): class ClassLabel (line 985) | class ClassLabel: method __post_init__ (line 1027) | def __post_init__(self, num_classes, names_file): method __call__ (line 1056) | def __call__(self): method str2int (line 1059) | def str2int(self, values: Union[str, Iterable]) -> Union[int, Iterable]: method _strval2int (line 1083) | def _strval2int(self, value: str) -> int: method int2str (line 1104) | def int2str(self, values: Union[int, Iterable]) -> Union[str, Iterable]: method encode_example (line 1134) | def encode_example(self, example_data): method cast_storage (line 1150) | def cast_storage(self, storage: Union[pa.StringArray, pa.IntegerArray]... method _load_names_from_file (line 1177) | def _load_names_from_file(names_filepath): class Json (line 1183) | class Json: method __call__ (line 1224) | def __call__(self): method encode_example (line 1227) | def encode_example(self, example_data): method decode_example (line 1237) | def decode_example(self, example_data, token_per_repo_id: Optional[dic... method cast_storage (line 1242) | def cast_storage(self, storage: Union[pa.Array]) -> pa.JsonArray: class Sequence (line 1266) | class Sequence: method __new__ (line 1286) | def __new__(cls, feature=None, length=-1, **kwargs): class List (line 1300) | class List(Sequence): method __repr__ (line 1320) | def __repr__(self): class LargeList (line 1328) | class LargeList: method __repr__ (line 1344) | def __repr__(self): function _check_non_null_non_empty_recursive (line 1370) | def _check_non_null_non_empty_recursive(obj, schema: Optional[FeatureTyp... function get_nested_type (line 1392) | def get_nested_type(schema: FeatureType) -> pa.DataType: function encode_nested_example (line 1425) | def encode_nested_example(schema, obj, level=0): function decode_nested_example (line 1465) | def decode_nested_example(schema, obj, token_per_repo_id: Optional[dict[... function register_feature (line 1533) | def register_feature( function generate_from_dict (line 1548) | def generate_from_dict(obj: Any): function generate_from_arrow_type (line 1586) | def generate_from_arrow_type(pa_type: pa.DataType) -> FeatureType: function numpy_to_pyarrow_listarray (line 1615) | def numpy_to_pyarrow_listarray(arr: np.ndarray, type: pa.DataType = None... function list_of_pa_arrays_to_pyarrow_listarray (line 1627) | def list_of_pa_arrays_to_pyarrow_listarray(l_arr: list[Optional[pa.Array... function list_of_np_array_to_pyarrow_listarray (line 1640) | def list_of_np_array_to_pyarrow_listarray(l_arr: list[np.ndarray], type:... function contains_any_np_array (line 1650) | def contains_any_np_array(data: Any): function any_np_array_to_pyarrow_listarray (line 1667) | def any_np_array_to_pyarrow_listarray(data: Union[np.ndarray, list], typ... function to_pyarrow_listarray (line 1683) | def to_pyarrow_listarray(data: Any, pa_type: _ArrayXDExtensionType) -> p... function _visit (line 1699) | def _visit(feature: FeatureType, func: Callable[[FeatureType], Optional[... function _visit_with_path (line 1723) | def _visit_with_path( function require_decoding (line 1755) | def require_decoding(feature: FeatureType, ignore_decode_attribute: bool... function require_storage_cast (line 1779) | def require_storage_cast(feature: FeatureType) -> bool: function require_storage_embed (line 1797) | def require_storage_embed(feature: FeatureType) -> bool: function keep_features_dicts_synced (line 1815) | def keep_features_dicts_synced(func): class Features (line 1837) | class Features(dict): method __init__ (line 1871) | def __init__(*args, **kwargs): method __reduce__ (line 1899) | def __reduce__(self): method type (line 1903) | def type(self): method arrow_schema (line 1913) | def arrow_schema(self): method from_arrow_schema (line 1924) | def from_arrow_schema(cls, pa_schema: pa.Schema) -> "Features": method from_dict (line 1958) | def from_dict(cls, dic) -> "Features": method to_dict (line 1986) | def to_dict(self): method _to_yaml_list (line 1989) | def _to_yaml_list(self) -> list: method _from_yaml_list (line 2064) | def _from_yaml_list(cls, yaml_data: list) -> "Features": method encode_example (line 2140) | def encode_example(self, example): method encode_column (line 2154) | def encode_column(self, column, column_name: str): method encode_batch (line 2170) | def encode_batch(self, batch): method decode_example (line 2189) | def decode_example(self, example: dict, token_per_repo_id: Optional[di... method decode_column (line 2212) | def decode_column( method decode_batch (line 2237) | def decode_batch(self, batch: dict, token_per_repo_id: Optional[dict[s... method copy (line 2264) | def copy(self) -> "Features": method reorder_fields_as (line 2284) | def reorder_fields_as(self, other: "Features") -> "Features": method flatten (line 2337) | def flatten(self, max_depth=16) -> "Features": function _is_null_feature (line 2381) | def _is_null_feature(feature) -> bool: function _align_features (line 2396) | def _align_features(features_list: list[Features]) -> list[Features]: function _check_if_features_can_be_aligned (line 2410) | def _check_if_features_can_be_aligned(features_list: list[Features]): function _fix_for_backward_compatible_features (line 2433) | def _fix_for_backward_compatible_features(feature: Any) -> FeatureType: FILE: src/datasets/features/image.py class Image (line 47) | class Image: method __call__ (line 95) | def __call__(self): method encode_example (line 98) | def encode_example(self, value: Union[str, bytes, bytearray, dict, np.... method decode_example (line 139) | def decode_example(self, value: dict, token_per_repo_id=None) -> "PIL.... method flatten (line 200) | def flatten(self) -> Union["FeatureType", dict[str, "FeatureType"]]: method cast_storage (line 213) | def cast_storage(self, storage: Union[pa.StringArray, pa.StructArray, ... method embed_storage (line 275) | def embed_storage(self, storage: pa.StructArray, token_per_repo_id=Non... function list_image_compression_formats (line 316) | def list_image_compression_formats() -> list[str]: function image_to_bytes (line 329) | def image_to_bytes(image: "PIL.Image.Image") -> bytes: function encode_pil_image (line 340) | def encode_pil_image(image: "PIL.Image.Image") -> dict: function encode_np_array (line 347) | def encode_np_array(array: np.ndarray) -> dict: function objects_to_list_of_image_dicts (line 390) | def objects_to_list_of_image_dicts( FILE: src/datasets/features/nifti.py class Nifti1ImageWrapper (line 23) | class Nifti1ImageWrapper(nib.nifti1.Nifti1Image): method __init__ (line 28) | def __init__(self, nifti_image: nib.nifti1.Nifti1Image): method _repr_html_ (line 39) | def _repr_html_(self): class Nifti (line 64) | class Nifti: method __call__ (line 107) | def __call__(self): method encode_example (line 110) | def encode_example(self, value: Union[str, bytes, bytearray, dict, "ni... method decode_example (line 150) | def decode_example(self, value: dict, token_per_repo_id=None) -> "Nift... method embed_storage (line 213) | def embed_storage(self, storage: pa.StructArray, token_per_repo_id=Non... method flatten (line 253) | def flatten(self) -> Union["FeatureType", Dict[str, "FeatureType"]]: method cast_storage (line 266) | def cast_storage(self, storage: Union[pa.StringArray, pa.StructArray, ... function encode_nibabel_image (line 303) | def encode_nibabel_image(img: "nib.Nifti1Image", force_bytes: bool = Fal... FILE: src/datasets/features/pdf.py function pdf_to_bytes (line 22) | def pdf_to_bytes(pdf: "pdfplumber.pdf.PDF") -> bytes: class Pdf (line 31) | class Pdf: method __call__ (line 75) | def __call__(self): method encode_example (line 78) | def encode_example(self, value: Union[str, bytes, bytearray, dict, "pd... method decode_example (line 113) | def decode_example(self, value: dict, token_per_repo_id=None) -> "pdfp... method flatten (line 171) | def flatten(self) -> Union["FeatureType", Dict[str, "FeatureType"]]: method cast_storage (line 184) | def cast_storage(self, storage: Union[pa.StringArray, pa.StructArray, ... method embed_storage (line 221) | def embed_storage(self, storage: pa.StructArray, token_per_repo_id=Non... function encode_pdfplumber_pdf (line 262) | def encode_pdfplumber_pdf(pdf: "pdfplumber.pdf.PDF") -> dict: FILE: src/datasets/features/translation.py class Translation (line 12) | class Translation: method __call__ (line 41) | def __call__(self): method flatten (line 44) | def flatten(self) -> Union["FeatureType", dict[str, "FeatureType"]]: class TranslationVariableLanguages (line 52) | class TranslationVariableLanguages: method __post_init__ (line 92) | def __post_init__(self): method __call__ (line 96) | def __call__(self): method encode_example (line 99) | def encode_example(self, translation_dict): method flatten (line 122) | def flatten(self) -> Union["FeatureType", dict[str, "FeatureType"]]: FILE: src/datasets/features/video.py class Example (line 23) | class Example(TypedDict): class Video (line 29) | class Video: method __call__ (line 102) | def __call__(self): method encode_example (line 105) | def encode_example(self, value: Union[str, bytes, bytearray, Example, ... method decode_example (line 153) | def decode_example( method flatten (line 226) | def flatten(self) -> Union["FeatureType", dict[str, "FeatureType"]]: method cast_storage (line 239) | def cast_storage(self, storage: Union[pa.StringArray, pa.StructArray, ... method embed_storage (line 291) | def embed_storage(self, storage: pa.StructArray, token_per_repo_id=Non... function video_to_bytes (line 332) | def video_to_bytes(video: "VideoDecoder") -> bytes: function encode_torchcodec_video (line 337) | def encode_torchcodec_video(video: "VideoDecoder") -> Example: function encode_np_array (line 346) | def encode_np_array(array: np.ndarray) -> Example: function hf_video_reader (line 355) | def hf_video_reader( FILE: src/datasets/filesystems/__init__.py function is_remote_filesystem (line 28) | def is_remote_filesystem(fs: fsspec.AbstractFileSystem) -> bool: function rename (line 39) | def rename(fs: fsspec.AbstractFileSystem, src: str, dst: str): FILE: src/datasets/filesystems/compression.py class BaseCompressedFileFileSystem (line 9) | class BaseCompressedFileFileSystem(AbstractArchiveFileSystem): method __init__ (line 19) | def __init__( method _strip_protocol (line 60) | def _strip_protocol(cls, path): method _get_dirs (line 64) | def _get_dirs(self): method cat (line 69) | def cat(self, path: str): method _open (line 73) | def _open( class Bz2FileSystem (line 88) | class Bz2FileSystem(BaseCompressedFileFileSystem): class GzipFileSystem (line 96) | class GzipFileSystem(BaseCompressedFileFileSystem): class Lz4FileSystem (line 104) | class Lz4FileSystem(BaseCompressedFileFileSystem): class XzFileSystem (line 112) | class XzFileSystem(BaseCompressedFileFileSystem): class ZstdFileSystem (line 120) | class ZstdFileSystem(BaseCompressedFileFileSystem): FILE: src/datasets/fingerprint.py class _TempCacheDir (line 45) | class _TempCacheDir: method __init__ (line 51) | def __init__(self): method _cleanup (line 81) | def _cleanup(self): method cleanup (line 92) | def cleanup(self): function maybe_register_dataset_for_temp_dir_deletion (line 97) | def maybe_register_dataset_for_temp_dir_deletion(dataset): function get_datasets_with_cache_file_in_temp_dir (line 116) | def get_datasets_with_cache_file_in_temp_dir(): function enable_caching (line 120) | def enable_caching(): function disable_caching (line 141) | def disable_caching(): function is_caching_enabled (line 162) | def is_caching_enabled() -> bool: function get_temporary_cache_files_directory (line 183) | def get_temporary_cache_files_directory() -> str: class Hasher (line 196) | class Hasher: method __init__ (line 201) | def __init__(self): method hash_bytes (line 205) | def hash_bytes(cls, value: Union[bytes, list[bytes]]) -> str: method hash (line 213) | def hash(cls, value: Any) -> str: method update (line 216) | def update(self, value: Any) -> None: method hexdigest (line 222) | def hexdigest(self) -> str: function generate_fingerprint (line 235) | def generate_fingerprint(dataset: "Dataset") -> str: function generate_random_fingerprint (line 249) | def generate_random_fingerprint(nbits: int = 64) -> str: function update_fingerprint (line 253) | def update_fingerprint(fingerprint, transform, transform_args): function validate_fingerprint (line 303) | def validate_fingerprint(fingerprint: str, max_length=64): function format_transform_for_fingerprint (line 323) | def format_transform_for_fingerprint(func: Callable, version: Optional[s... function format_kwargs_for_fingerprint (line 333) | def format_kwargs_for_fingerprint( function fingerprint_transform (line 378) | def fingerprint_transform( FILE: src/datasets/formatting/__init__.py function _register_formatter (line 40) | def _register_formatter( function _register_unavailable_formatter (line 63) | def _register_unavailable_formatter( function get_format_type_from_alias (line 115) | def get_format_type_from_alias(format_type: Optional[str]) -> Optional[s... function get_formatter (line 123) | def get_formatter(format_type: Optional[str], **format_kwargs) -> Format... FILE: src/datasets/formatting/formatting.py function _is_range_contiguous (line 40) | def _is_range_contiguous(key: range) -> bool: function _raise_bad_key_type (line 44) | def _raise_bad_key_type(key: Any): function _query_table_with_indices_mapping (line 50) | def _query_table_with_indices_mapping( function _query_table (line 80) | def _query_table(table: Table, key: Union[int, slice, range, str, Iterab... function _is_array_with_nulls (line 105) | def _is_array_with_nulls(pa_array: pa.Array) -> bool: class BaseArrowExtractor (line 109) | class BaseArrowExtractor(Generic[RowFormat, ColumnFormat, BatchFormat]): method extract_row (line 116) | def extract_row(self, pa_table: pa.Table) -> RowFormat: method extract_column (line 119) | def extract_column(self, pa_table: pa.Table) -> ColumnFormat: method extract_batch (line 122) | def extract_batch(self, pa_table: pa.Table) -> BatchFormat: function _unnest (line 126) | def _unnest(py_dict: dict[str, list[T]]) -> dict[str, T]: class SimpleArrowExtractor (line 131) | class SimpleArrowExtractor(BaseArrowExtractor[pa.Table, pa.Array, pa.Tab... method extract_row (line 132) | def extract_row(self, pa_table: pa.Table) -> pa.Table: method extract_column (line 135) | def extract_column(self, pa_table: pa.Table) -> pa.Array: method extract_batch (line 138) | def extract_batch(self, pa_table: pa.Table) -> pa.Table: class PythonArrowExtractor (line 142) | class PythonArrowExtractor(BaseArrowExtractor[dict, list, dict]): method extract_row (line 143) | def extract_row(self, pa_table: pa.Table) -> dict: method extract_column (line 146) | def extract_column(self, pa_table: pa.Table) -> list: method extract_batch (line 149) | def extract_batch(self, pa_table: pa.Table) -> dict: class NumpyArrowExtractor (line 153) | class NumpyArrowExtractor(BaseArrowExtractor[dict, np.ndarray, dict]): method __init__ (line 154) | def __init__(self, **np_array_kwargs): method extract_row (line 157) | def extract_row(self, pa_table: pa.Table) -> dict: method extract_column (line 160) | def extract_column(self, pa_table: pa.Table) -> np.ndarray: method extract_batch (line 163) | def extract_batch(self, pa_table: pa.Table) -> dict: method _arrow_array_to_numpy (line 166) | def _arrow_array_to_numpy(self, pa_array: pa.Array) -> np.ndarray: class PandasArrowExtractor (line 205) | class PandasArrowExtractor(BaseArrowExtractor[pd.DataFrame, pd.Series, p... method extract_row (line 206) | def extract_row(self, pa_table: pa.Table) -> pd.DataFrame: method extract_column (line 209) | def extract_column(self, pa_table: pa.Table) -> pd.Series: method extract_batch (line 212) | def extract_batch(self, pa_table: pa.Table) -> pd.DataFrame: class PythonFeaturesDecoder (line 216) | class PythonFeaturesDecoder: method __init__ (line 217) | def __init__( method decode_row (line 223) | def decode_row(self, row: dict) -> dict: method decode_column (line 226) | def decode_column(self, column: list, column_name: str) -> list: method decode_batch (line 233) | def decode_batch(self, batch: dict) -> dict: class PandasFeaturesDecoder (line 237) | class PandasFeaturesDecoder: method __init__ (line 238) | def __init__(self, features: Optional[Features]): method decode_row (line 241) | def decode_row(self, row: pd.DataFrame) -> pd.DataFrame: method decode_column (line 255) | def decode_column(self, column: pd.Series, column_name: str) -> pd.Ser... method decode_batch (line 265) | def decode_batch(self, batch: pd.DataFrame) -> pd.DataFrame: class LazyDict (line 269) | class LazyDict(MutableMapping): method __init__ (line 272) | def __init__(self, pa_table: pa.Table, formatter: "Formatter"): method __len__ (line 279) | def __len__(self): method __getitem__ (line 282) | def __getitem__(self, key): method __setitem__ (line 290) | def __setitem__(self, key, value): method __delitem__ (line 295) | def __delitem__(self, key) -> None: method __iter__ (line 300) | def __iter__(self): method __contains__ (line 303) | def __contains__(self, key): method __repr__ (line 306) | def __repr__(self): method __or__ (line 310) | def __or__(self, other): method __ror__ (line 325) | def __ror__(self, other): method __ior__ (line 340) | def __ior__(self, other): method __copy__ (line 351) | def __copy__(self): method copy (line 360) | def copy(self): method fromkeys (line 366) | def fromkeys(cls, iterable, value=None): method format (line 369) | def format(self, key): method _format_all (line 372) | def _format_all(self): class LazyRow (line 378) | class LazyRow(LazyDict): method format (line 379) | def format(self, key): class LazyBatch (line 383) | class LazyBatch(LazyDict): method format (line 384) | def format(self, key): class Formatter (line 388) | class Formatter(Generic[RowFormat, ColumnFormat, BatchFormat]): method __init__ (line 399) | def __init__( method __call__ (line 409) | def __call__(self, pa_table: pa.Table, query_type: str) -> Union[RowFo... method format_row (line 417) | def format_row(self, pa_table: pa.Table) -> RowFormat: method format_column (line 420) | def format_column(self, pa_table: pa.Table) -> ColumnFormat: method format_batch (line 423) | def format_batch(self, pa_table: pa.Table) -> BatchFormat: class TensorFormatter (line 427) | class TensorFormatter(Formatter[RowFormat, ColumnFormat, BatchFormat]): method recursive_tensorize (line 428) | def recursive_tensorize(self, data_struct: dict): class TableFormatter (line 432) | class TableFormatter(Formatter[RowFormat, ColumnFormat, BatchFormat]): class ArrowFormatter (line 437) | class ArrowFormatter(TableFormatter[pa.Table, pa.Array, pa.Table]): method format_row (line 441) | def format_row(self, pa_table: pa.Table) -> pa.Table: method format_column (line 444) | def format_column(self, pa_table: pa.Table) -> pa.Array: method format_batch (line 447) | def format_batch(self, pa_table: pa.Table) -> pa.Table: class PythonFormatter (line 451) | class PythonFormatter(Formatter[Mapping, list, Mapping]): method __init__ (line 452) | def __init__(self, features=None, lazy=False, token_per_repo_id=None): method format_row (line 456) | def format_row(self, pa_table: pa.Table) -> Mapping: method format_column (line 463) | def format_column(self, pa_table: pa.Table) -> list: method format_batch (line 468) | def format_batch(self, pa_table: pa.Table) -> Mapping: class PandasFormatter (line 476) | class PandasFormatter(TableFormatter[pd.DataFrame, pd.Series, pd.DataFra... method format_row (line 480) | def format_row(self, pa_table: pa.Table) -> pd.DataFrame: method format_column (line 485) | def format_column(self, pa_table: pa.Table) -> pd.Series: method format_batch (line 490) | def format_batch(self, pa_table: pa.Table) -> pd.DataFrame: class CustomFormatter (line 496) | class CustomFormatter(Formatter[dict, ColumnFormat, dict]): method __init__ (line 506) | def __init__(self, transform: Callable[[dict], dict], features=None, t... method format_row (line 510) | def format_row(self, pa_table: pa.Table) -> dict: method format_column (line 519) | def format_column(self, pa_table: pa.Table) -> ColumnFormat: method format_batch (line 538) | def format_batch(self, pa_table: pa.Table) -> dict: function _check_valid_column_key (line 544) | def _check_valid_column_key(key: str, columns: list[str]) -> None: function _check_valid_index_key (line 549) | def _check_valid_index_key(key: Union[int, slice, range, Iterable], size... function key_to_query_type (line 568) | def key_to_query_type(key: Union[int, slice, range, str, Iterable]) -> str: function query_table (line 578) | def query_table( function format_table (line 621) | def format_table( FILE: src/datasets/formatting/jax_formatter.py class JaxFormatter (line 38) | class JaxFormatter(TensorFormatter[Mapping, "jax.Array", Mapping]): method __init__ (line 39) | def __init__(self, features=None, device=None, token_per_repo_id=None,... method _map_devices_to_str (line 67) | def _map_devices_to_str() -> dict[str, "jaxlib.xla_extension.Device"]: method _consolidate (line 72) | def _consolidate(self, column): method _tensorize (line 83) | def _tensorize(self, value): method _recursive_tensorize (line 131) | def _recursive_tensorize(self, data_struct): method recursive_tensorize (line 150) | def recursive_tensorize(self, data_struct: dict): method format_row (line 153) | def format_row(self, pa_table: pa.Table) -> Mapping: method format_column (line 158) | def format_column(self, pa_table: pa.Table) -> "jax.Array": method format_batch (line 165) | def format_batch(self, pa_table: pa.Table) -> Mapping: FILE: src/datasets/formatting/np_formatter.py class NumpyFormatter (line 26) | class NumpyFormatter(TensorFormatter[Mapping, np.ndarray, Mapping]): method __init__ (line 27) | def __init__(self, features=None, token_per_repo_id=None, **np_array_k... method _consolidate (line 31) | def _consolidate(self, column): method _tensorize (line 46) | def _tensorize(self, value): method _recursive_tensorize (line 79) | def _recursive_tensorize(self, data_struct): method recursive_tensorize (line 96) | def recursive_tensorize(self, data_struct: dict): method format_row (line 99) | def format_row(self, pa_table: pa.Table) -> Mapping: method format_column (line 104) | def format_column(self, pa_table: pa.Table) -> np.ndarray: method format_batch (line 111) | def format_batch(self, pa_table: pa.Table) -> Mapping: FILE: src/datasets/formatting/polars_formatter.py class PolarsArrowExtractor (line 32) | class PolarsArrowExtractor(BaseArrowExtractor["pl.DataFrame", "pl.Series... method extract_row (line 33) | def extract_row(self, pa_table: pa.Table) -> "pl.DataFrame": method extract_column (line 44) | def extract_column(self, pa_table: pa.Table) -> "pl.Series": method extract_batch (line 55) | def extract_batch(self, pa_table: pa.Table) -> "pl.DataFrame": class PolarsFeaturesDecoder (line 67) | class PolarsFeaturesDecoder: method __init__ (line 68) | def __init__(self, features: Optional[Features]): method decode_row (line 72) | def decode_row(self, row: "pl.DataFrame") -> "pl.DataFrame": method decode_column (line 86) | def decode_column(self, column: "pl.Series", column_name: str) -> "pl.... method decode_batch (line 96) | def decode_batch(self, batch: "pl.DataFrame") -> "pl.DataFrame": class PolarsFormatter (line 100) | class PolarsFormatter(TableFormatter["pl.DataFrame", "pl.Series", "pl.Da... method __init__ (line 104) | def __init__(self, features=None, **np_array_kwargs): method format_row (line 111) | def format_row(self, pa_table: pa.Table) -> "pl.DataFrame": method format_column (line 116) | def format_column(self, pa_table: pa.Table) -> "pl.Series": method format_batch (line 121) | def format_batch(self, pa_table: pa.Table) -> "pl.DataFrame": FILE: src/datasets/formatting/tf_formatter.py class TFFormatter (line 32) | class TFFormatter(TensorFormatter[Mapping, "tf.Tensor", Mapping]): method __init__ (line 33) | def __init__(self, features=None, token_per_repo_id=None, **tf_tensor_... method _consolidate (line 38) | def _consolidate(self, column): method _tensorize (line 55) | def _tensorize(self, value): method _recursive_tensorize (line 86) | def _recursive_tensorize(self, data_struct): method recursive_tensorize (line 105) | def recursive_tensorize(self, data_struct: dict): method format_row (line 108) | def format_row(self, pa_table: pa.Table) -> Mapping: method format_column (line 113) | def format_column(self, pa_table: pa.Table) -> "tf.Tensor": method format_batch (line 120) | def format_batch(self, pa_table: pa.Table) -> Mapping: FILE: src/datasets/formatting/torch_formatter.py class TorchFormatter (line 32) | class TorchFormatter(TensorFormatter[Mapping, "torch.Tensor", Mapping]): method __init__ (line 33) | def __init__(self, features=None, token_per_repo_id=None, **torch_tens... method _consolidate (line 38) | def _consolidate(self, column): method _tensorize (line 49) | def _tensorize(self, value): method _recursive_tensorize (line 92) | def _recursive_tensorize(self, data_struct): method recursive_tensorize (line 106) | def recursive_tensorize(self, data_struct: dict): method format_row (line 109) | def format_row(self, pa_table: pa.Table) -> Mapping: method format_column (line 114) | def format_column(self, pa_table: pa.Table) -> "torch.Tensor": method format_batch (line 121) | def format_batch(self, pa_table: pa.Table) -> Mapping: FILE: src/datasets/hub.py function delete_from_hub (line 20) | def delete_from_hub( function _delete_files (line 92) | def _delete_files(dataset_id, revision=None, token=None): FILE: src/datasets/info.py class SupervisedKeysData (line 56) | class SupervisedKeysData: class DownloadChecksumsEntryData (line 62) | class DownloadChecksumsEntryData: class MissingCachedSizesConfigError (line 67) | class MissingCachedSizesConfigError(Exception): class NonMatchingCachedSizesError (line 71) | class NonMatchingCachedSizesError(Exception): class PostProcessedInfo (line 76) | class PostProcessedInfo: method __post_init__ (line 80) | def __post_init__(self): method from_dict (line 86) | def from_dict(cls, post_processed_info_dict: dict) -> "PostProcessedIn... class DatasetInfo (line 92) | class DatasetInfo: method __post_init__ (line 167) | def __post_init__(self): method write_to_directory (line 186) | def write_to_directory(self, dataset_info_dir, pretty_print=False, sto... method _dump_info (line 215) | def _dump_info(self, file, pretty_print=False): method _dump_license (line 219) | def _dump_license(self, file): method from_merge (line 224) | def from_merge(cls, dataset_infos: list["DatasetInfo"]): method from_directory (line 248) | def from_directory(cls, dataset_info_dir: str, storage_options: Option... method from_dict (line 282) | def from_dict(cls, dataset_info_dict: dict) -> "DatasetInfo": method update (line 286) | def update(self, other_dataset_info: "DatasetInfo", ignore_none=True): method copy (line 296) | def copy(self) -> "DatasetInfo": method _to_yaml_dict (line 299) | def _to_yaml_dict(self) -> dict: method _from_yaml_dict (line 314) | def _from_yaml_dict(cls, yaml_data: dict) -> "DatasetInfo": class DatasetInfosDict (line 324) | class DatasetInfosDict(dict[str, DatasetInfo]): method write_to_directory (line 325) | def write_to_directory(self, dataset_infos_dir, overwrite=False, prett... method from_directory (line 354) | def from_directory(cls, dataset_infos_dir) -> "DatasetInfosDict": method from_dataset_card_data (line 374) | def from_dataset_card_data(cls, dataset_card_data: DatasetCardData) ->... method to_dataset_card_data (line 392) | def to_dataset_card_data(self, dataset_card_data: DatasetCardData) -> ... FILE: src/datasets/inspect.py class SplitsNotFoundError (line 38) | class SplitsNotFoundError(ValueError): function get_dataset_infos (line 42) | def get_dataset_infos( function get_dataset_config_names (line 109) | def get_dataset_config_names( function get_dataset_default_config_name (line 175) | def get_dataset_default_config_name( function get_dataset_config_info (line 237) | def get_dataset_config_info( function get_dataset_split_names (line 295) | def get_dataset_split_names( FILE: src/datasets/io/abc.py class AbstractDatasetReader (line 8) | class AbstractDatasetReader(ABC): method __init__ (line 9) | def __init__( method read (line 30) | def read(self) -> Union[Dataset, DatasetDict, IterableDataset, Iterabl... class AbstractDatasetInputStream (line 34) | class AbstractDatasetInputStream(ABC): method __init__ (line 35) | def __init__( method read (line 52) | def read(self) -> Union[Dataset, IterableDataset]: FILE: src/datasets/io/csv.py class CsvDatasetReader (line 15) | class CsvDatasetReader(AbstractDatasetReader): method __init__ (line 16) | def __init__( method read (line 45) | def read(self): class CsvDatasetWriter (line 69) | class CsvDatasetWriter: method __init__ (line 70) | def __init__( method write (line 90) | def write(self) -> int: method _batch_csv (line 102) | def _batch_csv(self, args): method _write (line 115) | def _write(self, file_obj: BinaryIO, header, index, **to_csv_kwargs) -... FILE: src/datasets/io/generator.py class GeneratorDatasetInputStream (line 8) | class GeneratorDatasetInputStream(AbstractDatasetInputStream): method __init__ (line 9) | def __init__( method read (line 41) | def read(self): FILE: src/datasets/io/json.py class JsonDatasetReader (line 15) | class JsonDatasetReader(AbstractDatasetReader): method __init__ (line 16) | def __init__( method read (line 48) | def read(self): class JsonDatasetWriter (line 72) | class JsonDatasetWriter: method __init__ (line 73) | def __init__( method write (line 93) | def write(self) -> int: method _batch_json (line 126) | def _batch_json(self, args): method _write (line 139) | def _write( FILE: src/datasets/io/parquet.py class ParquetDatasetReader (line 19) | class ParquetDatasetReader(AbstractDatasetReader): method __init__ (line 20) | def __init__( method read (line 51) | def read(self): class ParquetDatasetWriter (line 75) | class ParquetDatasetWriter: method __init__ (line 76) | def __init__( method write (line 100) | def write(self) -> int: method _write (line 116) | def _write(self, file_obj: BinaryIO, batch_size: int, **parquet_writer... FILE: src/datasets/io/spark.py class SparkDatasetReader (line 11) | class SparkDatasetReader(AbstractDatasetReader): method __init__ (line 18) | def __init__( method read (line 49) | def read(self): FILE: src/datasets/io/sql.py class SqlDatasetReader (line 17) | class SqlDatasetReader(AbstractDatasetInputStream): method __init__ (line 18) | def __init__( method read (line 36) | def read(self): class SqlDatasetWriter (line 56) | class SqlDatasetWriter: method __init__ (line 57) | def __init__( method write (line 76) | def write(self) -> int: method _batch_sql (line 84) | def _batch_sql(self, args): method _write (line 96) | def _write(self, index, **to_sql_kwargs) -> int: FILE: src/datasets/io/text.py class TextDatasetReader (line 9) | class TextDatasetReader(AbstractDatasetReader): method __init__ (line 10) | def __init__( method read (line 39) | def read(self): FILE: src/datasets/iterable_dataset.py function identity_func (line 96) | def identity_func(x): function _rename_columns_fn (line 100) | def _rename_columns_fn(example: dict, column_mapping: dict[str, str]): function add_column_fn (line 115) | def add_column_fn(example: dict, idx: int, name: str, column: list[dict]): function _infer_features_from_batch (line 121) | def _infer_features_from_batch(batch: dict[str, list], try_features: Opt... function _examples_to_batch (line 131) | def _examples_to_batch(examples: list[dict[str, Any]]) -> dict[str, list]: function _batch_to_examples (line 140) | def _batch_to_examples(batch: dict[str, list]) -> Iterator[dict[str, Any]]: function _convert_to_arrow (line 147) | def _convert_to_arrow( function shift_ex_examples_rngs (line 179) | def shift_ex_examples_rngs(ex_iterable: "_BaseExamplesIterable", value: ... class _BaseExamplesIterable (line 194) | class _BaseExamplesIterable: method __init__ (line 197) | def __init__(self) -> None: method __iter__ (line 200) | def __iter__(self) -> Iterator[tuple[Key, dict]]: method iter_arrow (line 205) | def iter_arrow(self) -> Optional[Callable[[], Iterator[tuple[Key, pa.T... method is_typed (line 209) | def is_typed(self) -> bool: method features (line 213) | def features(self) -> Optional[Features]: method shuffle_data_sources (line 216) | def shuffle_data_sources(self, generator: np.random.Generator) -> "_Ba... method shard_data_sources (line 223) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 227) | def reshard_data_sources(self) -> "_BaseExamplesIterable": method split_shard_indices_by_worker (line 235) | def split_shard_indices_by_worker(self, num_shards: int, index: int, c... method num_shards (line 246) | def num_shards(self) -> int: method _init_state_dict (line 249) | def _init_state_dict(self) -> dict: method load_state_dict (line 252) | def load_state_dict(self, state_dict: dict) -> dict: method state_dict (line 266) | def state_dict(self) -> dict: class ExamplesIterable (line 272) | class ExamplesIterable(_BaseExamplesIterable): method __init__ (line 273) | def __init__( method _init_state_dict (line 286) | def _init_state_dict(self) -> dict: method __iter__ (line 290) | def __iter__(self): method shuffle_data_sources (line 302) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Exa... method shard_data_sources (line 309) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 316) | def reshard_data_sources(self) -> "ExamplesIterable": method num_shards (line 331) | def num_shards(self) -> int: class ArrowExamplesIterable (line 335) | class ArrowExamplesIterable(_BaseExamplesIterable): method __init__ (line 336) | def __init__( method iter_arrow (line 350) | def iter_arrow(self): method _init_state_dict (line 353) | def _init_state_dict(self) -> dict: method __iter__ (line 357) | def __iter__(self): method _iter_arrow (line 379) | def _iter_arrow(self): method shuffle_data_sources (line 395) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Arr... method shard_data_sources (line 400) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 407) | def reshard_data_sources(self) -> "ArrowExamplesIterable": method num_shards (line 422) | def num_shards(self) -> int: class RebatchedArrowExamplesIterable (line 426) | class RebatchedArrowExamplesIterable(_BaseExamplesIterable): method __init__ (line 427) | def __init__( method iter_arrow (line 441) | def iter_arrow(self): method is_typed (line 445) | def is_typed(self): method features (line 449) | def features(self): method _init_state_dict (line 452) | def _init_state_dict(self) -> dict: method __iter__ (line 463) | def __iter__(self): method _iter_arrow (line 466) | def _iter_arrow(self) -> Iterator[tuple[Key, pa.Table]]: method shuffle_data_sources (line 556) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Reb... method shard_data_sources (line 564) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 572) | def reshard_data_sources(self) -> "RebatchedArrowExamplesIterable": method num_shards (line 578) | def num_shards(self) -> int: class SelectColumnsIterable (line 582) | class SelectColumnsIterable(_BaseExamplesIterable): method __init__ (line 583) | def __init__(self, ex_iterable: _BaseExamplesIterable, column_names: l... method iter_arrow (line 589) | def iter_arrow(self): method is_typed (line 594) | def is_typed(self): method features (line 598) | def features(self): method _init_state_dict (line 601) | def _init_state_dict(self) -> dict: method __iter__ (line 605) | def __iter__(self): method _iter_arrow (line 609) | def _iter_arrow(self) -> Iterator[tuple[Key, pa.Table]]: method shuffle_data_sources (line 614) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Sel... method shard_data_sources (line 617) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 622) | def reshard_data_sources(self) -> "SelectColumnsIterable": method num_shards (line 626) | def num_shards(self) -> int: class StepExamplesIterable (line 630) | class StepExamplesIterable(_BaseExamplesIterable): method __init__ (line 631) | def __init__(self, ex_iterable: _BaseExamplesIterable, step: int, offs... method iter_arrow (line 638) | def iter_arrow(self): method is_typed (line 642) | def is_typed(self): method features (line 646) | def features(self): method _init_state_dict (line 649) | def _init_state_dict(self) -> dict: method __iter__ (line 657) | def __iter__(self): method _iter_arrow (line 666) | def _iter_arrow(self): method shuffle_data_sources (line 677) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Ste... method shard_data_sources (line 682) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 689) | def reshard_data_sources(self) -> "StepExamplesIterable": method num_shards (line 697) | def num_shards(self) -> int: class CyclingMultiSourcesExamplesIterable (line 701) | class CyclingMultiSourcesExamplesIterable(_BaseExamplesIterable): method __init__ (line 702) | def __init__( method is_typed (line 721) | def is_typed(self): method features (line 725) | def features(self): method iter_arrow (line 729) | def iter_arrow(self): method _get_indices_iterator (line 733) | def _get_indices_iterator(self): method _init_state_dict (line 742) | def _init_state_dict(self) -> dict: method _iter_arrow (line 752) | def _iter_arrow(self): method __iter__ (line 798) | def __iter__(self): method shuffle_data_sources (line 842) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Cyc... method num_shards (line 848) | def num_shards(self) -> int: method shard_data_sources (line 851) | def shard_data_sources( method reshard_data_sources (line 877) | def reshard_data_sources(self) -> "CyclingMultiSourcesExamplesIterable": class VerticallyConcatenatedMultiSourcesExamplesIterable (line 884) | class VerticallyConcatenatedMultiSourcesExamplesIterable(_BaseExamplesIt... method __init__ (line 897) | def __init__(self, ex_iterables: list[_BaseExamplesIterable]): method is_typed (line 902) | def is_typed(self): method features (line 906) | def features(self): method iter_arrow (line 910) | def iter_arrow(self): method _init_state_dict (line 914) | def _init_state_dict(self) -> dict: method __iter__ (line 922) | def __iter__(self): method _iter_arrow (line 929) | def _iter_arrow(self): method shuffle_data_sources (line 936) | def shuffle_data_sources( method num_shards (line 950) | def num_shards(self) -> int: method shard_data_sources (line 953) | def shard_data_sources( method reshard_data_sources (line 967) | def reshard_data_sources(self) -> "VerticallyConcatenatedMultiSourcesE... function _check_column_names (line 973) | def _check_column_names(column_names: list[str]): class HorizontallyConcatenatedMultiSourcesExamplesIterable (line 983) | class HorizontallyConcatenatedMultiSourcesExamplesIterable(_BaseExamples... method __init__ (line 999) | def __init__(self, ex_iterables: list[_BaseExamplesIterable]): method iter_arrow (line 1004) | def iter_arrow(self): method is_typed (line 1016) | def is_typed(self): method features (line 1020) | def features(self): method _init_state_dict (line 1023) | def _init_state_dict(self) -> dict: method __iter__ (line 1030) | def __iter__(self): method _iter_arrow (line 1053) | def _iter_arrow(self): method shuffle_data_sources (line 1081) | def shuffle_data_sources( method num_shards (line 1088) | def num_shards(self) -> int: method shard_data_sources (line 1091) | def shard_data_sources( method reshard_data_sources (line 1097) | def reshard_data_sources(self) -> "HorizontallyConcatenatedMultiSource... class RandomlyCyclingMultiSourcesExamplesIterable (line 1102) | class RandomlyCyclingMultiSourcesExamplesIterable(CyclingMultiSourcesExa... method __init__ (line 1103) | def __init__( method shift_rngs (line 1116) | def shift_rngs(self, value: int) -> "_BaseExamplesIterable": method is_typed (line 1127) | def is_typed(self): method features (line 1131) | def features(self): method _get_indices_iterator (line 1134) | def _get_indices_iterator(self): method _init_state_dict (line 1163) | def _init_state_dict(self) -> dict: method shuffle_data_sources (line 1174) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Ran... method shard_data_sources (line 1184) | def shard_data_sources( method reshard_data_sources (line 1216) | def reshard_data_sources(self) -> "RandomlyCyclingMultiSourcesExamples... function _table_output_to_arrow (line 1226) | def _table_output_to_arrow(output) -> pa.Table: class MappedExamplesIterable (line 1239) | class MappedExamplesIterable(_BaseExamplesIterable): method __init__ (line 1240) | def __init__( method iter_arrow (line 1293) | def iter_arrow(self): method is_typed (line 1298) | def is_typed(self): method features (line 1302) | def features(self): method _init_state_dict (line 1305) | def _init_state_dict(self) -> dict: method __iter__ (line 1315) | def __iter__(self): method _iter (line 1323) | def _iter(self): method _iter_arrow (line 1518) | def _iter_arrow(self, max_chunksize: Optional[int] = None) -> Iterator... method shuffle_data_sources (line 1590) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Map... method shard_data_sources (line 1607) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 1624) | def reshard_data_sources(self) -> "MappedExamplesIterable": method num_shards (line 1641) | def num_shards(self) -> int: function _add_mask (line 1645) | def _add_mask( function add_mask (line 1658) | def add_mask(mask_function: Callable, input: Union[dict, pa.Table], *arg... function async_add_mask (line 1663) | async def async_add_mask( class FilteredExamplesIterable (line 1670) | class FilteredExamplesIterable(MappedExamplesIterable): method __init__ (line 1673) | def __init__( method _iter (line 1705) | def _iter(self): method _iter_arrow (line 1711) | def _iter_arrow(self, max_chunksize: Optional[int] = None): method shuffle_data_sources (line 1716) | def shuffle_data_sources(self, seed: Optional[int]) -> "FilteredExampl... method shard_data_sources (line 1729) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 1742) | def reshard_data_sources(self) -> "FilteredExamplesIterable": method num_shards (line 1755) | def num_shards(self) -> int: class BufferShuffledExamplesIterable (line 1759) | class BufferShuffledExamplesIterable(_BaseExamplesIterable): method __init__ (line 1760) | def __init__(self, ex_iterable: _BaseExamplesIterable, buffer_size: in... method shift_rngs (line 1766) | def shift_rngs(self, value: int) -> "_BaseExamplesIterable": method is_typed (line 1776) | def is_typed(self): method features (line 1780) | def features(self): method iter_arrow (line 1784) | def iter_arrow(self): method _init_state_dict (line 1787) | def _init_state_dict(self) -> dict: method load_state_dict (line 1792) | def load_state_dict(self, state_dict: dict) -> dict: method _iter_random_indices (line 1802) | def _iter_random_indices(rng: np.random.Generator, buffer_size: int, r... method __iter__ (line 1806) | def __iter__(self): method _iter_arrow (line 1823) | def _iter_arrow(self): method shuffle_data_sources (line 1840) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Buf... method shard_data_sources (line 1846) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 1854) | def reshard_data_sources(self) -> "BufferShuffledExamplesIterable": method num_shards (line 1862) | def num_shards(self) -> int: class SkipExamplesIterable (line 1866) | class SkipExamplesIterable(_BaseExamplesIterable): method __init__ (line 1867) | def __init__( method iter_arrow (line 1881) | def iter_arrow(self): method is_typed (line 1885) | def is_typed(self): method features (line 1889) | def features(self): method _init_state_dict (line 1892) | def _init_state_dict(self) -> dict: method __iter__ (line 1900) | def __iter__(self): method _iter_arrow (line 1910) | def _iter_arrow(self): method split_number (line 1929) | def split_number(num, n): method shuffle_data_sources (line 1937) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Ski... method shard_data_sources (line 1949) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 1961) | def reshard_data_sources(self) -> "SkipExamplesIterable": method num_shards (line 1970) | def num_shards(self) -> int: class RepeatExamplesIterable (line 1974) | class RepeatExamplesIterable(_BaseExamplesIterable): method __init__ (line 1979) | def __init__( method _init_state_dict (line 1988) | def _init_state_dict(self) -> dict: method __iter__ (line 1996) | def __iter__(self): method shuffle_data_sources (line 2007) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Rep... method shard_data_sources (line 2011) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 2018) | def reshard_data_sources(self) -> "RepeatExamplesIterable": method num_shards (line 2025) | def num_shards(self) -> int: class TakeExamplesIterable (line 2029) | class TakeExamplesIterable(_BaseExamplesIterable): method __init__ (line 2030) | def __init__( method iter_arrow (line 2044) | def iter_arrow(self): method is_typed (line 2048) | def is_typed(self): method features (line 2052) | def features(self): method _init_state_dict (line 2055) | def _init_state_dict(self) -> dict: method __iter__ (line 2063) | def __iter__(self): method _iter_arrow (line 2076) | def _iter_arrow(self): method split_number (line 2098) | def split_number(num, n): method shuffle_data_sources (line 2106) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Tak... method shard_data_sources (line 2118) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 2135) | def reshard_data_sources(self) -> "TakeExamplesIterable": method num_shards (line 2144) | def num_shards(self) -> int: function _apply_feature_types_on_example (line 2148) | def _apply_feature_types_on_example( class FormattingConfig (line 2164) | class FormattingConfig: method is_table (line 2168) | def is_table(self) -> bool: method is_tensor (line 2172) | def is_tensor(self) -> bool: class FormattedExamplesIterable (line 2176) | class FormattedExamplesIterable(_BaseExamplesIterable): method __init__ (line 2177) | def __init__( method iter_arrow (line 2193) | def iter_arrow(self): method is_typed (line 2198) | def is_typed(self): method features (line 2202) | def features(self): method _init_state_dict (line 2205) | def _init_state_dict(self) -> dict: method __iter__ (line 2209) | def __iter__(self): method _iter_arrow (line 2247) | def _iter_arrow(self) -> Iterator[tuple[Key, pa.Table]]: method shuffle_data_sources (line 2263) | def shuffle_data_sources(self, generator: np.random.Generator) -> "For... method shard_data_sources (line 2273) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method reshard_data_sources (line 2283) | def reshard_data_sources(self) -> "FormattedExamplesIterable": method num_shards (line 2293) | def num_shards(self) -> int: class DistributedConfig (line 2298) | class DistributedConfig: function _maybe_add_torch_iterable_dataset_parent_class (line 2303) | def _maybe_add_torch_iterable_dataset_parent_class(cls): function _maybe_share_with_torch_persistent_workers (line 2312) | def _maybe_share_with_torch_persistent_workers(value: Union[int, "torch.... class IterableColumn (line 2324) | class IterableColumn: method __init__ (line 2345) | def __init__(self, source: Union["IterableDataset", "IterableColumn"],... method __iter__ (line 2349) | def __iter__(self) -> Iterator[Any]: method __getitem__ (line 2353) | def __getitem__(self, column_name: str) -> "IterableColumn": class IterableDataset (line 2357) | class IterableDataset(DatasetInfoMixin): method __init__ (line 2360) | def __init__( method num_columns (line 2383) | def num_columns(self) -> Optional[int]: method column_names (line 2399) | def column_names(self) -> Optional[list[str]]: method state_dict (line 2414) | def state_dict(self) -> dict: method load_state_dict (line 2467) | def load_state_dict(self, state_dict: dict) -> None: method __repr__ (line 2517) | def __repr__(self): method __getstate__ (line 2520) | def __getstate__(self): method __setstate__ (line 2523) | def __setstate__(self, d): method _head (line 2532) | def _head(self, n=5): method epoch (line 2536) | def epoch(self) -> int: method num_shards (line 2540) | def num_shards(self) -> int: method n_shards (line 2546) | def n_shards(self) -> int: # backward compatibility method _iter_pytorch (line 2549) | def _iter_pytorch(self): method _is_main_process (line 2605) | def _is_main_process(self): method _prepare_ex_iterable_for_iteration (line 2616) | def _prepare_ex_iterable_for_iteration( method __iter__ (line 2673) | def __iter__(self): method iter (line 2694) | def iter(self, batch_size: int, drop_last_batch: bool = False): method __getitem__ (line 2725) | def __getitem__(self, column_name: str) -> IterableColumn: method from_generator (line 2729) | def from_generator( method from_spark (line 2784) | def from_spark( method from_file (line 2827) | def from_file(filename: str) -> "IterableDataset": method from_pandas (line 2843) | def from_pandas( method from_polars (line 2904) | def from_polars( method from_dict (line 2955) | def from_dict( method from_list (line 2992) | def from_list( method from_csv (line 3031) | def from_csv( method from_json (line 3074) | def from_json( method from_parquet (line 3121) | def from_parquet( method from_text (line 3206) | def from_text( method with_format (line 3257) | def with_format( method map (line 3314) | def map( method filter (line 3469) | def filter( method shuffle (line 3554) | def shuffle( method set_epoch (line 3623) | def set_epoch(self, epoch: int): method skip (line 3626) | def skip(self, n: int) -> "IterableDataset": method repeat (line 3668) | def repeat(self, num_times: Optional[int]) -> "IterableDataset": method take (line 3710) | def take(self, n: int) -> "IterableDataset": method shard (line 3745) | def shard( method reshard (line 3801) | def reshard(self) -> "IterableDataset": method add_column (line 3841) | def add_column(self, name: str, column: Union[list, np.array]) -> "Ite... method rename_column (line 3853) | def rename_column(self, original_column_name: str, new_column_name: st... method rename_columns (line 3883) | def rename_columns(self, column_mapping: dict[str, str]) -> "IterableD... method remove_columns (line 3908) | def remove_columns(self, column_names: Union[str, list[str]]) -> "Iter... method select_columns (line 3943) | def select_columns(self, column_names: Union[str, list[str]]) -> "Iter... method cast_column (line 3993) | def cast_column(self, column: str, feature: FeatureType) -> "IterableD... method cast (line 4039) | def cast( method decode (line 4085) | def decode(self, enable: bool = True, num_threads: int = 0) -> "Iterab... method _step (line 4178) | def _step(self, step: int, offset: int) -> "IterableDataset": method _resolve_features (line 4189) | def _resolve_features(self): method batch (line 4207) | def batch(self, batch_size: int, drop_last_batch: bool = False) -> "It... method to_dict (line 4230) | def to_dict(self, batch_size: Optional[int] = None, batched: bool = Fa... method to_list (line 4253) | def to_list(self) -> list: method to_pandas (line 4268) | def to_pandas( method to_polars (line 4308) | def to_polars( method to_csv (line 4345) | def to_csv( method to_json (line 4388) | def to_json( method to_sql (line 4442) | def to_sql( method to_parquet (line 4484) | def to_parquet( method _push_parquet_shards_to_hub_single (line 4532) | def _push_parquet_shards_to_hub_single( method _push_parquet_shards_to_hub (line 4625) | def _push_parquet_shards_to_hub( method push_to_hub (line 4743) | def push_to_hub( function _concatenate_iterable_datasets (line 4939) | def _concatenate_iterable_datasets( function _interleave_iterable_datasets (line 5031) | def _interleave_iterable_datasets( function _split_by_node_iterable_dataset (line 5116) | def _split_by_node_iterable_dataset(dataset: IterableDataset, rank: int,... function _apply_async (line 5149) | async def _apply_async(pool, func, x): function _batch_fn (line 5158) | def _batch_fn(unbatched): function _generate_tables_from_polars (line 5162) | def _generate_tables_from_polars(df: Union["pl.DataFrame", "pl.LazyFrame... FILE: src/datasets/load.py class _InitializeConfiguredDatasetBuilder (line 105) | class _InitializeConfiguredDatasetBuilder: method __call__ (line 114) | def __call__(self, builder_cls, metadata_configs, default_config_name,... function configure_builder_class (line 123) | def configure_builder_class( function import_main_class (line 163) | def import_main_class(module_path) -> Optional[type[DatasetBuilder]]: function get_dataset_builder_class (line 180) | def get_dataset_builder_class( function increase_load_count (line 197) | def increase_load_count(name: str): function infer_module_for_data_files_list (line 210) | def infer_module_for_data_files_list( function infer_module_for_data_files_list_in_archives (line 247) | def infer_module_for_data_files_list_in_archives( function infer_module_for_data_files (line 282) | def infer_module_for_data_files( function create_builder_configs_from_metadata_configs (line 310) | def create_builder_configs_from_metadata_configs( class BuilderConfigsParameters (line 368) | class BuilderConfigsParameters: class DatasetModule (line 386) | class DatasetModule: class _DatasetModuleFactory (line 394) | class _DatasetModuleFactory: method get_module (line 395) | def get_module(self) -> DatasetModule: class LocalDatasetModuleFactory (line 399) | class LocalDatasetModuleFactory(_DatasetModuleFactory): method __init__ (line 403) | def __init__( method get_module (line 419) | def get_module(self) -> DatasetModule: class PackagedDatasetModuleFactory (line 508) | class PackagedDatasetModuleFactory(_DatasetModuleFactory): method __init__ (line 511) | def __init__( method get_module (line 526) | def get_module(self) -> DatasetModule: class HubDatasetModuleFactory (line 549) | class HubDatasetModuleFactory(_DatasetModuleFactory): method __init__ (line 555) | def __init__( method get_module (line 574) | def get_module(self) -> DatasetModule: class HubDatasetModuleFactoryWithParquetExport (line 718) | class HubDatasetModuleFactoryWithParquetExport(_DatasetModuleFactory): method __init__ (line 723) | def __init__( method get_module (line 734) | def get_module(self) -> DatasetModule: class CachedDatasetModuleFactory (line 792) | class CachedDatasetModuleFactory(_DatasetModuleFactory): method __init__ (line 797) | def __init__( method get_module (line 806) | def get_module(self) -> DatasetModule: class HubBucketDatasetModuleFactory (line 834) | class HubBucketDatasetModuleFactory(_DatasetModuleFactory): method __init__ (line 840) | def __init__( method get_module (line 855) | def get_module(self) -> DatasetModule: function dataset_module_factory (line 955) | def dataset_module_factory( function load_dataset_builder (line 1212) | def load_dataset_builder( function load_dataset (line 1373) | def load_dataset( function load_dataset (line 1396) | def load_dataset( function load_dataset (line 1420) | def load_dataset( function load_dataset (line 1444) | def load_dataset( function load_dataset (line 1467) | def load_dataset( function load_from_disk (line 1725) | def load_from_disk( FILE: src/datasets/naming.py function camelcase_to_snakecase (line 34) | def camelcase_to_snakecase(name): function snakecase_to_camelcase (line 41) | def snakecase_to_camelcase(name): function filename_prefix_for_name (line 48) | def filename_prefix_for_name(name): function filename_prefix_for_split (line 54) | def filename_prefix_for_split(name, split): function filepattern_for_dataset_split (line 62) | def filepattern_for_dataset_split(dataset_name, split, data_dir, filetyp... function filenames_for_dataset_split (line 70) | def filenames_for_dataset_split(path, dataset_name, split, filetype_suff... FILE: src/datasets/packaged_modules/__init__.py function _hash_python_lines (line 27) | def _hash_python_lines(lines: list[str]) -> str: FILE: src/datasets/packaged_modules/arrow/arrow.py class ArrowConfig (line 15) | class ArrowConfig(datasets.BuilderConfig): method __post_init__ (line 20) | def __post_init__(self): class Arrow (line 24) | class Arrow(datasets.ArrowBasedBuilder): method _info (line 27) | def _info(self): method _split_generators (line 30) | def _split_generators(self, dl_manager): method _cast_table (line 50) | def _cast_table(self, pa_table: pa.Table) -> pa.Table: method _generate_shards (line 57) | def _generate_shards(self, files): method _generate_tables (line 60) | def _generate_tables(self, files): FILE: src/datasets/packaged_modules/audiofolder/audiofolder.py class AudioFolderConfig (line 9) | class AudioFolderConfig(folder_based_builder.FolderBasedBuilderConfig): method __post_init__ (line 15) | def __post_init__(self): class AudioFolder (line 19) | class AudioFolder(folder_based_builder.FolderBasedBuilder): FILE: src/datasets/packaged_modules/cache/cache.py function _get_modification_time (line 21) | def _get_modification_time(cached_directory_path): function _find_hash_in_cache (line 25) | def _find_hash_in_cache( class Cache (line 99) | class Cache(datasets.ArrowBasedBuilder): method __init__ (line 100) | def __init__( method _info (line 148) | def _info(self) -> datasets.DatasetInfo: method download_and_prepare (line 151) | def download_and_prepare(self, output_dir: Optional[str] = None, *args... method _split_generators (line 157) | def _split_generators(self, dl_manager): method _generate_shards (line 179) | def _generate_shards(self, files): method _generate_tables (line 182) | def _generate_tables(self, files): FILE: src/datasets/packaged_modules/csv/csv.py class CsvConfig (line 25) | class CsvConfig(datasets.BuilderConfig): method __post_init__ (line 70) | def __post_init__(self): method pd_read_csv_kwargs (line 78) | def pd_read_csv_kwargs(self): class Csv (line 145) | class Csv(datasets.ArrowBasedBuilder): method _info (line 148) | def _info(self): method _split_generators (line 151) | def _split_generators(self, dl_manager): method _cast_table (line 169) | def _cast_table(self, pa_table: pa.Table) -> pa.Table: method _generate_shards (line 180) | def _generate_shards(self, base_files, files_iterables): method _generate_tables (line 183) | def _generate_tables(self, base_files, files_iterables): FILE: src/datasets/packaged_modules/eval/eval.py class Eval (line 15) | class Eval(datasets.GeneratorBasedBuilder): method _info (line 18) | def _info(self): method _split_generators (line 21) | def _split_generators(self, dl_manager): method _sort_samples_key (line 53) | def _sort_samples_key(self, sample_path: str): method _iter_samples_from_log_files (line 58) | def _iter_samples_from_log_files(self, log_files: Iterable[str]): method _generate_shards (line 71) | def _generate_shards(self, base_files, logs_files_iterables): method _generate_examples (line 74) | def _generate_examples(self, base_files, logs_files_iterables): FILE: src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py function count_path_segments (line 24) | def count_path_segments(path): class FolderBasedBuilderConfig (line 29) | class FolderBasedBuilderConfig(datasets.BuilderConfig): method __post_init__ (line 38) | def __post_init__(self): class FolderBasedBuilder (line 42) | class FolderBasedBuilder(datasets.GeneratorBasedBuilder): method _info (line 62) | def _info(self): method _split_generators (line 71) | def _split_generators(self, dl_manager): method _split_files_and_metadata_and_archives (line 269) | def _split_files_and_metadata_and_archives(self, data_files): method _read_metadata (line 283) | def _read_metadata(self, metadata_file: str, metadata_ext: str = "") -... method _generate_shards (line 366) | def _generate_shards(self, files, metadata_files, add_metadata, add_la... method _generate_examples (line 374) | def _generate_examples(self, files, metadata_files, add_metadata, add_... function _nested_apply (line 433) | def _nested_apply(item: Any, feature_path: _VisitPath, func: Callable[[A... FILE: src/datasets/packaged_modules/generator/generator.py class GeneratorConfig (line 10) | class GeneratorConfig(datasets.BuilderConfig): method __post_init__ (line 16) | def __post_init__(self): class Generator (line 25) | class Generator(datasets.GeneratorBasedBuilder): method _info (line 28) | def _info(self): method _split_generators (line 31) | def _split_generators(self, dl_manager): method _generate_examples (line 34) | def _generate_examples(self, **gen_kwargs): FILE: src/datasets/packaged_modules/hdf5/hdf5.py class HDF5Config (line 33) | class HDF5Config(datasets.BuilderConfig): class HDF5 (line 40) | class HDF5(datasets.ArrowBasedBuilder): method _info (line 45) | def _info(self): method _split_generators (line 48) | def _split_generators(self, dl_manager): method _generate_shards (line 66) | def _generate_shards(self, files): method _generate_tables (line 69) | def _generate_tables(self, files): function _is_complex_dtype (line 102) | def _is_complex_dtype(dtype: np.dtype) -> bool: function _create_complex_features (line 110) | def _create_complex_features(dset) -> Features: function _convert_complex_to_nested (line 135) | def _convert_complex_to_nested(arr: np.ndarray) -> pa.StructArray: function _is_compound_dtype (line 148) | def _is_compound_dtype(dtype: np.dtype) -> bool: class _CompoundGroup (line 153) | class _CompoundGroup: method items (line 157) | def items(self): class _CompoundField (line 164) | class _CompoundField: method __post_init__ (line 170) | def __post_init__(self): method __getitem__ (line 173) | def __getitem__(self, key): function _create_compound_features (line 177) | def _create_compound_features(dset) -> Features: function _convert_compound_to_nested (line 182) | def _convert_compound_to_nested(arr, dset) -> pa.StructArray: function _is_vlen_dtype (line 193) | def _is_vlen_dtype(dtype: np.dtype) -> bool: function _create_vlen_features (line 199) | def _create_vlen_features(dset) -> Features: function _convert_vlen_to_array (line 207) | def _convert_vlen_to_array(arr: np.ndarray) -> pa.Array: function _recursive_infer_features (line 216) | def _recursive_infer_features(h5_obj) -> Features: function _infer_feature (line 231) | def _infer_feature(dset): function _load_array (line 241) | def _load_array(dset, path: str, start: int, end: int) -> pa.Array: function _recursive_load_arrays (line 267) | def _recursive_load_arrays(h5_obj, features: Features, start: int, end: ... function _create_sized_feature (line 303) | def _create_sized_feature(dset): function _create_sized_feature_impl (line 309) | def _create_sized_feature_impl(dset_shape, value_feature): function _sized_arrayxd (line 328) | def _sized_arrayxd(rank: int): function _np_to_pa_to_hf_value (line 332) | def _np_to_pa_to_hf_value(numpy_dtype: np.dtype) -> Value: function _first_dataset (line 336) | def _first_dataset(h5_obj, features: Features, prefix=""): function _check_dataset_lengths (line 348) | def _check_dataset_lengths(h5_obj, features: Features) -> int: function _is_group (line 363) | def _is_group(h5_obj) -> bool: function _is_dataset (line 369) | def _is_dataset(h5_obj) -> bool: function _is_file (line 375) | def _is_file(h5_obj) -> bool: function _has_zero_dimensions (line 381) | def _has_zero_dimensions(feature): FILE: src/datasets/packaged_modules/imagefolder/imagefolder.py class ImageFolderConfig (line 9) | class ImageFolderConfig(folder_based_builder.FolderBasedBuilderConfig): method __post_init__ (line 15) | def __post_init__(self): class ImageFolder (line 19) | class ImageFolder(folder_based_builder.FolderBasedBuilder): FILE: src/datasets/packaged_modules/json/json.py function pandas_read_json (line 30) | def pandas_read_json(path_or_buf, **kwargs): class FullReadDisallowed (line 36) | class FullReadDisallowed(Exception): class JsonConfig (line 41) | class JsonConfig(datasets.BuilderConfig): method __post_init__ (line 54) | def __post_init__(self): class Json (line 58) | class Json(datasets.ArrowBasedBuilder): method _info (line 61) | def _info(self): method _split_generators (line 73) | def _split_generators(self, dl_manager): method _cast_table (line 97) | def _cast_table(self, pa_table: pa.Table, json_field_paths=()) -> pa.T... method _generate_shards (line 127) | def _generate_shards(self, base_files, files_iterables): method _generate_tables (line 130) | def _generate_tables(self, base_files, files_iterables, allow_full_rea... FILE: src/datasets/packaged_modules/lance/lance.py class LanceConfig (line 41) | class LanceConfig(datasets.BuilderConfig): function resolve_dataset_uris (line 62) | def resolve_dataset_uris(files: List[str]) -> Dict[str, List[str]]: function _fix_hf_uri (line 72) | def _fix_hf_uri(uri: str) -> str: function _fix_local_version_file (line 81) | def _fix_local_version_file(uri: str) -> str: class Lance (line 92) | class Lance(datasets.ArrowBasedBuilder, datasets.builder._CountableBuild... method _info (line 96) | def _info(self): method _split_generators (line 99) | def _split_generators(self, dl_manager): method _cast_table (line 183) | def _cast_table(self, pa_table: pa.Table) -> pa.Table: method _generate_shards (line 190) | def _generate_shards( method _generate_num_examples (line 203) | def _generate_num_examples( method _generate_tables (line 216) | def _generate_tables( FILE: src/datasets/packaged_modules/niftifolder/niftifolder.py class NiftiFolderConfig (line 9) | class NiftiFolderConfig(folder_based_builder.FolderBasedBuilderConfig): method __post_init__ (line 15) | def __post_init__(self): class NiftiFolder (line 19) | class NiftiFolder(folder_based_builder.FolderBasedBuilder): FILE: src/datasets/packaged_modules/pandas/pandas.py class PandasConfig (line 14) | class PandasConfig(datasets.BuilderConfig): method __post_init__ (line 19) | def __post_init__(self): class Pandas (line 23) | class Pandas(datasets.ArrowBasedBuilder): method _info (line 26) | def _info(self): method _split_generators (line 33) | def _split_generators(self, dl_manager): method _cast_table (line 43) | def _cast_table(self, pa_table: pa.Table) -> pa.Table: method _generate_shards (line 50) | def _generate_shards(self, files): method _generate_tables (line 53) | def _generate_tables(self, files): FILE: src/datasets/packaged_modules/parquet/parquet.py class ParquetConfig (line 17) | class ParquetConfig(datasets.BuilderConfig): method __post_init__ (line 86) | def __post_init__(self): class Parquet (line 90) | class Parquet(datasets.ArrowBasedBuilder): method _info (line 93) | def _info(self): method _split_generators (line 105) | def _split_generators(self, dl_manager): method _cast_table (line 143) | def _cast_table(self, pa_table: pa.Table) -> pa.Table: method _generate_shards (line 150) | def _generate_shards(self, files, row_groups_list): method _generate_more_gen_kwargs (line 160) | def _generate_more_gen_kwargs(self, files, row_groups_list): method _generate_tables (line 176) | def _generate_tables(self, files, row_groups_list): FILE: src/datasets/packaged_modules/pdffolder/pdffolder.py class PdfFolderConfig (line 9) | class PdfFolderConfig(folder_based_builder.FolderBasedBuilderConfig): method __post_init__ (line 15) | def __post_init__(self): class PdfFolder (line 19) | class PdfFolder(folder_based_builder.FolderBasedBuilder): FILE: src/datasets/packaged_modules/spark/spark.py class SparkConfig (line 32) | class SparkConfig(datasets.BuilderConfig): method __post_init__ (line 37) | def __post_init__(self): function _reorder_dataframe_by_partition (line 41) | def _reorder_dataframe_by_partition(df: "pyspark.sql.DataFrame", new_par... function _generate_iterable_examples (line 49) | def _generate_iterable_examples( class SparkExamplesIterable (line 78) | class SparkExamplesIterable(_BaseExamplesIterable): method __init__ (line 79) | def __init__( method _init_state_dict (line 88) | def _init_state_dict(self) -> dict: method load_state_dict (line 93) | def load_state_dict(self, state_dict: dict) -> dict: method __iter__ (line 96) | def __iter__(self): method shuffle_data_sources (line 99) | def shuffle_data_sources(self, generator: np.random.Generator) -> "Spa... method shard_data_sources (line 104) | def shard_data_sources(self, num_shards: int, index: int, contiguous=T... method num_shards (line 109) | def num_shards(self) -> int: class Spark (line 113) | class Spark(datasets.DatasetBuilder): method __init__ (line 116) | def __init__( method _validate_cache_dir (line 135) | def _validate_cache_dir(self): method _info (line 168) | def _info(self): method _split_generators (line 171) | def _split_generators(self, dl_manager: datasets.download.download_man... method _repartition_df_if_needed (line 174) | def _repartition_df_if_needed(self, max_shard_size): method _prepare_split_single (line 199) | def _prepare_split_single( method _prepare_split (line 283) | def _prepare_split( method _get_examples_iterable_for_split (line 363) | def _get_examples_iterable_for_split( FILE: src/datasets/packaged_modules/sql/sql.py class SqlConfig (line 25) | class SqlConfig(datasets.BuilderConfig): method __post_init__ (line 38) | def __post_init__(self): method create_config_id (line 45) | def create_config_id( method pd_read_sql_kwargs (line 81) | def pd_read_sql_kwargs(self): class Sql (line 92) | class Sql(datasets.ArrowBasedBuilder): method _info (line 95) | def _info(self): method _split_generators (line 98) | def _split_generators(self, dl_manager): method _cast_table (line 101) | def _cast_table(self, pa_table: pa.Table) -> pa.Table: method _generate_tables (line 112) | def _generate_tables(self): FILE: src/datasets/packaged_modules/text/text.py class TextConfig (line 17) | class TextConfig(datasets.BuilderConfig): class Text (line 45) | class Text(datasets.ArrowBasedBuilder): method _info (line 48) | def _info(self): method _split_generators (line 51) | def _split_generators(self, dl_manager): method _cast_table (line 73) | def _cast_table(self, pa_table: pa.Table) -> pa.Table: method _generate_shards (line 86) | def _generate_shards(self, base_files, files_iterables): method _generate_tables (line 89) | def _generate_tables(self, base_files, files_iterables): FILE: src/datasets/packaged_modules/videofolder/videofolder.py class VideoFolderConfig (line 9) | class VideoFolderConfig(folder_based_builder.FolderBasedBuilderConfig): method __post_init__ (line 15) | def __post_init__(self): class VideoFolder (line 19) | class VideoFolder(folder_based_builder.FolderBasedBuilder): FILE: src/datasets/packaged_modules/webdataset/_tenbin.py function bytelen (line 40) | def bytelen(a): function bytedata (line 50) | def bytedata(a): function check_acceptable_input_type (line 80) | def check_acceptable_input_type(data, allow64): function str64 (line 93) | def str64(s): function unstr64 (line 100) | def unstr64(i): function check_infos (line 106) | def check_infos(data, infos, required_infos=None): function encode_header (line 119) | def encode_header(a, info=""): function decode_header (line 131) | def decode_header(h): function encode_list (line 143) | def encode_list(l, infos=None): # noqa: E741 function decode_list (line 157) | def decode_list(l, infos=False): # noqa: E741 function roundup (line 174) | def roundup(n, k=64): function encode_chunks (line 179) | def encode_chunks(l): # noqa: E741 function decode_chunks (line 194) | def decode_chunks(buf): function encode_buffer (line 211) | def encode_buffer(l, infos=None): # noqa: E741 function decode_buffer (line 218) | def decode_buffer(buf, infos=False): function write_chunk (line 223) | def write_chunk(stream, buf): function read_chunk (line 234) | def read_chunk(stream): function write (line 252) | def write(stream, l, infos=None): # noqa: E741 function read (line 258) | def read(stream, n=sys.maxsize, infos=False): function save (line 272) | def save(fname, *args, infos=None, nocheck=False): function load (line 280) | def load(fname, infos=False, nocheck=False): FILE: src/datasets/packaged_modules/webdataset/webdataset.py class WebDataset (line 20) | class WebDataset(datasets.GeneratorBasedBuilder): method _get_pipeline_from_tar (line 29) | def _get_pipeline_from_tar(cls, tar_path, tar_iterator): method _info (line 60) | def _info(self) -> datasets.DatasetInfo: method _split_generators (line 63) | def _split_generators(self, dl_manager): method _generate_shards (line 108) | def _generate_shards(self, tar_paths, tar_iterators): method _generate_examples (line 111) | def _generate_examples(self, tar_paths, tar_iterators): function base_plus_ext (line 134) | def base_plus_ext(path): function text_loads (line 281) | def text_loads(data: bytes): function tenbin_loads (line 285) | def tenbin_loads(data: bytes): function msgpack_loads (line 291) | def msgpack_loads(data: bytes): function npy_loads (line 297) | def npy_loads(data: bytes): function npz_loads (line 304) | def npz_loads(data: bytes): function cbor_loads (line 308) | def cbor_loads(data: bytes): function torch_loads (line 314) | def torch_loads(data: bytes): FILE: src/datasets/packaged_modules/xml/xml.py class XmlConfig (line 15) | class XmlConfig(datasets.BuilderConfig): class Xml (line 23) | class Xml(datasets.ArrowBasedBuilder): method _info (line 26) | def _info(self): method _split_generators (line 29) | def _split_generators(self, dl_manager): method _cast_table (line 47) | def _cast_table(self, pa_table: pa.Table) -> pa.Table: method _generate_shards (line 60) | def _generate_shards(self, files): method _generate_tables (line 63) | def _generate_tables(self, files): FILE: src/datasets/parallel/parallel.py class ParallelBackendConfig (line 12) | class ParallelBackendConfig: function parallel_map (line 17) | def parallel_map(function, iterable, num_proc, batched, batch_size, type... function _map_with_multiprocessing_pool (line 43) | def _map_with_multiprocessing_pool( function _map_with_joblib (line 77) | def _map_with_joblib( function parallel_backend (line 93) | def parallel_backend(backend_name: str): FILE: src/datasets/search.py class MissingIndex (line 36) | class MissingIndex(Exception): class SearchResults (line 40) | class SearchResults(NamedTuple): class BatchedSearchResults (line 45) | class BatchedSearchResults(NamedTuple): class NearestExamplesResults (line 50) | class NearestExamplesResults(NamedTuple): class BatchedNearestExamplesResults (line 55) | class BatchedNearestExamplesResults(NamedTuple): class BaseIndex (line 60) | class BaseIndex: method search (line 63) | def search(self, query, k: int = 10, **kwargs) -> SearchResults: method search_batch (line 70) | def search_batch(self, queries, k: int = 10, **kwargs) -> BatchedSearc... method save (line 88) | def save(self, file: Union[str, PurePath]): method load (line 93) | def load(cls, file: Union[str, PurePath]) -> "BaseIndex": class ElasticSearchIndex (line 98) | class ElasticSearchIndex(BaseIndex): method __init__ (line 108) | def __init__( method add_documents (line 146) | def add_documents(self, documents: Union[list[str], "Dataset"], column... method search (line 182) | def search(self, query: str, k=10, **kwargs) -> SearchResults: method search_batch (line 201) | def search_batch(self, queries, k: int = 10, max_workers=10, **kwargs)... class FaissIndex (line 215) | class FaissIndex(BaseIndex): method __init__ (line 225) | def __init__( method add_vectors (line 255) | def add_vectors( method _faiss_index_to_device (line 316) | def _faiss_index_to_device(index: "faiss.Index", device: Optional[Unio... method search (line 349) | def search(self, query: np.array, k=10, **kwargs) -> SearchResults: method search_batch (line 369) | def search_batch(self, queries: np.array, k=10, **kwargs) -> BatchedSe... method save (line 387) | def save(self, file: Union[str, PurePath], storage_options: Optional[d... method load (line 400) | def load( class IndexableMixin (line 417) | class IndexableMixin: method __init__ (line 420) | def __init__(self): method __len__ (line 423) | def __len__(self): method __getitem__ (line 426) | def __getitem__(self, key): method is_index_initialized (line 429) | def is_index_initialized(self, index_name: str) -> bool: method _check_index_is_initialized (line 432) | def _check_index_is_initialized(self, index_name: str): method list_indexes (line 438) | def list_indexes(self) -> list[str]: method get_index (line 442) | def get_index(self, index_name: str) -> BaseIndex: method add_faiss_index (line 454) | def add_faiss_index( method add_faiss_index_from_external_arrays (line 495) | def add_faiss_index_from_external_arrays( method save_faiss_index (line 535) | def save_faiss_index(self, index_name: str, file: Union[str, PurePath]... method load_faiss_index (line 553) | def load_faiss_index( method add_elasticsearch_index (line 585) | def add_elasticsearch_index( method load_elasticsearch_index (line 637) | def load_elasticsearch_index( method drop_index (line 684) | def drop_index(self, index_name: str): method search (line 693) | def search(self, index_name: str, query: Union[str, np.array], k: int ... method search_batch (line 713) | def search_batch( method get_nearest_examples (line 735) | def get_nearest_examples( method get_nearest_examples_batch (line 759) | def get_nearest_examples_batch( FILE: src/datasets/splits.py class SplitInfo (line 32) | class SplitInfo: method file_instructions (line 48) | def file_instructions(self): class SubSplitInfo (line 60) | class SubSplitInfo: method num_examples (line 72) | def num_examples(self): method file_instructions (line 77) | def file_instructions(self): class SplitBase (line 82) | class SplitBase(metaclass=abc.ABCMeta): method get_read_instruction (line 118) | def get_read_instruction(self, split_dict): method __eq__ (line 129) | def __eq__(self, other): method __ne__ (line 135) | def __ne__(self, other): method __add__ (line 139) | def __add__(self, other): method subsplit (line 143) | def subsplit(self, arg=None, k=None, percent=None, weighted=None): # ... class PercentSliceMeta (line 254) | class PercentSliceMeta(type): method __getitem__ (line 255) | def __getitem__(cls, slice_value): class PercentSlice (line 261) | class PercentSlice(metaclass=PercentSliceMeta): class _SplitMerged (line 277) | class _SplitMerged(SplitBase): method __init__ (line 280) | def __init__(self, split1, split2): method get_read_instruction (line 284) | def get_read_instruction(self, split_dict): method __repr__ (line 289) | def __repr__(self): class _SubSplit (line 293) | class _SubSplit(SplitBase): method __init__ (line 296) | def __init__(self, split, slice_value): method get_read_instruction (line 300) | def get_read_instruction(self, split_dict): method __repr__ (line 303) | def __repr__(self): class NamedSplit (line 315) | class NamedSplit(SplitBase): method __init__ (line 358) | def __init__(self, name: str): method __str__ (line 365) | def __str__(self): method __repr__ (line 368) | def __repr__(self): method __eq__ (line 371) | def __eq__(self, other): method __lt__ (line 382) | def __lt__(self, other): method __hash__ (line 385) | def __hash__(self): method get_read_instruction (line 388) | def get_read_instruction(self, split_dict): class NamedSplitAll (line 392) | class NamedSplitAll(NamedSplit): method __init__ (line 395) | def __init__(self): method __repr__ (line 398) | def __repr__(self): method get_read_instruction (line 401) | def get_read_instruction(self, split_dict): class Split (line 407) | class Split: method __new__ (line 450) | def __new__(cls, name): class SplitReadInstruction (line 465) | class SplitReadInstruction: method __init__ (line 481) | def __init__(self, split_info=None): method add (line 487) | def add(self, sliced_split): method __add__ (line 494) | def __add__(self, other): method __getitem__ (line 504) | def __getitem__(self, slice_value): method get_list_sliced_split_info (line 516) | def get_list_sliced_split_info(self): class SplitDict (line 520) | class SplitDict(dict[str, SplitInfo]): method __init__ (line 523) | def __init__(self, *args, dataset_name=None, **kwargs): method __getitem__ (line 527) | def __getitem__(self, key: Union[SplitBase, str]): method __setitem__ (line 540) | def __setitem__(self, key: Union[SplitBase, str], value: SplitInfo): method add (line 545) | def add(self, split_info: SplitInfo): method total_num_examples (line 553) | def total_num_examples(self): method from_split_dict (line 558) | def from_split_dict(cls, split_infos: Union[list, dict], dataset_name:... method to_split_dict (line 575) | def to_split_dict(self): method copy (line 584) | def copy(self): method _to_yaml_list (line 587) | def _to_yaml_list(self) -> list: method _from_yaml_list (line 599) | def _from_yaml_list(cls, yaml_data: list) -> "SplitDict": class SplitGenerator (line 604) | class SplitGenerator: method __post_init__ (line 634) | def __post_init__(self): FILE: src/datasets/streaming.py function extend_module_for_streaming (line 42) | def extend_module_for_streaming(module_path, download_config: Optional[D... function extend_dataset_builder_for_streaming (line 110) | def extend_dataset_builder_for_streaming(builder: "DatasetBuilder"): FILE: src/datasets/table.py function inject_arrow_table_documentation (line 22) | def inject_arrow_table_documentation(arrow_table_method): function _in_memory_arrow_table_from_file (line 33) | def _in_memory_arrow_table_from_file(filename: str) -> pa.Table: function _in_memory_arrow_table_from_buffer (line 40) | def _in_memory_arrow_table_from_buffer(buffer: pa.Buffer) -> pa.Table: function _memory_mapped_record_batch_reader_from_file (line 47) | def _memory_mapped_record_batch_reader_from_file(filename: str) -> pa.Re... function read_schema_from_file (line 52) | def read_schema_from_file(filename: str) -> pa.Schema: function _memory_mapped_arrow_table_from_file (line 62) | def _memory_mapped_arrow_table_from_file(filename: str) -> pa.Table: function _deepcopy (line 68) | def _deepcopy(x, memo: dict): function _interpolation_search (line 78) | def _interpolation_search(arr: list[int], x: int) -> int: class IndexedTableMixin (line 104) | class IndexedTableMixin: method __init__ (line 105) | def __init__(self, table: pa.Table): method fast_gather (line 112) | def fast_gather(self, indices: Union[list[int], np.ndarray]) -> pa.Table: method fast_slice (line 129) | def fast_slice(self, offset=0, length=None) -> pa.Table: class Table (line 153) | class Table(IndexedTableMixin): method __init__ (line 165) | def __init__(self, table: pa.Table): method __deepcopy__ (line 169) | def __deepcopy__(self, memo: dict): method validate (line 178) | def validate(self, *args, **kwargs): method equals (line 194) | def equals(self, *args, **kwargs): method to_batches (line 211) | def to_batches(self, *args, **kwargs): method to_pydict (line 225) | def to_pydict(self, *args, **kwargs): method to_pylist (line 234) | def to_pylist(self, *args, **kwargs): method to_pandas (line 243) | def to_pandas(self, *args, **kwargs): method to_string (line 305) | def to_string(self, *args, **kwargs): method to_reader (line 308) | def to_reader(self, max_chunksize: Optional[int] = None): method field (line 324) | def field(self, *args, **kwargs): method column (line 337) | def column(self, *args, **kwargs): method itercolumns (line 350) | def itercolumns(self, *args, **kwargs): method schema (line 360) | def schema(self): method columns (line 370) | def columns(self): method num_columns (line 380) | def num_columns(self): method num_rows (line 390) | def num_rows(self): method shape (line 403) | def shape(self): method nbytes (line 413) | def nbytes(self): method column_names (line 420) | def column_names(self): method __eq__ (line 426) | def __eq__(self, other): method __getitem__ (line 429) | def __getitem__(self, i): method __len__ (line 432) | def __len__(self): method __repr__ (line 435) | def __repr__(self): method __str__ (line 438) | def __str__(self): method slice (line 441) | def slice(self, *args, **kwargs): method filter (line 457) | def filter(self, *args, **kwargs): method flatten (line 463) | def flatten(self, *args, **kwargs): method combine_chunks (line 477) | def combine_chunks(self, *args, **kwargs): method cast (line 493) | def cast(self, *args, **kwargs): method replace_schema_metadata (line 508) | def replace_schema_metadata(self, *args, **kwargs): method add_column (line 522) | def add_column(self, *args, **kwargs): method append_column (line 543) | def append_column(self, *args, **kwargs): method remove_column (line 559) | def remove_column(self, *args, **kwargs): method set_column (line 572) | def set_column(self, *args, **kwargs): method rename_columns (line 590) | def rename_columns(self, *args, **kwargs): method drop (line 596) | def drop(self, *args, **kwargs): method select (line 612) | def select(self, *args, **kwargs): class TableBlock (line 628) | class TableBlock(Table): class InMemoryTable (line 638) | class InMemoryTable(TableBlock): method from_file (line 654) | def from_file(cls, filename: str): method from_buffer (line 659) | def from_buffer(cls, buffer: pa.Buffer): method from_pandas (line 664) | def from_pandas(cls, *args, **kwargs): method from_arrays (line 722) | def from_arrays(cls, *args, **kwargs): method from_pydict (line 742) | def from_pydict(cls, *args, **kwargs): method from_pylist (line 760) | def from_pylist(cls, mapping, *args, **kwargs): method from_batches (line 778) | def from_batches(cls, *args, **kwargs): method slice (line 793) | def slice(self, offset=0, length=None): method filter (line 810) | def filter(self, *args, **kwargs): method flatten (line 816) | def flatten(self, *args, **kwargs): method combine_chunks (line 830) | def combine_chunks(self, *args, **kwargs): method cast (line 846) | def cast(self, *args, **kwargs): method replace_schema_metadata (line 861) | def replace_schema_metadata(self, *args, **kwargs): method add_column (line 875) | def add_column(self, *args, **kwargs): method append_column (line 896) | def append_column(self, *args, **kwargs): method remove_column (line 913) | def remove_column(self, *args, **kwargs): method set_column (line 927) | def set_column(self, *args, **kwargs): method rename_columns (line 946) | def rename_columns(self, *args, **kwargs): method drop (line 952) | def drop(self, *args, **kwargs): method select (line 969) | def select(self, *args, **kwargs): class MemoryMappedTable (line 989) | class MemoryMappedTable(TableBlock): method __init__ (line 1010) | def __init__(self, table: pa.Table, path: str, replays: Optional[list[... method from_file (line 1016) | def from_file(cls, filename: str, replays=None): method __getstate__ (line 1021) | def __getstate__(self): method __setstate__ (line 1024) | def __setstate__(self, state): method _apply_replays (line 1032) | def _apply_replays(table: pa.Table, replays: Optional[list[Replay]] = ... method _append_replay (line 1043) | def _append_replay(self, replay: Replay) -> list[Replay]: method slice (line 1048) | def slice(self, offset=0, length=None): method filter (line 1067) | def filter(self, *args, **kwargs): method flatten (line 1075) | def flatten(self, *args, **kwargs): method combine_chunks (line 1091) | def combine_chunks(self, *args, **kwargs): method cast (line 1109) | def cast(self, *args, **kwargs): method replace_schema_metadata (line 1126) | def replace_schema_metadata(self, *args, **kwargs): method add_column (line 1142) | def add_column(self, *args, **kwargs): method append_column (line 1165) | def append_column(self, *args, **kwargs): method remove_column (line 1184) | def remove_column(self, *args, **kwargs): method set_column (line 1200) | def set_column(self, *args, **kwargs): method rename_columns (line 1221) | def rename_columns(self, *args, **kwargs): method drop (line 1229) | def drop(self, *args, **kwargs): method select (line 1248) | def select(self, *args, **kwargs): class ConcatenationTable (line 1273) | class ConcatenationTable(Table): method __init__ (line 1299) | def __init__(self, table: pa.Table, blocks: list[list[TableBlock]]): method __getstate__ (line 1312) | def __getstate__(self): method __setstate__ (line 1315) | def __setstate__(self, state): method _concat_blocks (line 1327) | def _concat_blocks(blocks: list[Union[TableBlock, pa.Table]], axis: in... method _concat_blocks_horizontally_and_vertically (line 1344) | def _concat_blocks_horizontally_and_vertically(cls, blocks: list[list[... method _merge_blocks (line 1354) | def _merge_blocks(cls, blocks: TableBlockContainer, axis: Optional[int... method _consolidate_blocks (line 1370) | def _consolidate_blocks(cls, blocks: TableBlockContainer) -> TableBloc... method from_blocks (line 1379) | def from_blocks(cls, blocks: TableBlockContainer) -> "ConcatenationTab... method from_tables (line 1393) | def from_tables(cls, tables: list[Union[pa.Table, Table]], axis: int =... method _slices (line 1475) | def _slices(self): method slice (line 1482) | def slice(self, offset=0, length=None): method filter (line 1513) | def filter(self, mask, *args, **kwargs): method flatten (line 1524) | def flatten(self, *args, **kwargs): method combine_chunks (line 1542) | def combine_chunks(self, *args, **kwargs): method cast (line 1562) | def cast(self, target_schema, *args, **kwargs): method replace_schema_metadata (line 1593) | def replace_schema_metadata(self, *args, **kwargs): method add_column (line 1611) | def add_column(self, *args, **kwargs): method append_column (line 1632) | def append_column(self, *args, **kwargs): method remove_column (line 1649) | def remove_column(self, i, *args, **kwargs): method set_column (line 1673) | def set_column(self, *args, **kwargs): method rename_columns (line 1692) | def rename_columns(self, names, *args, **kwargs): method drop (line 1705) | def drop(self, columns, *args, **kwargs): method select (line 1726) | def select(self, columns, *args, **kwargs): function concat_tables (line 1746) | def concat_tables(tables: list[Table], axis: int = 0) -> Table: function list_table_cache_files (line 1769) | def list_table_cache_files(table: Table) -> list[str]: function _wrap_for_chunked_arrays (line 1790) | def _wrap_for_chunked_arrays(func): function _are_list_values_of_length (line 1802) | def _are_list_values_of_length(array: pa.ListArray, length: int) -> bool: function _combine_list_array_offsets_with_mask (line 1807) | def _combine_list_array_offsets_with_mask(array: pa.ListArray) -> pa.Array: function _storage_type (line 1820) | def _storage_type(type: pa.DataType) -> pa.DataType: function _short_str (line 1833) | def _short_str(value: Any) -> str: function array_cast (line 1841) | def array_cast( function cast_array_to_feature (line 1954) | def cast_array_to_feature( function embed_array_storage (line 2096) | def embed_array_storage(array: pa.Array, feature: "FeatureType", token_p... class CastError (line 2154) | class CastError(ValueError): method __init__ (line 2157) | def __init__(self, *args, table_column_names: list[str], requested_col... method __reduce__ (line 2162) | def __reduce__(self): method details (line 2168) | def details(self): function cast_table_to_features (line 2179) | def cast_table_to_features(table: pa.Table, features: "Features"): function cast_table_to_schema (line 2201) | def cast_table_to_schema(table: pa.Table, schema: pa.Schema): function embed_table_storage (line 2233) | def embed_table_storage(table: pa.Table, token_per_repo_id=None): function table_cast (line 2257) | def table_cast(table: pa.Table, schema: pa.Schema): function table_flatten (line 2279) | def table_flatten(table: pa.Table): function table_visitor (line 2321) | def table_visitor(table: pa.Table, function: Callable[[pa.Array], None]): function table_iter (line 2353) | def table_iter(table: Table, batch_size: int, drop_last_batch=False) -> ... FILE: src/datasets/utils/_dataset_viewer.py class DatasetViewerError (line 16) | class DatasetViewerError(DatasetsError): function get_exported_parquet_files (line 26) | def get_exported_parquet_files( function get_exported_dataset_infos (line 62) | def get_exported_dataset_infos( FILE: src/datasets/utils/_dill.py class Pickler (line 27) | class Pickler(dill.Pickler): method save (line 31) | def save(self, obj, save_persistent_id=True): method _batch_setitems (line 72) | def _batch_setitems(self, items, *args, **kwargs): method memoize (line 83) | def memoize(self, obj): function pklregister (line 89) | def pklregister(t): function _is_supported_dill_version (line 99) | def _is_supported_dill_version(): function dump (line 111) | def dump(obj, file): function dumps (line 116) | def dumps(obj): function log (line 125) | def log(pickler, msg): function log (line 130) | def log(pickler, msg): function _save_set (line 135) | def _save_set(pickler, obj): function _save_regexPattern (line 149) | def _save_regexPattern(pickler, obj): function _save_tiktokenEncoding (line 158) | def _save_tiktokenEncoding(pickler, obj): function _save_torchTensor (line 167) | def _save_torchTensor(pickler, obj): function _save_torchGenerator (line 186) | def _save_torchGenerator(pickler, obj): function _save_spacyLanguage (line 200) | def _save_spacyLanguage(pickler, obj): function _save_transformersPreTrainedTokenizerBase (line 214) | def _save_transformersPreTrainedTokenizerBase(pickler, obj): function _save_code (line 264) | def _save_code(pickler, obj): function save_code (line 361) | def save_code(pickler, obj): FILE: src/datasets/utils/_filelock.py class FileLock (line 25) | class FileLock(FileLock_): method __init__ (line 33) | def __init__(self, lock_file, *args, **kwargs): method hash_filename_if_too_long (line 44) | def hash_filename_if_too_long(cls, path: str) -> str: FILE: src/datasets/utils/deprecation_utils.py function deprecated (line 14) | def deprecated(help_message: Optional[str] = None): class OnAccess (line 59) | class OnAccess(enum.EnumMeta): method __getattribute__ (line 64) | def __getattribute__(cls, name): method __getitem__ (line 70) | def __getitem__(cls, name): method __call__ (line 76) | def __call__(cls, value, names=None, *, module=None, qualname=None, ty... class DeprecatedEnum (line 83) | class DeprecatedEnum(enum.Enum, metaclass=OnAccess): method __new__ (line 88) | def __new__(cls, value): method help_message (line 95) | def help_message(self): method deprecate (line 98) | def deprecate(self): FILE: src/datasets/utils/doc_utils.py function is_documented_by (line 4) | def is_documented_by(function_with_docstring: Callable): FILE: src/datasets/utils/experimental.py function experimental (line 8) | def experimental(fn: Callable) -> Callable: FILE: src/datasets/utils/extract.py class ExtractManager (line 27) | class ExtractManager: method __init__ (line 28) | def __init__(self, cache_dir: Optional[str] = None): method _get_output_path (line 34) | def _get_output_path(self, path: str) -> str: method _do_extract (line 42) | def _do_extract(self, output_path: str, force_extract: bool) -> bool: method extract (line 47) | def extract(self, input_path: str, force_extract: bool = False) -> str: class BaseExtractor (line 57) | class BaseExtractor(ABC): method is_extractable (line 60) | def is_extractable(cls, path: Union[Path, str], **kwargs) -> bool: ... method extract (line 64) | def extract(input_path: Union[Path, str], output_path: Union[Path, str... class MagicNumberBaseExtractor (line 67) | class MagicNumberBaseExtractor(BaseExtractor, ABC): method read_magic_number (line 71) | def read_magic_number(path: Union[Path, str], magic_number_length: int): method is_extractable (line 76) | def is_extractable(cls, path: Union[Path, str], magic_number: bytes = ... class TarExtractor (line 86) | class TarExtractor(BaseExtractor): method is_extractable (line 88) | def is_extractable(cls, path: Union[Path, str], **kwargs) -> bool: method safemembers (line 92) | def safemembers(members: tarfile.TarFile, output_path: Union[Path, str]): method extract (line 128) | def extract(input_path: Union[Path, str], output_path: Union[Path, str... class GzipExtractor (line 135) | class GzipExtractor(MagicNumberBaseExtractor): method extract (line 139) | def extract(input_path: Union[Path, str], output_path: Union[Path, str... class ZipExtractor (line 145) | class ZipExtractor(MagicNumberBaseExtractor): method is_extractable (line 153) | def is_extractable(cls, path: Union[Path, str], magic_number: bytes = ... method safemembers (line 190) | def safemembers(members: list[zipfile.ZipInfo], output_path: Union[Pat... method extract (line 222) | def extract(input_path: Union[Path, str], output_path: Union[Path, str... class XzExtractor (line 229) | class XzExtractor(MagicNumberBaseExtractor): method extract (line 233) | def extract(input_path: Union[Path, str], output_path: Union[Path, str... class RarExtractor (line 239) | class RarExtractor(MagicNumberBaseExtractor): method safemembers (line 243) | def safemembers(members: list["rarfile.RarInfo"], output_path: Union[P... method extract (line 280) | def extract(input_path: Union[Path, str], output_path: Union[Path, str... class ZstdExtractor (line 291) | class ZstdExtractor(MagicNumberBaseExtractor): method extract (line 295) | def extract(input_path: Union[Path, str], output_path: Union[Path, str... class Bzip2Extractor (line 305) | class Bzip2Extractor(MagicNumberBaseExtractor): method extract (line 309) | def extract(input_path: Union[Path, str], output_path: Union[Path, str... class SevenZipExtractor (line 315) | class SevenZipExtractor(MagicNumberBaseExtractor): method safemembers (line 319) | def safemembers(members: list["py7zr.FileInfo"], output_path: Union[Pa... method extract (line 356) | def extract(input_path: Union[Path, str], output_path: Union[Path, str... class Lz4Extractor (line 367) | class Lz4Extractor(MagicNumberBaseExtractor): method extract (line 371) | def extract(input_path: Union[Path, str], output_path: Union[Path, str... class Extractor (line 381) | class Extractor: method _get_magic_number_max_length (line 396) | def _get_magic_number_max_length(cls): method _read_magic_number (line 405) | def _read_magic_number(path: Union[Path, str], magic_number_length: int): method is_extractable (line 412) | def is_extractable(cls, path: Union[Path, str], return_extractor: bool... method infer_extractor_format (line 424) | def infer_extractor_format(cls, path: Union[Path, str]) -> Optional[st... method extract (line 432) | def extract( FILE: src/datasets/utils/file_utils.py class _AiohttpClientError (line 54) | class _AiohttpClientError(Exception): function is_remote_url (line 75) | def is_remote_url(url_or_filename: str) -> bool: function is_local_path (line 79) | def is_local_path(url_or_filename: str) -> bool: function is_relative_path (line 86) | def is_relative_path(url_or_filename: str) -> bool: function relative_to_absolute_path (line 90) | def relative_to_absolute_path(path: T) -> T: function url_or_path_join (line 96) | def url_or_path_join(base_name: str, *pathnames: str) -> str: function url_or_path_parent (line 103) | def url_or_path_parent(url_or_path: str) -> str: function hash_url_to_filename (line 110) | def hash_url_to_filename(url, etag=None): function cached_path (line 134) | def cached_path( function get_datasets_user_agent (line 257) | def get_datasets_user_agent(user_agent: Optional[Union[str, dict]] = Non... function get_authentication_headers_for_url (line 275) | def get_authentication_headers_for_url(url: str, token: Optional[Union[s... function _raise_if_offline_mode_is_enabled (line 285) | def _raise_if_offline_mode_is_enabled(msg: Optional[str] = None): function fsspec_head (line 293) | def fsspec_head(url, storage_options=None): function stack_multiprocessing_download_progress_bars (line 299) | def stack_multiprocessing_download_progress_bars(): class TqdmCallback (line 305) | class TqdmCallback(fsspec.callbacks.TqdmCallback): method __init__ (line 306) | def __init__(self, tqdm_kwargs=None, *args, **kwargs): function fsspec_get (line 315) | def fsspec_get(url, temp_file, storage_options=None, desc=None, disable_... function get_from_cache (line 333) | def get_from_cache( function add_start_docstrings (line 420) | def add_start_docstrings(*docstr): function add_end_docstrings (line 428) | def add_end_docstrings(*docstr): function estimate_dataset_size (line 436) | def estimate_dataset_size(paths): function readline (line 440) | def readline(f: io.RawIOBase): class NonStreamableDatasetError (line 512) | class NonStreamableDatasetError(Exception): function _get_path_extension (line 516) | def _get_path_extension(path: str) -> str: function _get_extraction_protocol_with_magic_number (line 526) | def _get_extraction_protocol_with_magic_number(f) -> Optional[str]: function _get_extraction_protocol (line 544) | def _get_extraction_protocol(urlpath: str, download_config: Optional[Dow... function xjoin (line 570) | def xjoin(a, *p): function xdirname (line 597) | def xdirname(a): function xexists (line 628) | def xexists(urlpath: str, download_config: Optional[DownloadConfig] = No... function xbasename (line 649) | def xbasename(a): function xsplit (line 675) | def xsplit(a): function xsplitext (line 702) | def xsplitext(a): function xisfile (line 729) | def xisfile(path, download_config: Optional[DownloadConfig] = None) -> b... function xgetsize (line 749) | def xgetsize(path, download_config: Optional[DownloadConfig] = None) -> ... function xisdir (line 777) | def xisdir(path, download_config: Optional[DownloadConfig] = None) -> bool: function xrelpath (line 800) | def xrelpath(path, start=None): class _OverridableIOWrapper (line 817) | class _OverridableIOWrapper(io.RawIOBase): method __init__ (line 818) | def __init__(self, f): method __getattribute__ (line 822) | def __getattribute__(self, attr): method __setattr__ (line 829) | def __setattr__(self, attr, value): function _add_retries_to_file_obj_read_method (line 836) | def _add_retries_to_file_obj_read_method(file_obj): function _prepare_path_and_storage_options (line 880) | def _prepare_path_and_storage_options( function _prepare_single_hop_path_and_storage_options (line 892) | def _prepare_single_hop_path_and_storage_options( function xopen (line 945) | def xopen(file: str, mode="r", *args, download_config: Optional[Download... function xlistdir (line 1006) | def xlistdir(path: str, download_config: Optional[DownloadConfig] = None... function xglob (line 1031) | def xglob(urlpath, *, recursive=False, download_config: Optional[Downloa... function xwalk (line 1057) | def xwalk(urlpath, download_config: Optional[DownloadConfig] = None, **k... class xPath (line 1085) | class xPath(type(Path())): method __str__ (line 1088) | def __str__(self): method exists (line 1098) | def exists(self, download_config: Optional[DownloadConfig] = None): method glob (line 1109) | def glob(self, pattern, download_config: Optional[DownloadConfig] = No... method rglob (line 1137) | def rglob(self, pattern, **kwargs): method parent (line 1149) | def parent(self) -> "xPath": method name (line 1158) | def name(self) -> str: method stem (line 1167) | def stem(self) -> str: method suffix (line 1176) | def suffix(self) -> str: method open (line 1184) | def open(self, *args, **kwargs): method joinpath (line 1196) | def joinpath(self, *p: tuple[str, ...]) -> "xPath": method __truediv__ (line 1207) | def __truediv__(self, p: str) -> "xPath": method with_suffix (line 1210) | def with_suffix(self, suffix): function _as_str (line 1217) | def _as_str(path: Union[str, Path, xPath]): function xgzip_open (line 1221) | def xgzip_open(filepath_or_buffer, *args, download_config: Optional[Down... function xnumpy_load (line 1231) | def xnumpy_load(filepath_or_buffer, *args, download_config: Optional[Dow... function xpandas_read_csv (line 1241) | def xpandas_read_csv(filepath_or_buffer, download_config: Optional[Downl... function xpandas_read_excel (line 1253) | def xpandas_read_excel(filepath_or_buffer, download_config: Optional[Dow... function xpyarrow_parquet_read_table (line 1271) | def xpyarrow_parquet_read_table(filepath_or_buffer, download_config: Opt... function xsio_loadmat (line 1281) | def xsio_loadmat(filepath_or_buffer, download_config: Optional[DownloadC... function xet_parse (line 1290) | def xet_parse(source, parser=None, download_config: Optional[DownloadCon... function xxml_dom_minidom_parse (line 1308) | def xxml_dom_minidom_parse(filename_or_file, download_config: Optional[D... class ArchiveIterable (line 1326) | class ArchiveIterable(TrackedIterableFromGenerator): method _iter_tar (line 1330) | def _iter_tar(f): method _iter_zip (line 1347) | def _iter_zip(f): method _iter_from_fileobj (line 1362) | def _iter_from_fileobj(cls, f) -> Generator[tuple, None, None]: method _iter_from_urlpath (line 1370) | def _iter_from_urlpath( method from_buf (line 1383) | def from_buf(cls, fileobj) -> "ArchiveIterable": method from_urlpath (line 1387) | def from_urlpath(cls, urlpath_or_buf, download_config: Optional[Downlo... class FilesIterable (line 1391) | class FilesIterable(TrackedIterableFromGenerator): method _iter_from_urlpaths (line 1395) | def _iter_from_urlpaths( method from_urlpaths (line 1419) | def from_urlpaths(cls, urlpaths, download_config: Optional[DownloadCon... FILE: src/datasets/utils/info_utils.py class VerificationMode (line 22) | class VerificationMode(enum.Enum): function verify_checksums (line 43) | def verify_checksums(expected_checksums: Optional[dict], recorded_checks... function verify_splits (line 62) | def verify_splits(expected_splits: Optional[dict], recorded_splits: dict): function get_size_checksum_dict (line 80) | def get_size_checksum_dict(path: str, record_checksum: bool = True) -> d... function is_small_dataset (line 93) | def is_small_dataset(dataset_size): FILE: src/datasets/utils/json.py function ujson_dumps (line 10) | def ujson_dumps(*args, **kwargs): function ujson_loads (line 18) | def ujson_loads(*args, **kwargs): function json_encode_field (line 26) | def json_encode_field(example: Any, json_field_path: str) -> Any: function find_mixed_struct_types_field_paths (line 45) | def find_mixed_struct_types_field_paths(examples: list, allow_root=False... function get_json_field_path_from_pyarrow_json_error (line 72) | def get_json_field_path_from_pyarrow_json_error(err_str: str) -> str: function insert_json_field_path (line 80) | def insert_json_field_path(json_field_paths: list[str], json_field_path:... function json_encode_fields_in_json_lines (line 90) | def json_encode_fields_in_json_lines(original_batch: bytes, json_field_p... function get_json_field_paths_from_feature (line 98) | def get_json_field_paths_from_feature(feature: "FeatureType") -> list[str]: function set_json_types_in_feature (line 112) | def set_json_types_in_feature(feature: "FeatureType", json_field_paths: ... FILE: src/datasets/utils/logging.py function _get_default_logging_level (line 49) | def _get_default_logging_level(): function _get_library_name (line 65) | def _get_library_name() -> str: function _get_library_root_logger (line 69) | def _get_library_root_logger() -> logging.Logger: function _configure_library_root_logger (line 73) | def _configure_library_root_logger() -> None: function _reset_library_root_logger (line 80) | def _reset_library_root_logger() -> None: function get_logger (line 85) | def get_logger(name: Optional[str] = None) -> logging.Logger: function get_verbosity (line 94) | def get_verbosity() -> int: function set_verbosity (line 110) | def set_verbosity(verbosity: int) -> None: function set_verbosity_info (line 119) | def set_verbosity_info(): function set_verbosity_warning (line 129) | def set_verbosity_warning(): function set_verbosity_debug (line 139) | def set_verbosity_debug(): function set_verbosity_error (line 149) | def set_verbosity_error(): function disable_propagation (line 159) | def disable_propagation() -> None: function enable_propagation (line 166) | def enable_propagation() -> None: FILE: src/datasets/utils/metadata.py class _NoDuplicateSafeLoader (line 21) | class _NoDuplicateSafeLoader(yaml.SafeLoader): method _check_no_duplicates_on_constructed_node (line 22) | def _check_no_duplicates_on_constructed_node(self, node): method construct_mapping (line 30) | def construct_mapping(self, node, deep=False): function _split_yaml_from_readme (line 36) | def _split_yaml_from_readme(readme_content: str) -> tuple[Optional[str],... class MetadataConfigs (line 46) | class MetadataConfigs(dict[str, dict[str, Any]]): method _raise_if_data_files_field_not_valid (line 52) | def _raise_if_data_files_field_not_valid(metadata_config: dict): method _from_exported_parquet_files_and_dataset_infos (line 103) | def _from_exported_parquet_files_and_dataset_infos( method from_dataset_card_data (line 142) | def from_dataset_card_data(cls, dataset_card_data: DatasetCardData) ->... method to_dataset_card_data (line 166) | def to_dataset_card_data(self, dataset_card_data: DatasetCardData) -> ... method get_default_config_name (line 179) | def get_default_config_name(self) -> Optional[str]: FILE: src/datasets/utils/patching.py class _PatchedModuleObj (line 9) | class _PatchedModuleObj: method __init__ (line 12) | def __init__(self, module, attrs=None): class patch_submodule (line 21) | class patch_submodule: method __init__ (line 40) | def __init__(self, obj, target: str, new, attrs=None): method __enter__ (line 48) | def __enter__(self): method __exit__ (line 102) | def __exit__(self, *exc_info): method start (line 106) | def start(self): method stop (line 111) | def stop(self): FILE: src/datasets/utils/py_utils.py function size_str (line 71) | def size_str(size_in_bytes): function convert_file_size_to_int (line 95) | def convert_file_size_to_int(size: Union[int, str]) -> int: function glob_pattern_to_regex (line 139) | def glob_pattern_to_regex(pattern): function string_to_dict (line 158) | def string_to_dict(string: str, pattern: str) -> Optional[dict[str, str]]: function asdict (line 192) | def asdict(obj): function temporary_assignment (line 232) | def temporary_assignment(obj, attr, value): function temp_seed (line 243) | def temp_seed(seed: int, set_pytorch=False, set_tensorflow=False): function unique_values (line 296) | def unique_values(values): function no_op_if_value_is_null (line 305) | def no_op_if_value_is_null(func): function first_non_null_value (line 314) | def first_non_null_value(iterable): function first_non_null_non_empty_value (line 322) | def first_non_null_non_empty_value(iterable): function zip_dict (line 330) | def zip_dict(*dicts): class NonMutableDict (line 337) | class NonMutableDict(dict): method __init__ (line 345) | def __init__(self, *args, **kwargs): method __setitem__ (line 354) | def __setitem__(self, key, value): method update (line 359) | def update(self, other): class classproperty (line 365) | class classproperty(property): # pylint: disable=invalid-name method __get__ (line 368) | def __get__(self, obj, objtype=None): function _single_map_nested (line 372) | def _single_map_nested(args): function map_nested (line 416) | def map_nested( class NestedDataStructure (line 553) | class NestedDataStructure: method __init__ (line 554) | def __init__(self, data=None): method flatten (line 557) | def flatten(self, data=None): function has_sufficient_disk_space (line 567) | def has_sufficient_disk_space(needed_bytes, directory="."): function copyfunc (line 575) | def copyfunc(func): function _write_generator_to_queue (line 584) | def _write_generator_to_queue(queue: queue.Queue, func: Callable[..., It... function _get_pool_pid (line 590) | def _get_pool_pid(pool: Union[multiprocessing.pool.Pool, multiprocess.po... function iflatmap_unordered (line 594) | def iflatmap_unordered( function iter_batched (line 630) | def iter_batched(iterable: Iterable[T], n: int) -> Iterable[list[T]]: FILE: src/datasets/utils/sharding.py function _number_of_shards_in_gen_kwargs (line 4) | def _number_of_shards_in_gen_kwargs(gen_kwargs: dict) -> int: function _distribute_shards (line 21) | def _distribute_shards(num_shards: int, max_num_jobs: int) -> list[range]: function _split_gen_kwargs (line 48) | def _split_gen_kwargs(gen_kwargs: dict, max_num_jobs: int) -> list[dict]: function _merge_gen_kwargs (line 67) | def _merge_gen_kwargs(gen_kwargs_list: list[dict]) -> dict: function _shuffle_gen_kwargs (line 76) | def _shuffle_gen_kwargs(rng: np.random.Generator, gen_kwargs: dict) -> d... FILE: src/datasets/utils/stratify.py function approximate_mode (line 4) | def approximate_mode(class_counts, n_draws, rng): function stratified_shuffle_split_generate_indices (line 54) | def stratified_shuffle_split_generate_indices(y, n_train, n_test, rng, n... FILE: src/datasets/utils/tf_utils.py function minimal_tf_collate_fn (line 36) | def minimal_tf_collate_fn(features): function minimal_tf_collate_fn_with_renaming (line 56) | def minimal_tf_collate_fn_with_renaming(features): function is_numeric_pa_type (line 64) | def is_numeric_pa_type(pa_type): function np_get_batch (line 70) | def np_get_batch( function dataset_to_tf (line 118) | def dataset_to_tf( class SharedMemoryContext (line 231) | class SharedMemoryContext: method __init__ (line 234) | def __init__(self): method get_shm (line 238) | def get_shm(self, name, size, create): method get_array (line 248) | def get_array(self, name, shape, dtype, create): method __enter__ (line 252) | def __enter__(self): method __exit__ (line 255) | def __exit__(self, exc_type, exc_value, traceback): class NumpyMultiprocessingGenerator (line 263) | class NumpyMultiprocessingGenerator: method __init__ (line 264) | def __init__( method __iter__ (line 298) | def __iter__(self): method __call__ (line 395) | def __call__(self): method worker_loop (line 399) | def worker_loop( method distribute_batches (line 471) | def distribute_batches(dataset, batch_size, drop_remainder, num_worker... function multiprocess_dataset_to_tf (line 503) | def multiprocess_dataset_to_tf( FILE: src/datasets/utils/tqdm.py function disable_progress_bars (line 61) | def disable_progress_bars() -> None: function enable_progress_bars (line 78) | def enable_progress_bars() -> None: function are_progress_bars_disabled (line 95) | def are_progress_bars_disabled() -> bool: class tqdm (line 105) | class tqdm(old_tqdm): method __init__ (line 112) | def __init__(self, *args, **kwargs): method __delattr__ (line 120) | def __delattr__(self, attr: str) -> None: function is_progress_bar_enabled (line 134) | def is_progress_bar_enabled(): FILE: src/datasets/utils/track.py class tracked_str (line 4) | class tracked_str(str): method set_origin (line 7) | def set_origin(self, origin: str): method get_origin (line 11) | def get_origin(self): method __repr__ (line 14) | def __repr__(self) -> str: class tracked_list (line 21) | class tracked_list(list): method __init__ (line 22) | def __init__(self, *args, **kwargs) -> None: method __iter__ (line 26) | def __iter__(self) -> Iterator: method __repr__ (line 32) | def __repr__(self) -> str: class TrackedIterableFromGenerator (line 39) | class TrackedIterableFromGenerator(Iterable): method __init__ (line 42) | def __init__(self, generator, *args): method __iter__ (line 48) | def __iter__(self): method __repr__ (line 54) | def __repr__(self) -> str: method __reduce__ (line 60) | def __reduce__(self): FILE: src/datasets/utils/version.py class Version (line 30) | class Version: method __post_init__ (line 55) | def __post_init__(self): method __repr__ (line 58) | def __repr__(self): method tuple (line 62) | def tuple(self): method _validate_operand (line 65) | def _validate_operand(self, other): method __eq__ (line 72) | def __eq__(self, other): method __lt__ (line 80) | def __lt__(self, other): method __hash__ (line 84) | def __hash__(self): method from_dict (line 88) | def from_dict(cls, dic): method _to_yaml_string (line 92) | def _to_yaml_string(self) -> str: function _str_to_version_tuple (line 96) | def _str_to_version_tuple(version_str): function _version_tuple_to_str (line 104) | def _version_tuple_to_str(version_tuple): FILE: tests/commands/conftest.py function dataset_dir (line 6) | def dataset_dir(tmp_path): FILE: tests/commands/test_test.py function is_1percent_close (line 29) | def is_1percent_close(source, target): function test_test_command (line 34) | def test_test_command(dataset_dir): FILE: tests/conftest.py function pytest_collection_modifyitems (line 11) | def pytest_collection_modifyitems(config, items): function set_test_cache_config (line 20) | def set_test_cache_config(tmp_path_factory, monkeypatch): function disable_implicit_token (line 35) | def disable_implicit_token(monkeypatch): function disable_tqdm_output (line 40) | def disable_tqdm_output(): function set_update_download_counts_to_false (line 45) | def set_update_download_counts_to_false(monkeypatch): function set_sqlalchemy_silence_uber_warning (line 51) | def set_sqlalchemy_silence_uber_warning(monkeypatch): function zero_time_out_for_remote_code (line 61) | def zero_time_out_for_remote_code(): FILE: tests/distributed_scripts/run_torch_distributed.py class FailedTestError (line 15) | class FailedTestError(RuntimeError): function gen (line 19) | def gen(shards: List[str]): function main (line 25) | def main(): FILE: tests/features/test_array_xd.py function generate_examples (line 34) | def generate_examples(features: dict, num_examples=100, seq_shapes=None): class ExtensionTypeCompatibilityTest (line 59) | class ExtensionTypeCompatibilityTest(unittest.TestCase): method test_array2d_nonspecific_shape (line 60) | def test_array2d_nonspecific_shape(self): method test_multiple_extensions_same_row (line 81) | def test_multiple_extensions_same_row(self): method test_compatability_with_string_values (line 100) | def test_compatability_with_string_values(self): method test_extension_indexing (line 113) | def test_extension_indexing(self): function get_array_feature_types (line 129) | def get_array_feature_types(): class ArrayXDTest (line 144) | class ArrayXDTest(unittest.TestCase): method get_features (line 145) | def get_features(self, array_feature, shape_1, shape_2): method get_dict_example_0 (line 154) | def get_dict_example_0(self, shape_1, shape_2): method get_dict_example_1 (line 161) | def get_dict_example_1(self, shape_1, shape_2): method get_dict_examples (line 168) | def get_dict_examples(self, shape_1, shape_2): method _check_getitem_output_type (line 175) | def _check_getitem_output_type(self, dataset, shape_1, shape_2, first_... method test_write (line 207) | def test_write(self, array_feature, shape_1, shape_2): method test_write_batch (line 223) | def test_write_batch(self, array_feature, shape_1, shape_2): method test_from_dict (line 235) | def test_from_dict(self, array_feature, shape_1, shape_2): class ArrayXDDynamicTest (line 244) | class ArrayXDDynamicTest(unittest.TestCase): method get_one_col_dataset (line 245) | def get_one_col_dataset(self, first_dim_list, fixed_shape): method get_two_col_datasset (line 251) | def get_two_col_datasset(self, first_dim_list, fixed_shape): method test_to_pylist (line 262) | def test_to_pylist(self): method test_to_numpy (line 274) | def test_to_numpy(self): method test_iter_dataset (line 307) | def test_iter_dataset(self): method test_to_pandas (line 317) | def test_to_pandas(self): method test_map_dataset (line 346) | def test_map_dataset(self): function test_table_to_pandas (line 361) | def test_table_to_pandas(dtype, dummy_value): function test_array_xd_numpy_arrow_extractor (line 371) | def test_array_xd_numpy_arrow_extractor(dtype, dummy_value): function test_array_xd_with_none (line 379) | def test_array_xd_with_none(): function test_array_xd_with_np (line 419) | def test_array_xd_with_np(seq_type, dtype, shape, feature_class): function test_dataset_map (line 436) | def test_dataset_map(with_none): FILE: tests/features/test_audio.py function tar_wav_path (line 17) | def tar_wav_path(shared_datadir, tmp_path_factory): function tar_mp3_path (line 26) | def tar_mp3_path(shared_datadir, tmp_path_factory): function iter_archive (line 34) | def iter_archive(archive_path): function test_audio_instantiation (line 42) | def test_audio_instantiation(): function test_audio_feature_type_to_arrow (line 53) | def test_audio_feature_type_to_arrow(): function test_audio_feature_encode_example (line 77) | def test_audio_feature_encode_example(shared_datadir, build_example): function test_audio_feature_encode_example_pcm (line 100) | def test_audio_feature_encode_example_pcm(shared_datadir, build_example): function test_audio_feature_encode_example_audiodecoder (line 121) | def test_audio_feature_encode_example_audiodecoder(shared_datadir, in_sa... function test_audio_decode_example (line 136) | def test_audio_decode_example(shared_datadir): function test_audio_resampling (line 152) | def test_audio_resampling(shared_datadir): function test_audio_decode_example_mp3 (line 165) | def test_audio_decode_example_mp3(shared_datadir): function test_audio_decode_example_opus (line 179) | def test_audio_decode_example_opus(shared_datadir): function test_audio_decode_example_pcm (line 193) | def test_audio_decode_example_pcm(shared_datadir, sampling_rate): function test_audio_resampling_mp3_different_sampling_rates (line 207) | def test_audio_resampling_mp3_different_sampling_rates(shared_datadir): function test_backwards_compatibility (line 228) | def test_backwards_compatibility(shared_datadir): function test_dataset_with_audio_feature (line 251) | def test_dataset_with_audio_feature(shared_datadir): function test_dataset_with_audio_feature_tar_wav (line 280) | def test_dataset_with_audio_feature_tar_wav(tar_wav_path): function test_dataset_with_audio_feature_tar_mp3 (line 314) | def test_dataset_with_audio_feature_tar_mp3(tar_mp3_path): function test_dataset_with_audio_feature_with_none (line 348) | def test_dataset_with_audio_feature_with_none(): function test_resampling_at_loading_dataset_with_audio_feature (line 382) | def test_resampling_at_loading_dataset_with_audio_feature(shared_datadir): function test_resampling_at_loading_dataset_with_audio_feature_mp3 (line 411) | def test_resampling_at_loading_dataset_with_audio_feature_mp3(shared_dat... function test_resampling_after_loading_dataset_with_audio_feature (line 440) | def test_resampling_after_loading_dataset_with_audio_feature(shared_data... function test_resampling_after_loading_dataset_with_audio_feature_mp3 (line 473) | def test_resampling_after_loading_dataset_with_audio_feature_mp3(shared_... function test_dataset_cast_to_audio_features (line 518) | def test_dataset_cast_to_audio_features(shared_datadir, build_data): function test_dataset_concatenate_audio_features (line 533) | def test_dataset_concatenate_audio_features(shared_datadir): function test_dataset_concatenate_nested_audio_features (line 551) | def test_dataset_concatenate_nested_audio_features(shared_datadir): function test_dataset_with_audio_feature_map_is_not_decoded (line 572) | def test_dataset_with_audio_feature_map_is_not_decoded(shared_datadir): function test_dataset_with_audio_feature_map_is_decoded (line 594) | def test_dataset_with_audio_feature_map_is_decoded(shared_datadir): function test_formatted_dataset_with_audio_feature (line 624) | def test_formatted_dataset_with_audio_feature(shared_datadir): function jsonl_audio_dataset_path (line 676) | def jsonl_audio_dataset_path(shared_datadir, tmp_path_factory): function test_load_dataset_with_audio_feature (line 690) | def test_load_dataset_with_audio_feature(streaming, jsonl_audio_dataset_... function test_dataset_with_audio_feature_loaded_from_cache (line 708) | def test_dataset_with_audio_feature_loaded_from_cache(): function test_dataset_with_audio_feature_undecoded (line 717) | def test_dataset_with_audio_feature_undecoded(shared_datadir): function test_formatted_dataset_with_audio_feature_undecoded (line 735) | def test_formatted_dataset_with_audio_feature_undecoded(shared_datadir): function test_dataset_with_audio_feature_map_undecoded (line 767) | def test_dataset_with_audio_feature_map_undecoded(shared_datadir): function test_audio_embed_storage (line 785) | def test_audio_embed_storage(shared_datadir): function test_audio_decode_example_opus_convert_to_stereo (line 795) | def test_audio_decode_example_opus_convert_to_stereo(shared_datadir): function test_audio_decode_example_opus_convert_to_mono (line 809) | def test_audio_decode_example_opus_convert_to_mono(shared_datadir): FILE: tests/features/test_features.py function list_with (line 39) | def list_with(item): class FeaturesTest (line 43) | class FeaturesTest(TestCase): method test_from_arrow_schema_simple (line 44) | def test_from_arrow_schema_simple(self): method test_from_arrow_schema_with_sequence (line 54) | def test_from_arrow_schema_with_sequence(self): method test_string_to_arrow_bijection_for_primitive_types (line 64) | def test_string_to_arrow_bijection_for_primitive_types(self): method test_categorical_one_way (line 113) | def test_categorical_one_way(self): method test_feature_named_type (line 119) | def test_feature_named_type(self): method test_feature_named_self_as_kwarg (line 126) | def test_feature_named_self_as_kwarg(self): method test_class_label_feature_with_no_labels (line 133) | def test_class_label_feature_with_no_labels(self): method test_reorder_fields_as (line 140) | def test_reorder_fields_as(self): method test_flatten (line 258) | def test_flatten(self): method test_flatten_with_sequence (line 265) | def test_flatten_with_sequence(self): method test_features_dicts_are_synced (line 272) | def test_features_dicts_are_synced(self): function test_classlabel_init (line 297) | def test_classlabel_init(tmp_path_factory): function test_classlabel_str2int (line 320) | def test_classlabel_str2int(): function test_classlabel_int2str (line 333) | def test_classlabel_int2str(): function test_classlabel_cast_storage (line 346) | def test_classlabel_cast_storage(): function test_class_label_to_and_from_dict (line 386) | def test_class_label_to_and_from_dict(class_label_arg, tmp_path_factory): function test_decode_nested_example_with_list_types (line 403) | def test_decode_nested_example_with_list_types(schema, monkeypatch): function test_encode_nested_example_with_list_types (line 416) | def test_encode_nested_example_with_list_types(schema): function test_encode_nested_example_sequence_with_none (line 422) | def test_encode_nested_example_sequence_with_none(inner_type): function test_encode_example (line 438) | def test_encode_example(features_dict, example, expected_encoded_example): function test_encode_batch_with_example_with_empty_first_elem (line 444) | def test_encode_batch_with_example_with_empty_first_elem(): function test_encode_column_dict_with_none (line 461) | def test_encode_column_dict_with_none(): function test_dataset_feature_with_none (line 480) | def test_dataset_feature_with_none(feature): function iternumpy (line 513) | def iternumpy(key1, value1, value2): function dict_diff (line 520) | def dict_diff(d1: dict, d2: dict): # check if 2 dictionaries are equal class CastToPythonObjectsTest (line 536) | class CastToPythonObjectsTest(TestCase): method test_cast_to_python_objects_list (line 537) | def test_cast_to_python_objects_list(self): method test_cast_to_python_objects_tuple (line 543) | def test_cast_to_python_objects_tuple(self): method test_cast_to_python_or_numpy (line 549) | def test_cast_to_python_or_numpy(self): method test_cast_to_python_objects_series (line 558) | def test_cast_to_python_objects_series(self): method test_cast_to_python_objects_dataframe (line 567) | def test_cast_to_python_objects_dataframe(self): method test_cast_to_python_objects_pandas_timestamp (line 573) | def test_cast_to_python_objects_pandas_timestamp(self): method test_cast_to_python_objects_pandas_timedelta (line 583) | def test_cast_to_python_objects_pandas_timedelta(self): method test_cast_to_python_objects_torch (line 595) | def test_cast_to_python_objects_torch(self): method test_cast_to_python_objects_tf (line 610) | def test_cast_to_python_objects_tf(self): method test_cast_to_python_objects_jax (line 625) | def test_cast_to_python_objects_jax(self): method test_dont_iterate_over_each_element_in_a_list (line 641) | def test_dont_iterate_over_each_element_in_a_list(self, mocked_cast): function test_features_to_dict_and_from_dict_round_trip (line 688) | def test_features_to_dict_and_from_dict_round_trip(features: Features): function test_features_to_yaml_list (line 696) | def test_features_to_yaml_list(features: Features): function test_features_flatten_with_list_types (line 711) | def test_features_flatten_with_list_types(features_dict, expected_featur... function test_features_from_dict_with_list_types (line 742) | def test_features_from_dict_with_list_types(deserialized_features_dict, ... function test_generate_from_dict_with_list_types (line 772) | def test_generate_from_dict_with_list_types(deserialized_feature_dict, e... function test_features_to_yaml_list_with_large_list (line 787) | def test_features_to_yaml_list_with_large_list(features_dict, expected_f... function test_features_from_yaml_list_with_large_list (line 803) | def test_features_from_yaml_list_with_large_list(features_yaml_list, exp... function test_features_to_arrow_schema (line 809) | def test_features_to_arrow_schema(features: Features): function test_features_alignment (line 893) | def test_features_alignment(features: tuple[list[Features], list[Feature... function test_features_from_arrow_schema_primitive_data_type (line 900) | def test_features_from_arrow_schema_primitive_data_type(dtype): function test_features_from_arrow_schema_list_data_type (line 907) | def test_features_from_arrow_schema_list_data_type(list_dtype, scalar_dt... function test_features_reorder_fields_as_with_list_types (line 932) | def test_features_reorder_fields_as_with_list_types(feature, other_featu... function test_get_nested_type_with_scalar_feature (line 942) | def test_get_nested_type_with_scalar_feature(feature, expected_arrow_dat... function test_get_nested_type_with_list_feature (line 954) | def test_get_nested_type_with_list_feature( function test_generate_from_arrow_type_with_arrow_primitive_data_type (line 965) | def test_generate_from_arrow_type_with_arrow_primitive_data_type(arrow_p... function test_generate_from_arrow_type_with_arrow_nested_data_type (line 977) | def test_generate_from_arrow_type_with_arrow_nested_data_type( function test_check_non_null_non_empty_recursive_with_list_types (line 990) | def test_check_non_null_non_empty_recursive_with_list_types(schema): function test_check_non_null_non_empty_recursive_with_nested_list_types (line 1002) | def test_check_non_null_non_empty_recursive_with_nested_list_types(schema): function test_require_decoding_with_list_types (line 1007) | def test_require_decoding_with_list_types(feature): function test_require_storage_cast_with_list_types (line 1012) | def test_require_storage_cast_with_list_types(feature): function test_require_storage_embed_with_list_types (line 1017) | def test_require_storage_embed_with_list_types(feature): function test_visit_with_list_types (line 1025) | def test_visit_with_list_types(feature, expected): function test_is_null_feature (line 1048) | def test_is_null_feature(feature, expected): FILE: tests/features/test_image.py function tar_jpg_path (line 20) | def tar_jpg_path(shared_datadir, tmp_path_factory): function iter_archive (line 28) | def iter_archive(archive_path): function test_image_instantiation (line 36) | def test_image_instantiation(): function test_image_feature_type_to_arrow (line 44) | def test_image_feature_type_to_arrow(): function test_image_feature_encode_example (line 67) | def test_image_feature_encode_example(shared_datadir, build_example): function test_image_decode_example (line 81) | def test_image_decode_example(shared_datadir): function test_image_decode_example_with_exif_orientation_tag (line 98) | def test_image_decode_example_with_exif_orientation_tag(shared_datadir): function test_image_change_mode (line 116) | def test_image_change_mode(shared_datadir): function test_dataset_with_image_feature (line 130) | def test_dataset_with_image_feature(shared_datadir): function test_dataset_with_image_feature_from_pil_image (line 163) | def test_dataset_with_image_feature_from_pil_image(infer_feature, shared... function test_dataset_with_image_feature_from_np_array (line 195) | def test_dataset_with_image_feature_from_np_array(): function test_dataset_with_image_feature_tar_jpg (line 228) | def test_dataset_with_image_feature_tar_jpg(tar_jpg_path): function test_dataset_with_image_feature_with_none (line 263) | def test_dataset_with_image_feature_with_none(): function test_dataset_cast_to_image_features (line 309) | def test_dataset_cast_to_image_features(shared_datadir, build_data): function test_dataset_cast_to_image_features_polars (line 323) | def test_dataset_cast_to_image_features_polars(shared_datadir): function test_dataset_concatenate_image_features (line 336) | def test_dataset_concatenate_image_features(shared_datadir): function test_dataset_concatenate_nested_image_features (line 350) | def test_dataset_concatenate_nested_image_features(shared_datadir): function test_dataset_with_image_feature_map (line 371) | def test_dataset_with_image_feature_map(shared_datadir): function test_formatted_dataset_with_image_feature_map (line 420) | def test_formatted_dataset_with_image_feature_map(shared_datadir): function test_dataset_with_image_feature_map_change_image (line 455) | def test_dataset_with_image_feature_map_change_image(shared_datadir): function test_formatted_dataset_with_image_feature (line 525) | def test_formatted_dataset_with_image_feature(shared_datadir): function img_dataset_dir (line 576) | def img_dataset_dir(shared_datadir, tmp_path): function test_load_dataset_with_image_feature (line 587) | def test_load_dataset_with_image_feature(shared_datadir, img_dataset_dir... function test_dataset_with_image_feature_undecoded (line 602) | def test_dataset_with_image_feature_undecoded(shared_datadir): function test_formatted_dataset_with_image_feature_undecoded (line 620) | def test_formatted_dataset_with_image_feature_undecoded(shared_datadir): function test_dataset_with_image_feature_map_undecoded (line 652) | def test_dataset_with_image_feature_map_undecoded(shared_datadir): function test_image_embed_storage (line 671) | def test_image_embed_storage(shared_datadir): function test_encode_np_array (line 693) | def test_encode_np_array(array, dtype_cast, expected_image_format): FILE: tests/features/test_nifti.py function test_nifti_feature_encode_example (line 29) | def test_nifti_feature_encode_example(shared_datadir, nifti_file, build_... function test_dataset_with_nifti_feature (line 44) | def test_dataset_with_nifti_feature(shared_datadir, nifti_file): function test_encode_nibabel_image (line 74) | def test_encode_nibabel_image(shared_datadir): function test_embed_storage (line 96) | def test_embed_storage(shared_datadir): function test_load_zipped_file_locally (line 124) | def test_load_zipped_file_locally(shared_datadir): function test_nifti_lazy_loading (line 134) | def test_nifti_lazy_loading(shared_datadir): FILE: tests/features/test_pdf.py function test_pdf_feature_encode_example (line 24) | def test_pdf_feature_encode_example(shared_datadir, build_example): function test_dataset_with_pdf_feature (line 38) | def test_dataset_with_pdf_feature(shared_datadir): FILE: tests/features/test_video.py function test_video_feature_encode_example (line 24) | def test_video_feature_encode_example(shared_datadir, build_example): function test_dataset_with_video_feature (line 38) | def test_dataset_with_video_feature(shared_datadir): function test_dataset_with_video_map_and_formatted (line 76) | def test_dataset_with_video_map_and_formatted(shared_datadir): function test_dataset_with_video_feature_map_is_decoded (line 100) | def test_dataset_with_video_feature_map_is_decoded(shared_datadir): function jsonl_video_dataset_path (line 130) | def jsonl_video_dataset_path(shared_datadir, tmp_path_factory): function test_load_dataset_with_video_feature (line 144) | def test_load_dataset_with_video_feature(streaming, jsonl_video_dataset_... FILE: tests/fixtures/files.py function dataset (line 23) | def dataset(): function arrow_file (line 49) | def arrow_file(tmp_path_factory, dataset): function text_file_content (line 64) | def text_file_content(): function text_file (line 69) | def text_file(tmp_path_factory): function bz2_file (line 78) | def bz2_file(tmp_path_factory): function gz_file (line 89) | def gz_file(tmp_path_factory): function lz4_file (line 100) | def lz4_file(tmp_path_factory): function seven_zip_file (line 112) | def seven_zip_file(tmp_path_factory, text_file): function tar_file (line 123) | def tar_file(tmp_path_factory, text_file): function xz_file (line 133) | def xz_file(tmp_path_factory): function zip_file (line 144) | def zip_file(tmp_path_factory, text_file): function zstd_file (line 154) | def zstd_file(tmp_path_factory): function xml_file (line 169) | def xml_file(tmp_path_factory): function dataset_dict (line 246) | def dataset_dict(): function arrow_path (line 251) | def arrow_path(tmp_path_factory): function sqlite_path (line 259) | def sqlite_path(tmp_path_factory): function csv_path (line 271) | def csv_path(tmp_path_factory): function csv2_path (line 282) | def csv2_path(tmp_path_factory): function bz2_csv_path (line 293) | def bz2_csv_path(csv_path, tmp_path_factory): function zip_csv_path (line 306) | def zip_csv_path(csv_path, csv2_path, tmp_path_factory): function zip_uppercase_csv_path (line 315) | def zip_uppercase_csv_path(csv_path, csv2_path, tmp_path_factory): function zip_csv_with_dir_path (line 324) | def zip_csv_with_dir_path(csv_path, csv2_path, tmp_path_factory): function parquet_path (line 333) | def parquet_path(tmp_path_factory): function geoparquet_path (line 351) | def geoparquet_path(tmp_path_factory): function json_list_of_dicts_path (line 359) | def json_list_of_dicts_path(tmp_path_factory): function json_dict_of_lists_path (line 368) | def json_dict_of_lists_path(tmp_path_factory): function jsonl_path (line 377) | def jsonl_path(tmp_path_factory): function jsonl2_path (line 386) | def jsonl2_path(tmp_path_factory): function jsonl_312_path (line 395) | def jsonl_312_path(tmp_path_factory): function jsonl_str_path (line 404) | def jsonl_str_path(tmp_path_factory): function jsonl_missing_fields_path (line 413) | def jsonl_missing_fields_path(tmp_path_factory): function jsonl_mixed_types_path (line 422) | def jsonl_mixed_types_path(tmp_path_factory): function text_gz_path (line 431) | def text_gz_path(tmp_path_factory, text_path): function jsonl_gz_path (line 442) | def jsonl_gz_path(tmp_path_factory, jsonl_path): function zip_jsonl_path (line 453) | def zip_jsonl_path(jsonl_path, jsonl2_path, tmp_path_factory): function zip_nested_jsonl_path (line 462) | def zip_nested_jsonl_path(zip_jsonl_path, jsonl_path, jsonl2_path, tmp_p... function zip_jsonl_with_dir_path (line 470) | def zip_jsonl_with_dir_path(jsonl_path, jsonl2_path, tmp_path_factory): function tar_jsonl_path (line 479) | def tar_jsonl_path(jsonl_path, jsonl2_path, tmp_path_factory): function tar_nested_jsonl_path (line 488) | def tar_nested_jsonl_path(tar_jsonl_path, jsonl_path, jsonl2_path, tmp_p... function text_path (line 496) | def text_path(tmp_path_factory): function text2_path (line 506) | def text2_path(tmp_path_factory): function text_dir (line 516) | def text_dir(tmp_path_factory): function text_dir_with_unsupported_extension (line 526) | def text_dir_with_unsupported_extension(tmp_path_factory): function zip_text_path (line 536) | def zip_text_path(text_path, text2_path, tmp_path_factory): function zip_text_with_dir_path (line 545) | def zip_text_with_dir_path(text_path, text2_path, tmp_path_factory): function zip_unsupported_ext_path (line 554) | def zip_unsupported_ext_path(text_path, text2_path, tmp_path_factory): function text_path_with_unicode_new_lines (line 563) | def text_path_with_unicode_new_lines(tmp_path_factory): function image_file (line 572) | def image_file(): function audio_file (line 577) | def audio_file(): function audio_file_44100 (line 582) | def audio_file_44100(): function audio_file_16000 (line 587) | def audio_file_16000(): function tensor_file (line 592) | def tensor_file(tmp_path_factory): function zip_image_path (line 602) | def zip_image_path(image_file, tmp_path_factory): function data_dir_with_hidden_files (line 611) | def data_dir_with_hidden_files(tmp_path_factory): FILE: tests/fixtures/fsspec.py class MockFileSystem (line 10) | class MockFileSystem(AbstractFileSystem): method __init__ (line 13) | def __init__(self, *args, local_root_dir, **kwargs): method mkdir (line 18) | def mkdir(self, path, *args, **kwargs): method makedirs (line 22) | def makedirs(self, path, *args, **kwargs): method rmdir (line 26) | def rmdir(self, path): method ls (line 30) | def ls(self, path, detail=True, *args, **kwargs): method info (line 38) | def info(self, path, *args, **kwargs): method cp_file (line 44) | def cp_file(self, path1, path2, *args, **kwargs): method rm_file (line 49) | def rm_file(self, path, *args, **kwargs): method rm (line 53) | def rm(self, path, *args, **kwargs): method _open (line 57) | def _open(self, path, *args, **kwargs): method created (line 61) | def created(self, path): method modified (line 65) | def modified(self, path): method _strip_protocol (line 70) | def _strip_protocol(cls, path): class TmpDirFileSystem (line 77) | class TmpDirFileSystem(MockFileSystem): method __init__ (line 81) | def __init__(self, *args, **kwargs): method _strip_protocol (line 86) | def _strip_protocol(cls, path): function mock_fsspec (line 94) | def mock_fsspec(): function mockfs (line 103) | def mockfs(tmp_path_factory, mock_fsspec): function tmpfs (line 109) | def tmpfs(tmp_path_factory, mock_fsspec): FILE: tests/fixtures/hub.py function ci_hub_config (line 32) | def ci_hub_config(monkeypatch): function set_ci_hub_access_token (line 51) | def set_ci_hub_access_token(ci_hub_config, monkeypatch): function _http_ci_user_agent (line 62) | def _http_ci_user_agent(*args, **kwargs): function set_hf_ci_headers (line 68) | def set_hf_ci_headers(monkeypatch): function hf_api (line 78) | def hf_api(): function hf_token (line 83) | def hf_token(): function cleanup_repo (line 88) | def cleanup_repo(hf_api: HfApi): function cleanup_bucket (line 96) | def cleanup_bucket(hf_api: HfApi): function temporary_repo (line 104) | def temporary_repo(cleanup_repo): function temporary_bucket (line 120) | def temporary_bucket(cleanup_bucket): function _hf_gated_dataset_repo_txt_data (line 136) | def _hf_gated_dataset_repo_txt_data(hf_api: HfApi, hf_token, text_file_c... function hf_gated_dataset_repo_txt_data (line 156) | def hf_gated_dataset_repo_txt_data(_hf_gated_dataset_repo_txt_data, ci_h... function hf_private_dataset_repo_txt_data_ (line 161) | def hf_private_dataset_repo_txt_data_(hf_api: HfApi, hf_token, text_file... function hf_private_dataset_repo_txt_data (line 180) | def hf_private_dataset_repo_txt_data(hf_private_dataset_repo_txt_data_, ... function hf_private_dataset_repo_zipped_txt_data_ (line 185) | def hf_private_dataset_repo_zipped_txt_data_(hf_api: HfApi, hf_token, zi... function hf_private_dataset_repo_zipped_txt_data (line 204) | def hf_private_dataset_repo_zipped_txt_data(hf_private_dataset_repo_zipp... function hf_private_dataset_repo_zipped_img_data_ (line 209) | def hf_private_dataset_repo_zipped_img_data_(hf_api: HfApi, hf_token, zi... function hf_private_dataset_repo_zipped_img_data (line 228) | def hf_private_dataset_repo_zipped_img_data(hf_private_dataset_repo_zipp... FILE: tests/io/test_csv.py function _check_csv_dataset (line 13) | def _check_csv_dataset(dataset, expected_features): function test_dataset_from_csv_keep_in_memory (line 23) | def test_dataset_from_csv_keep_in_memory(keep_in_memory, csv_path, tmp_p... function test_dataset_from_csv_features (line 41) | def test_dataset_from_csv_features(features, csv_path, tmp_path): function test_dataset_from_csv_split (line 54) | def test_dataset_from_csv_split(split, csv_path, tmp_path): function test_dataset_from_csv_path_type (line 63) | def test_dataset_from_csv_path_type(path_type, csv_path, tmp_path): function _check_csv_datasetdict (line 74) | def _check_csv_datasetdict(dataset_dict, expected_features, splits=("tra... function test_csv_datasetdict_reader_keep_in_memory (line 86) | def test_csv_datasetdict_reader_keep_in_memory(keep_in_memory, csv_path,... function test_csv_datasetdict_reader_features (line 104) | def test_csv_datasetdict_reader_features(features, csv_path, tmp_path): function test_csv_datasetdict_reader_split (line 117) | def test_csv_datasetdict_reader_split(split, csv_path, tmp_path): function iter_csv_file (line 129) | def iter_csv_file(csv_path): function test_dataset_to_csv (line 134) | def test_dataset_to_csv(csv_path, tmp_path): function test_dataset_to_csv_multiproc (line 147) | def test_dataset_to_csv_multiproc(csv_path, tmp_path): function test_dataset_to_csv_invalidproc (line 160) | def test_dataset_to_csv_invalidproc(csv_path, tmp_path): function test_dataset_to_csv_fsspec (line 168) | def test_dataset_to_csv_fsspec(dataset, mockfs): FILE: tests/io/test_json.py function _check_json_dataset (line 14) | def _check_json_dataset(dataset, expected_features): function test_dataset_from_json_keep_in_memory (line 24) | def test_dataset_from_json_keep_in_memory(keep_in_memory, jsonl_path, tm... function test_dataset_from_json_features (line 42) | def test_dataset_from_json_features(features, jsonl_path, tmp_path): function test_dataset_from_json_with_unsorted_column_names (line 60) | def test_dataset_from_json_with_unsorted_column_names(features, jsonl_31... function test_dataset_from_json_with_mismatched_features (line 76) | def test_dataset_from_json_with_mismatched_features(jsonl_312_path, tmp_... function test_dataset_from_json_with_missing_fields (line 93) | def test_dataset_from_json_with_missing_fields(jsonl_missing_fields_path... function test_dataset_from_json_with_mixed_types (line 108) | def test_dataset_from_json_with_mixed_types(jsonl_mixed_types_path, tmp_... function test_dataset_from_json_split (line 121) | def test_dataset_from_json_split(split, jsonl_path, tmp_path): function test_dataset_from_json_path_type (line 130) | def test_dataset_from_json_path_type(path_type, jsonl_path, tmp_path): function _check_json_datasetdict (line 141) | def _check_json_datasetdict(dataset_dict, expected_features, splits=("tr... function test_datasetdict_from_json_keep_in_memory (line 153) | def test_datasetdict_from_json_keep_in_memory(keep_in_memory, jsonl_path... function test_datasetdict_from_json_features (line 171) | def test_datasetdict_from_json_features(features, jsonl_path, tmp_path): function test_datasetdict_from_json_splits (line 183) | def test_datasetdict_from_json_splits(split, jsonl_path, tmp_path): function load_json (line 196) | def load_json(buffer): function load_json_lines (line 200) | def load_json_lines(buffer): class TestJsonDatasetWriter (line 204) | class TestJsonDatasetWriter: method test_dataset_to_json_lines (line 206) | def test_dataset_to_json_lines(self, lines, load_json_function, dataset): method test_dataset_to_json_orient (line 226) | def test_dataset_to_json_orient(self, orient, container, keys, len_at,... method test_dataset_to_json_lines_multiproc (line 245) | def test_dataset_to_json_lines_multiproc(self, lines, load_json_functi... method test_dataset_to_json_orient_multiproc (line 265) | def test_dataset_to_json_orient_multiproc(self, orient, container, key... method test_dataset_to_json_orient_invalidproc (line 283) | def test_dataset_to_json_orient_invalidproc(self, dataset): method test_dataset_to_json_compression (line 289) | def test_dataset_to_json_compression(self, shared_datadir, tmp_path_fa... method test_dataset_to_json_fsspec (line 300) | def test_dataset_to_json_fsspec(self, dataset, mockfs): FILE: tests/io/test_parquet.py function _check_parquet_dataset (line 21) | def _check_parquet_dataset(dataset, expected_features): function test_dataset_from_parquet_keep_in_memory (line 31) | def test_dataset_from_parquet_keep_in_memory(keep_in_memory, parquet_pat... function test_dataset_from_parquet_features (line 49) | def test_dataset_from_parquet_features(features, parquet_path, tmp_path): function test_dataset_from_parquet_split (line 61) | def test_dataset_from_parquet_split(split, parquet_path, tmp_path): function test_dataset_from_parquet_path_type (line 70) | def test_dataset_from_parquet_path_type(path_type, parquet_path, tmp_path): function test_parquet_read_geoparquet (line 81) | def test_parquet_read_geoparquet(geoparquet_path, tmp_path): function test_parquet_read_filters (line 100) | def test_parquet_read_filters(parquet_path, tmp_path): function _check_parquet_datasetdict (line 110) | def _check_parquet_datasetdict(dataset_dict, expected_features, splits=(... function test_parquet_datasetdict_reader_keep_in_memory (line 122) | def test_parquet_datasetdict_reader_keep_in_memory(keep_in_memory, parqu... function test_parquet_datasetdict_reader_features (line 143) | def test_parquet_datasetdict_reader_features(streaming, features, parque... function test_parquet_datasetdict_reader_columns (line 160) | def test_parquet_datasetdict_reader_columns(streaming, columns, pass_fea... function test_parquet_datasetdict_reader_split (line 189) | def test_parquet_datasetdict_reader_split(split, parquet_path, tmp_path): function test_parquet_write (line 202) | def test_parquet_write(dataset, tmp_path): function test_parquet_write_uses_content_defined_chunking (line 210) | def test_parquet_write_uses_content_defined_chunking(dataset, tmp_path): function test_parquet_writer_persist_cdc_options_as_metadata (line 227) | def test_parquet_writer_persist_cdc_options_as_metadata(dataset, tmp_path): function test_dataset_to_parquet_keeps_features (line 261) | def test_dataset_to_parquet_keeps_features(shared_datadir, tmp_path): function test_dataset_to_parquet_json_for_empty_struct (line 276) | def test_dataset_to_parquet_json_for_empty_struct(shared_datadir, tmp_pa... function test_get_arrow_writer_batch_size_from_features (line 300) | def test_get_arrow_writer_batch_size_from_features(feature, expected): function test_dataset_to_parquet_fsspec (line 304) | def test_dataset_to_parquet_fsspec(dataset, mockfs): FILE: tests/io/test_sql.py function _check_sql_dataset (line 17) | def _check_sql_dataset(dataset, expected_features): function test_dataset_from_sql_keep_in_memory (line 28) | def test_dataset_from_sql_keep_in_memory(keep_in_memory, sqlite_path, tm... function test_dataset_from_sql_features (line 49) | def test_dataset_from_sql_features(features, sqlite_path, tmp_path, set_... function iter_sql_file (line 60) | def iter_sql_file(sqlite_path): function test_dataset_to_sql (line 69) | def test_dataset_to_sql(sqlite_path, tmp_path, set_sqlalchemy_silence_ub... function test_dataset_to_sql_multiproc (line 83) | def test_dataset_to_sql_multiproc(sqlite_path, tmp_path, set_sqlalchemy_... function test_dataset_to_sql_invalidproc (line 97) | def test_dataset_to_sql_invalidproc(sqlite_path, tmp_path, set_sqlalchem... FILE: tests/io/test_text.py function _check_text_dataset (line 9) | def _check_text_dataset(dataset, expected_features): function test_dataset_from_text_keep_in_memory (line 19) | def test_dataset_from_text_keep_in_memory(keep_in_memory, text_path, tmp... function test_dataset_from_text_features (line 36) | def test_dataset_from_text_features(features, text_path, tmp_path): function test_dataset_from_text_split (line 48) | def test_dataset_from_text_split(split, text_path, tmp_path): function test_dataset_from_text_path_type (line 57) | def test_dataset_from_text_path_type(path_type, text_path, tmp_path): function _check_text_datasetdict (line 68) | def _check_text_datasetdict(dataset_dict, expected_features, splits=("tr... function test_datasetdict_from_text_keep_in_memory (line 80) | def test_datasetdict_from_text_keep_in_memory(keep_in_memory, text_path,... function test_datasetdict_from_text_features (line 97) | def test_datasetdict_from_text_features(features, text_path, tmp_path): function test_datasetdict_from_text_split (line 110) | def test_datasetdict_from_text_split(split, text_path, tmp_path): FILE: tests/packaged_modules/test_arrow.py function arrow_file_streaming_format (line 10) | def arrow_file_streaming_format(tmp_path): function arrow_file_file_format (line 24) | def arrow_file_file_format(tmp_path): function test_arrow_generate_tables (line 44) | def test_arrow_generate_tables(file_fixture, config_kwargs, request): function test_config_raises_when_invalid_name (line 53) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 59) | def test_config_raises_when_invalid_data_files(data_files) -> None: FILE: tests/packaged_modules/test_audiofolder.py function cache_dir (line 17) | def cache_dir(tmp_path): function data_files_with_labels_no_metadata (line 22) | def data_files_with_labels_no_metadata(tmp_path, audio_file): function audio_file_with_metadata (line 43) | def audio_file_with_metadata(tmp_path, audio_file): function data_files_with_one_split_and_metadata (line 58) | def data_files_with_one_split_and_metadata(tmp_path, audio_file): function data_files_with_two_splits_and_metadata (line 90) | def data_files_with_two_splits_and_metadata(request, tmp_path, audio_file): function data_files_with_zip_archives (line 151) | def data_files_with_zip_archives(tmp_path, audio_file_44100, audio_file_... function test_config_raises_when_invalid_name (line 185) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 191) | def test_config_raises_when_invalid_data_files(data_files) -> None: function test_generate_examples_with_labels (line 198) | def test_generate_examples_with_labels(data_files_with_labels_no_metadat... function test_generate_examples_drop_labels (line 213) | def test_generate_examples_drop_labels(data_files_with_labels_no_metadat... function test_generate_examples_drop_metadata (line 237) | def test_generate_examples_drop_metadata(audio_file_with_metadata, drop_... function test_data_files_with_metadata_and_single_split (line 263) | def test_data_files_with_metadata_and_single_split(streaming, cache_dir,... function test_data_files_with_metadata_and_multiple_splits (line 281) | def test_data_files_with_metadata_and_multiple_splits(streaming, cache_d... function test_data_files_with_metadata_and_archives (line 299) | def test_data_files_with_metadata_and_archives(streaming, cache_dir, dat... function test_data_files_with_wrong_metadata_file_name (line 324) | def test_data_files_with_wrong_metadata_file_name(cache_dir, tmp_path, a... function test_data_files_with_custom_audio_file_name_column_in_metadata_file (line 346) | def test_data_files_with_custom_audio_file_name_column_in_metadata_file(... function test_data_files_with_with_metadata_in_different_formats (line 368) | def test_data_files_with_with_metadata_in_different_formats(cache_dir, t... FILE: tests/packaged_modules/test_cache.py function test_cache (line 14) | def test_cache(text_dir: Path, tmp_path: Path): function test_cache_streaming (line 24) | def test_cache_streaming(text_dir: Path, tmp_path: Path): function test_cache_auto_hash (line 34) | def test_cache_auto_hash(text_dir: Path, tmp_path: Path): function test_cache_auto_hash_with_custom_config (line 43) | def test_cache_auto_hash_with_custom_config(text_dir: Path, tmp_path: Pa... function test_cache_missing (line 61) | def test_cache_missing(text_dir: Path, tmp_path: Path): function test_cache_multi_configs (line 76) | def test_cache_multi_configs(tmp_path: Path): function test_cache_single_config (line 106) | def test_cache_single_config(tmp_path: Path): function test_cache_capital_letters (line 140) | def test_cache_capital_letters(tmp_path: Path): FILE: tests/packaged_modules/test_csv.py function csv_file (line 16) | def csv_file(tmp_path): function malformed_csv_file (line 31) | def malformed_csv_file(tmp_path): function csv_file_with_image (line 46) | def csv_file_with_image(tmp_path, image_file): function csv_file_with_label (line 60) | def csv_file_with_label(tmp_path): function csv_file_with_int_list (line 76) | def csv_file_with_int_list(tmp_path): function test_config_raises_when_invalid_name (line 91) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 97) | def test_config_raises_when_invalid_data_files(data_files) -> None: function test_csv_generate_tables_raises_error_with_malformed_csv (line 102) | def test_csv_generate_tables_raises_error_with_malformed_csv(csv_file, m... function test_csv_cast_image (line 119) | def test_csv_cast_image(csv_file_with_image): function test_csv_cast_label (line 132) | def test_csv_cast_label(csv_file_with_label): function test_csv_convert_int_list (line 145) | def test_csv_convert_int_list(csv_file_with_int_list): FILE: tests/packaged_modules/test_folder_based_builder.py class DummyFeature (line 25) | class DummyFeature: class DummyFolderBasedBuilder (line 29) | class DummyFolderBasedBuilder(FolderBasedBuilder): function cache_dir (line 37) | def cache_dir(tmp_path): function auto_text_file (line 42) | def auto_text_file(text_file): function data_files_with_labels_no_metadata (line 47) | def data_files_with_labels_no_metadata(tmp_path, auto_text_file): function data_files_with_different_levels_no_metadata (line 68) | def data_files_with_different_levels_no_metadata(tmp_path, auto_text_file): function data_files_with_one_label_no_metadata (line 89) | def data_files_with_one_label_no_metadata(tmp_path, auto_text_file): function files_with_labels_and_duplicated_label_key_in_metadata (line 105) | def files_with_labels_and_duplicated_label_key_in_metadata(tmp_path, aut... function file_with_metadata (line 132) | def file_with_metadata(tmp_path, text_file): function data_files_with_one_split_and_metadata (line 147) | def data_files_with_one_split_and_metadata(tmp_path, auto_text_file): function data_files_with_two_splits_and_metadata (line 179) | def data_files_with_two_splits_and_metadata(tmp_path, auto_text_file): function data_files_with_zip_archives (line 221) | def data_files_with_zip_archives(tmp_path, auto_text_file): function test_config_raises_when_invalid_name (line 254) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 260) | def test_config_raises_when_invalid_data_files(data_files) -> None: function test_inferring_labels_from_data_dirs (line 265) | def test_inferring_labels_from_data_dirs(data_files_with_labels_no_metad... function test_default_folder_builder_not_usable (line 275) | def test_default_folder_builder_not_usable(data_files_with_labels_no_met... function test_streaming_patched (line 287) | def test_streaming_patched(): function test_generate_examples_drop_labels (line 296) | def test_generate_examples_drop_labels( function test_generate_examples_drop_metadata (line 323) | def test_generate_examples_drop_metadata(file_with_metadata, drop_metada... function test_data_files_with_different_levels_no_metadata (line 352) | def test_data_files_with_different_levels_no_metadata( function test_data_files_with_one_label_no_metadata (line 375) | def test_data_files_with_one_label_no_metadata(data_files_with_one_label... function test_data_files_with_metadata_and_splits (line 396) | def test_data_files_with_metadata_and_splits( function test_data_files_with_metadata_and_archives (line 417) | def test_data_files_with_metadata_and_archives(streaming, cache_dir, dat... function test_data_files_with_wrong_metadata_file_name (line 432) | def test_data_files_with_wrong_metadata_file_name(cache_dir, tmp_path, a... function test_data_files_with_custom_file_name_column_in_metadata_file (line 452) | def test_data_files_with_custom_file_name_column_in_metadata_file(cache_... function test_data_files_with_custom_file_names_column_in_metadata_file_large_string_list (line 472) | def test_data_files_with_custom_file_names_column_in_metadata_file_large... FILE: tests/packaged_modules/test_hdf5.py function hdf5_file (line 13) | def hdf5_file(tmp_path): function hdf5_file_with_groups (line 27) | def hdf5_file_with_groups(tmp_path): function hdf5_file_with_arrays (line 43) | def hdf5_file_with_arrays(tmp_path): function hdf5_file_with_different_dtypes (line 64) | def hdf5_file_with_different_dtypes(tmp_path): function hdf5_file_with_vlen_arrays (line 85) | def hdf5_file_with_vlen_arrays(tmp_path): function hdf5_file_with_variable_length_strings (line 115) | def hdf5_file_with_variable_length_strings(tmp_path): function hdf5_file_with_complex_data (line 140) | def hdf5_file_with_complex_data(tmp_path): function hdf5_file_with_compound_data (line 163) | def hdf5_file_with_compound_data(tmp_path): function hdf5_file_with_compound_complex_arrays (line 187) | def hdf5_file_with_compound_complex_arrays(tmp_path): function hdf5_file_with_mismatched_lengths (line 233) | def hdf5_file_with_mismatched_lengths(tmp_path): function hdf5_file_with_zero_dimensions (line 254) | def hdf5_file_with_zero_dimensions(tmp_path): function empty_hdf5_file (line 270) | def empty_hdf5_file(tmp_path): function hdf5_file_with_mixed_data_types (line 284) | def hdf5_file_with_mixed_data_types(tmp_path): function test_config_raises_when_invalid_name (line 306) | def test_config_raises_when_invalid_name(): function test_config_raises_when_invalid_data_files (line 313) | def test_config_raises_when_invalid_data_files(data_files): function test_hdf5_basic_functionality (line 319) | def test_hdf5_basic_functionality(hdf5_file): function test_hdf5_nested_groups (line 337) | def test_hdf5_nested_groups(hdf5_file_with_groups): function test_hdf5_multi_dimensional_arrays (line 355) | def test_hdf5_multi_dimensional_arrays(hdf5_file_with_arrays): function test_hdf5_vlen_arrays (line 369) | def test_hdf5_vlen_arrays(hdf5_file_with_vlen_arrays): function test_hdf5_variable_length_strings (line 393) | def test_hdf5_variable_length_strings(hdf5_file_with_variable_length_str... function test_hdf5_different_dtypes (line 416) | def test_hdf5_different_dtypes(hdf5_file_with_different_dtypes): function test_hdf5_batch_processing (line 430) | def test_hdf5_batch_processing(hdf5_file): function test_hdf5_column_filtering (line 450) | def test_hdf5_column_filtering(hdf5_file_with_groups): function test_hdf5_feature_specification (line 467) | def test_hdf5_feature_specification(hdf5_file): function test_hdf5_mismatched_lengths_error (line 478) | def test_hdf5_mismatched_lengths_error(hdf5_file_with_mismatched_lengths): function test_hdf5_zero_dimensions_handling (line 487) | def test_hdf5_zero_dimensions_handling(hdf5_file_with_zero_dimensions, c... function test_hdf5_empty_file_warning (line 515) | def test_hdf5_empty_file_warning(empty_hdf5_file, hdf5_file_with_arrays,... function test_hdf5_feature_inference (line 525) | def test_hdf5_feature_inference(hdf5_file_with_arrays): function test_hdf5_vlen_feature_inference (line 548) | def test_hdf5_vlen_feature_inference(hdf5_file_with_vlen_arrays): function test_hdf5_variable_string_feature_inference (line 568) | def test_hdf5_variable_string_feature_inference(hdf5_file_with_variable_... function test_hdf5_invalid_features (line 586) | def test_hdf5_invalid_features(hdf5_file_with_arrays): function test_hdf5_no_data_files_error (line 598) | def test_hdf5_no_data_files_error(): function test_hdf5_complex_numbers (line 608) | def test_hdf5_complex_numbers(hdf5_file_with_complex_data): function test_hdf5_compound_types (line 643) | def test_hdf5_compound_types(hdf5_file_with_compound_data): function test_hdf5_feature_inference_complex (line 663) | def test_hdf5_feature_inference_complex(hdf5_file_with_complex_data): function test_hdf5_feature_inference_compound (line 679) | def test_hdf5_feature_inference_compound(hdf5_file_with_compound_data): function test_hdf5_mixed_data_types (line 695) | def test_hdf5_mixed_data_types(hdf5_file_with_mixed_data_types): function test_hdf5_mismatched_lengths_with_column_filtering (line 714) | def test_hdf5_mismatched_lengths_with_column_filtering(hdf5_file_with_mi... function test_hdf5_compound_with_complex_arrays (line 762) | def test_hdf5_compound_with_complex_arrays(hdf5_file_with_compound_compl... function test_hdf5_feature_inference_compound_complex_arrays (line 797) | def test_hdf5_feature_inference_compound_complex_arrays(hdf5_file_with_c... FILE: tests/packaged_modules/test_imagefolder.py function cache_dir (line 17) | def cache_dir(tmp_path): function data_files_with_labels_no_metadata (line 22) | def data_files_with_labels_no_metadata(tmp_path, image_file): function image_files_with_labels_and_duplicated_label_key_in_metadata (line 43) | def image_files_with_labels_and_duplicated_label_key_in_metadata(tmp_pat... function image_file_with_metadata (line 70) | def image_file_with_metadata(tmp_path, image_file): function image_files_with_metadata_that_misses_one_image (line 85) | def image_files_with_metadata_that_misses_one_image(tmp_path, image_file): function data_files_with_one_split_and_metadata (line 102) | def data_files_with_one_split_and_metadata(request, tmp_path, image_file): function data_files_with_two_splits_and_metadata (line 145) | def data_files_with_two_splits_and_metadata(request, tmp_path, image_file): function data_files_with_zip_archives (line 206) | def data_files_with_zip_archives(tmp_path, image_file): function test_config_raises_when_invalid_name (line 243) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 249) | def test_config_raises_when_invalid_data_files(data_files) -> None: function test_generate_examples_with_labels (line 256) | def test_generate_examples_with_labels(data_files_with_labels_no_metadat... function test_generate_examples_drop_labels (line 271) | def test_generate_examples_drop_labels(data_files_with_labels_no_metadat... function test_generate_examples_drop_metadata (line 295) | def test_generate_examples_drop_metadata(image_file_with_metadata, drop_... function test_data_files_with_metadata_and_single_split (line 321) | def test_data_files_with_metadata_and_single_split(streaming, cache_dir,... function test_data_files_with_metadata_and_multiple_splits (line 339) | def test_data_files_with_metadata_and_multiple_splits(streaming, cache_d... function test_data_files_with_metadata_and_archives (line 357) | def test_data_files_with_metadata_and_archives(streaming, cache_dir, dat... function test_data_files_with_wrong_metadata_file_name (line 374) | def test_data_files_with_wrong_metadata_file_name(cache_dir, tmp_path, i... function test_data_files_with_custom_image_file_name_column_in_metadata_file (line 396) | def test_data_files_with_custom_image_file_name_column_in_metadata_file(... function test_data_files_with_with_metadata_in_different_formats (line 418) | def test_data_files_with_with_metadata_in_different_formats(cache_dir, t... FILE: tests/packaged_modules/test_json.py function jsonl_file (line 14) | def jsonl_file(tmp_path): function ndjson_file (line 30) | def ndjson_file(tmp_path): function jsonl_file_utf16_encoded (line 45) | def jsonl_file_utf16_encoded(tmp_path): function json_file_with_list_of_dicts (line 60) | def json_file_with_list_of_dicts(tmp_path): function json_file_with_list_of_strings (line 77) | def json_file_with_list_of_strings(tmp_path): function json_file_with_list_of_dicts_field (line 94) | def json_file_with_list_of_dicts_field(tmp_path): function json_file_with_list_of_strings_field (line 115) | def json_file_with_list_of_strings_field(tmp_path): function json_file_with_dict_of_lists_field (line 136) | def json_file_with_dict_of_lists_field(tmp_path): function json_file_with_list_of_dicts_with_sorted_columns (line 156) | def json_file_with_list_of_dicts_with_sorted_columns(tmp_path): function json_file_with_list_of_dicts_with_sorted_columns_field (line 173) | def json_file_with_list_of_dicts_with_sorted_columns_field(tmp_path): function jsonl_file_with_mix_of_str_and_int (line 194) | def jsonl_file_with_mix_of_str_and_int(tmp_path): function jsonl_file_with_dicts_of_varying_keys (line 209) | def jsonl_file_with_dicts_of_varying_keys(tmp_path): function jsonl_file_with_lists_of_dicts_of_varying_keys (line 224) | def jsonl_file_with_lists_of_dicts_of_varying_keys(tmp_path): function jsonl_file_with_messages (line 271) | def jsonl_file_with_messages(tmp_path): function test_config_raises_when_invalid_name (line 279) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 285) | def test_config_raises_when_invalid_data_files(data_files) -> None: function test_json_generate_tables (line 307) | def test_json_generate_tables(file_fixture, config_kwargs, expected, req... function test_json_generate_tables_with_missing_features (line 339) | def test_json_generate_tables_with_missing_features(file_fixture, config... function test_json_generate_tables_with_sorted_columns (line 355) | def test_json_generate_tables_with_sorted_columns(file_fixture, config_k... FILE: tests/packaged_modules/test_lance.py function lance_dataset (line 10) | def lance_dataset(tmp_path) -> str: function lance_hf_dataset (line 25) | def lance_hf_dataset(tmp_path) -> str: function test_load_lance_dataset (line 53) | def test_load_lance_dataset(lance_dataset): function test_load_hf_dataset (line 67) | def test_load_hf_dataset(lance_hf_dataset, streaming): function test_load_vectors (line 84) | def test_load_vectors(lance_hf_dataset): function test_load_lance_streaming_modes (line 95) | def test_load_lance_streaming_modes(lance_hf_dataset, streaming): FILE: tests/packaged_modules/test_pandas.py function test_config_raises_when_invalid_name (line 8) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 14) | def test_config_raises_when_invalid_data_files(data_files) -> None: FILE: tests/packaged_modules/test_parquet.py function test_config_raises_when_invalid_name (line 8) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 14) | def test_config_raises_when_invalid_data_files(data_files) -> None: FILE: tests/packaged_modules/test_spark.py function _get_expected_row_ids_and_row_dicts_for_partition_order (line 23) | def _get_expected_row_ids_and_row_dicts_for_partition_order(df, partitio... function test_config_raises_when_invalid_name (line 32) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 38) | def test_config_raises_when_invalid_data_files(data_files) -> None: function test_repartition_df_if_needed (line 45) | def test_repartition_df_if_needed(): function test_generate_iterable_examples (line 58) | def test_generate_iterable_examples(): function test_spark_examples_iterable (line 73) | def test_spark_examples_iterable(): function test_spark_examples_iterable_shuffle (line 85) | def test_spark_examples_iterable_shuffle(): function test_spark_examples_iterable_shard (line 103) | def test_spark_examples_iterable_shard(): function test_repartition_df_if_needed_max_num_df_rows (line 128) | def test_repartition_df_if_needed_max_num_df_rows(): function test_iterable_image_features (line 140) | def test_iterable_image_features(): function test_iterable_image_features_decode (line 154) | def test_iterable_image_features_decode(): FILE: tests/packaged_modules/test_sql.py function test_config_raises_when_invalid_name (line 8) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 14) | def test_config_raises_when_invalid_data_files(data_files) -> None: FILE: tests/packaged_modules/test_text.py function text_file (line 15) | def text_file(tmp_path): function text_file_with_image (line 37) | def text_file_with_image(tmp_path, image_file): function test_config_raises_when_invalid_name (line 44) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 50) | def test_config_raises_when_invalid_data_files(data_files) -> None: function test_text_linebreaks (line 56) | def test_text_linebreaks(text_file, keep_linebreaks): function test_text_cast_image (line 66) | def test_text_cast_image(text_file_with_image): function test_text_sample_by (line 78) | def test_text_sample_by(sample_by, text_file): FILE: tests/packaged_modules/test_videofolder.py function cache_dir (line 15) | def cache_dir(tmp_path): function video_file_path (line 20) | def video_file_path(): function data_files_with_labels_no_metadata (line 25) | def data_files_with_labels_no_metadata(tmp_path, video_file_path): function video_file_with_metadata (line 43) | def video_file_with_metadata(tmp_path, video_file_path): function data_files_with_zip_archives (line 58) | def data_files_with_zip_archives(tmp_path, video_file_path): function test_config_raises_when_invalid_name (line 90) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 96) | def test_config_raises_when_invalid_data_files(data_files) -> None: function test_generate_examples_with_labels (line 101) | def test_generate_examples_with_labels(data_files_with_labels_no_metadat... function test_generate_examples_with_metadata (line 109) | def test_generate_examples_with_metadata(video_file_with_metadata, cache... function test_data_files_with_metadata_and_archives (line 121) | def test_data_files_with_metadata_and_archives(streaming, cache_dir, dat... FILE: tests/packaged_modules/test_webdataset.py function gzipped_text_wds_file (line 19) | def gzipped_text_wds_file(tmp_path, text_gz_path): function image_wds_file (line 29) | def image_wds_file(tmp_path, image_file): function upper_lower_case_file (line 43) | def upper_lower_case_file(tmp_path): function audio_wds_file (line 64) | def audio_wds_file(tmp_path, audio_file): function video_wds_file (line 78) | def video_wds_file(tmp_path): function bad_wds_file (line 93) | def bad_wds_file(tmp_path, image_file, text_file): function tensor_wds_file (line 105) | def tensor_wds_file(tmp_path, tensor_file): function test_gzipped_text_webdataset (line 119) | def test_gzipped_text_webdataset(gzipped_text_wds_file, text_path): function test_image_webdataset (line 142) | def test_image_webdataset(image_wds_file): function test_upper_lower_case (line 172) | def test_upper_lower_case(upper_lower_case_file): function test_image_webdataset_missing_keys (line 216) | def test_image_webdataset_missing_keys(image_wds_file): function test_audio_webdataset (line 247) | def test_audio_webdataset(audio_wds_file): function test_video_webdataset (line 278) | def test_video_webdataset(video_wds_file): function test_webdataset_errors_on_bad_file (line 301) | def test_webdataset_errors_on_bad_file(bad_wds_file): function test_webdataset_with_features (line 309) | def test_webdataset_with_features(image_wds_file): function test_tensor_webdataset (line 338) | def test_tensor_webdataset(tensor_wds_file): FILE: tests/test_arrow_dataset.py class PickableMagicMock (line 69) | class PickableMagicMock(MagicMock): method __reduce__ (line 70) | def __reduce__(self): class Unpicklable (line 74) | class Unpicklable: method __init__ (line 75) | def __init__(self, **kwargs): method __getstate__ (line 79) | def __getstate__(self): function picklable_map_function (line 83) | def picklable_map_function(x): function picklable_map_function_with_indices (line 87) | def picklable_map_function_with_indices(x, i): function picklable_map_function_with_rank (line 91) | def picklable_map_function_with_rank(x, r): function picklable_map_function_with_indices_and_rank (line 95) | def picklable_map_function_with_indices_and_rank(x, i, r): function picklable_filter_function (line 99) | def picklable_filter_function(x): function picklable_filter_function_with_rank (line 103) | def picklable_filter_function_with_rank(x, r): function assert_arrow_metadata_are_synced_with_dataset_features (line 107) | def assert_arrow_metadata_are_synced_with_dataset_features(dataset: Data... class BaseDatasetTest (line 128) | class BaseDatasetTest(TestCase): method inject_fixtures (line 130) | def inject_fixtures(self, caplog, set_sqlalchemy_silence_uber_warning): method _create_dummy_dataset (line 133) | def _create_dummy_dataset( method _to (line 176) | def _to(self, in_memory, tmp_dir, *datasets): method test_dummy_dataset (line 189) | def test_dummy_dataset(self, in_memory): method test_dataset_getitem (line 219) | def test_dataset_getitem(self, in_memory): method test_dummy_dataset_deepcopy (line 243) | def test_dummy_dataset_deepcopy(self, in_memory): method test_dummy_dataset_pickle (line 255) | def test_dummy_dataset_pickle(self, in_memory): method test_dummy_dataset_serialize (line 286) | def test_dummy_dataset_serialize(self, in_memory): method test_dummy_dataset_load_from_disk (line 376) | def test_dummy_dataset_load_from_disk(self, in_memory): method test_restore_saved_format (line 388) | def test_restore_saved_format(self, in_memory): method test_set_format_numpy_multiple_columns (line 398) | def test_set_format_numpy_multiple_columns(self, in_memory): method test_set_format_torch (line 437) | def test_set_format_torch(self, in_memory): method test_set_format_tf (line 467) | def test_set_format_tf(self, in_memory): method test_set_format_pandas (line 487) | def test_set_format_pandas(self, in_memory): method test_set_format_polars (line 501) | def test_set_format_polars(self, in_memory): method test_set_transform (line 516) | def test_set_transform(self, in_memory): method test_transmit_format (line 537) | def test_transmit_format(self, in_memory): method test_cast (line 553) | def test_cast(self, in_memory): method test_class_encode_column (line 569) | def test_class_encode_column(self, in_memory): method test_remove_columns (line 605) | def test_remove_columns(self, in_memory): method test_rename_column (line 630) | def test_rename_column(self, in_memory): method test_rename_columns (line 641) | def test_rename_columns(self, in_memory): method test_select_columns (line 669) | def test_select_columns(self, in_memory): method test_concatenate (line 710) | def test_concatenate(self, in_memory): method test_concatenate_formatted (line 730) | def test_concatenate_formatted(self, in_memory): method test_concatenate_with_indices (line 751) | def test_concatenate_with_indices(self, in_memory): method test_concatenate_with_indices_from_disk (line 807) | def test_concatenate_with_indices_from_disk(self, in_memory): method test_concatenate_pickle (line 838) | def test_concatenate_pickle(self, in_memory): method test_repeat (line 890) | def test_repeat(self, in_memory): method test_flatten (line 916) | def test_flatten(self, in_memory): method test_flatten_complex_image (line 979) | def test_flatten_complex_image(self, in_memory): method test_map (line 1052) | def test_map(self, in_memory): method test_map_multiprocessing (line 1164) | def test_map_multiprocessing(self, in_memory): method test_map_new_features (line 1286) | def test_map_new_features(self, in_memory): method test_map_batched (line 1300) | def test_map_batched(self, in_memory): method test_map_nested (line 1377) | def test_map_nested(self, in_memory): method test_map_return_example_as_dict_value (line 1385) | def test_map_return_example_as_dict_value(self, in_memory): method test_map_fn_kwargs (line 1392) | def test_map_fn_kwargs(self, in_memory): method test_map_caching (line 1413) | def test_map_caching(self, in_memory): method test_suffix_template_format (line 1491) | def test_suffix_template_format(self, in_memory): method test_cache_file_name_no_ext_raises_error (line 1504) | def test_cache_file_name_no_ext_raises_error(self, in_memory): method test_map_caching_reuses_cache_with_different_num_proc (line 1514) | def test_map_caching_reuses_cache_with_different_num_proc(self, in_mem... method test_map_caching_partial_remap (line 1534) | def test_map_caching_partial_remap(self, in_memory): method test_map_return_pa_table (line 1588) | def test_map_return_pa_table(self, in_memory): method test_map_return_pd_dataframe (line 1645) | def test_map_return_pd_dataframe(self, in_memory): method test_map_return_pl_dataframe (line 1685) | def test_map_return_pl_dataframe(self, in_memory): method test_map_torch (line 1728) | def test_map_torch(self, in_memory): method test_map_tf (line 1745) | def test_map_tf(self, in_memory): method test_map_jax (line 1762) | def test_map_jax(self, in_memory): method test_map_numpy (line 1778) | def test_map_numpy(self, in_memory): method test_map_tensor_batched (line 1794) | def test_map_tensor_batched(self, in_memory): method test_map_input_columns (line 1810) | def test_map_input_columns(self, in_memory): method test_map_remove_columns (line 1827) | def test_map_remove_columns(self, in_memory): method test_map_stateful_callable (line 1859) | def test_map_stateful_callable(self, in_memory): method test_map_crash_subprocess (line 1886) | def test_map_crash_subprocess(self, in_memory): method test_map_on_mixed_types (line 1905) | def test_map_on_mixed_types(self, in_memory): method test_filter (line 1920) | def test_filter(self, in_memory): method test_filter_with_indices_mapping (line 1944) | def test_filter_with_indices_mapping(self, in_memory): method test_filter_empty (line 1953) | def test_filter_empty(self, in_memory): method test_filter_batched (line 1968) | def test_filter_batched(self, in_memory): method test_filter_input_columns (line 1977) | def test_filter_input_columns(self, in_memory): method test_filter_fn_kwargs (line 1986) | def test_filter_fn_kwargs(self, in_memory): method test_filter_multiprocessing (line 2007) | def test_filter_multiprocessing(self, in_memory): method test_filter_caching (line 2030) | def test_filter_caching(self, in_memory): method test_keep_features_after_transform_specified (line 2042) | def test_keep_features_after_transform_specified(self, in_memory): method test_keep_features_after_transform_unspecified (line 2063) | def test_keep_features_after_transform_unspecified(self, in_memory): method test_keep_features_after_transform_to_file (line 2084) | def test_keep_features_after_transform_to_file(self, in_memory): method test_keep_features_after_transform_to_memory (line 2106) | def test_keep_features_after_transform_to_memory(self, in_memory): method test_keep_features_after_loading_from_cache (line 2126) | def test_keep_features_after_loading_from_cache(self, in_memory): method test_keep_features_with_new_features (line 2152) | def test_keep_features_with_new_features(self, in_memory): method test_select (line 2181) | def test_select(self, in_memory): method test_select_then_map (line 2275) | def test_select_then_map(self, in_memory): method test_pickle_after_many_transforms_on_disk (line 2294) | def test_pickle_after_many_transforms_on_disk(self, in_memory): method test_shuffle (line 2324) | def test_shuffle(self, in_memory): method test_sort (line 2359) | def test_sort(self, in_memory): method test_to_csv (line 2458) | def test_to_csv(self, in_memory): method test_to_dict (line 2510) | def test_to_dict(self, in_memory): method test_to_list (line 2532) | def test_to_list(self, in_memory): method test_to_pandas (line 2550) | def test_to_pandas(self, in_memory): method test_to_polars (line 2581) | def test_to_polars(self, in_memory): method test_to_parquet (line 2612) | def test_to_parquet(self, in_memory): method test_to_sql (line 2665) | def test_to_sql(self, in_memory): method test_train_test_split (line 2732) | def test_train_test_split(self, in_memory): method test_shard (line 2803) | def test_shard(self, in_memory): method test_flatten_indices (line 2840) | def test_flatten_indices(self, in_memory): method test_format_vectors (line 2890) | def test_format_vectors(self, in_memory): method test_format_ragged_vectors (line 2943) | def test_format_ragged_vectors(self, in_memory): method test_format_nested (line 3003) | def test_format_nested(self, in_memory): method test_format_pandas (line 3038) | def test_format_pandas(self, in_memory): method test_format_polars (line 3049) | def test_format_polars(self, in_memory): method test_transmit_format_single (line 3059) | def test_transmit_format_single(self, in_memory): method test_transmit_format_dict (line 3074) | def test_transmit_format_dict(self, in_memory): method test_with_format (line 3091) | def test_with_format(self, in_memory): method test_with_transform (line 3102) | def test_with_transform(self, in_memory): method test_tf_dataset_conversion (line 3115) | def test_tf_dataset_conversion(self, in_memory): method test_tf_index_reshuffling (line 3153) | def test_tf_index_reshuffling(self, in_memory): method test_tf_label_renaming (line 3180) | def test_tf_label_renaming(self, in_memory): method test_tf_dataset_options (line 3240) | def test_tf_dataset_options(self, in_memory): class MiscellaneousDatasetTest (line 3323) | class MiscellaneousDatasetTest(TestCase): method test_from_pandas (line 3324) | def test_from_pandas(self): method test_from_polars (line 3353) | def test_from_polars(self): method test_from_dict (line 3381) | def test_from_dict(self): method test_from_dict_on_mixed_types (line 3426) | def test_from_dict_on_mixed_types(self): method test_concatenate_mixed_memory_and_disk (line 3447) | def test_concatenate_mixed_memory_and_disk(self): method test_set_format_encode (line 3463) | def test_set_format_encode(self): method test_tf_string_encoding (line 3476) | def test_tf_string_encoding(self): function test_cast_with_sliced_list (line 3493) | def test_cast_with_sliced_list(): function test_class_encode_column_with_none (line 3503) | def test_class_encode_column_with_none(include_nulls): function test_sort_with_none (line 3515) | def test_sort_with_none(null_placement): function test_update_metadata_with_features (line 3524) | def test_update_metadata_with_features(dataset_dict): function test_concatenate_datasets (line 3542) | def test_concatenate_datasets(dataset_type, axis, expected_shape, datase... function test_concatenate_datasets_new_columns (line 3559) | def test_concatenate_datasets_new_columns(): function test_concatenate_datasets_complex_features (line 3578) | def test_concatenate_datasets_complex_features(axis): function test_concatenate_datasets_with_concatenation_tables (line 3595) | def test_concatenate_datasets_with_concatenation_tables( function test_concatenate_datasets_duplicate_columns (line 3639) | def test_concatenate_datasets_duplicate_columns(dataset): function test_interleave_datasets (line 3645) | def test_interleave_datasets(): function test_interleave_datasets_probabilities (line 3658) | def test_interleave_datasets_probabilities(): function test_interleave_datasets_oversampling_strategy (line 3675) | def test_interleave_datasets_oversampling_strategy(): function test_interleave_datasets_probabilities_oversampling_strategy (line 3688) | def test_interleave_datasets_probabilities_oversampling_strategy(): function test_dataset_iter_batch (line 3712) | def test_dataset_iter_batch(batch_size, drop_last_batch): function test_dataset_add_column (line 3742) | def test_dataset_add_column(column, expected_dtype, in_memory, transform... function test_dataset_add_item (line 3791) | def test_dataset_add_item(item, in_memory, dataset_dict, arrow_path, tra... function test_dataset_add_item_new_columns (line 3819) | def test_dataset_add_item_new_columns(): function test_dataset_add_item_introduce_feature_type (line 3835) | def test_dataset_add_item_introduce_feature_type(): function test_dataset_filter_batched_indices (line 3843) | def test_dataset_filter_batched_indices(): function test_dataset_from_file (line 3850) | def test_dataset_from_file(in_memory, dataset, arrow_file): function _check_csv_dataset (line 3859) | def _check_csv_dataset(dataset, expected_features): function test_dataset_from_csv_keep_in_memory (line 3869) | def test_dataset_from_csv_keep_in_memory(keep_in_memory, csv_path, tmp_p... function test_dataset_from_csv_features (line 3887) | def test_dataset_from_csv_features(features, csv_path, tmp_path): function test_dataset_from_csv_split (line 3900) | def test_dataset_from_csv_split(split, csv_path, tmp_path): function test_dataset_from_csv_path_type (line 3909) | def test_dataset_from_csv_path_type(path_type, csv_path, tmp_path): function _check_json_dataset (line 3920) | def _check_json_dataset(dataset, expected_features): function test_dataset_from_json_keep_in_memory (line 3930) | def test_dataset_from_json_keep_in_memory(keep_in_memory, jsonl_path, tm... function test_dataset_from_json_features (line 3948) | def test_dataset_from_json_features(features, jsonl_path, tmp_path): function test_dataset_from_json_with_class_label_feature (line 3959) | def test_dataset_from_json_with_class_label_feature(jsonl_str_path, tmp_... function test_dataset_from_json_split (line 3973) | def test_dataset_from_json_split(split, jsonl_path, tmp_path): function test_dataset_from_json_path_type (line 3982) | def test_dataset_from_json_path_type(path_type, jsonl_path, tmp_path): function _check_parquet_dataset (line 3993) | def _check_parquet_dataset(dataset, expected_features): function test_dataset_from_parquet_keep_in_memory (line 4003) | def test_dataset_from_parquet_keep_in_memory(keep_in_memory, parquet_pat... function test_dataset_from_parquet_features (line 4021) | def test_dataset_from_parquet_features(features, parquet_path, tmp_path): function test_dataset_from_parquet_split (line 4033) | def test_dataset_from_parquet_split(split, parquet_path, tmp_path): function test_dataset_from_parquet_path_type (line 4042) | def test_dataset_from_parquet_path_type(path_type, parquet_path, tmp_path): function _check_text_dataset (line 4053) | def _check_text_dataset(dataset, expected_features): function test_dataset_from_text_keep_in_memory (line 4063) | def test_dataset_from_text_keep_in_memory(keep_in_memory, text_path, tmp... function test_dataset_from_text_features (line 4080) | def test_dataset_from_text_features(features, text_path, tmp_path): function test_dataset_from_text_split (line 4092) | def test_dataset_from_text_split(split, text_path, tmp_path): function test_dataset_from_text_path_type (line 4101) | def test_dataset_from_text_path_type(path_type, text_path, tmp_path): function data_generator (line 4113) | def data_generator(): function _check_generator_dataset (line 4127) | def _check_generator_dataset(dataset, expected_features, split): function test_dataset_from_generator_keep_in_memory (line 4138) | def test_dataset_from_generator_keep_in_memory(keep_in_memory, data_gene... function test_dataset_from_generator_features (line 4156) | def test_dataset_from_generator_features(features, data_generator, tmp_p... function test_dataset_from_generator_split (line 4171) | def test_dataset_from_generator_split(split, data_generator, tmp_path): function test_dataset_from_generator_fingerprint (line 4184) | def test_dataset_from_generator_fingerprint(fingerprint, data_generator,... function test_from_spark (line 4196) | def test_from_spark(): function test_from_spark_features (line 4217) | def test_from_spark_features(): function test_from_spark_different_cache (line 4241) | def test_from_spark_different_cache(): function _check_sql_dataset (line 4256) | def _check_sql_dataset(dataset, expected_features): function test_dataset_from_sql_con_type (line 4267) | def test_dataset_from_sql_con_type(con_type, sqlite_path, tmp_path, set_... function test_dataset_from_sql_features (line 4305) | def test_dataset_from_sql_features(features, sqlite_path, tmp_path, set_... function test_dataset_from_sql_keep_in_memory (line 4318) | def test_dataset_from_sql_keep_in_memory(keep_in_memory, sqlite_path, tm... function test_dataset_to_json (line 4328) | def test_dataset_to_json(dataset, tmp_path): function test_pickle_dataset_after_transforming_the_table (line 4364) | def test_pickle_dataset_after_transforming_the_table(in_memory, method_a... function test_dummy_dataset_serialize_fs (line 4379) | def test_dummy_dataset_serialize_fs(dataset, mockfs): function test_build_local_temp_path (line 4400) | def test_build_local_temp_path(uri_or_path): class StratifiedTest (line 4415) | class StratifiedTest(TestCase): method test_errors_train_test_split_stratify (line 4416) | def test_errors_train_test_split_stratify(self): method test_train_test_split_startify (line 4450) | def test_train_test_split_startify(self): function test_dataset_estimate_nbytes (line 4481) | def test_dataset_estimate_nbytes(): function test_dataset_to_iterable_dataset (line 4498) | def test_dataset_to_iterable_dataset(dataset: Dataset): function test_dataset_format_with_unformatted_image (line 4516) | def test_dataset_format_with_unformatted_image(): function test_dataset_with_torch_dataloader (line 4530) | def test_dataset_with_torch_dataloader(dataset, batch_size): function test_map_cases (line 4546) | def test_map_cases(return_lazy_dict): function test_map_async (line 4652) | def test_map_async(): function test_filter_async (line 4674) | def test_filter_async(): function test_dataset_getitem_int_np_equivalence (line 4696) | def test_dataset_getitem_int_np_equivalence(): function test_dataset_getitem_raises (line 4702) | def test_dataset_getitem_raises(): function test_categorical_dataset (line 4714) | def test_categorical_dataset(tmpdir): function test_dataset_batch (line 4732) | def test_dataset_batch(): function test_dataset_from_dict_with_large_list (line 4783) | def test_dataset_from_dict_with_large_list(): function test_dataset_save_to_disk_with_large_list (line 4791) | def test_dataset_save_to_disk_with_large_list(tmp_path): function test_dataset_save_to_disk_and_load_from_disk_round_trip_with_large_list (line 4800) | def test_dataset_save_to_disk_and_load_from_disk_round_trip_with_large_l... function test_from_polars_with_large_list (line 4814) | def test_from_polars_with_large_list(): function test_from_polars_save_to_disk_with_large_list (line 4823) | def test_from_polars_save_to_disk_with_large_list(tmp_path): function test_from_polars_save_to_disk_and_load_from_disk_round_trip_with_large_list (line 4834) | def test_from_polars_save_to_disk_and_load_from_disk_round_trip_with_lar... function test_polars_round_trip (line 4849) | def test_polars_round_trip(): function test_add_column (line 4854) | def test_add_column(): function test_process_large_few_examples (line 4864) | def test_process_large_few_examples(tmp_path): FILE: tests/test_arrow_reader.py class ReaderTest (line 17) | class ReaderTest(BaseReader): method _get_table_from_filename (line 23) | def _get_table_from_filename(self, filename_skip_take, in_memory=False): class BaseReaderTest (line 39) | class BaseReaderTest(TestCase): method test_read (line 40) | def test_read(self): method test_read_sharded (line 76) | def test_read_sharded(self): method test_read_files (line 94) | def test_read_files(self): function test_read_table (line 116) | def test_read_table(in_memory, dataset, arrow_file): function test_read_files (line 126) | def test_read_files(in_memory, dataset, arrow_file): function test_read_instruction_spec (line 138) | def test_read_instruction_spec(): function test_make_file_instructions_basic (line 161) | def test_make_file_instructions_basic(): function test_make_file_instructions (line 200) | def test_make_file_instructions(split_name, instruction, shard_lengths, ... function test_make_file_instructions_raises (line 263) | def test_make_file_instructions_raises(name, expected_exception): FILE: tests/test_arrow_writer.py class TypedSequenceTest (line 21) | class TypedSequenceTest(TestCase): method test_no_type (line 22) | def test_no_type(self): method test_array_type_forbidden (line 26) | def test_array_type_forbidden(self): method test_try_type_and_type_forbidden (line 30) | def test_try_type_and_type_forbidden(self): method test_compatible_type (line 34) | def test_compatible_type(self): method test_incompatible_type (line 38) | def test_incompatible_type(self): method test_try_compatible_type (line 42) | def test_try_compatible_type(self): method test_try_incompatible_type (line 46) | def test_try_incompatible_type(self): method test_compatible_extension_type (line 50) | def test_compatible_extension_type(self): method test_incompatible_extension_type (line 54) | def test_incompatible_extension_type(self): method test_try_compatible_extension_type (line 58) | def test_try_compatible_extension_type(self): method test_try_incompatible_extension_type (line 62) | def test_try_incompatible_extension_type(self): method test_exhaustive_cast (line 67) | def test_exhaustive_cast(self): function _check_output (line 80) | def _check_output(output, expected_num_chunks: int): function test_write (line 99) | def test_write(fields, writer_batch_size): function test_write_with_features (line 114) | def test_write_with_features(): function test_write_with_keys (line 136) | def test_write_with_keys(writer_batch_size): function test_write_batch (line 154) | def test_write_batch(fields, writer_batch_size): function test_write_table (line 173) | def test_write_table(fields, writer_batch_size): function test_write_row (line 191) | def test_write_row(fields, writer_batch_size): function test_write_file (line 206) | def test_write_file(): function get_base_dtype (line 219) | def get_base_dtype(arr_type): function change_first_primitive_element_in_list (line 226) | def change_first_primitive_element_in_list(lst, value): function test_optimized_int_type_for_typed_sequence (line 235) | def test_optimized_int_type_for_typed_sequence(sequence, optimized_int_t... function test_optimized_typed_sequence (line 251) | def test_optimized_typed_sequence(sequence, col, expected_dtype): function test_arrow_writer_closes_stream (line 267) | def test_arrow_writer_closes_stream(raise_exception, tmp_path): function test_arrow_writer_with_filesystem (line 281) | def test_arrow_writer_with_filesystem(mockfs): function test_parquet_writer_write (line 294) | def test_parquet_writer_write(): function test_parquet_writer_uses_content_defined_chunking (line 307) | def test_parquet_writer_uses_content_defined_chunking(): function test_parquet_writer_writes_page_index (line 368) | def test_parquet_writer_writes_page_index(): function test_writer_embed_local_files (line 383) | def test_writer_embed_local_files(tmp_path, embed_local_files): function test_always_nullable (line 406) | def test_always_nullable(): FILE: tests/test_builder.py class DummyBuilder (line 48) | class DummyBuilder(DatasetBuilder): method _info (line 49) | def _info(self): method _split_generators (line 52) | def _split_generators(self, dl_manager): method _prepare_split (line 55) | def _prepare_split(self, split_generator, **kwargs): class DummyGeneratorBasedBuilder (line 64) | class DummyGeneratorBasedBuilder(GeneratorBasedBuilder): method _info (line 65) | def _info(self): method _split_generators (line 68) | def _split_generators(self, dl_manager): method _generate_examples (line 71) | def _generate_examples(self): class DummyArrowBasedBuilder (line 76) | class DummyArrowBasedBuilder(ArrowBasedBuilder): method _info (line 77) | def _info(self): method _split_generators (line 80) | def _split_generators(self, dl_manager): method _generate_tables (line 83) | def _generate_tables(self): class DummyGeneratorBasedBuilderWithIntegers (line 88) | class DummyGeneratorBasedBuilderWithIntegers(GeneratorBasedBuilder): method _info (line 89) | def _info(self): method _split_generators (line 92) | def _split_generators(self, dl_manager): method _generate_examples (line 95) | def _generate_examples(self): class DummyGeneratorBasedBuilderConfig (line 100) | class DummyGeneratorBasedBuilderConfig(BuilderConfig): method __init__ (line 101) | def __init__(self, content="foo", times=2, *args, **kwargs): class DummyGeneratorBasedBuilderWithConfig (line 107) | class DummyGeneratorBasedBuilderWithConfig(GeneratorBasedBuilder): method _info (line 110) | def _info(self): method _split_generators (line 113) | def _split_generators(self, dl_manager): method _generate_examples (line 116) | def _generate_examples(self): class DummyBuilderWithMultipleConfigs (line 121) | class DummyBuilderWithMultipleConfigs(DummyBuilder): class DummyBuilderWithDefaultConfig (line 128) | class DummyBuilderWithDefaultConfig(DummyBuilderWithMultipleConfigs): class DummyBuilderWithDownload (line 132) | class DummyBuilderWithDownload(DummyBuilder): method __init__ (line 133) | def __init__(self, *args, rel_path=None, abs_path=None, **kwargs): method _split_generators (line 138) | def _split_generators(self, dl_manager): class DummyArrowBasedBuilderWithShards (line 146) | class DummyArrowBasedBuilderWithShards(ArrowBasedBuilder): method _info (line 147) | def _info(self): method _split_generators (line 150) | def _split_generators(self, dl_manager): method _generate_tables (line 153) | def _generate_tables(self, filepaths): class DummyGeneratorBasedBuilderWithShards (line 159) | class DummyGeneratorBasedBuilderWithShards(GeneratorBasedBuilder): method _info (line 160) | def _info(self): method _split_generators (line 163) | def _split_generators(self, dl_manager): method _generate_examples (line 166) | def _generate_examples(self, filepaths): class DummyArrowBasedBuilderWithAmbiguousShards (line 172) | class DummyArrowBasedBuilderWithAmbiguousShards(ArrowBasedBuilder): method _info (line 173) | def _info(self): method _split_generators (line 176) | def _split_generators(self, dl_manager): method _generate_tables (line 187) | def _generate_tables(self, filepaths, dummy_kwarg_with_different_length): class DummyGeneratorBasedBuilderWithAmbiguousShards (line 193) | class DummyGeneratorBasedBuilderWithAmbiguousShards(GeneratorBasedBuilder): method _info (line 194) | def _info(self): method _split_generators (line 197) | def _split_generators(self, dl_manager): method _generate_examples (line 208) | def _generate_examples(self, filepaths, dummy_kwarg_with_different_len... function _run_concurrent_download_and_prepare (line 214) | def _run_concurrent_download_and_prepare(tmp_dir): function check_streaming (line 220) | def check_streaming(builder): class BuilderTest (line 226) | class BuilderTest(TestCase): method test_download_and_prepare (line 227) | def test_download_and_prepare(self): method test_download_and_prepare_checksum_computation (line 244) | def test_download_and_prepare_checksum_computation(self): method test_concurrent_download_and_prepare (line 260) | def test_concurrent_download_and_prepare(self): method test_download_and_prepare_with_base_path (line 289) | def test_download_and_prepare_with_base_path(self): method test_as_dataset_with_post_process (line 318) | def test_as_dataset_with_post_process(self): method test_as_dataset_with_post_process_with_index (line 454) | def test_as_dataset_with_post_process_with_index(self): method test_download_and_prepare_with_post_process (line 527) | def test_download_and_prepare_with_post_process(self): method test_error_download_and_prepare (line 616) | def test_error_download_and_prepare(self): method test_generator_based_download_and_prepare (line 630) | def test_generator_based_download_and_prepare(self): method test_cache_dir_no_args (line 651) | def test_cache_dir_no_args(self): method test_cache_dir_for_data_files (line 657) | def test_cache_dir_for_data_files(self): method test_cache_dir_for_features (line 713) | def test_cache_dir_for_features(self): method test_cache_dir_for_config_kwargs (line 723) | def test_cache_dir_for_config_kwargs(self): method test_config_names (line 748) | def test_config_names(self): method test_cache_dir_for_data_dir (line 766) | def test_cache_dir_for_data_dir(self): method test_cache_dir_for_configured_builder (line 774) | def test_cache_dir_for_configured_builder(self): function test_config_raises_when_invalid_name (line 789) | def test_config_raises_when_invalid_name() -> None: function test_config_raises_when_invalid_data_files (line 795) | def test_config_raises_when_invalid_data_files(data_files) -> None: function test_arrow_based_download_and_prepare (line 800) | def test_arrow_based_download_and_prepare(tmp_path): function test_builder_as_dataset (line 826) | def test_builder_as_dataset(split, expected_dataset_class, expected_data... function test_generator_based_builder_as_dataset (line 861) | def test_generator_based_builder_as_dataset(in_memory, tmp_path): function test_custom_writer_batch_size (line 875) | def test_custom_writer_batch_size(tmp_path, writer_batch_size, default_w... function test_builder_as_streaming_dataset (line 886) | def test_builder_as_streaming_dataset(tmp_path): function _run_test_builder_streaming_works_in_subprocesses (line 898) | def _run_test_builder_streaming_works_in_subprocesses(builder): function test_builder_streaming_works_in_subprocess (line 905) | def test_builder_streaming_works_in_subprocess(tmp_path): class DummyBuilderWithVersion (line 912) | class DummyBuilderWithVersion(GeneratorBasedBuilder): method _info (line 915) | def _info(self): method _split_generators (line 918) | def _split_generators(self, dl_manager): method _generate_examples (line 921) | def _generate_examples(self): class DummyBuilderWithBuilderConfigs (line 925) | class DummyBuilderWithBuilderConfigs(GeneratorBasedBuilder): method _info (line 928) | def _info(self): method _split_generators (line 931) | def _split_generators(self, dl_manager): method _generate_examples (line 934) | def _generate_examples(self): class CustomBuilderConfig (line 938) | class CustomBuilderConfig(BuilderConfig): method __init__ (line 939) | def __init__(self, date=None, language=None, version="2.0.0", **kwargs): class DummyBuilderWithCustomBuilderConfigs (line 946) | class DummyBuilderWithCustomBuilderConfigs(GeneratorBasedBuilder): method _info (line 950) | def _info(self): method _split_generators (line 953) | def _split_generators(self, dl_manager): method _generate_examples (line 956) | def _generate_examples(self): function test_builder_config_version (line 969) | def test_builder_config_version(builder_class, kwargs, tmp_path): function test_builder_download_and_prepare_with_absolute_output_dir (line 975) | def test_builder_download_and_prepare_with_absolute_output_dir(tmp_path): function test_builder_download_and_prepare_with_relative_output_dir (line 985) | def test_builder_download_and_prepare_with_relative_output_dir(): function test_builder_with_filesystem_download_and_prepare (line 996) | def test_builder_with_filesystem_download_and_prepare(tmp_path, mockfs): function test_builder_with_filesystem_download_and_prepare_reload (line 1008) | def test_builder_with_filesystem_download_and_prepare_reload(tmp_path, m... function test_generator_based_builder_download_and_prepare_as_parquet (line 1019) | def test_generator_based_builder_download_and_prepare_as_parquet(tmp_path): function test_generator_based_builder_download_and_prepare_sharded (line 1030) | def test_generator_based_builder_download_and_prepare_sharded(tmp_path): function test_generator_based_builder_download_and_prepare_with_max_shard_size (line 1057) | def test_generator_based_builder_download_and_prepare_with_max_shard_siz... function test_generator_based_builder_download_and_prepare_with_num_proc (line 1081) | def test_generator_based_builder_download_and_prepare_with_num_proc(tmp_... function test_generator_based_builder_download_and_prepare_with_ambiguous_shards (line 1107) | def test_generator_based_builder_download_and_prepare_with_ambiguous_sha... function test_arrow_based_builder_download_and_prepare_as_parquet (line 1113) | def test_arrow_based_builder_download_and_prepare_as_parquet(tmp_path): function test_arrow_based_builder_download_and_prepare_sharded (line 1124) | def test_arrow_based_builder_download_and_prepare_sharded(tmp_path): function test_arrow_based_builder_download_and_prepare_with_max_shard_size (line 1150) | def test_arrow_based_builder_download_and_prepare_with_max_shard_size(tm... function test_arrow_based_builder_download_and_prepare_with_num_proc (line 1173) | def test_arrow_based_builder_download_and_prepare_with_num_proc(tmp_path): function test_arrow_based_builder_download_and_prepare_with_ambiguous_shards (line 1199) | def test_arrow_based_builder_download_and_prepare_with_ambiguous_shards(... FILE: tests/test_data_files.py function complex_data_dir (line 44) | def complex_data_dir(tmp_path): function is_relative_to (line 78) | def is_relative_to(path, *other): function pattern_results (line 88) | def pattern_results(complex_data_dir): function hub_dataset_repo_path (line 116) | def hub_dataset_repo_path(tmpfs, complex_data_dir): function hub_dataset_repo_patterns_results (line 125) | def hub_dataset_repo_patterns_results(hub_dataset_repo_path, complex_dat... function test_is_inside_unrequested_special_dir (line 135) | def test_is_inside_unrequested_special_dir(complex_data_dir, pattern_res... function test_is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir (line 151) | def test_is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(... function test_pattern_results_fixture (line 172) | def test_pattern_results_fixture(pattern_results, pattern): function test_resolve_pattern_locally (line 178) | def test_resolve_pattern_locally(complex_data_dir, pattern, pattern_resu... function test_resolve_pattern_locally_with_dot_in_base_path (line 186) | def test_resolve_pattern_locally_with_dot_in_base_path(complex_data_dir): function test_resolve_pattern_locally_prefixed_archive_glob (line 193) | def test_resolve_pattern_locally_prefixed_archive_glob(archive_jsonl, re... function test_resolve_pattern_locally_with_absolute_path (line 204) | def test_resolve_pattern_locally_with_absolute_path(tmp_path, complex_da... function test_resolve_pattern_locally_with_double_dots (line 210) | def test_resolve_pattern_locally_with_double_dots(tmp_path, complex_data... function test_resolve_pattern_locally_returns_hidden_file_only_if_requested (line 216) | def test_resolve_pattern_locally_returns_hidden_file_only_if_requested(c... function test_resolve_pattern_locally_hidden_base_path (line 223) | def test_resolve_pattern_locally_hidden_base_path(tmp_path): function test_resolve_pattern_locallyreturns_hidden_dir_only_if_requested (line 231) | def test_resolve_pattern_locallyreturns_hidden_dir_only_if_requested(com... function test_resolve_pattern_locally_returns_special_dir_only_if_requested (line 240) | def test_resolve_pattern_locally_returns_special_dir_only_if_requested(c... function test_resolve_pattern_locally_special_base_path (line 249) | def test_resolve_pattern_locally_special_base_path(tmp_path): function test_resolve_pattern_locally_with_extensions (line 258) | def test_resolve_pattern_locally_with_extensions(complex_data_dir, patte... function test_fail_resolve_pattern_locally (line 267) | def test_fail_resolve_pattern_locally(complex_data_dir): function test_resolve_pattern_locally_does_not_resolve_symbolic_links (line 273) | def test_resolve_pattern_locally_does_not_resolve_symbolic_links(tmp_pat... function test_resolve_pattern_locally_sorted_files (line 280) | def test_resolve_pattern_locally_sorted_files(tmp_path_factory): function test_resolve_pattern_in_dataset_repository (line 292) | def test_resolve_pattern_in_dataset_repository(hub_dataset_repo_path, pa... function test_resolve_pattern_in_dataset_repository_with_base_path (line 303) | def test_resolve_pattern_in_dataset_repository_with_base_path(hub_datase... function test_resolve_pattern_in_dataset_repository_with_extensions (line 314) | def test_resolve_pattern_in_dataset_repository_with_extensions(hub_datas... function test_fail_resolve_pattern_in_dataset_repository (line 323) | def test_fail_resolve_pattern_in_dataset_repository(hub_dataset_repo_path): function test_resolve_pattern_in_dataset_repository_returns_hidden_file_only_if_requested (line 328) | def test_resolve_pattern_in_dataset_repository_returns_hidden_file_only_... function test_resolve_pattern_in_dataset_repository_hidden_base_path (line 335) | def test_resolve_pattern_in_dataset_repository_hidden_base_path(tmpfs): function test_resolve_pattern_in_dataset_repository_returns_hidden_dir_only_if_requested (line 341) | def test_resolve_pattern_in_dataset_repository_returns_hidden_dir_only_i... function test_resolve_pattern_in_dataset_repository_returns_special_dir_only_if_requested (line 350) | def test_resolve_pattern_in_dataset_repository_returns_special_dir_only_... function test_resolve_pattern_in_dataset_repository_special_base_path (line 359) | def test_resolve_pattern_in_dataset_repository_special_base_path(tmpfs): function dummy_fs (line 366) | def dummy_fs(): function test_resolve_pattern_fs (line 375) | def test_resolve_pattern_fs(dummy_fs): function test_DataFilesList_from_patterns_in_dataset_repository_ (line 381) | def test_DataFilesList_from_patterns_in_dataset_repository_( function test_DataFilesList_from_patterns_locally_with_extra_files (line 392) | def test_DataFilesList_from_patterns_locally_with_extra_files(complex_da... function test_DataFilesList_from_patterns_raises_FileNotFoundError (line 398) | def test_DataFilesList_from_patterns_raises_FileNotFoundError(complex_da... class TestDataFilesDict (line 403) | class TestDataFilesDict: method test_key_order_after_copy (line 404) | def test_key_order_after_copy(self): function test_DataFilesDict_from_patterns_in_dataset_repository (line 411) | def test_DataFilesDict_from_patterns_in_dataset_repository( function test_DataFilesDict_from_patterns_in_dataset_repository_with_base_path (line 432) | def test_DataFilesDict_from_patterns_in_dataset_repository_with_base_path( function test_DataFilesDict_from_patterns_locally (line 445) | def test_DataFilesDict_from_patterns_locally(complex_data_dir, pattern_r... function test_DataFilesDict_from_patterns_in_dataset_repository_hashing (line 455) | def test_DataFilesDict_from_patterns_in_dataset_repository_hashing(hub_d... function test_DataFilesDict_from_patterns_locally_or_remote_hashing (line 471) | def test_DataFilesDict_from_patterns_locally_or_remote_hashing(text_file): function test_DataFilesPatternsList (line 493) | def test_DataFilesPatternsList(text_file): function test_DataFilesPatternsDict (line 514) | def test_DataFilesPatternsDict(text_file): function mock_fs (line 524) | def mock_fs(file_paths: List[str]): function test_get_data_files_patterns (line 653) | def test_get_data_files_patterns(base_path, data_file_per_split): function test_get_data_patterns_from_directory_with_the_word_data_twice (line 685) | def test_get_data_patterns_from_directory_with_the_word_data_twice(tmp_p... FILE: tests/test_dataset_dict.py class DatasetDictTest (line 29) | class DatasetDictTest(TestCase): method _create_dummy_dataset (line 30) | def _create_dummy_dataset(self, multiple_columns=False, int_to_float=F... method _create_dummy_dataset_dict (line 46) | def _create_dummy_dataset_dict(self, multiple_columns=False, int_to_fl... method _create_dummy_iterable_dataset (line 54) | def _create_dummy_iterable_dataset(self, multiple_columns=False) -> It... method _create_dummy_iterable_dataset_dict (line 66) | def _create_dummy_iterable_dataset_dict(self, multiple_columns=False) ... method test_flatten (line 74) | def test_flatten(self): method test_set_format_numpy (line 86) | def test_set_format_numpy(self): method test_set_format_torch (line 122) | def test_set_format_torch(self): method test_set_format_tf (line 150) | def test_set_format_tf(self): method test_set_format_pandas (line 173) | def test_set_format_pandas(self): method test_set_format_polars (line 189) | def test_set_format_polars(self): method test_set_transform (line 206) | def test_set_transform(self): method test_with_format (line 230) | def test_with_format(self): method test_with_transform (line 238) | def test_with_transform(self): method test_cast (line 249) | def test_cast(self): method test_remove_columns (line 260) | def test_remove_columns(self): method test_rename_column (line 282) | def test_rename_column(self): method test_select_columns (line 290) | def test_select_columns(self): method test_map (line 316) | def test_map(self): method test_iterable_map (line 357) | def test_iterable_map(self): method test_filter (line 370) | def test_filter(self): method test_iterable_filter (line 395) | def test_iterable_filter(self): method test_iterable_dataset_dict_push_to_hub_max_shard_size_and_num_shards_are_mutually_exclusive (line 407) | def test_iterable_dataset_dict_push_to_hub_max_shard_size_and_num_shar... method test_iterable_dataset_dict_push_to_hub_forwards_max_shard_size_to_each_split (line 412) | def test_iterable_dataset_dict_push_to_hub_forwards_max_shard_size_to_... method test_sort (line 467) | def test_sort(self): method test_shuffle (line 492) | def test_shuffle(self): method test_flatten_indices (line 549) | def test_flatten_indices(self): method test_check_values_type (line 571) | def test_check_values_type(self): method test_serialization (line 580) | def test_serialization(self): method test_load_from_disk (line 624) | def test_load_from_disk(self): method test_align_labels_with_mapping (line 637) | def test_align_labels_with_mapping(self): function test_dummy_datasetdict_serialize_fs (line 677) | def test_dummy_datasetdict_serialize_fs(mockfs): function _check_csv_datasetdict (line 695) | def _check_csv_datasetdict(dataset_dict, expected_features, splits=("tra... function test_datasetdict_from_csv_keep_in_memory (line 707) | def test_datasetdict_from_csv_keep_in_memory(keep_in_memory, csv_path, t... function test_datasetdict_from_csv_features (line 725) | def test_datasetdict_from_csv_features(features, csv_path, tmp_path): function test_datasetdict_from_csv_split (line 738) | def test_datasetdict_from_csv_split(split, csv_path, tmp_path): function _check_json_datasetdict (line 751) | def _check_json_datasetdict(dataset_dict, expected_features, splits=("tr... function test_datasetdict_from_json_keep_in_memory (line 763) | def test_datasetdict_from_json_keep_in_memory(keep_in_memory, jsonl_path... function test_datasetdict_from_json_features (line 781) | def test_datasetdict_from_json_features(features, jsonl_path, tmp_path): function test_datasetdict_from_json_splits (line 793) | def test_datasetdict_from_json_splits(split, jsonl_path, tmp_path): function _check_parquet_datasetdict (line 806) | def _check_parquet_datasetdict(dataset_dict, expected_features, splits=(... function test_datasetdict_from_parquet_keep_in_memory (line 818) | def test_datasetdict_from_parquet_keep_in_memory(keep_in_memory, parquet... function test_datasetdict_from_parquet_features (line 836) | def test_datasetdict_from_parquet_features(features, parquet_path, tmp_p... function test_datasetdict_from_parquet_split (line 848) | def test_datasetdict_from_parquet_split(split, parquet_path, tmp_path): function _check_text_datasetdict (line 861) | def _check_text_datasetdict(dataset_dict, expected_features, splits=("tr... function test_datasetdict_from_text_keep_in_memory (line 873) | def test_datasetdict_from_text_keep_in_memory(keep_in_memory, text_path,... function test_datasetdict_from_text_features (line 890) | def test_datasetdict_from_text_features(features, text_path, tmp_path): function test_datasetdict_from_text_split (line 902) | def test_datasetdict_from_text_split(split, text_path, tmp_path): FILE: tests/test_dataset_list.py class DatasetListTest (line 7) | class DatasetListTest(TestCase): method _create_example_records (line 8) | def _create_example_records(self): method _create_example_dict (line 16) | def _create_example_dict(self): method test_create (line 20) | def test_create(self): method test_list_dict_equivalent (line 27) | def test_list_dict_equivalent(self): method test_uneven_records (line 33) | def test_uneven_records(self): # checks what happens with missing col... method test_variable_list_records (line 39) | def test_variable_list_records(self): # checks if the type can be inf... method test_create_empty (line 44) | def test_create_empty(self): FILE: tests/test_distributed.py function test_split_dataset_by_node_map_style (line 13) | def test_split_dataset_by_node_map_style(): function test_split_dataset_by_node_iterable (line 24) | def test_split_dataset_by_node_iterable(): function test_split_dataset_by_node_iterable_sharded (line 39) | def test_split_dataset_by_node_iterable_sharded(shards_per_node): function test_split_dataset_by_node_iterable_distributed (line 58) | def test_split_dataset_by_node_iterable_distributed(): function test_distributed_shuffle_iterable (line 78) | def test_distributed_shuffle_iterable(): function test_torch_distributed_run (line 97) | def test_torch_distributed_run(streaming): function test_torch_distributed_run_streaming_with_num_workers (line 124) | def test_torch_distributed_run_streaming_with_num_workers(nproc_per_node... FILE: tests/test_download_manager.py class MockResponse (line 19) | class MockResponse: method iter_content (line 24) | def iter_content(self, **kwargs): function mock_request (line 28) | def mock_request(*args, **kwargs): function test_download_manager_download (line 33) | def test_download_manager_download(urls_type, tmp_path, tmpfs): function test_download_manager_extract (line 75) | def test_download_manager_extract(paths_type, xz_file, text_file, extrac... function test_download_manager_delete_extracted_files (line 121) | def test_download_manager_delete_extracted_files(xz_file): function _test_jsonl (line 142) | def _test_jsonl(path, file): function test_iter_archive_path (line 151) | def test_iter_archive_path(archive_jsonl, request): function test_iter_archive_file (line 160) | def test_iter_archive_file(archive_nested_jsonl, request): function test_iter_files (line 170) | def test_iter_files(data_dir_with_hidden_files): FILE: tests/test_exceptions.py function test_error_not_deprecated (line 31) | def test_error_not_deprecated(error, monkeypatch): FILE: tests/test_experimental.py function dummy_function (line 8) | def dummy_function(): class TestExperimentalFlag (line 12) | class TestExperimentalFlag(unittest.TestCase): method test_experimental_warning (line 13) | def test_experimental_warning(self): FILE: tests/test_extract.py function test_base_extractors (line 33) | def test_base_extractors( function test_extractor (line 94) | def test_extractor( function tar_file_with_dot_dot (line 144) | def tar_file_with_dot_dot(tmp_path, text_file): function tar_file_with_sym_link (line 156) | def tar_file_with_sym_link(tmp_path): function test_tar_extract_insecure_files (line 172) | def test_tar_extract_insecure_files( function test_is_zipfile_false_positive (line 188) | def test_is_zipfile_false_positive(tmpdir): FILE: tests/test_file_utils.py function zstd_path (line 62) | def zstd_path(tmp_path_factory): function tmpfs_file (line 71) | def tmpfs_file(tmpfs): function test_cached_path_extract (line 78) | def test_cached_path_extract(compression_format, gz_file, xz_file, zstd_... function test_extracted_datasets_path (line 93) | def test_extracted_datasets_path(default_extracted, default_cache_dir, x... function test_cached_path_local (line 114) | def test_cached_path_local(text_file): function test_cached_path_missing_local (line 125) | def test_cached_path_missing_local(tmp_path): function test_get_from_cache_fsspec (line 136) | def test_get_from_cache_fsspec(tmpfs_file): function test_cached_path_offline (line 144) | def test_cached_path_offline(): function test_fsspec_offline (line 150) | def test_fsspec_offline(tmp_path_factory): function test_prepare_single_hop_path_and_storage_options (line 216) | def test_prepare_single_hop_path_and_storage_options( class DummyTestFS (line 227) | class DummyTestFS(AbstractFileSystem): method __getitem__ (line 269) | def __getitem__(self, name): method ls (line 275) | def ls(self, path, detail=True, refresh=True, **kwargs): method _open (line 289) | def _open( function mock_fsspec2 (line 310) | def mock_fsspec2(): # to avoid the name collision with `mock_fsspec` fr... function _readd_double_slash_removed_by_path (line 316) | def _readd_double_slash_removed_by_path(path_as_posix: str) -> str: function test_xjoin (line 368) | def test_xjoin(input_path, paths_to_join, expected_path): function test_xdirname (line 390) | def test_xdirname(input_path, expected_path): function test_xexists (line 405) | def test_xexists(input_path, exists, tmp_path, mock_fsspec2): function test_xexists_private (line 413) | def test_xexists_private(hf_private_dataset_repo_txt_data, hf_token): function test_xsplit (line 433) | def test_xsplit(input_path, expected_head_and_tail): function test_xsplitext (line 455) | def test_xsplitext(input_path, expected_path_and_ext): function test_xopen_local (line 464) | def test_xopen_local(text_path): function test_xopen_remote (line 472) | def test_xopen_remote(): function test_xlistdir (line 488) | def test_xlistdir(input_path, expected_paths, tmp_path, mock_fsspec2): function test_xlistdir_private (line 498) | def test_xlistdir_private(hf_private_dataset_repo_zipped_txt_data, hf_to... function test_xisdir (line 519) | def test_xisdir(input_path, isdir, tmp_path, mock_fsspec2): function test_xisdir_private (line 527) | def test_xisdir_private(hf_private_dataset_repo_zipped_txt_data, hf_token): function test_xisfile (line 545) | def test_xisfile(input_path, isfile, tmp_path, mock_fsspec2): function test_xisfile_private (line 553) | def test_xisfile_private(hf_private_dataset_repo_txt_data, hf_token): function test_xgetsize (line 568) | def test_xgetsize(input_path, size, tmp_path, mock_fsspec2): function test_xgetsize_private (line 577) | def test_xgetsize_private(hf_private_dataset_repo_txt_data, hf_token): function test_xglob (line 610) | def test_xglob(input_path, expected_paths, tmp_path, mock_fsspec2): function test_xglob_private (line 621) | def test_xglob_private(hf_private_dataset_repo_zipped_txt_data, hf_token): function test_xwalk (line 643) | def test_xwalk(input_path, expected_outputs, tmp_path, mock_fsspec2): function test_xwalk_private (line 660) | def test_xwalk_private(hf_private_dataset_repo_zipped_txt_data, hf_token): function test_xrelpath (line 686) | def test_xrelpath(input_path, start_path, expected_path): class TestxPath (line 691) | class TestxPath: method test_xpath_str (line 702) | def test_xpath_str(self, input_path): method test_xpath_as_posix (line 715) | def test_xpath_as_posix(self, input_path, expected_path): method test_xpath_exists (line 727) | def test_xpath_exists(self, input_path, exists, tmp_path, mock_fsspec2): method test_xpath_glob (line 760) | def test_xpath_glob(self, input_path, pattern, expected_paths, tmp_pat... method test_xpath_rglob (line 813) | def test_xpath_rglob(self, input_path, pattern, expected_paths, tmp_pa... method test_xpath_parent (line 834) | def test_xpath_parent(self, input_path, expected_path): method test_xpath_name (line 847) | def test_xpath_name(self, input_path, expected): method test_xpath_stem (line 860) | def test_xpath_stem(self, input_path, expected): method test_xpath_suffix (line 873) | def test_xpath_suffix(self, input_path, expected): method test_xpath_with_suffix (line 890) | def test_xpath_with_suffix(self, input_path, suffix, expected): function test_get_extraction_protocol (line 906) | def test_get_extraction_protocol(urlpath, expected_protocol): function test_get_extraction_protocol_gg_drive (line 918) | def test_get_extraction_protocol_gg_drive(urlpath, expected_protocol): function test_streaming_gg_drive (line 924) | def test_streaming_gg_drive(): function test_xnumpy_load (line 929) | def test_xnumpy_load(tmp_path): FILE: tests/test_filelock.py function test_long_path (line 6) | def test_long_path(tmpdir): FILE: tests/test_filesystem.py function test_mockfs (line 13) | def test_mockfs(mockfs): function test_non_mockfs (line 18) | def test_non_mockfs(): function test_is_remote_filesystem (line 23) | def test_is_remote_filesystem(mockfs): function test_compression_filesystems (line 34) | def test_compression_filesystems(compression_fs_class, gz_file, bz2_file... function test_fs_isfile (line 53) | def test_fs_isfile(protocol, zip_jsonl_path, jsonl_gz_path): FILE: tests/test_fingerprint.py class Foo (line 34) | class Foo: method __init__ (line 35) | def __init__(self, foo): method __call__ (line 38) | def __call__(self): class DatasetChild (line 42) | class DatasetChild(datasets.Dataset): method func1 (line 44) | def func1(self, new_fingerprint, *args, **kwargs): method func2 (line 48) | def func2(self, new_fingerprint, *args, **kwargs): class UnpicklableCallable (line 52) | class UnpicklableCallable: method __init__ (line 53) | def __init__(self, callable): method __call__ (line 56) | def __call__(self, *args, **kwargs): method __getstate__ (line 60) | def __getstate__(self): class TorchModule (line 69) | class TorchModule(nn.Module): method __init__ (line 70) | def __init__(self): method forward (line 75) | def forward(self, x): class TokenizersHashTest (line 82) | class TokenizersHashTest(TestCase): method test_hash_tokenizer (line 85) | def test_hash_tokenizer(self): method test_hash_tokenizer_with_cache (line 113) | def test_hash_tokenizer_with_cache(self): method test_hash_regex (line 123) | def test_hash_regex(self): class RecurseHashTest (line 136) | class RecurseHashTest(TestCase): method test_recurse_hash_for_function (line 137) | def test_recurse_hash_for_function(self): method test_hash_ignores_line_definition_of_function (line 150) | def test_hash_ignores_line_definition_of_function(self): method test_recurse_hash_for_class (line 162) | def test_recurse_hash_for_class(self): method test_recurse_hash_for_method (line 169) | def test_recurse_hash_for_method(self): method test_hash_ipython_function (line 176) | def test_hash_ipython_function(self): method test_recurse_hash_for_function_with_shuffled_globals (line 204) | def test_recurse_hash_for_function_with_shuffled_globals(self): class HashingTest (line 227) | class HashingTest(TestCase): method test_hash_simple (line 228) | def test_hash_simple(self): method test_hash_class_instance (line 235) | def test_hash_class_instance(self): method test_hash_update (line 242) | def test_hash_update(self): method test_hash_unpicklable (line 258) | def test_hash_unpicklable(self): method test_hash_same_strings (line 262) | def test_hash_same_strings(self): method test_set_stable (line 279) | def test_set_stable(self): method test_set_doesnt_depend_on_order (line 285) | def test_set_doesnt_depend_on_order(self): method test_hash_tiktoken_encoding (line 296) | def test_hash_tiktoken_encoding(self): method test_hash_torch_tensor (line 310) | def test_hash_torch_tensor(self): method test_hash_torch_generator (line 324) | def test_hash_torch_generator(self): method test_hash_spacy_model (line 338) | def test_hash_spacy_model(self): method test_hash_torch_compiled_function (line 352) | def test_hash_torch_compiled_function(self): method test_hash_torch_compiled_module (line 365) | def test_hash_torch_compiled_module(self): function test_move_script_doesnt_change_hash (line 381) | def test_move_script_doesnt_change_hash(tmp_path: Path): function test_fingerprint_in_multiprocessing (line 406) | def test_fingerprint_in_multiprocessing(): function test_temp_cache_dir_with_tmpdir_nonexistent (line 417) | def test_temp_cache_dir_with_tmpdir_nonexistent(tmp_path, caplog): function test_temp_cache_dir_with_tmpdir_existing (line 465) | def test_temp_cache_dir_with_tmpdir_existing(tmp_path, monkeypatch): function test_temp_cache_dir_without_tmpdir (line 485) | def test_temp_cache_dir_without_tmpdir(monkeypatch): function test_temp_cache_dir_tmpdir_creation_failure (line 506) | def test_temp_cache_dir_tmpdir_creation_failure(tmp_path, monkeypatch, c... function test_temp_cache_dir_tmpdir_not_directory (line 528) | def test_temp_cache_dir_tmpdir_not_directory(tmp_path, monkeypatch): function test_fingerprint_when_transform_version_changes (line 545) | def test_fingerprint_when_transform_version_changes(): function test_dependency_on_dill (line 572) | def test_dependency_on_dill(): FILE: tests/test_fingerprint_tokenizer_stability.py function _make_mutable_backend_tokenizer (line 10) | def _make_mutable_backend_tokenizer() -> PreTrainedTokenizerFast: function test_hasher_hash_tokenizer_stable_after_call (line 18) | def test_hasher_hash_tokenizer_stable_after_call(): function test_map_cache_reused_with_tokenizer_after_call (line 26) | def test_map_cache_reused_with_tokenizer_after_call(tmp_path): FILE: tests/test_formatting.py class AnyArray (line 32) | class AnyArray: method __init__ (line 33) | def __init__(self, data) -> None: method __array__ (line 36) | def __array__(self) -> np.ndarray: function _gen_any_arrays (line 40) | def _gen_any_arrays(): function any_arrays_dataset (line 46) | def any_arrays_dataset(): class ArrowExtractorTest (line 62) | class ArrowExtractorTest(TestCase): method _create_dummy_table (line 63) | def _create_dummy_table(self): method test_python_extractor (line 66) | def test_python_extractor(self): method test_numpy_extractor (line 76) | def test_numpy_extractor(self): method test_numpy_extractor_nested (line 86) | def test_numpy_extractor_nested(self): method test_numpy_extractor_temporal (line 101) | def test_numpy_extractor_temporal(self): method test_pandas_extractor (line 113) | def test_pandas_extractor(self): method test_pandas_extractor_nested (line 127) | def test_pandas_extractor_nested(self): method test_pandas_extractor_temporal (line 142) | def test_pandas_extractor_temporal(self): method test_polars_extractor (line 155) | def test_polars_extractor(self): method test_polars_nested (line 174) | def test_polars_nested(self): method test_polars_temporal (line 194) | def test_polars_temporal(self): class LazyDictTest (line 209) | class LazyDictTest(TestCase): method _create_dummy_table (line 210) | def _create_dummy_table(self): method _create_dummy_formatter (line 213) | def _create_dummy_formatter(self): method test_lazy_dict_copy (line 216) | def test_lazy_dict_copy(self): class FormatterTest (line 227) | class FormatterTest(TestCase): method _create_dummy_table (line 228) | def _create_dummy_table(self): method test_python_formatter (line 231) | def test_python_formatter(self): method test_python_formatter_lazy (line 241) | def test_python_formatter_lazy(self): method test_numpy_formatter (line 255) | def test_numpy_formatter(self): method test_numpy_formatter_np_array_kwargs (line 266) | def test_numpy_formatter_np_array_kwargs(self): method test_numpy_formatter_image (line 278) | def test_numpy_formatter_image(self): method test_numpy_formatter_audio (line 312) | def test_numpy_formatter_audio(self): method test_pandas_formatter (line 322) | def test_pandas_formatter(self): method test_polars_formatter (line 337) | def test_polars_formatter(self): method test_torch_formatter (line 357) | def test_torch_formatter(self): method test_torch_formatter_torch_tensor_kwargs (line 378) | def test_torch_formatter_torch_tensor_kwargs(self): method test_torch_formatter_image (line 396) | def test_torch_formatter_image(self): method test_torch_formatter_audio (line 434) | def test_torch_formatter_audio(self): method test_tf_formatter (line 449) | def test_tf_formatter(self): method test_tf_formatter_tf_tensor_kwargs (line 473) | def test_tf_formatter_tf_tensor_kwargs(self): method test_tf_formatter_image (line 490) | def test_tf_formatter_image(self): method test_tf_formatter_audio (line 526) | def test_tf_formatter_audio(self): method test_jax_formatter (line 544) | def test_jax_formatter(self): method test_jax_formatter_jnp_array_kwargs (line 565) | def test_jax_formatter_jnp_array_kwargs(self): method test_jax_formatter_image (line 582) | def test_jax_formatter_image(self): method test_jax_formatter_audio (line 619) | def test_jax_formatter_audio(self): method test_jax_formatter_device (line 634) | def test_jax_formatter_device(self): class QueryTest (line 652) | class QueryTest(TestCase): method _create_dummy_table (line 653) | def _create_dummy_table(self): method _create_dummy_arrow_indices (line 656) | def _create_dummy_arrow_indices(self): method assertTableEqual (line 659) | def assertTableEqual(self, first: pa.Table, second: pa.Table): method test_query_table_int (line 665) | def test_query_table_int(self): method test_query_table_slice (line 692) | def test_query_table_slice(self): method test_query_table_range (line 747) | def test_query_table_range(self): method test_query_table_str (line 831) | def test_query_table_str(self): method test_query_table_iterable (line 842) | def test_query_table_iterable(self): method test_query_table_indexable_type (line 895) | def test_query_table_indexable_type(self): method test_query_table_invalid_key_type (line 922) | def test_query_table_invalid_key_type(self): function arrow_table (line 942) | def arrow_table(): function test_tf_formatter_sets_default_dtypes (line 956) | def test_tf_formatter_sets_default_dtypes(cast_schema, arrow_table): function test_torch_formatter_sets_default_dtypes (line 991) | def test_torch_formatter_sets_default_dtypes(cast_schema, arrow_table): function test_iterable_dataset_of_arrays_format_to_arrow (line 1015) | def test_iterable_dataset_of_arrays_format_to_arrow(any_arrays_dataset: ... function test_iterable_dataset_of_arrays_format_to_numpy (line 1020) | def test_iterable_dataset_of_arrays_format_to_numpy(any_arrays_dataset: ... function test_iterable_dataset_of_arrays_format_to_torch (line 1026) | def test_iterable_dataset_of_arrays_format_to_torch(any_arrays_dataset: ... function test_iterable_dataset_of_arrays_format_to_tf (line 1034) | def test_iterable_dataset_of_arrays_format_to_tf(any_arrays_dataset: Ite... function test_iterable_dataset_of_arrays_format_to_jax (line 1042) | def test_iterable_dataset_of_arrays_format_to_jax(any_arrays_dataset: It... FILE: tests/test_hub.py function test_dataset_url (line 18) | def test_dataset_url(repo_id, filename, revision): function test_delete_from_hub (line 23) | def test_delete_from_hub(temporary_repo, hf_api, hf_token, csv_path, ci_... FILE: tests/test_info.py function test_from_dir (line 20) | def test_from_dir(files, tmp_path_factory): function test_dataset_info_dump_and_reload (line 52) | def test_dataset_info_dump_and_reload(tmp_path, dataset_info: DatasetInfo): function test_dataset_info_to_yaml_dict (line 60) | def test_dataset_info_to_yaml_dict(): function test_dataset_info_to_yaml_dict_empty (line 89) | def test_dataset_info_to_yaml_dict_empty(): function test_dataset_infos_dict_dump_and_reload (line 122) | def test_dataset_infos_dict_dump_and_reload(tmp_path, dataset_infos_dict... function test_from_merge_same_dataset_infos (line 156) | def test_from_merge_same_dataset_infos(dataset_info): function test_dataset_info_from_dict_with_large_list (line 169) | def test_dataset_info_from_dict_with_large_list(): FILE: tests/test_info_utils.py function test_is_small_dataset (line 9) | def test_is_small_dataset(dataset_size, input_in_memory_max_size, monkey... FILE: tests/test_inspect.py function test_get_dataset_config_info (line 24) | def test_get_dataset_config_info(path, config_name, expected_splits): function test_get_dataset_config_info_private (line 30) | def test_get_dataset_config_info_private(hf_token, hf_private_dataset_re... function test_get_dataset_config_info_raises (line 47) | def test_get_dataset_config_info_raises(path, config_name, expected_exce... function test_get_dataset_config_names (line 64) | def test_get_dataset_config_names(path, expected): function test_get_dataset_default_config_name (line 81) | def test_get_dataset_default_config_name(path, expected): function test_get_dataset_info (line 97) | def test_get_dataset_info(path, expected_configs, expected_splits_in_fir... function test_get_dataset_split_names (line 115) | def test_get_dataset_split_names(path, expected_config, expected_splits): function test_get_dataset_split_names_error (line 129) | def test_get_dataset_split_names_error(path, config_name, expected_excep... FILE: tests/test_iterable_dataset.py function generate_examples_fn (line 85) | def generate_examples_fn(**kwargs): function generate_tables_fn (line 96) | def generate_tables_fn(**kwargs): function dataset (line 116) | def dataset(): function dataset_with_several_columns (line 122) | def dataset_with_several_columns(): function arrow_file (line 131) | def arrow_file(tmp_path_factory, dataset: IterableDataset): function assert_load_state_dict_resumes_iteration (line 137) | def assert_load_state_dict_resumes_iteration(ex_iterable: _BaseExamplesI... function assert_load_state_dict_resumes_arrow_iteration (line 150) | def assert_load_state_dict_resumes_arrow_iteration(ex_iterable: _BaseExa... function test_convert_to_arrow (line 177) | def test_convert_to_arrow(batch_size, drop_last_batch): function test_examples_iterable (line 207) | def test_examples_iterable(): function test_examples_iterable_with_kwargs (line 216) | def test_examples_iterable_with_kwargs(): function test_examples_iterable_shuffle_data_sources (line 225) | def test_examples_iterable_shuffle_data_sources(): function test_examples_iterable_shuffle_shards_and_metadata (line 233) | def test_examples_iterable_shuffle_shards_and_metadata(): function test_arrow_examples_iterable (line 253) | def test_arrow_examples_iterable(): function test_arrow_examples_iterable_with_kwargs (line 263) | def test_arrow_examples_iterable_with_kwargs(): function test_arrow_examples_iterable_shuffle_data_sources (line 276) | def test_arrow_examples_iterable_shuffle_data_sources(): function test_rebatched_arrow_examples_iterable (line 299) | def test_rebatched_arrow_examples_iterable(tables, batch_size, drop_last... function test_buffer_shuffled_examples_iterable (line 325) | def test_buffer_shuffled_examples_iterable(seed): function test_cycling_multi_sources_examples_iterable (line 358) | def test_cycling_multi_sources_examples_iterable(): function test_randomly_cycling_multi_sources_examples_iterable (line 374) | def test_randomly_cycling_multi_sources_examples_iterable(probabilities): function test_randomly_cycling_multi_sources_examples_iterable_state (line 407) | def test_randomly_cycling_multi_sources_examples_iterable_state(probabil... function test_mapped_examples_iterable (line 442) | def test_mapped_examples_iterable(n, func, batched, batch_size): function test_mapped_examples_iterable_drop_last_batch (line 479) | def test_mapped_examples_iterable_drop_last_batch(n, func, batched, batc... function _wrap_async (line 518) | def _wrap_async(func, *args, **kwargs): function test_mapped_examples_iterable_with_indices (line 540) | def test_mapped_examples_iterable_with_indices(n, func, batched, batch_s... function test_mapped_examples_iterable_remove_columns (line 584) | def test_mapped_examples_iterable_remove_columns(n, func, batched, batch... function test_iterable_dataset_vs_dataset_map (line 618) | def test_iterable_dataset_vs_dataset_map(batched, batch_size, input_colu... function test_mapped_examples_iterable_fn_kwargs (line 675) | def test_mapped_examples_iterable_fn_kwargs(n, func, batched, batch_size... function test_mapped_examples_iterable_input_columns (line 713) | def test_mapped_examples_iterable_input_columns(n, func, batched, batch_... function test_mapped_examples_iterable_arrow_format (line 753) | def test_mapped_examples_iterable_arrow_format(n, func, batched, batch_s... function test_mapped_examples_iterable_arrow_format_from_arrow_examples_iterable (line 795) | def test_mapped_examples_iterable_arrow_format_from_arrow_examples_itera... function test_mapped_examples_iterable_drop_last_batch_and_arrow_format (line 835) | def test_mapped_examples_iterable_drop_last_batch_and_arrow_format(n, fu... function test_mapped_examples_iterable_with_indices_and_arrow_format (line 900) | def test_mapped_examples_iterable_with_indices_and_arrow_format(n, func,... function test_mapped_examples_iterable_remove_columns_arrow_format (line 953) | def test_mapped_examples_iterable_remove_columns_arrow_format(n, func, b... function test_mapped_examples_iterable_fn_kwargs_and_arrow_format (line 1000) | def test_mapped_examples_iterable_fn_kwargs_and_arrow_format(n, func, ba... function test_mapped_examples_iterable_input_columns_and_arrow_format (line 1042) | def test_mapped_examples_iterable_input_columns_and_arrow_format(n, func... function test_filtered_examples_iterable (line 1088) | def test_filtered_examples_iterable(n, func, batched, batch_size): function test_filtered_examples_iterable_with_indices (line 1120) | def test_filtered_examples_iterable_with_indices(n, func, batched, batch... function test_filtered_examples_iterable_input_columns (line 1154) | def test_filtered_examples_iterable_input_columns(n, func, batched, batc... function test_map_async (line 1179) | def test_map_async(): function test_filter_async (line 1201) | def test_filter_async(): function test_skip_examples_iterable (line 1223) | def test_skip_examples_iterable(): function test_take_examples_iterable (line 1235) | def test_take_examples_iterable(): function test_step_examples_iterable (line 1247) | def test_step_examples_iterable(): function test_skip_arrow_examples_iterable (line 1256) | def test_skip_arrow_examples_iterable(): function test_take_arrow_examples_iterable (line 1268) | def test_take_arrow_examples_iterable(): function test_step_arrow_examples_iterable (line 1280) | def test_step_arrow_examples_iterable(): function test_repeat_examples_iterable (line 1297) | def test_repeat_examples_iterable(n, num_times): function test_vertically_concatenated_examples_iterable (line 1311) | def test_vertically_concatenated_examples_iterable(): function test_vertically_concatenated_examples_iterable_with_different_columns (line 1320) | def test_vertically_concatenated_examples_iterable_with_different_column... function test_vertically_concatenated_examples_iterable_shuffle_data_sources (line 1331) | def test_vertically_concatenated_examples_iterable_shuffle_data_sources(): function test_horizontally_concatenated_examples_iterable (line 1345) | def test_horizontally_concatenated_examples_iterable(): function test_no_iter_arrow (line 1385) | def test_no_iter_arrow(ex_iterable: _BaseExamplesIterable): function test_iter_arrow (line 1435) | def test_iter_arrow(ex_iterable: _BaseExamplesIterable): function test_iterable_dataset (line 1449) | def test_iterable_dataset(): function test_iterable_dataset_push_to_hub_max_shard_size_and_num_shards_are_mutually_exclusive (line 1456) | def test_iterable_dataset_push_to_hub_max_shard_size_and_num_shards_are_... function test_iterable_dataset_push_to_hub_single_shard_disables_multiprocessing (line 1462) | def test_iterable_dataset_push_to_hub_single_shard_disables_multiprocess... function test_iterable_dataset_push_to_hub_default_num_shards_uses_dataset_num_shards (line 1499) | def test_iterable_dataset_push_to_hub_default_num_shards_uses_dataset_nu... function test_iterable_dataset_push_to_hub_max_shard_size_computes_num_shards_from_estimated_size (line 1529) | def test_iterable_dataset_push_to_hub_max_shard_size_computes_num_shards... function test_iterable_dataset_push_to_hub_max_shard_size_respects_num_proc_floor (line 1560) | def test_iterable_dataset_push_to_hub_max_shard_size_respects_num_proc_f... function test_iterable_dataset_from_generator (line 1597) | def test_iterable_dataset_from_generator(): function test_iterable_dataset_from_generator_with_shards (line 1613) | def test_iterable_dataset_from_generator_with_shards(): function test_iterable_dataset_to_pandas_preserves_declared_features (line 1625) | def test_iterable_dataset_to_pandas_preserves_declared_features(): function test_iterable_dataset_to_pandas_casts_when_schema_mismatch (line 1640) | def test_iterable_dataset_to_pandas_casts_when_schema_mismatch(): function test_iterable_dataset_from_file (line 1663) | def test_iterable_dataset_from_file(dataset: IterableDataset, arrow_file... function test_from_spark_streaming (line 1676) | def test_from_spark_streaming(): function test_from_spark_streaming_features (line 1703) | def test_from_spark_streaming_features(): function test_iterable_dataset_torch_integration (line 1724) | def test_iterable_dataset_torch_integration(): function test_iterable_dataset_torch_picklable (line 1734) | def test_iterable_dataset_torch_picklable(): function test_iterable_dataset_with_format_torch (line 1750) | def test_iterable_dataset_with_format_torch(): function test_iterable_dataset_torch_dataloader_parallel (line 1760) | def test_iterable_dataset_torch_dataloader_parallel(): function test_sharded_iterable_dataset_torch_dataloader_parallel (line 1775) | def test_sharded_iterable_dataset_torch_dataloader_parallel(num_shards, ... function test_iterable_dataset_from_hub_torch_dataloader_parallel (line 1790) | def test_iterable_dataset_from_hub_torch_dataloader_parallel(num_workers... function gen_with_worker_info (line 1799) | def gen_with_worker_info(shard): function test_iterable_dataset_shuffle_with_multiple_workers_different_rng (line 1808) | def test_iterable_dataset_shuffle_with_multiple_workers_different_rng(): function gen_with_value (line 1824) | def gen_with_value(shard, value): function test_iterable_dataset_interleave_dataset_with_multiple_workers (line 1830) | def test_iterable_dataset_interleave_dataset_with_multiple_workers(): function gen_with_id (line 1848) | def gen_with_id(shard, value): function test_iterable_dataset_interleave_dataset_deterministic_across_iterations (line 1854) | def test_iterable_dataset_interleave_dataset_deterministic_across_iterat... function test_iterable_dataset_iter_batch (line 1877) | def test_iterable_dataset_iter_batch(batch_size, drop_last_batch): function test_iterable_dataset_info (line 1890) | def test_iterable_dataset_info(): function test_iterable_dataset_set_epoch (line 1900) | def test_iterable_dataset_set_epoch(dataset: IterableDataset): function test_iterable_dataset_set_epoch_resuming (line 1906) | def test_iterable_dataset_set_epoch_resuming(dataset: IterableDataset): function test_iterable_dataset_map (line 1917) | def test_iterable_dataset_map( function test_iterable_dataset_map_batched (line 1928) | def test_iterable_dataset_map_batched( function test_iterable_dataset_map_complex_features (line 1940) | def test_iterable_dataset_map_complex_features( function test_iterable_dataset_map_with_features (line 1961) | def test_iterable_dataset_map_with_features(dataset: IterableDataset) ->... function test_iterable_dataset_map_with_fn_kwargs (line 1985) | def test_iterable_dataset_map_with_fn_kwargs(dataset: IterableDataset) -... function test_iterable_dataset_filter (line 1999) | def test_iterable_dataset_filter(dataset: IterableDataset) -> None: function test_iterable_dataset_filter_chaining_does_not_raise (line 2006) | def test_iterable_dataset_filter_chaining_does_not_raise() -> None: function test_iterable_dataset_shuffle (line 2036) | def test_iterable_dataset_shuffle(dataset: IterableDataset, seed, epoch): function test_iterable_dataset_features (line 2080) | def test_iterable_dataset_features(features): function test_iterable_dataset_features_cast_to_python (line 2090) | def test_iterable_dataset_features_cast_to_python(): function test_iterable_dataset_with_format (line 2111) | def test_iterable_dataset_with_format(dataset: IterableDataset, format_t... function test_iterable_dataset_is_torch_iterable_dataset (line 2117) | def test_iterable_dataset_is_torch_iterable_dataset(dataset: IterableDat... function test_iterable_dataset_persists_epoch_in_torch_workers (line 2127) | def test_iterable_dataset_persists_epoch_in_torch_workers(dataset: Itera... function test_iterable_dataset_skip (line 2149) | def test_iterable_dataset_skip(dataset: IterableDataset, n): function test_iterable_dataset_take (line 2157) | def test_iterable_dataset_take(dataset: IterableDataset, n): function test_iterable_dataset_repeat (line 2165) | def test_iterable_dataset_repeat(dataset: IterableDataset, n): function test_iterable_dataset_shard (line 2172) | def test_iterable_dataset_shard(): function test_iterable_dataset_skip_or_take_after_shuffle (line 2196) | def test_iterable_dataset_skip_or_take_after_shuffle(method, after_shuff... function test_iterable_dataset_skip_or_take_after_split_by_node (line 2227) | def test_iterable_dataset_skip_or_take_after_split_by_node(method, after... function test_iterable_dataset_add_column (line 2254) | def test_iterable_dataset_add_column(dataset_with_several_columns: Itera... function test_iterable_dataset_rename_column (line 2264) | def test_iterable_dataset_rename_column(dataset_with_several_columns: It... function test_iterable_dataset_rename_columns (line 2279) | def test_iterable_dataset_rename_columns(dataset_with_several_columns: I... function test_iterable_dataset_remove_columns (line 2295) | def test_iterable_dataset_remove_columns(dataset_with_several_columns: I... function test_iterable_dataset_select_columns (line 2315) | def test_iterable_dataset_select_columns(dataset_with_several_columns: I... function test_iterable_dataset_cast_column (line 2334) | def test_iterable_dataset_cast_column(): function test_iterable_dataset_cast (line 2344) | def test_iterable_dataset_cast(): function test_iterable_dataset_resolve_features (line 2353) | def test_iterable_dataset_resolve_features(): function test_iterable_dataset_resolve_features_keep_order (line 2367) | def test_iterable_dataset_resolve_features_keep_order(): function test_iterable_dataset_with_features_fill_with_none (line 2378) | def test_iterable_dataset_with_features_fill_with_none(): function test_concatenate_datasets (line 2388) | def test_concatenate_datasets(): function test_concatenate_datasets_resolves_features (line 2397) | def test_concatenate_datasets_resolves_features(): function test_concatenate_datasets_with_different_columns (line 2407) | def test_concatenate_datasets_with_different_columns(): function test_concatenate_datasets_axis_1 (line 2422) | def test_concatenate_datasets_axis_1(): function test_concatenate_datasets_axis_1_resolves_features (line 2433) | def test_concatenate_datasets_axis_1_resolves_features(): function test_concatenate_datasets_axis_1_with_different_lengths (line 2443) | def test_concatenate_datasets_axis_1_with_different_lengths(): function test_concatenate_datasets_with_format (line 2466) | def test_concatenate_datasets_with_format(dataset: IterableDataset, form... function test_interleave_datasets (line 2487) | def test_interleave_datasets(dataset: IterableDataset, probas, seed, exp... function test_interleave_datasets_with_features (line 2534) | def test_interleave_datasets_with_features( function test_interleave_datasets_with_oversampling (line 2550) | def test_interleave_datasets_with_oversampling(): function test_with_format_torch (line 2575) | def test_with_format_torch(dataset_with_several_columns: IterableDataset): function test_with_format_tf (line 2597) | def test_with_format_tf(dataset_with_several_columns: IterableDataset): function test_map_array_are_not_converted_back_to_lists (line 2615) | def test_map_array_are_not_converted_back_to_lists(dataset: IterableData... function test_formatted_map (line 2625) | def test_formatted_map(dataset: IterableDataset): function test_format_from_arrow (line 2642) | def test_format_from_arrow(): function test_format_arrow (line 2667) | def test_format_arrow(dataset: IterableDataset): function test_format_pandas (line 2679) | def test_format_pandas(dataset: IterableDataset): function test_format_polars (line 2692) | def test_format_polars(dataset: IterableDataset): function test_interleave_dataset_with_sharding (line 2707) | def test_interleave_dataset_with_sharding(num_shards1, num_shards2, num_... function filter_func (line 2727) | def filter_func(batch): function map_func (line 2731) | def map_func(batch): function test_pickle_after_many_transforms (line 2736) | def test_pickle_after_many_transforms(dataset_with_several_columns): function test_resume_dataloader (line 2755) | def test_resume_dataloader(dataset: IterableDataset): function test_iterable_dataset_batch (line 2770) | def test_iterable_dataset_batch(): class DecodableFeature (line 2834) | class DecodableFeature: method __init__ (line 2837) | def __init__(self): method decode_example (line 2840) | def decode_example(self, example, token_per_repo_id=None): method __call__ (line 2844) | def __call__(self): function test_decode (line 2848) | def test_decode(): class TestIterableColumn (line 2872) | class TestIterableColumn: method test_simple_getitem (line 2873) | def test_simple_getitem(self): method test_chained_getitem (line 2887) | def test_chained_getitem(self): method test_getitem_for_batched_dataset (line 2901) | def test_getitem_for_batched_dataset(self): FILE: tests/test_load.py function data_dir (line 83) | def data_dir(tmp_path): function data_dir_with_arrow (line 94) | def data_dir_with_arrow(tmp_path): function data_dir_with_metadata (line 113) | def data_dir_with_metadata(tmp_path): function data_dir_with_single_config_in_metadata (line 138) | def data_dir_with_single_config_in_metadata(tmp_path): function data_dir_with_config_and_data_files (line 164) | def data_dir_with_config_and_data_files(tmp_path): function data_dir_with_two_config_in_metadata (line 190) | def data_dir_with_two_config_in_metadata(tmp_path): function data_dir_with_data_dir_configs_in_metadata (line 219) | def data_dir_with_data_dir_configs_in_metadata(tmp_path): function sub_data_dirs (line 233) | def sub_data_dirs(tmp_path): function complex_data_dir (line 255) | def complex_data_dir(tmp_path): function test_infer_module_for_data_files (line 287) | def test_infer_module_for_data_files(data_files, expected_module, expect... function test_infer_module_for_data_files_in_archives (line 302) | def test_infer_module_for_data_files_in_archives( class ModuleFactoryTest (line 316) | class ModuleFactoryTest(TestCase): method inject_fixtures (line 318) | def inject_fixtures( method setUp (line 337) | def setUp(self): method test_LocalDatasetModuleFactory (line 341) | def test_LocalDatasetModuleFactory(self): method test_LocalDatasetModuleFactory_with_data_dir (line 347) | def test_LocalDatasetModuleFactory_with_data_dir(self): method test_LocalDatasetModuleFactory_with_metadata (line 362) | def test_LocalDatasetModuleFactory_with_metadata(self): method test_LocalDatasetModuleFactory_with_single_config_in_metadata (line 375) | def test_LocalDatasetModuleFactory_with_single_config_in_metadata(self): method test_LocalDatasetModuleFactory_with_config_and_data_files (line 408) | def test_LocalDatasetModuleFactory_with_config_and_data_files(self): method test_LocalDatasetModuleFactory_data_dir_with_config_and_data_files (line 424) | def test_LocalDatasetModuleFactory_data_dir_with_config_and_data_files... method test_LocalDatasetModuleFactory_with_two_configs_in_metadata (line 440) | def test_LocalDatasetModuleFactory_with_two_configs_in_metadata(self): method test_PackagedDatasetModuleFactory (line 484) | def test_PackagedDatasetModuleFactory(self): method test_PackagedDatasetModuleFactory_with_data_dir (line 491) | def test_PackagedDatasetModuleFactory_with_data_dir(self): method test_PackagedDatasetModuleFactory_with_data_dir_and_metadata (line 500) | def test_PackagedDatasetModuleFactory_with_data_dir_and_metadata(self): method test_HubDatasetModuleFactory (line 514) | def test_HubDatasetModuleFactory(self): method test_HubDatasetModuleFactory_with_data_dir (line 523) | def test_HubDatasetModuleFactory_with_data_dir(self): method test_HubDatasetModuleFactory_with_metadata (line 546) | def test_HubDatasetModuleFactory_with_metadata(self): method test_HubDatasetModuleFactory_with_one_default_config_in_metadata (line 577) | def test_HubDatasetModuleFactory_with_one_default_config_in_metadata(s... method test_HubDatasetModuleFactory_with_two_configs_in_metadata (line 617) | def test_HubDatasetModuleFactory_with_two_configs_in_metadata(self): method test_CachedDatasetModuleFactory (line 676) | def test_CachedDatasetModuleFactory(self): function test_module_factories (line 698) | def test_module_factories(factory_class, requires_commit_hash): class LoadTest (line 708) | class LoadTest(TestCase): method inject_fixtures (line 710) | def inject_fixtures(self, caplog): method setUp (line 713) | def setUp(self): method tearDown (line 716) | def tearDown(self): method test_offline_dataset_module_factory (line 720) | def test_offline_dataset_module_factory(self): method test_offline_dataset_module_factory_with_capital_letters_in_name (line 733) | def test_offline_dataset_module_factory_with_capital_letters_in_name(s... method test_load_dataset_from_hub (line 745) | def test_load_dataset_from_hub(self): method test_load_dataset_invalid_revision_with_cache (line 769) | def test_load_dataset_invalid_revision_with_cache(self): method test_load_dataset_namespace (line 780) | def test_load_dataset_namespace(self): function test_load_dataset_builder_with_metadata (line 792) | def test_load_dataset_builder_with_metadata(): function test_load_dataset_builder_config_kwargs_passed_as_arguments (line 803) | def test_load_dataset_builder_config_kwargs_passed_as_arguments(): function test_load_dataset_builder_config_kwargs_override_builder_kwargs (line 810) | def test_load_dataset_builder_config_kwargs_override_builder_kwargs(): function test_load_dataset_builder_with_two_configs_in_metadata (line 839) | def test_load_dataset_builder_with_two_configs_in_metadata(): function test_load_dataset_builder_with_metadata_configs_pickable (line 851) | def test_load_dataset_builder_with_metadata_configs_pickable(serializer): function test_load_dataset_builder_for_absolute_data_dir (line 866) | def test_load_dataset_builder_for_absolute_data_dir(complex_data_dir): function test_load_dataset_builder_for_relative_data_dir (line 877) | def test_load_dataset_builder_for_relative_data_dir(complex_data_dir): function test_load_dataset_builder_for_community_dataset (line 892) | def test_load_dataset_builder_for_community_dataset(): function test_load_dataset_builder_fail (line 903) | def test_load_dataset_builder_fail(): function test_load_dataset_from_hub (line 917) | def test_load_dataset_from_hub(kwargs, expected_train_num_rows, expected... function test_load_dataset_cached_from_hub (line 925) | def test_load_dataset_cached_from_hub(stream_from_cache, caplog): function test_load_dataset_streaming_gz_json (line 944) | def test_load_dataset_streaming_gz_json(jsonl_gz_path): function test_load_dataset_streaming_compressed_files (line 964) | def test_load_dataset_streaming_compressed_files(path): function test_load_dataset_streaming_csv (line 982) | def test_load_dataset_streaming_csv(path_extension, streaming, csv_path,... function test_load_dataset_zip_csv (line 994) | def test_load_dataset_zip_csv(data_file, streaming, zip_csv_path, zip_cs... function test_load_dataset_zip_jsonl (line 1019) | def test_load_dataset_zip_jsonl(data_file, streaming, zip_jsonl_path, zi... function test_load_dataset_zip_text (line 1044) | def test_load_dataset_zip_text(data_file, streaming, zip_text_path, zip_... function test_load_dataset_arrow (line 1067) | def test_load_dataset_arrow(streaming, data_dir_with_arrow): function test_load_dataset_text_with_unicode_new_lines (line 1084) | def test_load_dataset_text_with_unicode_new_lines(text_path_with_unicode... function test_load_dataset_with_unsupported_extensions (line 1090) | def test_load_dataset_with_unsupported_extensions(text_dir_with_unsuppor... function test_loading_from_the_datasets_hub_with_token (line 1097) | def test_loading_from_the_datasets_hub_with_token(): function test_load_streaming_private_dataset (line 1111) | def test_load_streaming_private_dataset(hf_token, hf_private_dataset_rep... function test_load_dataset_builder_private_dataset (line 1117) | def test_load_dataset_builder_private_dataset(hf_token, hf_private_datas... function test_load_streaming_private_dataset_with_zipped_data (line 1123) | def test_load_streaming_private_dataset_with_zipped_data(hf_token, hf_pr... function test_load_dataset_config_kwargs_passed_as_arguments (line 1130) | def test_load_dataset_config_kwargs_passed_as_arguments(): function test_load_hub_dataset_with_single_config_in_metadata (line 1139) | def test_load_hub_dataset_with_single_config_in_metadata(): function test_load_hub_dataset_with_two_config_in_metadata (line 1160) | def test_load_hub_dataset_with_two_config_in_metadata(): function test_load_hub_dataset_with_metadata_config_in_parallel (line 1188) | def test_load_hub_dataset_with_metadata_config_in_parallel(): function test_load_dataset_private_zipped_images (line 1206) | def test_load_dataset_private_zipped_images(hf_private_dataset_repo_zipp... function test_load_dataset_then_move_then_reload (line 1213) | def test_load_dataset_then_move_then_reload(data_dir, tmp_path, caplog): function test_load_dataset_builder_then_edit_then_load_again (line 1229) | def test_load_dataset_builder_then_edit_then_load_again(tmp_path: Path): function test_load_dataset_local_with_default_in_memory (line 1242) | def test_load_dataset_local_with_default_in_memory(max_in_memory_dataset... function test_remote_data_files (line 1259) | def test_remote_data_files(): function distributed_load_dataset (line 1269) | def distributed_load_dataset(args): function test_load_dataset_distributed (line 1275) | def test_load_dataset_distributed(tmp_path, csv_path): function test_load_dataset_with_storage_options (line 1286) | def test_load_dataset_with_storage_options(mockfs): function test_load_dataset_with_storage_options_with_decoding (line 1296) | def test_load_dataset_with_storage_options_with_decoding(mockfs, image_f... function test_load_dataset_with_zip (line 1309) | def test_load_dataset_with_zip(zip_csv_path): function test_reload_old_cache_from_2_15 (line 1319) | def test_reload_old_cache_from_2_15(tmp_path: Path): function test_update_dataset_card_data_with_standalone_yaml (line 1350) | def test_update_dataset_card_data_with_standalone_yaml(): FILE: tests/test_metadata_util.py function _dedent (line 17) | def _dedent(string: str) -> str: function data_dir_with_two_subdirs (line 131) | def data_dir_with_two_subdirs(tmp_path): class TestMetadataUtils (line 146) | class TestMetadataUtils(unittest.TestCase): method test_metadata_dict_from_readme (line 147) | def test_metadata_dict_from_readme(self): method test_from_yaml_string (line 170) | def test_from_yaml_string(self): function test_metadata_configs_dataset_card_data (line 256) | def test_metadata_configs_dataset_card_data( function test_metadata_configs_incorrect_yaml (line 269) | def test_metadata_configs_incorrect_yaml(): function test_split_order_in_metadata_configs_from_exported_parquet_files_and_dataset_infos (line 279) | def test_split_order_in_metadata_configs_from_exported_parquet_files_and... FILE: tests/test_offline_util.py function test_offline_with_timeout (line 22) | def test_offline_with_timeout(): function test_offline_with_connection_error (line 37) | def test_offline_with_connection_error(): function test_offline_with_datasets_offline_mode_enabled (line 47) | def test_offline_with_datasets_offline_mode_enabled(): FILE: tests/test_parallel.py function add_one (line 9) | def add_one(i): # picklable for multiprocessing function test_parallel_backend_input (line 16) | def test_parallel_backend_input(): function test_parallel_backend_map_nested (line 34) | def test_parallel_backend_map_nested(num_proc): FILE: tests/test_patching.py function test_patch_submodule (line 6) | def test_patch_submodule(): function test_patch_submodule_builtin (line 72) | def test_patch_submodule_builtin(): function test_patch_submodule_missing (line 86) | def test_patch_submodule_missing(): function test_patch_submodule_missing_builtin (line 93) | def test_patch_submodule_missing_builtin(): function test_patch_submodule_start_and_stop (line 104) | def test_patch_submodule_start_and_stop(): function test_patch_submodule_successive (line 114) | def test_patch_submodule_successive(): function test_patch_submodule_doesnt_exist (line 146) | def test_patch_submodule_doesnt_exist(): FILE: tests/test_py_utils.py function np_sum (line 27) | def np_sum(x): # picklable for multiprocessing function add_one (line 31) | def add_one(i): # picklable for multiprocessing function add_one_to_batch (line 35) | def add_one_to_batch(batch): # picklable for multiprocessing class A (line 40) | class A: function test_map_nested (line 61) | def test_map_nested(data_struct, expected_result, num_proc, batched, fun... class PyUtilsTest (line 65) | class PyUtilsTest(TestCase): method test_map_nested (line 66) | def test_map_nested(self): method test_zip_dict (line 88) | def test_zip_dict(self): method test_temporary_assignment (line 95) | def test_temporary_assignment(self): function test_map_nested_num_proc (line 121) | def test_map_nested_num_proc(iterable_length, num_proc, expected_num_proc): class TempSeedTest (line 137) | class TempSeedTest(TestCase): method test_tensorflow (line 139) | def test_tensorflow(self): method test_torch (line 160) | def test_torch(self): method test_numpy (line 177) | def test_numpy(self): function test_nested_data_structure_data (line 192) | def test_nested_data_structure_data(input_data): function test_flatten (line 220) | def test_flatten(data, expected_output): function test_asdict (line 225) | def test_asdict(): function _split_text (line 238) | def _split_text(text: str): function _2seconds_generator_of_2items_with_timing (line 242) | def _2seconds_generator_of_2items_with_timing(content): function test_iflatmap_unordered (line 248) | def test_iflatmap_unordered(): function test_string_to_dict (line 275) | def test_string_to_dict(): FILE: tests/test_search.py class IndexableDatasetTest (line 20) | class IndexableDatasetTest(TestCase): method _create_dummy_dataset (line 21) | def _create_dummy_dataset(self): method test_add_faiss_index (line 25) | def test_add_faiss_index(self): method test_add_faiss_index_errors (line 37) | def test_add_faiss_index_errors(self): method test_add_faiss_index_from_external_arrays (line 44) | def test_add_faiss_index_from_external_arrays(self): method test_serialization (line 57) | def test_serialization(self): method test_drop_index (line 79) | def test_drop_index(self): method test_add_elasticsearch_index (line 87) | def test_add_elasticsearch_index(self): class FaissIndexTest (line 107) | class FaissIndexTest(TestCase): method test_flat_ip (line 108) | def test_flat_ip(self): method test_factory (line 137) | def test_factory(self): method test_custom (line 149) | def test_custom(self): method test_serialization (line 157) | def test_serialization(self): function test_serialization_fs (line 180) | def test_serialization_fs(mockfs): class ElasticSearchIndexTest (line 199) | class ElasticSearchIndexTest(TestCase): method test_elasticsearch (line 200) | def test_elasticsearch(self): FILE: tests/test_sharding_utils.py function test_distribute_shards (line 17) | def test_distribute_shards(kwargs, expected): function test_split_gen_kwargs (line 32) | def test_split_gen_kwargs(gen_kwargs, max_num_jobs, expected): function test_number_of_shards_in_gen_kwargs (line 48) | def test_number_of_shards_in_gen_kwargs(gen_kwargs, expected): FILE: tests/test_splits.py function test_split_dict_to_yaml_list (line 18) | def test_split_dict_to_yaml_list(split_dict: SplitDict): function test_split_dict_asdict_has_dataset_name (line 33) | def test_split_dict_asdict_has_dataset_name(split_info): function test_named_split_inequality (line 41) | def test_named_split_inequality(): FILE: tests/test_streaming_download_manager.py function test_streaming_dl_manager_download_dummy_path (line 30) | def test_streaming_dl_manager_download_dummy_path(): function test_streaming_dl_manager_download_dummy_url (line 36) | def test_streaming_dl_manager_download_dummy_url(): function test_streaming_dl_manager_extract_throws (line 51) | def test_streaming_dl_manager_extract_throws(urlpath): function test_streaming_dl_manager_download (line 56) | def test_streaming_dl_manager_download(text_path): function test_streaming_dl_manager_download_and_extract_no_extraction_dummy_path (line 64) | def test_streaming_dl_manager_download_and_extract_no_extraction_dummy_p... function test_streaming_dl_manager_download_and_extract_no_extraction_dummy_url (line 70) | def test_streaming_dl_manager_download_and_extract_no_extraction_dummy_u... function test_streaming_dl_manager_extract (line 76) | def test_streaming_dl_manager_extract(text_gz_path, text_path): function test_iter_files_in_archive (line 88) | def test_iter_files_in_archive(archive_jsonl, request): function test_streaming_dl_manager_download_and_extract_with_extraction (line 96) | def test_streaming_dl_manager_download_and_extract_with_extraction(text_... function test_streaming_dl_manager_download_and_extract_with_join (line 111) | def test_streaming_dl_manager_download_and_extract_with_join(input_path,... function test_streaming_dl_manager_extract_all_supported_single_file_compression_types (line 119) | def test_streaming_dl_manager_extract_all_supported_single_file_compress... function test_streaming_gg_drive_no_extract (line 143) | def test_streaming_gg_drive_no_extract(): function test_streaming_gg_drive_gzipped (line 151) | def test_streaming_gg_drive_gzipped(): function test_streaming_gg_drive_zipped (line 159) | def test_streaming_gg_drive_zipped(): function _test_jsonl (line 168) | def _test_jsonl(path, file): function test_iter_archive_path (line 177) | def test_iter_archive_path(archive_jsonl, request): function test_iter_archive_file (line 193) | def test_iter_archive_file(archive_nested_jsonl, request): function test_iter_files (line 212) | def test_iter_files(data_dir_with_hidden_files): FILE: tests/test_table.py function in_memory_pa_table (line 39) | def in_memory_pa_table(arrow_file) -> pa.Table: function _to_testing_blocks (line 43) | def _to_testing_blocks(table: TableBlock) -> list[list[TableBlock]]: function in_memory_blocks (line 53) | def in_memory_blocks(in_memory_pa_table): function memory_mapped_blocks (line 59) | def memory_mapped_blocks(arrow_file): function mixed_in_memory_and_memory_mapped_blocks (line 65) | def mixed_in_memory_and_memory_mapped_blocks(in_memory_blocks, memory_ma... function assert_deepcopy_without_bringing_data_in_memory (line 69) | def assert_deepcopy_without_bringing_data_in_memory(table: MemoryMappedT... function assert_deepcopy_does_bring_data_in_memory (line 76) | def assert_deepcopy_does_bring_data_in_memory(table: MemoryMappedTable): function assert_pickle_without_bringing_data_in_memory (line 83) | def assert_pickle_without_bringing_data_in_memory(table: MemoryMappedTab... function assert_pickle_does_bring_data_in_memory (line 91) | def assert_pickle_does_bring_data_in_memory(table: MemoryMappedTable): function assert_index_attributes_equal (line 99) | def assert_index_attributes_equal(table: Table, other: Table): function add_suffix_to_column_names (line 105) | def add_suffix_to_column_names(table, suffix): function test_inject_arrow_table_documentation (line 109) | def test_inject_arrow_table_documentation(in_memory_pa_table): function test_in_memory_arrow_table_from_file (line 122) | def test_in_memory_arrow_table_from_file(arrow_file, in_memory_pa_table): function test_in_memory_arrow_table_from_buffer (line 128) | def test_in_memory_arrow_table_from_buffer(in_memory_pa_table): function test_memory_mapped_arrow_table_from_file (line 139) | def test_memory_mapped_arrow_table_from_file(arrow_file, in_memory_pa_ta... function test_table_init (line 145) | def test_table_init(in_memory_pa_table): function test_table_validate (line 150) | def test_table_validate(in_memory_pa_table): function test_table_equals (line 155) | def test_table_equals(in_memory_pa_table): function test_table_to_batches (line 160) | def test_table_to_batches(in_memory_pa_table): function test_table_to_pydict (line 165) | def test_table_to_pydict(in_memory_pa_table): function test_table_to_string (line 170) | def test_table_to_string(in_memory_pa_table): function test_table_field (line 175) | def test_table_field(in_memory_pa_table): function test_table_column (line 181) | def test_table_column(in_memory_pa_table): function test_table_itercolumns (line 187) | def test_table_itercolumns(in_memory_pa_table): function test_table_getitem (line 193) | def test_table_getitem(in_memory_pa_table): function test_table_len (line 198) | def test_table_len(in_memory_pa_table): function test_table_str (line 203) | def test_table_str(in_memory_pa_table): function test_table_attributes (line 212) | def test_table_attributes(in_memory_pa_table, attribute): function test_in_memory_table_from_file (line 217) | def test_in_memory_table_from_file(arrow_file, in_memory_pa_table): function test_in_memory_table_from_buffer (line 224) | def test_in_memory_table_from_buffer(in_memory_pa_table): function test_in_memory_table_from_pandas (line 236) | def test_in_memory_table_from_pandas(in_memory_pa_table): function test_in_memory_table_from_arrays (line 248) | def test_in_memory_table_from_arrays(in_memory_pa_table): function test_in_memory_table_from_pydict (line 256) | def test_in_memory_table_from_pydict(in_memory_pa_table): function test_in_memory_table_from_pylist (line 264) | def test_in_memory_table_from_pylist(in_memory_pa_table): function test_in_memory_table_from_batches (line 271) | def test_in_memory_table_from_batches(in_memory_pa_table): function test_in_memory_table_deepcopy (line 278) | def test_in_memory_table_deepcopy(in_memory_pa_table): function test_in_memory_table_pickle (line 288) | def test_in_memory_table_pickle(in_memory_pa_table): function test_in_memory_table_pickle_big_table (line 297) | def test_in_memory_table_pickle_big_table(): function test_in_memory_table_slice (line 305) | def test_in_memory_table_slice(in_memory_pa_table): function test_in_memory_table_filter (line 311) | def test_in_memory_table_filter(in_memory_pa_table): function test_in_memory_table_flatten (line 318) | def test_in_memory_table_flatten(in_memory_pa_table): function test_in_memory_table_combine_chunks (line 324) | def test_in_memory_table_combine_chunks(in_memory_pa_table): function test_in_memory_table_cast (line 330) | def test_in_memory_table_cast(in_memory_pa_table): function test_in_memory_table_cast_reorder_struct (line 343) | def test_in_memory_table_cast_reorder_struct(): function test_in_memory_table_cast_with_hf_features (line 360) | def test_in_memory_table_cast_with_hf_features(): function test_in_memory_table_replace_schema_metadata (line 368) | def test_in_memory_table_replace_schema_metadata(in_memory_pa_table): function test_in_memory_table_add_column (line 375) | def test_in_memory_table_add_column(in_memory_pa_table): function test_in_memory_table_append_column (line 384) | def test_in_memory_table_append_column(in_memory_pa_table): function test_in_memory_table_remove_column (line 392) | def test_in_memory_table_remove_column(in_memory_pa_table): function test_in_memory_table_set_column (line 398) | def test_in_memory_table_set_column(in_memory_pa_table): function test_in_memory_table_rename_columns (line 407) | def test_in_memory_table_rename_columns(in_memory_pa_table): function test_in_memory_table_drop (line 415) | def test_in_memory_table_drop(in_memory_pa_table): function test_memory_mapped_table_init (line 422) | def test_memory_mapped_table_init(arrow_file, in_memory_pa_table): function test_memory_mapped_table_from_file (line 430) | def test_memory_mapped_table_from_file(arrow_file, in_memory_pa_table): function test_memory_mapped_table_from_file_with_replay (line 439) | def test_memory_mapped_table_from_file_with_replay(arrow_file, in_memory... function test_memory_mapped_table_deepcopy (line 451) | def test_memory_mapped_table_deepcopy(arrow_file): function test_memory_mapped_table_pickle (line 462) | def test_memory_mapped_table_pickle(arrow_file): function test_memory_mapped_table_pickle_doesnt_fill_memory (line 471) | def test_memory_mapped_table_pickle_doesnt_fill_memory(arrow_file): function test_memory_mapped_table_pickle_applies_replay (line 478) | def test_memory_mapped_table_pickle_applies_replay(arrow_file): function test_memory_mapped_table_slice (line 488) | def test_memory_mapped_table_slice(arrow_file, in_memory_pa_table): function test_memory_mapped_table_filter (line 497) | def test_memory_mapped_table_filter(arrow_file, in_memory_pa_table): function test_memory_mapped_table_flatten (line 509) | def test_memory_mapped_table_flatten(arrow_file, in_memory_pa_table): function test_memory_mapped_table_combine_chunks (line 518) | def test_memory_mapped_table_combine_chunks(arrow_file, in_memory_pa_tab... function test_memory_mapped_table_cast (line 527) | def test_memory_mapped_table_cast(arrow_file, in_memory_pa_table): function test_memory_mapped_table_replace_schema_metadata (line 545) | def test_memory_mapped_table_replace_schema_metadata(arrow_file, in_memo... function test_memory_mapped_table_add_column (line 555) | def test_memory_mapped_table_add_column(arrow_file, in_memory_pa_table): function test_memory_mapped_table_append_column (line 567) | def test_memory_mapped_table_append_column(arrow_file, in_memory_pa_table): function test_memory_mapped_table_remove_column (line 578) | def test_memory_mapped_table_remove_column(arrow_file, in_memory_pa_table): function test_memory_mapped_table_set_column (line 587) | def test_memory_mapped_table_set_column(arrow_file, in_memory_pa_table): function test_memory_mapped_table_rename_columns (line 599) | def test_memory_mapped_table_rename_columns(arrow_file, in_memory_pa_tab... function test_memory_mapped_table_drop (line 610) | def test_memory_mapped_table_drop(arrow_file, in_memory_pa_table): function test_concatenation_table_init (line 621) | def test_concatenation_table_init( function test_concatenation_table_from_blocks (line 636) | def test_concatenation_table_from_blocks(in_memory_pa_table, in_memory_b... function test_concatenation_table_from_blocks_doesnt_increase_memory (line 659) | def test_concatenation_table_from_blocks_doesnt_increase_memory( function test_concatenation_table_from_tables (line 678) | def test_concatenation_table_from_tables(axis, in_memory_pa_table, arrow... function test_concatenation_table_from_tables_axis1_misaligned_blocks (line 705) | def test_concatenation_table_from_tables_axis1_misaligned_blocks(arrow_f... function test_concatenation_table_deepcopy (line 730) | def test_concatenation_table_deepcopy( function test_concatenation_table_pickle (line 749) | def test_concatenation_table_pickle( function test_concat_tables_with_features_metadata (line 765) | def test_concat_tables_with_features_metadata(arrow_file, in_memory_pa_t... function test_concatenation_table_slice (line 782) | def test_concatenation_table_slice( function test_concatenation_table_slice_mixed_schemas_vertically (line 795) | def test_concatenation_table_slice_mixed_schemas_vertically(arrow_file): function test_concatenation_table_filter (line 817) | def test_concatenation_table_filter( function test_concatenation_table_flatten (line 832) | def test_concatenation_table_flatten( function test_concatenation_table_combine_chunks (line 846) | def test_concatenation_table_combine_chunks( function test_concatenation_table_cast (line 860) | def test_concatenation_table_cast( function test_concat_tables_cast_with_features_metadata (line 891) | def test_concat_tables_cast_with_features_metadata( function test_concatenation_table_replace_schema_metadata (line 912) | def test_concatenation_table_replace_schema_metadata( function test_concatenation_table_add_column (line 927) | def test_concatenation_table_add_column( function test_concatenation_table_append_column (line 946) | def test_concatenation_table_append_column( function test_concatenation_table_remove_column (line 964) | def test_concatenation_table_remove_column( function test_concatenation_table_set_column (line 978) | def test_concatenation_table_set_column( function test_concatenation_table_rename_columns (line 997) | def test_concatenation_table_rename_columns( function test_concatenation_table_drop (line 1013) | def test_concatenation_table_drop( function test_concat_tables (line 1027) | def test_concat_tables(arrow_file, in_memory_pa_table): function _interpolation_search_ground_truth (line 1052) | def _interpolation_search_ground_truth(arr: list[int], x: int) -> Union[... class _ListWithGetitemCounter (line 1059) | class _ListWithGetitemCounter(list): method __init__ (line 1060) | def __init__(self, *args, **kwargs): method __getitem__ (line 1064) | def __getitem__(self, i): method getitem_unique_count (line 1070) | def getitem_unique_count(self): function test_interpolation_search (line 1081) | def test_interpolation_search(arr, x): function test_indexed_table_mixin (line 1095) | def test_indexed_table_mixin(): function test_cast_integer_array_to_features (line 1106) | def test_cast_integer_array_to_features(): function test_cast_float_array_to_features (line 1114) | def test_cast_float_array_to_features(): function test_cast_boolean_array_to_features (line 1122) | def test_cast_boolean_array_to_features(): function test_cast_decimal_array_to_features (line 1130) | def test_cast_decimal_array_to_features(): function test_cast_array_to_feature_with_struct_with_missing_fields (line 1147) | def test_cast_array_to_feature_with_struct_with_missing_fields(array_lis... function test_cast_array_to_features_nested (line 1155) | def test_cast_array_to_features_nested(): function test_cast_array_to_features_to_nested_with_no_fields (line 1162) | def test_cast_array_to_features_to_nested_with_no_fields(): function test_cast_array_to_features_nested_with_nulls (line 1168) | def test_cast_array_to_features_nested_with_nulls(): function test_cast_array_to_features_to_null_type (line 1181) | def test_cast_array_to_features_to_null_type(): function test_cast_array_to_features_array_xd (line 1192) | def test_cast_array_to_features_array_xd(): function test_cast_array_to_features_sequence_classlabel (line 1202) | def test_cast_array_to_features_sequence_classlabel(): function test_cast_fixed_size_list_array_to_features_sequence (line 1234) | def test_cast_fixed_size_list_array_to_features_sequence(arr, slice, tar... function test_cast_list_array_to_features_sequence (line 1259) | def test_cast_list_array_to_features_sequence(arr, slice, target_value_f... function test_cast_array_to_feature_with_list_array_and_sequence_feature (line 1278) | def test_cast_array_to_feature_with_list_array_and_sequence_feature( function test_cast_array_to_feature_with_list_array_and_large_list_feature (line 1316) | def test_cast_array_to_feature_with_list_array_and_large_list_feature(fr... function test_cast_array_xd_to_features_sequence (line 1337) | def test_cast_array_xd_to_features_sequence(): function test_embed_array_storage (line 1351) | def test_embed_array_storage(image_file): function test_embed_array_storage_nested (line 1359) | def test_embed_array_storage_nested(image_file): function test_embed_array_storage_with_list_types (line 1385) | def test_embed_array_storage_with_list_types(array, feature, expected_em... function test_embed_table_storage (line 1398) | def test_embed_table_storage(image_file): function test_table_iter (line 1416) | def test_table_iter(table, batch_size, drop_last_batch): function test_array_cast (line 1433) | def test_array_cast(from_type, to_type): function test_cast_table_to_schema_with_missing_fields (line 1445) | def test_cast_table_to_schema_with_missing_fields(): FILE: tests/test_tqdm.py class TestTqdmUtils (line 15) | class TestTqdmUtils(unittest.TestCase): method capsys (line 17) | def capsys(self, capsys: CaptureFixture) -> None: method setUp (line 27) | def setUp(self) -> None: method tearDown (line 32) | def tearDown(self) -> None: method test_tqdm_helpers (line 40) | def test_tqdm_helpers(self) -> None: method test_cannot_enable_tqdm_when_env_variable_is_set (line 49) | def test_cannot_enable_tqdm_when_env_variable_is_set(self) -> None: method test_cannot_disable_tqdm_when_env_variable_is_set (line 62) | def test_cannot_disable_tqdm_when_env_variable_is_set(self) -> None: method test_tqdm_disabled (line 75) | def test_tqdm_disabled(self) -> None: method test_tqdm_disabled_cannot_be_forced (line 86) | def test_tqdm_disabled_cannot_be_forced(self) -> None: method test_tqdm_can_be_disabled_when_globally_enabled (line 97) | def test_tqdm_can_be_disabled_when_globally_enabled(self) -> None: method test_tqdm_enabled (line 108) | def test_tqdm_enabled(self) -> None: FILE: tests/test_upstream_hub.py class TestPushToHub (line 55) | class TestPushToHub: method test_push_dataset_dict_to_hub_no_token (line 59) | def test_push_dataset_dict_to_hub_no_token(self, temporary_repo, set_c... method test_push_dataset_dict_to_hub_name_without_namespace (line 76) | def test_push_dataset_dict_to_hub_name_without_namespace(self, tempora... method test_push_dataset_dict_to_hub_datasets_with_different_features (line 93) | def test_push_dataset_dict_to_hub_datasets_with_different_features(sel... method test_push_dataset_dict_to_hub_private (line 107) | def test_push_dataset_dict_to_hub_private(self, temporary_repo): method test_push_dataset_dict_to_hub (line 124) | def test_push_dataset_dict_to_hub(self, temporary_repo): method test_push_dataset_dict_to_hub_with_pull_request (line 141) | def test_push_dataset_dict_to_hub_with_pull_request(self, temporary_re... method test_push_dataset_dict_to_hub_with_revision (line 160) | def test_push_dataset_dict_to_hub_with_revision(self, temporary_repo): method test_push_dataset_dict_to_hub_multiple_files (line 177) | def test_push_dataset_dict_to_hub_multiple_files(self, temporary_repo): method test_push_dataset_dict_to_hub_multiple_files_with_max_shard_size (line 200) | def test_push_dataset_dict_to_hub_multiple_files_with_max_shard_size(s... method test_push_dataset_dict_to_hub_multiple_files_with_num_shards (line 222) | def test_push_dataset_dict_to_hub_multiple_files_with_num_shards(self,... method test_push_dataset_dict_to_hub_with_multiple_commits (line 244) | def test_push_dataset_dict_to_hub_with_multiple_commits(self, temporar... method test_push_dataset_dict_to_hub_overwrite_files (line 275) | def test_push_dataset_dict_to_hub_overwrite_files(self, temporary_repo): method test_push_dataset_dict_to_hub_bucket (line 367) | def test_push_dataset_dict_to_hub_bucket(self, temporary_bucket): method test_push_dataset_dict_to_hub_bucket_inside_dir (line 386) | def test_push_dataset_dict_to_hub_bucket_inside_dir(self, temporary_bu... method test_push_dataset_to_hub_bucket (line 407) | def test_push_dataset_to_hub_bucket(self, temporary_bucket): method test_push_dataset_to_hub_bucket_inside_dir (line 424) | def test_push_dataset_to_hub_bucket_inside_dir(self, temporary_bucket): method test_push_dataset_to_hub (line 442) | def test_push_dataset_to_hub(self, temporary_repo): method test_push_dataset_to_hub_custom_features (line 459) | def test_push_dataset_to_hub_custom_features(self, temporary_repo): method test_push_dataset_to_hub_custom_features_audio (line 474) | def test_push_dataset_to_hub_custom_features_audio(self, temporary_repo): method test_push_dataset_to_hub_custom_features_image (line 501) | def test_push_dataset_to_hub_custom_features_image(self, temporary_repo): method test_push_dataset_to_hub_custom_features_image_list (line 523) | def test_push_dataset_to_hub_custom_features_image_list(self, temporar... method test_push_dataset_dict_to_hub_custom_features (line 544) | def test_push_dataset_dict_to_hub_custom_features(self, temporary_repo): method test_push_dataset_to_hub_custom_splits (line 558) | def test_push_dataset_to_hub_custom_splits(self, temporary_repo): method test_push_dataset_to_hub_multiple_splits_one_by_one (line 569) | def test_push_dataset_to_hub_multiple_splits_one_by_one(self, temporar... method test_push_dataset_dict_to_hub_custom_splits (line 580) | def test_push_dataset_dict_to_hub_custom_splits(self, temporary_repo): method test_push_multiple_dataset_configs_to_hub_load_dataset_builder (line 593) | def test_push_multiple_dataset_configs_to_hub_load_dataset_builder(sel... method test_push_multiple_dataset_configs_to_hub_load_dataset (line 627) | def test_push_multiple_dataset_configs_to_hub_load_dataset(self, tempo... method test_push_multiple_dataset_configs_to_hub_readme_metadata_content (line 669) | def test_push_multiple_dataset_configs_to_hub_readme_metadata_content( method test_push_multiple_dataset_dict_configs_to_hub_load_dataset_builder (line 727) | def test_push_multiple_dataset_dict_configs_to_hub_load_dataset_builde... method test_push_multiple_dataset_dict_configs_to_hub_load_dataset (line 764) | def test_push_multiple_dataset_dict_configs_to_hub_load_dataset(self, ... method test_push_multiple_dataset_dict_configs_to_hub_readme_metadata_content (line 815) | def test_push_multiple_dataset_dict_configs_to_hub_readme_metadata_con... method test_push_dataset_to_hub_with_config_no_metadata_configs (line 880) | def test_push_dataset_to_hub_with_config_no_metadata_configs(self, tem... method test_push_dataset_dict_to_hub_with_config_no_metadata_configs (line 912) | def test_push_dataset_dict_to_hub_with_config_no_metadata_configs(self... method test_push_dataset_dict_to_hub_num_proc (line 946) | def test_push_dataset_dict_to_hub_num_proc(self, temporary_repo, set_c... method test_push_iterable_dataset_dict_to_hub (line 968) | def test_push_iterable_dataset_dict_to_hub(self, temporary_repo, set_c... method test_push_iterable_dataset_dict_to_hub_num_proc (line 985) | def test_push_iterable_dataset_dict_to_hub_num_proc(self, temporary_re... method test_push_iterable_dataset_to_hub (line 1008) | def test_push_iterable_dataset_to_hub(self, temporary_repo): method test_push_iterable_dataset_dict_to_hub_bucket (line 1024) | def test_push_iterable_dataset_dict_to_hub_bucket(self, temporary_buck... method test_push_iterable_dataset_to_hub_bucket (line 1042) | def test_push_iterable_dataset_to_hub_bucket(self, temporary_bucket): method test_push_sharded_iterable_dataset_to_hub_bucket (line 1059) | def test_push_sharded_iterable_dataset_to_hub_bucket(self, temporary_b... class DummyFolderBasedBuilder (line 1081) | class DummyFolderBasedBuilder(FolderBasedBuilder): function text_file_with_metadata (line 1090) | def text_file_with_metadata(request, tmp_path, text_file): class TestLoadFromHub (line 1114) | class TestLoadFromHub: method test_load_dataset_with_metadata_file (line 1118) | def test_load_dataset_with_metadata_file(self, temporary_repo, text_fi... method test_get_data_patterns (line 1144) | def test_get_data_patterns(self, temporary_repo, tmp_path): method test_load_dataset_raises_for_unauthenticated_user (line 1164) | def test_load_dataset_raises_for_unauthenticated_user( FILE: tests/test_version.py function test_version_equality_and_hash (line 19) | def test_version_equality_and_hash(other, expected_equality): FILE: tests/utils.py function parse_flag_from_env (line 25) | def parse_flag_from_env(key, default=False): function require_buckets_support_in_huggingface_hub (line 74) | def require_buckets_support_in_huggingface_hub(test_case): function require_regex (line 88) | def require_regex(test_case): function require_elasticsearch (line 102) | def require_elasticsearch(test_case): function require_sqlalchemy (line 116) | def require_sqlalchemy(test_case): function require_torch (line 130) | def require_torch(test_case): function require_torch_compile (line 142) | def require_torch_compile(test_case): function require_polars (line 156) | def require_polars(test_case): function require_tf (line 168) | def require_tf(test_case): function require_jax (line 180) | def require_jax(test_case): function require_pil (line 192) | def require_pil(test_case): function require_torchvision (line 204) | def require_torchvision(test_case): function require_torchcodec (line 216) | def require_torchcodec(test_case): function require_pdfplumber (line 228) | def require_pdfplumber(test_case): function require_nibabel (line 240) | def require_nibabel(test_case): function require_transformers (line 252) | def require_transformers(test_case): function require_tiktoken (line 267) | def require_tiktoken(test_case): function require_spacy (line 282) | def require_spacy(test_case): function require_pyspark (line 297) | def require_pyspark(test_case): function require_joblibspark (line 312) | def require_joblibspark(test_case): function require_torchdata_stateful_dataloader (line 327) | def require_torchdata_stateful_dataloader(test_case): function slow (line 342) | def slow(test_case): function local (line 355) | def local(test_case): function packaged (line 367) | def packaged(test_case): function remote (line 379) | def remote(test_case): function for_all_test_methods (line 391) | def for_all_test_methods(*decorators): class RequestWouldHangIndefinitelyError (line 403) | class RequestWouldHangIndefinitelyError(Exception): class OfflineSimulationMode (line 407) | class OfflineSimulationMode(Enum): function offline (line 414) | def offline(mode: OfflineSimulationMode): function set_current_working_directory_to_temp_dir (line 468) | def set_current_working_directory_to_temp_dir(*args, **kwargs): function assert_arrow_memory_increases (line 479) | def assert_arrow_memory_increases(): function assert_arrow_memory_doesnt_increase (line 489) | def assert_arrow_memory_doesnt_increase(): function is_rng_equal (line 498) | def is_rng_equal(rng1, rng2): function xfail_if_500_502_http_error (line 502) | def xfail_if_500_502_http_error(func): class _RunOutput (line 522) | class _RunOutput: method __init__ (line 523) | def __init__(self, returncode, stdout, stderr): function _read_stream (line 529) | async def _read_stream(stream, callback): function _stream_subprocess (line 538) | async def _stream_subprocess(cmd, env=None, stdin=None, timeout=None, qu... function execute_subprocess_async (line 579) | def execute_subprocess_async(cmd, env=None, stdin=None, timeout=180, qui... function pytest_xdist_worker_id (line 601) | def pytest_xdist_worker_id(): function get_torch_dist_unique_port (line 611) | def get_torch_dist_unique_port(): FILE: utils/release.py function update_version_in_file (line 31) | def update_version_in_file(fname, version, pattern): function global_version_update (line 42) | def global_version_update(version): function get_version (line 48) | def get_version(): function pre_release_work (line 56) | def pre_release_work(patch=False): function post_release_work (line 78) | def post_release_work():