Repository: olirice/flupy
Branch: master
Commit: 1bc446fd3efc
Files: 25
Total size: 61.7 KB

Directory structure:
gitextract_nws95u0a/

├── .coveragerc
├── .github/
│   └── workflows/
│       ├── pre-commit_hooks.yaml
│       └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── .version
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── benchmark/
│   └── test_benchmark.py
├── mypy.ini
├── pyproject.toml
├── pytest.ini
├── setup.cfg
└── src/
    ├── flupy/
    │   ├── __init__.py
    │   ├── cli/
    │   │   ├── __init__.py
    │   │   ├── cli.py
    │   │   └── utils.py
    │   ├── fluent.py
    │   └── py.typed
    └── tests/
        ├── test_cli.py
        ├── test_cli_utils.py
        ├── test_flu.py
        └── test_version.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .coveragerc
================================================
[report]
exclude_lines =
    pragma: no cover
    if TYPE_CHECKING:
    raise AssertionError
    raise NotImplementedError
    @overload
    pass


================================================
FILE: .github/workflows/pre-commit_hooks.yaml
================================================
name: pre-commit hooks

on: [push]

jobs:
  build:
    runs-on: ubuntu-latest

    steps:

    - uses: actions/checkout@v1

    - name: python setup 3.9
      uses: actions/setup-python@v1
      with:
        python-version: '3.9'

    - name: Install Poetry
      uses: snok/install-poetry@v1
      with:
        version: 1.7.1
        virtualenvs-create: true
        virtualenvs-in-project: true

    - name: Install dependencies
      run: |
        poetry install --with dev

    - name: run tests
      run: |
        poetry run pre-commit run --all

================================================
FILE: .github/workflows/test.yml
================================================
name: tests

on: [push]

jobs:
  build:
    runs-on: ubuntu-latest

    strategy:
        matrix:
            python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']

    steps:

    - uses: actions/checkout@v1

    - name: python setup ${{ matrix.python-version }}
      uses: actions/setup-python@v1
      with:
        python-version: ${{ matrix.python-version }}

    - name: Install Poetry
      uses: snok/install-poetry@v1
      with:
        version: 1.7.1
        virtualenvs-create: true
        virtualenvs-in-project: true

    - name: Install dependencies
      run: |
        poetry install --with dev

    - name: run tests
      run: |
        poetry run pytest --cov=src/flupy src/tests --cov-report=xml

    - name: upload coverage to codecov
      uses: codecov/codecov-action@v1
      with:
        token: ${{ secrets.CODECOV_TOKEN }}
        file: ./coverage.xml
        flags: unittests
        name: codecov-umbrella
        fail_ci_if_error: true


================================================
FILE: .gitignore
================================================
docs/*
# Temporary Python files
*.pyc
*.egg-info
__pycache__
.ipynb_checkpoints

# pyenv
.python-version

.benchmarks
poetry.lock

pip-wheel-metadata/

.vscode

# Temporary OS files
Icon*

# Pytest cache
.pytest_cache/*

# Virtual environment
venv/*

# Temporary virtual environment files
/.cache/
/.venv/

# Temporary server files
.env
*.pid
*.swp

# Generated documentation
/docs/gen/
/docs/apidocs/
/docs/_build/
/site/
/*.html
/*.rst
/docs/*.png

# Google Drive
*.gdoc
*.gsheet
*.gslides
*.gdraw

# Testing and coverage results
/.pytest/
/.coverage
/.coverage.*
/htmlcov/
/xmlreport/
/pyunit.xml
/tmp/
*.tmp

# Build and release directories
/build/
/dist/
*.spec

# Sublime Text
*.sublime-workspace

# Eclipse
.settings

# LLMs
CLAUDE.md


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
-   repo: https://github.com/pre-commit/mirrors-isort
    rev: v5.10.1
    hooks:
    -   id: isort
        args: ['--multi-line=3', '--trailing-comma', '--force-grid-wrap=0', '--use-parentheses', '--line-width=88']


-   repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v5.0.0
    hooks:
    - id: trailing-whitespace
    - id: check-added-large-files
    - id: check-yaml
    - id: mixed-line-ending
      args: ['--fix=lf']

-   repo: https://github.com/humitos/mirrors-autoflake.git
    rev: v1.1
    hooks:
    -   id: autoflake
        args: ['--in-place', '--remove-all-unused-imports']

-   repo: https://github.com/psf/black
    rev: 25.1.0
    hooks:
    - id: black
      language_version: python3.9

-   repo: https://github.com/pre-commit/mirrors-mypy
    rev: v1.17.0
    hooks:
    -   id: mypy
        files: flupy/
        args: ["--config-file", "mypy.ini"]


================================================
FILE: .readthedocs.yml
================================================
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Set the OS, Python version and other tools you might need
build:
  os: ubuntu-22.04
  tools:
    python: "3.11"
  jobs:
    post_create_environment:
      # Install poetry
      - pip install poetry
    post_install:
      # Install dependencies with Poetry
      # VIRTUAL_ENV needs to be set manually for Poetry to work correctly
      - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --with dev

# Build documentation in the "docs/" directory with Sphinx
sphinx:
  configuration: docs/conf.py


================================================
FILE: .version
================================================
1.0.11


================================================
FILE: CONTRIBUTING.md
================================================
# For Contributors

## Setup

### Requirements

* Make:
    * Windows: http://mingw.org/download/installer
    * Mac: http://developer.apple.com/xcode
    * Linux: http://www.gnu.org/software/make
* pipenv: http://docs.pipenv.org
* Pandoc: http://johnmacfarlane.net/pandoc/installing.html
* Graphviz: http://www.graphviz.org/Download.php

To confirm these system dependencies are configured correctly:

```sh
$ make doctor
```

### Installation

Install project dependencies into a virtual environment:

```sh
$ make install
```

## Development Tasks

### Testing

Manually run the tests:

```sh
$ make test
```

or keep them running on change:

```sh
$ make watch
```

> In order to have OS X notifications, `brew install terminal-notifier`.

### Documentation

Build the documentation:

```sh
$ make docs
```

### Static Analysis

Run linters and static analyzers:

```sh
$ make pylint
$ make pycodestyle
$ make pydocstyle
$ make check  # includes all checks
```

## Continuous Integration

The CI server will report overall build status:

```sh
$ make ci
```

## Release Tasks

Release to PyPI:

```sh
$ make upload
```


================================================
FILE: LICENSE.md
================================================
# License

**The MIT License (MIT)**

Copyright &copy; 2017, Oliver Rice

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.


================================================
FILE: README.md
================================================
# flupy

<p>

<a href="https://flupy.readthedocs.io/en/latest/?badge=latest"><img src="https://readthedocs.org/projects/flupy/badge/?version=latest" alt="Tests" height="18"></a>
<a href="https://codecov.io/gh/olirice/flupy"><img src="https://codecov.io/gh/olirice/flupy/branch/master/graph/badge.svg" height="18"></a>
<a href="https://github.com/psf/black">
        <img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Codestyle Black" height="18">
    </a>
</p>

<p>
    <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.9+-blue.svg" alt="Python version" height="18"></a>
  <a href="https://badge.fury.io/py/flupy"><img src="https://badge.fury.io/py/flupy.svg" alt="PyPI version" height="18"></a>
    <a href="https://github.com/olirice/flupy/blob/master/LICENSE"><img src="https://img.shields.io/pypi/l/markdown-subtemplate.svg" alt="License" height="18"></a>
    <a href="https://pypi.org/project/flupy/"><img src="https://img.shields.io/pypi/dm/flupy.svg" alt="Download count" height="18"></a>
</p>

---

**Documentation**: <a href="https://flupy.readthedocs.io/en/latest/" target="_blank">https://flupy.readthedocs.io/en/latest/</a>

**Source Code**: <a href="https://github.com/olirice/flupy" target="_blank">https://github.com/olirice/flupy</a>

---

## Overview
Flupy implements a [fluent interface](https://en.wikipedia.org/wiki/Fluent_interface) for operating on python iterables. All flupy methods return generators and are evaluated lazily. This allows expressions to transform arbitrary size data in extremely limited memory.

You can think of flupy as a light weight, 0 dependency, pure python alternative to the excellent [Apache Spark](https://spark.apache.org/) project.

## Setup

### Requirements

* Python 3.9+

### Installation

Install flupy with pip:
```sh
$ pip install flupy
```

### Library
```python
from itertools import count
from flupy import flu

# Processing an infinite sequence in constant memory
pipeline = (
    flu(count())
    .map(lambda x: x**2)
    .filter(lambda x: x % 517 == 0)
    .chunk(5)
    .take(3)
)

for item in pipeline:
  print(item)

# Returns:
# [0, 267289, 1069156, 2405601, 4276624]
# [6682225, 9622404, 13097161, 17106496, 21650409]
# [26728900, 32341969, 38489616, 45171841, 52388644]
```

### CLI
The flupy command line interface brings the same syntax for lazy piplines to your shell. Inputs to the `flu` command are auto-populated into a `Fluent` context named `_`.
````
$ flu -h
usage: flu [-h] [-f FILE] [-i [IMPORT [IMPORT ...]]] command

flupy: a fluent interface for python

positional arguments:
  command               flupy command to execute on input

optional arguments:
  -h, --help            show this help message and exit
  -f FILE, --file FILE  path to input file
  -i [IMPORT [IMPORT ...]], --import [IMPORT [IMPORT ...]]
                        modules to import
                        Syntax: <module>:<object>:<alias>
                        Examples:
                                'import os' = '-i os'
                                'import os as op_sys' = '-i os::op_sys'
                                'from os import environ' = '-i os:environ'
                                'from os import environ as env' = '-i os:environ:env'
````


================================================
FILE: benchmark/test_benchmark.py
================================================
from itertools import cycle

from flupy import flu


def test_integration(benchmark):
    @benchmark
    def work():
        (flu(range(100000)).chunk(100).chunk(2).map_item(0).count())


def test_max(benchmark):
    @benchmark
    def work():
        flu(range(300000)).max()


def test_initialize(benchmark):
    @benchmark
    def work():
        flu(range(10))


def test_collect(benchmark):
    @benchmark
    def work():
        flu(range(3)).collect()


def test___getitem__(benchmark):
    @benchmark
    def work():
        flu(range(350))[1:3].collect()


def test_sum(benchmark):
    @benchmark
    def work():
        gen = flu(range(1000)).sum()


def test_reduce(benchmark):
    @benchmark
    def work():
        flu(range(50)).reduce(lambda x, y: x + y)


def test_fold_left(benchmark):
    @benchmark
    def work():
        flu(range(5)).fold_left(lambda x, y: x + y, 0)


def test_count(benchmark):
    @benchmark
    def work():
        gen = flu(range(3000)).count()


def test_min(benchmark):
    @benchmark
    def work():
        flu(range(3000)).min()


def test_first(benchmark):
    @benchmark
    def work():
        flu(range(3)).first()


def test_last(benchmark):
    @benchmark
    def work():
        flu(range(3000)).last()


def test_head(benchmark):
    @benchmark
    def work():
        flu(range(30000)).head(n=10)


def test_tail(benchmark):
    @benchmark
    def work():
        gen = flu(range(30000)).tail(n=10)


def test_unique(benchmark):
    class NoHash:
        def __init__(self, letter, keyf):
            self.letter = letter
            self.keyf = keyf

    a = NoHash("a", 1)
    b = NoHash("b", 1)
    c = NoHash("c", 2)

    data = [x % 500 for x in range(10000)]

    @benchmark
    def work():
        gen = flu(data).unique().collect()


def test_sort(benchmark):
    @benchmark
    def work():
        flu(range(3000, 0, -1)).sort().collect()


def test_shuffle(benchmark):
    original_order = list(range(10000))

    @benchmark
    def work():
        flu(original_order).shuffle().collect()


def test_map(benchmark):
    @benchmark
    def work():
        flu(range(3)).map(lambda x: x + 2).collect()


def test_rate_limit(benchmark):
    @benchmark
    def work():
        flu(range(300)).rate_limit(50000000000000).collect()


def test_map_item(benchmark):
    data = flu(range(300)).map(lambda x: {"a": x})

    @benchmark
    def work():
        gen = flu(data).map_item("a")


def test_map_attr(benchmark):
    class Person:
        def __init__(self, age: int) -> None:
            self.age = age

    people = flu(range(200)).map(Person).collect()

    @benchmark
    def work():
        flu(people).map_attr("age").collect()


def test_filter(benchmark):
    @benchmark
    def work():
        flu(range(3)).filter(lambda x: 0 < x < 2).collect()


def test_take(benchmark):
    @benchmark
    def work():
        flu(range(10)).take(5).collect()


def test_take_while(benchmark):
    @benchmark
    def work():
        flu(cycle(range(10))).take_while(lambda x: x < 4).collect()


def test_drop_while(benchmark):
    @benchmark
    def work():
        flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4).collect()


def test_group_by(benchmark):
    @benchmark
    def work():
        flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0]).collect()


def test_chunk(benchmark):
    @benchmark
    def work():
        flu(range(500)).chunk(2).collect()


def test_enumerate(benchmark):
    @benchmark
    def work():
        flu(range(3)).enumerate(start=1).collect()


def test_zip(benchmark):
    @benchmark
    def work():
        flu(range(3)).zip(range(3)).collect()


def test_zip_longest(benchmark):
    @benchmark
    def work():
        flu(range(3)).zip_longest(range(5)).collect()


def test_window(benchmark):
    @benchmark
    def work():
        gen = flu(range(5)).window(n=3, step=3).collect


def test_flatten(benchmark):
    nested = [1, [2, (3, [4])], ["rbsd", "abc"], (7,)]

    @benchmark
    def work():
        gen = flu(nested).flatten(depth=2, base_type=tuple).collect()


def test_tee(benchmark):
    @benchmark
    def work():
        gen1, gen2, gen3 = flu(range(100)).tee(3)


def test_join_left(benchmark):
    @benchmark
    def work():
        flu(range(6)).join_left(range(0, 6, 2)).collect()


def test_join_inner(benchmark):
    @benchmark
    def work():
        flu(range(6)).join_inner(range(0, 6, 2)).collect()


================================================
FILE: mypy.ini
================================================
[mypy]
ignore_missing_imports = True
strict_optional = True
follow_imports = skip
warn_redundant_casts = True
warn_unused_ignores = False
check_untyped_defs = True
no_implicit_reexport = True

# Strict Mode:
disallow_untyped_defs = True
disallow_any_generics = True


================================================
FILE: pyproject.toml
================================================
[tool.poetry]
name = "flupy"
version = "1.2.3"
description = "Fluent data processing in Python - a chainable stream processing library for expressive data manipulation using method chaining"
authors = ["Oliver Rice <oliver@oliverrice.com>"]
license = "MIT"
readme = "README.md"
repository = "https://github.com/olirice/flupy"
packages = [{include = "flupy", from = "src"}]
classifiers = [
    "Development Status :: 5 - Production/Stable",
    "Natural Language :: English",
    "Operating System :: OS Independent",
    "Programming Language :: Python",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
]

[tool.poetry.dependencies]
python = ">=3.9"
typing_extensions = ">=4"

[tool.poetry.group.dev.dependencies]
pytest = "*"
pytest-cov = "*"
pytest-benchmark = "*"
pre-commit = "*"
pylint = "*"
black = "*"
mypy = "*"
sphinx = "*"
sphinx-rtd-theme = "*"

[tool.poetry.scripts]
flu = "flupy.cli.cli:main"
flu_precommit = "flupy.cli.cli:precommit"

[build-system]
requires = ["poetry-core>=2.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.black]
line-length = 120
exclude = '''
/(
    \.git
  | \.hg
  | \.mypy_cache
  | \.tox
  | \.venv
  | _build
  | buck-out
  | build
  | dist
)/
'''

[tool.mypy]
python_version = "3.9"
ignore_missing_imports = true
strict_optional = true
follow_imports = "skip"
warn_redundant_casts = true
warn_unused_ignores = false
check_untyped_defs = true
no_implicit_reexport = true
disallow_untyped_defs = true
disallow_any_generics = true

[tool.pytest.ini_options]
addopts = "--cov=src/flupy src/tests"

[tool.coverage.report]
exclude_lines = [
    "pragma: no cover",
    "if TYPE_CHECKING:",
    "raise AssertionError",
    "raise NotImplementedError",
    "@overload",
    "pass",
]


================================================
FILE: pytest.ini
================================================
[pytest]
addopts = --cov=src/flupy src/tests


================================================
FILE: setup.cfg
================================================
[metadata]
description_file = README.md


================================================
FILE: src/flupy/__init__.py
================================================
from importlib.metadata import version

from flupy.cli.utils import walk_dirs, walk_files
from flupy.fluent import flu

__project__ = "flupy"
__version__ = version(__project__)

__all__ = ["flu", "walk_files", "walk_dirs"]


================================================
FILE: src/flupy/cli/__init__.py
================================================


================================================
FILE: src/flupy/cli/cli.py
================================================
import argparse
import importlib
import sys
from typing import Any, Dict, Generator, List, Optional

from flupy import __version__, flu, walk_dirs, walk_files


def read_file(path: str) -> Generator[str, None, None]:
    """Yield lines from a file given its path"""
    with open(path, "r") as f:
        yield from f


def parse_args(args: List[str]) -> argparse.Namespace:
    """Parse input arguments"""
    parser = argparse.ArgumentParser(
        description="flupy: a fluent interface for python collections",
        formatter_class=argparse.RawTextHelpFormatter,
    )
    parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__)
    parser.add_argument("command", help="command to execute against input")
    parser.add_argument("-f", "--file", help="path to input file")
    parser.add_argument(
        "-i",
        "--import",
        nargs="*",
        default=[],
        help="modules to import\n"
        "Syntax: <module>:<object>:<alias>\n"
        "Examples:\n"
        "\t'import os' = '-i os'\n"
        "\t'import os as op_sys' = '-i os::op_sys'\n"
        "\t'from os import environ' = '-i os:environ'\n"
        "\t'from os import environ as env' = '-i os:environ:env'\n",
    )
    return parser.parse_args(args)


def build_import_dict(imps: List[str]) -> Dict[str, Any]:
    """Execute CLI scoped imports"""
    import_dict = {}
    for imp_stx in imps:
        module, _, obj_alias = imp_stx.partition(":")
        obj, _, alias = obj_alias.partition(":")

        if not obj:
            import_dict[alias or module] = importlib.import_module(module)
        else:
            _garb = importlib.import_module(module)
            import_dict[alias or obj] = getattr(_garb, obj)
    return import_dict


def main(argv: Optional[List[str]] = None) -> None:
    """CLI Entrypoint"""
    args = parse_args(argv[1:] if argv is not None else sys.argv[1:])

    _command = args.command
    _file = args.file
    _import = getattr(args, "import")

    import_dict = build_import_dict(_import)

    if _file:
        _ = flu(read_file(_file)).map(str.rstrip)
    else:
        try:
            # Restore the default SIGPIPE handler
            from signal import SIG_DFL, SIGPIPE, signal

            signal(SIGPIPE, SIG_DFL)
        except ImportError:
            # SIGPIPE not available on platform (e.g. Windows), nothing to do
            pass

        _ = flu(sys.stdin).map(str.rstrip)

    locals_dict = {
        "flu": flu,
        "_": _,
        "walk_files": walk_files,
        "walk_dirs": walk_dirs,
    }

    pipeline = eval(_command, import_dict, locals_dict)

    if hasattr(pipeline, "__iter__") and not isinstance(pipeline, (str, bytes)):
        for r in pipeline:
            sys.stdout.write(str(r) + "\n")

    elif pipeline is None:
        pass
    else:
        sys.stdout.write(str(pipeline) + "\n")


================================================
FILE: src/flupy/cli/utils.py
================================================
# pylint: disable=invalid-name
import os
from typing import Generator

from flupy.fluent import Fluent, flu


def walk_files(*pathes: str, abspath: bool = True) -> "Fluent[str]":
    """Yield files recursively starting from each location in *pathes"""

    if pathes == ():
        pathes = (".",)

    def _impl() -> Generator[str, None, None]:
        for path in pathes:
            for d, _, files in os.walk(path):
                for x in files:
                    rel_path = os.path.join(d, x)
                    if abspath:
                        yield os.path.abspath(rel_path)
                    else:
                        yield rel_path

    return flu(_impl())


def walk_dirs(path: str = ".") -> "Fluent[str]":
    """Yield files recursively starting from *path"""

    def _impl() -> Generator[str, None, None]:
        for d, _, _ in os.walk(path):
            yield d

    return flu(_impl())


================================================
FILE: src/flupy/fluent.py
================================================
# pylint: disable=invalid-name
import time
from collections import defaultdict, deque
from collections.abc import Iterable as IterableType
from functools import reduce
from itertools import dropwhile, groupby, islice, product, takewhile, tee, zip_longest
from random import sample
from typing import (
    Any,
    Callable,
    Collection,
    Deque,
    Dict,
    Generator,
    Generic,
    Hashable,
    Iterable,
    Iterator,
    List,
    Optional,
    Set,
    Tuple,
    Type,
    TypeVar,
    Union,
    overload,
)

from typing_extensions import Concatenate, ParamSpec, Protocol

__all__ = ["flu"]


T = TypeVar("T")
T_co = TypeVar("T_co", covariant=True)
T_contra = TypeVar("T_contra", contravariant=True)
_T1 = TypeVar("_T1")
_T2 = TypeVar("_T2")
_T3 = TypeVar("_T3")
S = TypeVar("S")
P = ParamSpec("P")

CallableTakesIterable = Callable[[Iterable[T]], Collection[T]]


class SupportsEquality(Protocol):
    def __eq__(self, __other: object) -> bool:
        pass


class SupportsGetItem(Protocol[T_co]):
    def __getitem__(self, __k: Hashable) -> T_co:
        pass


class SupportsIteration(Protocol[T_co]):
    def __iter__(self) -> Iterator[T]:
        pass


class SupportsLessThan(Protocol):
    def __lt__(self, __other: Any) -> bool:
        pass


SupportsLessThanT = TypeVar("SupportsLessThanT", bound="SupportsLessThan")


class Empty:
    pass


def identity(x: T) -> T:
    return x


class Fluent(Generic[T]):
    """A fluent interface to lazy generator functions

    >>> from flupy import flu
    >>> (
        flu(range(100))
        .map(lambda x: x**2)
        .filter(lambda x: x % 3 == 0)
        .chunk(3)
        .take(2)
        .to_list()
    )
    [[0, 9, 36], [81, 144, 225]]
    """

    def __init__(self, iterable: Iterable[T]) -> None:
        iterator = iter(iterable)
        self._iterator: Iterator[T] = iterator

    @overload
    def __getitem__(self, index: int) -> T:
        pass

    @overload
    def __getitem__(self, index: slice) -> "Fluent[T]":
        pass

    def __getitem__(self, key: Union[int, slice]) -> Union[T, "Fluent[T]"]:
        if isinstance(key, int) and key >= 0:
            try:
                return next(islice(self._iterator, key, key + 1))
            except StopIteration:
                raise IndexError("flu index out of range")
        elif isinstance(key, slice):
            return flu(islice(self._iterator, key.start, key.stop, key.step))
        else:
            raise TypeError(f"Indices must be non-negative integers or slices, not {type(key).__name__}")

    ### Summary ###
    def collect(self, n: Optional[int] = None, container_type: CallableTakesIterable[T] = list) -> Collection[T]:
        """Collect items from iterable into a container

        >>> flu(range(4)).collect()
        [0, 1, 2, 3]

        >>> flu(range(4)).collect(container_type=set)
        {0, 1, 2, 3}

        >>> flu(range(4)).collect(n=2)
        [0, 1]
        """
        return container_type(self.take(n))

    def to_list(self) -> List[T]:
        """Collect items from iterable into a list

        >>> flu(range(4)).to_list()
        [0, 1, 2, 3]
        """
        return list(self)

    def sum(self) -> Union[T, int]:
        """Sum of elements in the iterable

        >>> flu([1,2,3]).sum()
        6

        """
        return sum(self)  # type: ignore

    def count(self) -> int:
        """Count of elements in the iterable

        >>> flu(['a','b','c']).count()
        3
        """
        return sum(1 for _ in self)

    def min(self: "Fluent[SupportsLessThanT]") -> SupportsLessThanT:
        """Smallest element in the interable

        >>> flu([1, 3, 0, 2]).min()
        0
        """
        return min(self)

    def max(self: "Fluent[SupportsLessThanT]") -> SupportsLessThanT:
        """Largest element in the interable

        >>> flu([0, 3, 2, 1]).max()
        3
        """
        return max(self)

    def first(self, default: Any = Empty()) -> T:
        """Return the first item of the iterable. Raise IndexError if empty, or return default if provided.

        >>> flu([0, 1, 2, 3]).first()
        0
        >>> flu([]).first(default="some_default")
        'some_default'
        """
        x: Union[Empty, T] = default
        for x in self:
            return x
        if isinstance(x, Empty):
            raise IndexError("Empty iterator")
        return x

    def last(self, default: Any = Empty()) -> T:
        """Return the last item of the iterble. Raise IndexError if empty or default if provided.

        >>> flu([0, 1, 2, 3]).last()
        3
        >>> flu([]).last(default='some_default')
        'some_default'
        """
        x: Union[Empty, T] = default
        for x in self:
            pass
        if isinstance(x, Empty):
            raise IndexError("Empty iterator")
        return x

    def head(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]:
        """Returns up to the first *n* elements from the iterable.

        >>> flu(range(20)).head()
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

        >>> flu(range(15)).head(n=2)
        [0, 1]

        >>> flu([]).head()
        []
        """
        return self.take(n).collect(container_type=container_type)

    def tail(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]:
        """Return up to the last *n* elements from the iterable

        >>> flu(range(20)).tail()
        [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

        >>> flu(range(15)).tail(n=2)
        [13, 14]
        """
        val: Union[List[Empty], Tuple[Any, ...]] = [Empty()]
        for val in self.window(n, fill_value=Empty()):
            pass
        return container_type([x for x in val if not isinstance(x, Empty)])

    ### End Summary ###

    ### Non-Constant Memory ###
    def sort(
        self: "Fluent[SupportsLessThanT]",
        key: Optional[Callable[[Any], Any]] = None,
        reverse: bool = False,
    ) -> "Fluent[SupportsLessThanT]":
        """Sort iterable by *key* function if provided or identity otherwise

        Note: sorting loads the entire iterable into memory

        >>> flu([3,6,1]).sort().to_list()
        [1, 3, 6]

        >>> flu([3,6,1]).sort(reverse=True).to_list()
        [6, 3, 1]

        >>> flu([3,-6,1]).sort(key=abs).to_list()
        [1, 3, -6]
        """
        return Fluent(sorted(self, key=key, reverse=reverse))

    def join_left(
        self,
        other: Iterable[_T1],
        key: Callable[[T], Hashable] = identity,
        other_key: Callable[[_T1], Hashable] = identity,
    ) -> "Fluent[Tuple[T, Union[_T1, None]]]":
        """Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries

        When no matching entry is found in *other*, entries in the iterable are paired with None

        Note: join_left loads *other* into memory

        >>> flu(range(6)).join_left(range(0, 6, 2)).to_list()
        [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)]
        """

        def _impl() -> Generator[Tuple[T, Union[_T1, None]], None, None]:

            other_lookup = defaultdict(list)

            for entry_other in other:
                other_lookup[other_key(entry_other)].append(entry_other)

            for entry in self:
                matches: Optional[List[_T1]] = other_lookup.get(key(entry))

                if matches:
                    for match in matches:
                        yield (entry, match)
                else:
                    yield (entry, None)

        return Fluent(_impl())

    def join_inner(
        self,
        other: Iterable[_T1],
        key: Callable[[T], Hashable] = identity,
        other_key: Callable[[_T1], Hashable] = identity,
    ) -> "Fluent[Tuple[T, _T1]]":
        """Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries

        When no matching entry is found in *other*, entries in the iterable are filtered from the results

        Note: join_inner loads *other* into memory

        >>> flu(range(6)).join_inner(range(0, 6, 2)).to_list()
        [(0, 0), (2, 2), (4, 4)]

        """

        def _impl() -> Generator[Tuple[T, _T1], None, None]:

            other_lookup = defaultdict(list)

            for entry_other in other:
                other_lookup[other_key(entry_other)].append(entry_other)

            for entry in self:
                matches: List[_T1] = other_lookup[key(entry)]

                for match in matches:
                    yield (entry, match)

        return Fluent(_impl())

    def join_full(
        self,
        other: Iterable[_T1],
        key: Callable[[T], Hashable] = identity,
        other_key: Callable[[_T1], Hashable] = identity,
    ) -> "Fluent[Tuple[Union[T, None], Union[_T1, None]]]":
        """Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries

        Returns all entries from both iterables. When no matching entry is found, entries are paired with None

        Note: join_full loads both *self* and *other* into memory

        >>> flu(range(4)).join_full(range(2, 6)).to_list()
        [(0, None), (1, None), (2, 2), (3, 3), (None, 4), (None, 5)]
        """

        def _impl() -> Generator[Tuple[Union[T, None], Union[_T1, None]], None, None]:

            # Build lookup for other
            other_lookup: Dict[Hashable, List[_T1]] = defaultdict(list)
            other_keys_seen: Set[Hashable] = set()

            for entry_other in other:
                other_key_val = other_key(entry_other)
                other_lookup[other_key_val].append(entry_other)
                other_keys_seen.add(other_key_val)

            # Track which keys from other have been matched
            matched_other_keys: Set[Hashable] = set()

            # Process all entries from self
            for entry in self:
                entry_key = key(entry)
                matches: Optional[List[_T1]] = other_lookup.get(entry_key)

                if matches:
                    matched_other_keys.add(entry_key)
                    for match in matches:
                        yield (entry, match)
                else:
                    yield (entry, None)

            # Yield unmatched entries from other
            unmatched_keys = other_keys_seen - matched_other_keys
            for unmatched_key in unmatched_keys:
                for entry_other in other_lookup[unmatched_key]:
                    yield (None, entry_other)

        return Fluent(_impl())

    def shuffle(self) -> "Fluent[T]":
        """Randomize the order of elements in the interable

        Note: shuffle loads the entire iterable into memory

        >>> flu([3,6,1]).shuffle().to_list()
        [6, 1, 3]
        """
        dat: List[T] = self.to_list()
        return Fluent(sample(dat, len(dat)))

    def group_by(
        self, key: Callable[[T], Union[T, _T1]] = identity, sort: bool = True
    ) -> "Fluent[Tuple[Union[T,_T1], Fluent[T]]]":
        """Yield consecutive keys and groups from the iterable

        *key* is a function to compute a key value used in grouping and sorting for each element. *key* defaults to an identity function which returns the unchaged element

        When the iterable is pre-sorted according to *key*, setting *sort* to False will prevent loading the dataset into memory and improve performance

        >>> flu([2, 4, 2, 4]).group_by().to_list()
        [(2, <flu object>), (4, <flu object>)]

        Or, if the iterable is pre-sorted

        >>> flu([2, 2, 5, 5]).group_by(sort=False).to_list()
        [(2, <flu object>), (5, <flu object>)]

        Using a key function

        >>> points = [
            {'x': 1, 'y': 0},
            {'x': 4, 'y': 3},
            {'x': 1, 'y': 5}
        ]
        >>> key_func = lambda u: u['x']
        >>> flu(points).group_by(key=key_func, sort=True).to_list()
        [(1, <flu object>), (4, <flu object>)]
        """

        gen = self.sort(key) if sort else self
        return Fluent(groupby(gen, key)).map(lambda x: (x[0], flu([y for y in x[1]])))

    def unique(self, key: Callable[[T], Hashable] = identity) -> "Fluent[T]":
        """Yield elements that are unique by a *key*.

        >>> flu([2, 3, 2, 3]).unique().to_list()
        [2, 3]

        >>> flu([2, -3, -2, 3]).unique(key=abs).to_list()
        [2, -3]
        """

        def _impl() -> Generator[T, None, None]:
            seen: Set[Any] = set()
            for x in self:
                x_hash = key(x)
                if x_hash in seen:
                    continue
                else:
                    seen.add(x_hash)
                    yield x

        return Fluent(_impl())

    ### End Non-Constant Memory ###

    ### Side Effect ###
    def rate_limit(self, per_second: Union[int, float] = 100) -> "Fluent[T]":
        """Restrict consumption of iterable to n item  *per_second*

        >>> import time
        >>> start_time = time.time()
        >>> _ = flu(range(3)).rate_limit(3).to_list()
        >>> print('Runtime', int(time.time() - start_time))
        1.00126 # approximately 1 second for 3 items
        """

        def _impl() -> Generator[T, None, None]:
            wait_time = 1.0 / per_second
            for val in self:
                start_time = time.time()
                yield val
                call_duration = time.time() - start_time
                time.sleep(max(wait_time - call_duration, 0.0))

        return Fluent(_impl())

    def side_effect(
        self,
        func: Callable[[T], Any],
        before: Optional[Callable[[], Any]] = None,
        after: Optional[Callable[[], Any]] = None,
    ) -> "Fluent[T]":
        """Invoke *func* for each item in the iterable before yielding the item.
        *func* takes a single argument and the output is discarded
        *before* and *after* are optional functions that take no parameters and are executed once before iteration begins
        and after iteration ends respectively. Each will be called exactly once.


        >>> flu(range(2)).side_effect(lambda x: print(f'Collected {x}')).to_list()
        Collected 0
        Collected 1
        [0, 1]
        """

        def _impl() -> Generator[T, None, None]:
            try:
                if before is not None:
                    before()

                for x in self:
                    func(x)
                    yield x

            finally:
                if after is not None:
                    after()

        return Fluent(_impl())

    ### End Side Effect ###

    def map(self, func: Callable[Concatenate[T, P], _T1], *args: Any, **kwargs: Any) -> "Fluent[_T1]":
        """Apply *func* to each element of iterable

        >>> flu(range(5)).map(lambda x: x*x).to_list()
        [0, 1, 4, 9, 16]
        """

        def _impl() -> Generator[_T1, None, None]:
            for val in self._iterator:
                yield func(val, *args, **kwargs)

        return Fluent(_impl())

    def map_item(self: "Fluent[SupportsGetItem[T]]", item: Hashable) -> "Fluent[T]":
        """Extracts *item* from every element of the iterable

        >>> flu([(2, 4), (2, 5)]).map_item(1).to_list()
        [4, 5]

        >>> flu([{'mykey': 8}, {'mykey': 5}]).map_item('mykey').to_list()
        [8, 5]
        """

        def _impl() -> Generator[T, None, None]:
            for x in self:
                yield x[item]

        return Fluent(_impl())

    def map_attr(self, attr: str) -> "Fluent[Any]":
        """Extracts the attribute *attr* from each element of the iterable

        >>> from collections import namedtuple
        >>> MyTup = namedtuple('MyTup', ['value', 'backup_val'])
        >>> flu([MyTup(1, 5), MyTup(2, 4)]).map_attr('value').to_list()
        [1, 2]
        """
        return self.map(lambda x: getattr(x, attr))

    def filter(self, func: Callable[Concatenate[T, P], bool], *args: Any, **kwargs: Any) -> "Fluent[T]":
        """Yield elements of iterable where *func* returns truthy

        >>> flu(range(10)).filter(lambda x: x % 2 == 0).to_list()
        [0, 2, 4, 6, 8]
        """

        def _impl() -> Generator[T, None, None]:
            for val in self._iterator:
                if func(val, *args, **kwargs):
                    yield val

        return Fluent(_impl())

    def reduce(self, func: Callable[[T, T], T]) -> T:
        """Apply a function of two arguments cumulatively to the items of the iterable,
        from left to right, so as to reduce the sequence to a single value

        >>> flu(range(5)).reduce(lambda x, y: x + y)
        10
        """
        return reduce(func, self)

    def fold_left(self, func: Callable[[S, T], S], initial: S) -> S:
        """Apply a function of two arguments cumulatively to the items of the iterable,
        from left to right, starting with *initial*, so as to fold the sequence to
        a single value

        >>> flu(range(5)).fold_left(lambda x, y: x + str(y), "")
        '01234'
        """
        return reduce(func, self, initial)

    @overload
    def zip(self, __iter1: Iterable[_T1]) -> "Fluent[Tuple[T, _T1]]": ...

    @overload
    def zip(self, __iter1: Iterable[_T1], __iter2: Iterable[_T2]) -> "Fluent[Tuple[T, _T1, _T2]]": ...

    @overload
    def zip(
        self, __iter1: Iterable[_T1], __iter2: Iterable[_T2], __iter3: Iterable[_T3]
    ) -> "Fluent[Tuple[T, _T1, _T2, _T3]]": ...

    @overload
    def zip(
        self,
        __iter1: Iterable[Any],
        __iter2: Iterable[Any],
        __iter3: Iterable[Any],
        __iter4: Iterable[Any],
        *iterable: Iterable[Any],
    ) -> "Fluent[Tuple[T, ...]]": ...

    def zip(self, *iterable: Iterable[Any]) -> Union[
        "Fluent[Tuple[T, ...]]",
        "Fluent[Tuple[T, _T1]]",
        "Fluent[Tuple[T, _T1, _T2]]",
        "Fluent[Tuple[T, _T1, _T2, _T3]]",
    ]:
        """Yields tuples containing the i-th element from the i-th
        argument in the instance, and the iterable

        >>> flu(range(5)).zip(range(3, 0, -1)).to_list()
        [(0, 3), (1, 2), (2, 1)]
        """
        # @self_to_flu is not compatible with @overload
        # make sure any usage of self supports arbitrary iterables
        tup_iter = zip(iter(self), *iterable)
        return Fluent(tup_iter)

    def zip_longest(self, *iterable: Iterable[_T1], fill_value: Any = None) -> "Fluent[Tuple[T, ...]]":
        """Yields tuples containing the i-th element from the i-th
        argument in the instance, and the iterable
        Iteration continues until the longest iterable is exhaused.
        If iterables are uneven in length, missing values are filled in with fill value

        >>> flu(range(5)).zip_longest(range(3, 0, -1)).to_list()
        [(0, 3), (1, 2), (2, 1), (3, None), (4, None)]


        >>> flu(range(5)).zip_longest(range(3, 0, -1), fill_value='a').to_list()
        [(0, 3), (1, 2), (2, 1), (3, 'a'), (4, 'a')]
        """
        return Fluent(zip_longest(self, *iterable, fillvalue=fill_value))

    def enumerate(self, start: int = 0) -> "Fluent[Tuple[int, T]]":
        """Yields tuples from the instance where the first element
        is a count from initial value *start*.

        >>> flu([3,4,5]).enumerate().to_list()
        [(0, 3), (1, 4), (2, 5)]
        """
        return Fluent(enumerate(self, start=start))

    def take(self, n: Optional[int] = None) -> "Fluent[T]":
        """Yield first *n* items of the iterable

        >>> flu(range(10)).take(2).to_list()
        [0, 1]
        """
        return Fluent(islice(self._iterator, n))

    def take_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]":
        """Yield elements from the chainable so long as the predicate is true

        >>> flu(range(10)).take_while(lambda x: x < 3).to_list()
        [0, 1, 2]
        """
        return Fluent(takewhile(predicate, self._iterator))

    def drop_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]":
        """Drop elements from the chainable as long as the predicate is true;
        afterwards, return every element

        >>> flu(range(10)).drop_while(lambda x: x < 3).to_list()
        [3, 4, 5, 6, 7, 8, 9]
        """
        return Fluent(dropwhile(predicate, self._iterator))

    def chunk(self, n: int) -> "Fluent[List[T]]":
        """Yield lists of elements from iterable in groups of *n*

        if the iterable is not evenly divisiible by *n*, the final list will be shorter

        >>> flu(range(10)).chunk(3).to_list()
        [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
        """

        def _impl() -> Generator[List[T], None, None]:

            while True:
                vals: List[T] = list(self.take(n))
                if vals:
                    yield vals
                else:
                    return

        return Fluent(_impl())

    def flatten(
        self,
        depth: int = 1,
        base_type: Optional[Type[object]] = None,
        iterate_strings: bool = False,
    ) -> "Fluent[Any]":
        """Recursively flatten nested iterables (e.g., a list of lists of tuples)
        into non-iterable type or an optional user-defined base_type

        Strings are treated as non-iterable for convenience. set iterate_string=True
        to change that behavior.

        >>> flu([[0, 1, 2], [3, 4, 5]]).flatten().to_list()
        [0, 1, 2, 3, 4, 5]

        >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten().to_list()
        [0, [1, 2], [3, 4], 5]

        >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list()
        [0, 1, 2, 3, 4, 5]

        >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list()
        [0, 1, 2, 3, 4, 5]

        >>> flu([1, (2, 2), 4, [5, (6, 6, 6)]]).flatten(base_type=tuple).to_list()
        [1, (2, 2), 4, 5, (6, 6, 6)]

        >>> flu([[2, 0], 'abc', 3, [4]]).flatten(iterate_strings=True).to_list()
        [2, 0, 'a', 'b', 'c', 3, 4]
        """

        # TODO(OR): Reimplement with strong types
        def walk(node: Any, level: int) -> Generator[T, None, None]:
            if (
                ((depth is not None) and (level > depth))
                or (isinstance(node, str) and not iterate_strings)
                or ((base_type is not None) and isinstance(node, base_type))
            ):
                yield node
                return
            try:
                tree = iter(node)
            except TypeError:
                yield node
                return
            else:
                for child in tree:
                    for val in walk(child, level + 1):
                        yield val

        return Fluent(walk(self, level=0))

    def denormalize(self: "Fluent[SupportsIteration[Any]]", iterate_strings: bool = False) -> "Fluent[Tuple[Any, ...]]":
        """Denormalize iterable components of each record

        >>> flu([("abc", [1, 2, 3])]).denormalize().to_list()
        [('abc', 1), ('abc', 2), ('abc', 3)]

        >>> flu([("abc", [1, 2])]).denormalize(iterate_strings=True).to_list()
        [('a', 1), ('a', 2), ('b', 1), ('b', 2), ('c', 1), ('c', 2)]

        >>> flu([("abc", [])]).denormalize().to_list()
        []
        """

        def _impl() -> Generator[Tuple[Any, ...], None, None]:
            for record in self:
                iter_elements: List[Iterable[Any]] = []
                element: Any
                for element in record:

                    # Check for string and string iteration is allowed
                    if isinstance(element, str) and iterate_strings:
                        iter_elements.append(element)

                    # Check for string and string iteration is not allowed
                    elif isinstance(element, str):
                        iter_elements.append([element])

                    # Check for iterable
                    elif isinstance(element, IterableType):
                        iter_elements.append(element)

                    # Check for non-iterable
                    else:
                        iter_elements.append([element])

                for row in product(*iter_elements):
                    yield row

        return Fluent(_impl())

    def window(self, n: int, step: int = 1, fill_value: Any = None) -> "Fluent[Tuple[Any, ...]]":
        """Yield a sliding window of width *n* over the given iterable.

        Each window will advance in increments of *step*:

        If the length of the iterable does not evenly divide by the *step*
        the final output is padded with *fill_value*

        >>> flu(range(5)).window(3).to_list()
        [(0, 1, 2), (1, 2, 3), (2, 3, 4)]

        >>> flu(range(5)).window(n=3, step=2).to_list()
        [(0, 1, 2), (2, 3, 4)]

        >>> flu(range(9)).window(n=4, step=3).to_list()
        [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, None)]

        >>> flu(range(9)).window(n=4, step=3, fill_value=-1).to_list()
        [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, -1)]
        """

        def _impl() -> Generator[Tuple[Any, ...], None, None]:
            if n < 0:
                raise ValueError("n must be >= 0")
            elif n == 0:
                yield tuple()
                return
            if step < 1:
                raise ValueError("step must be >= 1")

            window: Deque[Any] = deque([], n)
            append = window.append

            # Initial deque fill
            for _ in range(n):
                append(next(self, fill_value))
            yield tuple(window)

            # Appending new items to the right causes old items to fall off the left
            i = 0
            for item in self:
                append(item)
                i = (i + 1) % step
                if i % step == 0:
                    yield tuple(window)

            # If there are items from the iterable in the window, pad with the given
            # value and emit them.
            if (i % step) and (step - i < n):
                for _ in range(step - i):
                    append(fill_value)
                yield tuple(window)

        return Fluent(_impl())

    def __iter__(self) -> "Fluent[T]":
        return self

    def __next__(self) -> T:
        return next(self._iterator)

    def tee(self, n: int = 2) -> "Fluent[Fluent[T]]":
        """Return n independent iterators from a single iterable

        once tee() has made a split, the original iterable should not be used
        anywhere else; otherwise, the iterable could get advanced without the
        tee objects being informed

        >>> copy1, copy2 = flu(range(5)).tee()
        >>> copy1.sum()
        10
        >>> copy2.to_list()
        [0, 1, 2, 3, 4]
        """
        return Fluent((Fluent(x) for x in tee(self, n)))


class flu(Fluent[T]):
    """A fluent interface to lazy generator functions

    >>> from flupy import flu
    >>> (
            flu(range(100))
            .map(lambda x: x**2)
            .filter(lambda x: x % 3 == 0)
            .chunk(3)
            .take(2)
            .to_list()
        )
    [[0, 9, 36], [81, 144, 225]]
    """


================================================
FILE: src/flupy/py.typed
================================================


================================================
FILE: src/tests/test_cli.py
================================================
from tempfile import NamedTemporaryFile

import pytest

from flupy.cli.cli import build_import_dict, main, parse_args


def test_parse_args():
    with pytest.raises(SystemExit) as cm:
        parse_args([])
        assert cm.exception.code == 2

    args = parse_args(["_"])
    assert args.command == "_"

    args = parse_args(["_", "-i", "os:environ:env"])
    assert "os:environ:env" in getattr(args, "import")
    assert args.command == "_"

    import_dict = build_import_dict(["json"])
    assert "json" in import_dict


def test_build_import_dict():
    import json

    import_dict = build_import_dict(["json"])
    assert "json" in import_dict
    assert import_dict["json"] == json

    import_dict = build_import_dict(["json:dumps"])
    assert "dumps" in import_dict
    assert import_dict["dumps"] == json.dumps

    import_dict = build_import_dict(["json:dumps:ds"])
    assert "ds" in import_dict
    assert import_dict["ds"] == json.dumps

    import_dict = build_import_dict(["json::j"])
    assert "j" in import_dict
    assert import_dict["j"] == json


def test_show_help(capsys):
    with pytest.raises(SystemExit):
        main(["flu", "-h"])

    result = capsys.readouterr()
    stdout = result.out
    assert stdout.startswith("usage")


def test_show_version(capsys):
    main(["flu", "flu(range(5)).collect()"])

    result = capsys.readouterr()
    stdout = result.out.replace("\n", "")
    assert stdout.startswith("0")


def test_basic_pipeline(capsys):
    main(["flu", "flu(range(5)).collect()"])
    result = capsys.readouterr()
    stdout = result.out.replace("\n", "")
    assert stdout.startswith("0")


def test_pass_on_none_pipeline(capsys):
    main(["flu", "None"])
    result = capsys.readouterr()
    stdout = result.out
    assert stdout == ""


def test_non_iterable_non_none_pipeline(capsys):
    main(["flu", '"hello_world"'])
    result = capsys.readouterr()
    stdout = result.out.strip("\n")
    assert stdout == "hello_world"


def test_cli_walk_files(capsys):
    main(["flu", "walk_files().head(2)"])
    result = capsys.readouterr()
    stdout = result.out.strip("\n").split("\n")
    assert len(stdout) == 2


def test_cli_walk_dirs(capsys):
    main(["flu", "walk_dirs().head(2)"])
    result = capsys.readouterr()
    stdout = result.out.strip("\n").split("\n")
    assert len(stdout) == 2


def test_from_file(capsys):
    with NamedTemporaryFile("w+") as f:
        f.write("hello")
        f.read()
        f_name = f.name
        main(["flu", "-f", f_name, "_.map(str.upper)"])
    result = capsys.readouterr()
    stdout = result.out.strip("\n")
    assert stdout == "HELLO"


def test_glob_imports(capsys):
    main(["flu", "flu(env).count()", "-i", "os:environ:env"])
    result = capsys.readouterr()
    stdout = result.out
    assert stdout


================================================
FILE: src/tests/test_cli_utils.py
================================================
from flupy.cli.utils import walk_dirs, walk_files


def test_walk_files():
    assert walk_files().head()
    assert walk_files(abspath=False).head()


def test_walk_dirs():
    assert walk_dirs().head()


================================================
FILE: src/tests/test_flu.py
================================================
import sys
from itertools import count, cycle

import pytest

from flupy import flu


def test_collect():
    assert flu(range(3)).collect() == [0, 1, 2]
    assert flu(range(3)).collect(container_type=tuple) == (0, 1, 2)
    assert flu(range(3)).collect(n=2) == [0, 1]


def test_to_list():
    assert flu(range(3)).to_list() == [0, 1, 2]


def test___getitem__():
    assert flu(range(3))[1] == 1
    assert flu(range(3))[1:].collect() == [1, 2]
    assert flu(range(35))[1:2].collect() == [1]
    assert flu(range(35))[1:3].collect() == [1, 2]
    with pytest.raises(IndexError):
        flu([1])[4]
    with pytest.raises((KeyError, TypeError)):
        flu([1])["not an index"]


def test_sum():
    gen = flu(range(3))
    assert gen.sum() == 3


def test_reduce():
    gen = flu(range(5))
    assert gen.reduce(lambda x, y: x + y) == 10


def test_fold_left():
    assert flu(range(5)).fold_left(lambda x, y: x + y, 0) == 10
    assert flu(range(5)).fold_left(lambda x, y: x + str(y), "") == "01234"


def test_count():
    gen = flu(range(3))
    assert gen.count() == 3


def test_min():
    gen = flu(range(3))
    assert gen.min() == 0


def test_first():
    gen = flu(range(3))
    assert gen.first() == 0
    gen = flu([])
    with pytest.raises(IndexError):
        gen.first()
    gen = flu([])
    assert gen.first(default=1) == 1


def test_last():
    gen = flu(range(3))
    assert gen.last() == 2
    gen = flu([])
    with pytest.raises(IndexError):
        gen.last()
    gen = flu([])
    assert gen.last(default=1) == 1


def test_head():
    gen = flu(range(30))
    assert gen.head(n=2) == [0, 1]
    gen = flu(range(30))
    assert gen.head(n=3, container_type=set) == set([0, 1, 2])
    gen = flu(range(3))
    assert gen.head(n=50) == [0, 1, 2]


def test_tail():
    gen = flu(range(30))
    assert gen.tail(n=2) == [28, 29]
    gen = flu(range(30))
    assert gen.tail(n=3, container_type=set) == set([27, 28, 29])
    gen = flu(range(3))
    assert gen.tail(n=50) == [0, 1, 2]


def test_max():
    gen = flu(range(3))
    assert gen.max() == 2


def test_unique():
    class NoHash:
        def __init__(self, letter, keyf):
            self.letter = letter
            self.keyf = keyf

    a = NoHash("a", 1)
    b = NoHash("b", 1)
    c = NoHash("c", 2)

    gen = flu([a, b, c]).unique()
    assert gen.collect() == [a, b, c]
    gen = flu([a, b, c]).unique(lambda x: x.letter)
    assert gen.collect() == [a, b, c]
    gen = flu([a, b, c]).unique(lambda x: x.keyf)
    assert gen.collect() == [a, c]


def test_side_effect():
    class FakeFile:
        def __init__(self):
            self.is_open = False
            self.content = []

        def write(self, text):
            if self.is_open:
                self.content.append(text)
            else:
                raise IOError("fake file is not open for writing")

        def open(self):
            self.is_open = True

        def close(self):
            self.is_open = False

    # Test the fake file
    ffile = FakeFile()
    ffile.open()
    ffile.write("should be there")
    ffile.close()
    assert ffile.content[0] == "should be there"
    with pytest.raises(IOError):
        ffile.write("should fail")

    # Reset fake file
    ffile = FakeFile()

    with pytest.raises(IOError):
        flu(range(5)).side_effect(ffile.write).collect()

    gen_result = flu(range(5)).side_effect(ffile.write, before=ffile.open, after=ffile.close).collect()
    assert ffile.is_open == False
    assert ffile.content == [0, 1, 2, 3, 4]
    assert gen_result == [0, 1, 2, 3, 4]


def test_sort():
    gen = flu(range(3, 0, -1)).sort()
    assert gen.collect() == [1, 2, 3]


def test_shuffle():
    original_order = list(range(10000))
    new_order = flu(original_order).shuffle().collect()
    assert new_order != original_order
    assert len(new_order) == len(original_order)
    assert sum(new_order) == sum(original_order)


def test_map():
    gen = flu(range(3)).map(lambda x: x + 2)
    assert gen.collect() == [2, 3, 4]


def test_rate_limit():
    resA = flu(range(3)).collect()
    resB = flu(range(3)).rate_limit(5000).collect()
    assert resA == resB


def test_map_item():
    gen = flu(range(3)).map(lambda x: {"a": x}).map_item("a")
    assert gen.collect() == [0, 1, 2]


def test_map_attr():
    class Person:
        def __init__(self, age: int) -> None:
            self.age = age

    gen = flu(range(3)).map(lambda x: Person(x)).map_attr("age")
    assert gen.collect() == [0, 1, 2]


def test_filter():
    gen = flu(range(3)).filter(lambda x: 0 < x < 2)
    assert gen.collect() == [1]


def test_take():
    gen = flu(range(10)).take(5)
    assert gen.collect() == [0, 1, 2, 3, 4]


def test_take_while():
    gen = flu(cycle(range(10))).take_while(lambda x: x < 4)
    assert gen.collect() == [0, 1, 2, 3]


def test_drop_while():
    gen = flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4)
    assert gen.collect() == [4, 3, 2, 1]


def test_group_by():
    gen = flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0])
    g1, g2, g3 = gen.map(lambda x: (x[0], x[1].collect())).collect()
    # Standard usage
    assert g1 == (1, [(1, 0), (1, 1), (1, 2)])
    assert g2 == (2, [(2, 3), (2, 4), (2, 5), (2, 6)])
    assert g3 == (3, [(3, 7)])
    # No param usage
    v1 = flu(range(10)).group_by().map(lambda x: (x[0], list(x[1])))
    v2 = flu(range(10)).map(lambda x: (x, [x]))
    assert v1.collect() == v2.collect()
    # Sort
    gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=False)
    assert gen.count() == 4
    gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=True)
    assert gen.count() == 2

    # Identity Function
    points = [{"x": 1, "y": 0}, {"x": 4, "y": 3}, {"x": 1, "y": 5}]
    key_func = lambda u: u["x"]
    gen = flu(points).group_by(key=key_func, sort=True).collect()
    assert len(gen) == 2
    assert gen[0][0] == 1
    assert gen[1][0] == 4
    assert len(gen[0][1].collect()) == 2
    assert len(gen[1][1].collect()) == 1


def test_chunk():
    gen = flu(range(5)).chunk(2)
    assert gen.collect() == [[0, 1], [2, 3], [4]]


def test_next():
    gen = flu(range(5))
    assert next(gen) == 0


def test_iter():
    gen = flu(range(5))
    assert next(iter(gen)) == 0


def test_enumerate():
    # Check default
    gen = flu(range(3)).enumerate()
    assert gen.collect() == [(0, 0), (1, 1), (2, 2)]

    # Check start param
    gen = flu(range(3)).enumerate(start=1)
    assert gen.collect() == [(1, 0), (2, 1), (3, 2)]


def test_zip():
    gen = flu(range(3)).zip(range(3))
    assert gen.collect() == [(0, 0), (1, 1), (2, 2)]

    gen2 = flu(range(3)).zip(range(3), range(2))
    assert gen2.collect() == [(0, 0, 0), (1, 1, 1)]


def test_zip_longest():
    gen = flu(range(3)).zip_longest(range(5))
    assert gen.collect() == [(0, 0), (1, 1), (2, 2), (None, 3), (None, 4)]
    gen = flu(range(3)).zip_longest(range(5), fill_value="a")
    assert gen.collect() == [(0, 0), (1, 1), (2, 2), ("a", 3), ("a", 4)]
    gen = flu(range(3)).zip_longest(range(5), range(4), fill_value="a")
    assert gen.collect() == [(0, 0, 0), (1, 1, 1), (2, 2, 2), ("a", 3, 3), ("a", 4, "a")]


def test_window():
    # Check default
    gen = flu(range(5)).window(n=3)
    assert gen.collect() == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]

    # Check step param
    gen = flu(range(5)).window(n=3, step=3)
    assert gen.collect() == [(0, 1, 2), (3, 4, None)]

    # Check fill_value param
    gen = flu(range(5)).window(n=3, step=3, fill_value="i")
    assert gen.collect() == [(0, 1, 2), (3, 4, "i")]

    assert flu(range(4)).window(n=0).collect() == [tuple()]

    with pytest.raises(ValueError):
        flu(range(5)).window(n=-1).collect()

    with pytest.raises(ValueError):
        flu(range(5)).window(3, step=0).collect()


def test_flu():
    gen = flu(count()).map(lambda x: x**2).filter(lambda x: x % 517 == 0).chunk(5).take(3)
    assert next(gen) == [0, 267289, 1069156, 2405601, 4276624]


def test_flatten():
    nested = [1, [2, (3, [4])], ["rbsd", "abc"], (7,)]

    # Defaults with depth of 1
    gen = flu(nested).flatten()
    assert [x for x in gen] == [1, 2, (3, [4]), "rbsd", "abc", 7]

    # Depth 2
    gen = flu(nested).flatten(depth=2)
    assert [x for x in gen] == [1, 2, 3, [4], "rbsd", "abc", 7]

    # Depth 3
    gen = flu(nested).flatten(depth=3)
    assert [x for x in gen] == [1, 2, 3, 4, "rbsd", "abc", 7]

    # Depth infinite
    gen = flu(nested).flatten(depth=sys.maxsize)
    assert [x for x in gen] == [1, 2, 3, 4, "rbsd", "abc", 7]

    # Depth 2 with tuple base_type
    gen = flu(nested).flatten(depth=2, base_type=tuple)
    assert [x for x in gen] == [1, 2, (3, [4]), "rbsd", "abc", (7,)]

    # Depth 2 with iterate strings
    gen = flu(nested).flatten(depth=2, base_type=tuple, iterate_strings=True)
    assert [x for x in gen] == [1, 2, (3, [4]), "r", "b", "s", "d", "a", "b", "c", (7,)]


def test_denormalize():
    content = [
        ["abc", [1, 2, 3]],
    ]
    assert flu(content).denormalize().collect() == [("abc", 1), ("abc", 2), ("abc", 3)]
    assert (flu(content).denormalize(iterate_strings=True).collect()) == [
        ("a", 1),
        ("a", 2),
        ("a", 3),
        ("b", 1),
        ("b", 2),
        ("b", 3),
        ("c", 1),
        ("c", 2),
        ("c", 3),
    ]

    assert (flu([[[1], [1, 2], None]]).denormalize().collect()) == [
        (1, 1, None),
        (1, 2, None),
    ]

    assert (flu([[[1], [1, 2], []]]).denormalize().collect()) == []


def test_tee():
    # Default unpacking
    gen1, gen2 = flu(range(100)).tee()
    assert gen1.sum() == gen2.sum()

    # adjusting *n* paramter
    gen1, gen2, gen3 = flu(range(100)).tee(3)
    assert gen1.sum() == gen3.sum()

    # No sync progress
    gen1, gen2 = flu(range(100)).tee()
    assert next(gen1) == next(gen2)

    # No break chaining
    assert flu(range(5)).tee().map(sum).sum() == 20


def test_join_left():
    # Default unpacking
    res = flu(range(6)).join_left(range(0, 6, 2)).collect()
    assert res == [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)]


def test_join_inner():
    # Default unpacking
    res = flu(range(6)).join_inner(range(0, 6, 2)).collect()
    assert res == [(0, 0), (2, 2), (4, 4)]


def test_join_full():
    # Basic full join
    res = flu(range(4)).join_full(range(2, 6)).collect()
    assert res == [(0, None), (1, None), (2, 2), (3, 3), (None, 4), (None, 5)]

    # Full join with custom keys
    left = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]
    right = [{"id": 2, "value": 100}, {"id": 3, "value": 200}]
    res = flu(left).join_full(right, key=lambda x: x["id"], other_key=lambda x: x["id"]).collect()
    assert res == [
        ({"id": 1, "name": "Alice"}, None),
        ({"id": 2, "name": "Bob"}, {"id": 2, "value": 100}),
        (None, {"id": 3, "value": 200}),
    ]

    # Full join with empty left
    res = flu([]).join_full(range(3)).collect()
    assert res == [(None, 0), (None, 1), (None, 2)]

    # Full join with empty right
    res = flu(range(3)).join_full([]).collect()
    assert res == [(0, None), (1, None), (2, None)]

    # Full join with both empty
    res = flu([]).join_full([]).collect()
    assert res == []

    # Full join with duplicates
    res = flu([1, 2, 2, 3]).join_full([2, 2, 4]).collect()
    expected = [(1, None), (2, 2), (2, 2), (2, 2), (2, 2), (3, None), (None, 4)]  # 2x2 cartesian product
    # Sort with custom key to handle None values
    sort_key = lambda x: (
        x[0] is None,
        x[0] if x[0] is not None else -1,
        x[1] is None,
        x[1] if x[1] is not None else -1,
    )
    assert sorted(res, key=sort_key) == sorted(expected, key=sort_key)


================================================
FILE: src/tests/test_version.py
================================================
"""
Tests for version information.
"""

import re

import flupy


def test_version_format():
    """Test that __version__ follows semantic versioning format (MAJOR.MINOR.PATCH)."""
    # Standard semver regex pattern
    semver_pattern = r"^(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"

    assert re.match(
        semver_pattern, flupy.__version__
    ), f"Version '{flupy.__version__}' does not match semantic versioning format"

    # Ensure version parts can be parsed as integers
    major, minor, patch = flupy.__version__.split("-")[0].split("+")[0].split(".")[:3]
    assert major.isdigit(), f"Major version '{major}' is not a valid integer"
    assert minor.isdigit(), f"Minor version '{minor}' is not a valid integer"
    assert patch.isdigit(), f"Patch version '{patch}' is not a valid integer"