[
  {
    "path": ".coveragerc",
    "content": "[report]\nexclude_lines =\n    pragma: no cover\n    if TYPE_CHECKING:\n    raise AssertionError\n    raise NotImplementedError\n    @overload\n    pass\n"
  },
  {
    "path": ".github/workflows/pre-commit_hooks.yaml",
    "content": "name: pre-commit hooks\n\non: [push]\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n\n    steps:\n\n    - uses: actions/checkout@v1\n\n    - name: python setup 3.9\n      uses: actions/setup-python@v1\n      with:\n        python-version: '3.9'\n\n    - name: Install Poetry\n      uses: snok/install-poetry@v1\n      with:\n        version: 1.7.1\n        virtualenvs-create: true\n        virtualenvs-in-project: true\n\n    - name: Install dependencies\n      run: |\n        poetry install --with dev\n\n    - name: run tests\n      run: |\n        poetry run pre-commit run --all"
  },
  {
    "path": ".github/workflows/test.yml",
    "content": "name: tests\n\non: [push]\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n\n    strategy:\n        matrix:\n            python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']\n\n    steps:\n\n    - uses: actions/checkout@v1\n\n    - name: python setup ${{ matrix.python-version }}\n      uses: actions/setup-python@v1\n      with:\n        python-version: ${{ matrix.python-version }}\n\n    - name: Install Poetry\n      uses: snok/install-poetry@v1\n      with:\n        version: 1.7.1\n        virtualenvs-create: true\n        virtualenvs-in-project: true\n\n    - name: Install dependencies\n      run: |\n        poetry install --with dev\n\n    - name: run tests\n      run: |\n        poetry run pytest --cov=src/flupy src/tests --cov-report=xml\n\n    - name: upload coverage to codecov\n      uses: codecov/codecov-action@v1\n      with:\n        token: ${{ secrets.CODECOV_TOKEN }}\n        file: ./coverage.xml\n        flags: unittests\n        name: codecov-umbrella\n        fail_ci_if_error: true\n"
  },
  {
    "path": ".gitignore",
    "content": "docs/*\n# Temporary Python files\n*.pyc\n*.egg-info\n__pycache__\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n.benchmarks\npoetry.lock\n\npip-wheel-metadata/\n\n.vscode\n\n# Temporary OS files\nIcon*\n\n# Pytest cache\n.pytest_cache/*\n\n# Virtual environment\nvenv/*\n\n# Temporary virtual environment files\n/.cache/\n/.venv/\n\n# Temporary server files\n.env\n*.pid\n*.swp\n\n# Generated documentation\n/docs/gen/\n/docs/apidocs/\n/docs/_build/\n/site/\n/*.html\n/*.rst\n/docs/*.png\n\n# Google Drive\n*.gdoc\n*.gsheet\n*.gslides\n*.gdraw\n\n# Testing and coverage results\n/.pytest/\n/.coverage\n/.coverage.*\n/htmlcov/\n/xmlreport/\n/pyunit.xml\n/tmp/\n*.tmp\n\n# Build and release directories\n/build/\n/dist/\n*.spec\n\n# Sublime Text\n*.sublime-workspace\n\n# Eclipse\n.settings\n\n# LLMs\nCLAUDE.md\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "repos:\n-   repo: https://github.com/pre-commit/mirrors-isort\n    rev: v5.10.1\n    hooks:\n    -   id: isort\n        args: ['--multi-line=3', '--trailing-comma', '--force-grid-wrap=0', '--use-parentheses', '--line-width=88']\n\n\n-   repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v5.0.0\n    hooks:\n    - id: trailing-whitespace\n    - id: check-added-large-files\n    - id: check-yaml\n    - id: mixed-line-ending\n      args: ['--fix=lf']\n\n-   repo: https://github.com/humitos/mirrors-autoflake.git\n    rev: v1.1\n    hooks:\n    -   id: autoflake\n        args: ['--in-place', '--remove-all-unused-imports']\n\n-   repo: https://github.com/psf/black\n    rev: 25.1.0\n    hooks:\n    - id: black\n      language_version: python3.9\n\n-   repo: https://github.com/pre-commit/mirrors-mypy\n    rev: v1.17.0\n    hooks:\n    -   id: mypy\n        files: flupy/\n        args: [\"--config-file\", \"mypy.ini\"]\n\n"
  },
  {
    "path": ".readthedocs.yml",
    "content": "# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details\n\n# Required\nversion: 2\n\n# Set the OS, Python version and other tools you might need\nbuild:\n  os: ubuntu-22.04\n  tools:\n    python: \"3.11\"\n  jobs:\n    post_create_environment:\n      # Install poetry\n      - pip install poetry\n    post_install:\n      # Install dependencies with Poetry\n      # VIRTUAL_ENV needs to be set manually for Poetry to work correctly\n      - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --with dev\n\n# Build documentation in the \"docs/\" directory with Sphinx\nsphinx:\n  configuration: docs/conf.py\n"
  },
  {
    "path": ".version",
    "content": "1.0.11\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# For Contributors\n\n## Setup\n\n### Requirements\n\n* Make:\n    * Windows: http://mingw.org/download/installer\n    * Mac: http://developer.apple.com/xcode\n    * Linux: http://www.gnu.org/software/make\n* pipenv: http://docs.pipenv.org\n* Pandoc: http://johnmacfarlane.net/pandoc/installing.html\n* Graphviz: http://www.graphviz.org/Download.php\n\nTo confirm these system dependencies are configured correctly:\n\n```sh\n$ make doctor\n```\n\n### Installation\n\nInstall project dependencies into a virtual environment:\n\n```sh\n$ make install\n```\n\n## Development Tasks\n\n### Testing\n\nManually run the tests:\n\n```sh\n$ make test\n```\n\nor keep them running on change:\n\n```sh\n$ make watch\n```\n\n> In order to have OS X notifications, `brew install terminal-notifier`.\n\n### Documentation\n\nBuild the documentation:\n\n```sh\n$ make docs\n```\n\n### Static Analysis\n\nRun linters and static analyzers:\n\n```sh\n$ make pylint\n$ make pycodestyle\n$ make pydocstyle\n$ make check  # includes all checks\n```\n\n## Continuous Integration\n\nThe CI server will report overall build status:\n\n```sh\n$ make ci\n```\n\n## Release Tasks\n\nRelease to PyPI:\n\n```sh\n$ make upload\n```\n"
  },
  {
    "path": "LICENSE.md",
    "content": "# License\n\n**The MIT License (MIT)**\n\nCopyright &copy; 2017, Oliver Rice\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in\nall copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nTHE SOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "# flupy\n\n<p>\n\n<a href=\"https://flupy.readthedocs.io/en/latest/?badge=latest\"><img src=\"https://readthedocs.org/projects/flupy/badge/?version=latest\" alt=\"Tests\" height=\"18\"></a>\n<a href=\"https://codecov.io/gh/olirice/flupy\"><img src=\"https://codecov.io/gh/olirice/flupy/branch/master/graph/badge.svg\" height=\"18\"></a>\n<a href=\"https://github.com/psf/black\">\n        <img src=\"https://img.shields.io/badge/code%20style-black-000000.svg\" alt=\"Codestyle Black\" height=\"18\">\n    </a>\n</p>\n\n<p>\n    <a href=\"https://www.python.org/downloads/\"><img src=\"https://img.shields.io/badge/python-3.9+-blue.svg\" alt=\"Python version\" height=\"18\"></a>\n  <a href=\"https://badge.fury.io/py/flupy\"><img src=\"https://badge.fury.io/py/flupy.svg\" alt=\"PyPI version\" height=\"18\"></a>\n    <a href=\"https://github.com/olirice/flupy/blob/master/LICENSE\"><img src=\"https://img.shields.io/pypi/l/markdown-subtemplate.svg\" alt=\"License\" height=\"18\"></a>\n    <a href=\"https://pypi.org/project/flupy/\"><img src=\"https://img.shields.io/pypi/dm/flupy.svg\" alt=\"Download count\" height=\"18\"></a>\n</p>\n\n---\n\n**Documentation**: <a href=\"https://flupy.readthedocs.io/en/latest/\" target=\"_blank\">https://flupy.readthedocs.io/en/latest/</a>\n\n**Source Code**: <a href=\"https://github.com/olirice/flupy\" target=\"_blank\">https://github.com/olirice/flupy</a>\n\n---\n\n## Overview\nFlupy implements a [fluent interface](https://en.wikipedia.org/wiki/Fluent_interface) for operating on python iterables. All flupy methods return generators and are evaluated lazily. This allows expressions to transform arbitrary size data in extremely limited memory.\n\nYou can think of flupy as a light weight, 0 dependency, pure python alternative to the excellent [Apache Spark](https://spark.apache.org/) project.\n\n## Setup\n\n### Requirements\n\n* Python 3.9+\n\n### Installation\n\nInstall flupy with pip:\n```sh\n$ pip install flupy\n```\n\n### Library\n```python\nfrom itertools import count\nfrom flupy import flu\n\n# Processing an infinite sequence in constant memory\npipeline = (\n    flu(count())\n    .map(lambda x: x**2)\n    .filter(lambda x: x % 517 == 0)\n    .chunk(5)\n    .take(3)\n)\n\nfor item in pipeline:\n  print(item)\n\n# Returns:\n# [0, 267289, 1069156, 2405601, 4276624]\n# [6682225, 9622404, 13097161, 17106496, 21650409]\n# [26728900, 32341969, 38489616, 45171841, 52388644]\n```\n\n### CLI\nThe flupy command line interface brings the same syntax for lazy piplines to your shell. Inputs to the `flu` command are auto-populated into a `Fluent` context named `_`.\n````\n$ flu -h\nusage: flu [-h] [-f FILE] [-i [IMPORT [IMPORT ...]]] command\n\nflupy: a fluent interface for python\n\npositional arguments:\n  command               flupy command to execute on input\n\noptional arguments:\n  -h, --help            show this help message and exit\n  -f FILE, --file FILE  path to input file\n  -i [IMPORT [IMPORT ...]], --import [IMPORT [IMPORT ...]]\n                        modules to import\n                        Syntax: <module>:<object>:<alias>\n                        Examples:\n                                'import os' = '-i os'\n                                'import os as op_sys' = '-i os::op_sys'\n                                'from os import environ' = '-i os:environ'\n                                'from os import environ as env' = '-i os:environ:env'\n````\n"
  },
  {
    "path": "benchmark/test_benchmark.py",
    "content": "from itertools import cycle\n\nfrom flupy import flu\n\n\ndef test_integration(benchmark):\n    @benchmark\n    def work():\n        (flu(range(100000)).chunk(100).chunk(2).map_item(0).count())\n\n\ndef test_max(benchmark):\n    @benchmark\n    def work():\n        flu(range(300000)).max()\n\n\ndef test_initialize(benchmark):\n    @benchmark\n    def work():\n        flu(range(10))\n\n\ndef test_collect(benchmark):\n    @benchmark\n    def work():\n        flu(range(3)).collect()\n\n\ndef test___getitem__(benchmark):\n    @benchmark\n    def work():\n        flu(range(350))[1:3].collect()\n\n\ndef test_sum(benchmark):\n    @benchmark\n    def work():\n        gen = flu(range(1000)).sum()\n\n\ndef test_reduce(benchmark):\n    @benchmark\n    def work():\n        flu(range(50)).reduce(lambda x, y: x + y)\n\n\ndef test_fold_left(benchmark):\n    @benchmark\n    def work():\n        flu(range(5)).fold_left(lambda x, y: x + y, 0)\n\n\ndef test_count(benchmark):\n    @benchmark\n    def work():\n        gen = flu(range(3000)).count()\n\n\ndef test_min(benchmark):\n    @benchmark\n    def work():\n        flu(range(3000)).min()\n\n\ndef test_first(benchmark):\n    @benchmark\n    def work():\n        flu(range(3)).first()\n\n\ndef test_last(benchmark):\n    @benchmark\n    def work():\n        flu(range(3000)).last()\n\n\ndef test_head(benchmark):\n    @benchmark\n    def work():\n        flu(range(30000)).head(n=10)\n\n\ndef test_tail(benchmark):\n    @benchmark\n    def work():\n        gen = flu(range(30000)).tail(n=10)\n\n\ndef test_unique(benchmark):\n    class NoHash:\n        def __init__(self, letter, keyf):\n            self.letter = letter\n            self.keyf = keyf\n\n    a = NoHash(\"a\", 1)\n    b = NoHash(\"b\", 1)\n    c = NoHash(\"c\", 2)\n\n    data = [x % 500 for x in range(10000)]\n\n    @benchmark\n    def work():\n        gen = flu(data).unique().collect()\n\n\ndef test_sort(benchmark):\n    @benchmark\n    def work():\n        flu(range(3000, 0, -1)).sort().collect()\n\n\ndef test_shuffle(benchmark):\n    original_order = list(range(10000))\n\n    @benchmark\n    def work():\n        flu(original_order).shuffle().collect()\n\n\ndef test_map(benchmark):\n    @benchmark\n    def work():\n        flu(range(3)).map(lambda x: x + 2).collect()\n\n\ndef test_rate_limit(benchmark):\n    @benchmark\n    def work():\n        flu(range(300)).rate_limit(50000000000000).collect()\n\n\ndef test_map_item(benchmark):\n    data = flu(range(300)).map(lambda x: {\"a\": x})\n\n    @benchmark\n    def work():\n        gen = flu(data).map_item(\"a\")\n\n\ndef test_map_attr(benchmark):\n    class Person:\n        def __init__(self, age: int) -> None:\n            self.age = age\n\n    people = flu(range(200)).map(Person).collect()\n\n    @benchmark\n    def work():\n        flu(people).map_attr(\"age\").collect()\n\n\ndef test_filter(benchmark):\n    @benchmark\n    def work():\n        flu(range(3)).filter(lambda x: 0 < x < 2).collect()\n\n\ndef test_take(benchmark):\n    @benchmark\n    def work():\n        flu(range(10)).take(5).collect()\n\n\ndef test_take_while(benchmark):\n    @benchmark\n    def work():\n        flu(cycle(range(10))).take_while(lambda x: x < 4).collect()\n\n\ndef test_drop_while(benchmark):\n    @benchmark\n    def work():\n        flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4).collect()\n\n\ndef test_group_by(benchmark):\n    @benchmark\n    def work():\n        flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0]).collect()\n\n\ndef test_chunk(benchmark):\n    @benchmark\n    def work():\n        flu(range(500)).chunk(2).collect()\n\n\ndef test_enumerate(benchmark):\n    @benchmark\n    def work():\n        flu(range(3)).enumerate(start=1).collect()\n\n\ndef test_zip(benchmark):\n    @benchmark\n    def work():\n        flu(range(3)).zip(range(3)).collect()\n\n\ndef test_zip_longest(benchmark):\n    @benchmark\n    def work():\n        flu(range(3)).zip_longest(range(5)).collect()\n\n\ndef test_window(benchmark):\n    @benchmark\n    def work():\n        gen = flu(range(5)).window(n=3, step=3).collect\n\n\ndef test_flatten(benchmark):\n    nested = [1, [2, (3, [4])], [\"rbsd\", \"abc\"], (7,)]\n\n    @benchmark\n    def work():\n        gen = flu(nested).flatten(depth=2, base_type=tuple).collect()\n\n\ndef test_tee(benchmark):\n    @benchmark\n    def work():\n        gen1, gen2, gen3 = flu(range(100)).tee(3)\n\n\ndef test_join_left(benchmark):\n    @benchmark\n    def work():\n        flu(range(6)).join_left(range(0, 6, 2)).collect()\n\n\ndef test_join_inner(benchmark):\n    @benchmark\n    def work():\n        flu(range(6)).join_inner(range(0, 6, 2)).collect()\n"
  },
  {
    "path": "mypy.ini",
    "content": "[mypy]\nignore_missing_imports = True\nstrict_optional = True\nfollow_imports = skip\nwarn_redundant_casts = True\nwarn_unused_ignores = False\ncheck_untyped_defs = True\nno_implicit_reexport = True\n\n# Strict Mode:\ndisallow_untyped_defs = True\ndisallow_any_generics = True\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[tool.poetry]\nname = \"flupy\"\nversion = \"1.2.3\"\ndescription = \"Fluent data processing in Python - a chainable stream processing library for expressive data manipulation using method chaining\"\nauthors = [\"Oliver Rice <oliver@oliverrice.com>\"]\nlicense = \"MIT\"\nreadme = \"README.md\"\nrepository = \"https://github.com/olirice/flupy\"\npackages = [{include = \"flupy\", from = \"src\"}]\nclassifiers = [\n    \"Development Status :: 5 - Production/Stable\",\n    \"Natural Language :: English\",\n    \"Operating System :: OS Independent\",\n    \"Programming Language :: Python\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: 3.9\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3.13\",\n]\n\n[tool.poetry.dependencies]\npython = \">=3.9\"\ntyping_extensions = \">=4\"\n\n[tool.poetry.group.dev.dependencies]\npytest = \"*\"\npytest-cov = \"*\"\npytest-benchmark = \"*\"\npre-commit = \"*\"\npylint = \"*\"\nblack = \"*\"\nmypy = \"*\"\nsphinx = \"*\"\nsphinx-rtd-theme = \"*\"\n\n[tool.poetry.scripts]\nflu = \"flupy.cli.cli:main\"\nflu_precommit = \"flupy.cli.cli:precommit\"\n\n[build-system]\nrequires = [\"poetry-core>=2.0.0\"]\nbuild-backend = \"poetry.core.masonry.api\"\n\n[tool.black]\nline-length = 120\nexclude = '''\n/(\n    \\.git\n  | \\.hg\n  | \\.mypy_cache\n  | \\.tox\n  | \\.venv\n  | _build\n  | buck-out\n  | build\n  | dist\n)/\n'''\n\n[tool.mypy]\npython_version = \"3.9\"\nignore_missing_imports = true\nstrict_optional = true\nfollow_imports = \"skip\"\nwarn_redundant_casts = true\nwarn_unused_ignores = false\ncheck_untyped_defs = true\nno_implicit_reexport = true\ndisallow_untyped_defs = true\ndisallow_any_generics = true\n\n[tool.pytest.ini_options]\naddopts = \"--cov=src/flupy src/tests\"\n\n[tool.coverage.report]\nexclude_lines = [\n    \"pragma: no cover\",\n    \"if TYPE_CHECKING:\",\n    \"raise AssertionError\",\n    \"raise NotImplementedError\",\n    \"@overload\",\n    \"pass\",\n]\n"
  },
  {
    "path": "pytest.ini",
    "content": "[pytest]\naddopts = --cov=src/flupy src/tests\n"
  },
  {
    "path": "setup.cfg",
    "content": "[metadata]\ndescription_file = README.md\n"
  },
  {
    "path": "src/flupy/__init__.py",
    "content": "from importlib.metadata import version\n\nfrom flupy.cli.utils import walk_dirs, walk_files\nfrom flupy.fluent import flu\n\n__project__ = \"flupy\"\n__version__ = version(__project__)\n\n__all__ = [\"flu\", \"walk_files\", \"walk_dirs\"]\n"
  },
  {
    "path": "src/flupy/cli/__init__.py",
    "content": ""
  },
  {
    "path": "src/flupy/cli/cli.py",
    "content": "import argparse\nimport importlib\nimport sys\nfrom typing import Any, Dict, Generator, List, Optional\n\nfrom flupy import __version__, flu, walk_dirs, walk_files\n\n\ndef read_file(path: str) -> Generator[str, None, None]:\n    \"\"\"Yield lines from a file given its path\"\"\"\n    with open(path, \"r\") as f:\n        yield from f\n\n\ndef parse_args(args: List[str]) -> argparse.Namespace:\n    \"\"\"Parse input arguments\"\"\"\n    parser = argparse.ArgumentParser(\n        description=\"flupy: a fluent interface for python collections\",\n        formatter_class=argparse.RawTextHelpFormatter,\n    )\n    parser.add_argument(\"-v\", \"--version\", action=\"version\", version=\"%(prog)s \" + __version__)\n    parser.add_argument(\"command\", help=\"command to execute against input\")\n    parser.add_argument(\"-f\", \"--file\", help=\"path to input file\")\n    parser.add_argument(\n        \"-i\",\n        \"--import\",\n        nargs=\"*\",\n        default=[],\n        help=\"modules to import\\n\"\n        \"Syntax: <module>:<object>:<alias>\\n\"\n        \"Examples:\\n\"\n        \"\\t'import os' = '-i os'\\n\"\n        \"\\t'import os as op_sys' = '-i os::op_sys'\\n\"\n        \"\\t'from os import environ' = '-i os:environ'\\n\"\n        \"\\t'from os import environ as env' = '-i os:environ:env'\\n\",\n    )\n    return parser.parse_args(args)\n\n\ndef build_import_dict(imps: List[str]) -> Dict[str, Any]:\n    \"\"\"Execute CLI scoped imports\"\"\"\n    import_dict = {}\n    for imp_stx in imps:\n        module, _, obj_alias = imp_stx.partition(\":\")\n        obj, _, alias = obj_alias.partition(\":\")\n\n        if not obj:\n            import_dict[alias or module] = importlib.import_module(module)\n        else:\n            _garb = importlib.import_module(module)\n            import_dict[alias or obj] = getattr(_garb, obj)\n    return import_dict\n\n\ndef main(argv: Optional[List[str]] = None) -> None:\n    \"\"\"CLI Entrypoint\"\"\"\n    args = parse_args(argv[1:] if argv is not None else sys.argv[1:])\n\n    _command = args.command\n    _file = args.file\n    _import = getattr(args, \"import\")\n\n    import_dict = build_import_dict(_import)\n\n    if _file:\n        _ = flu(read_file(_file)).map(str.rstrip)\n    else:\n        try:\n            # Restore the default SIGPIPE handler\n            from signal import SIG_DFL, SIGPIPE, signal\n\n            signal(SIGPIPE, SIG_DFL)\n        except ImportError:\n            # SIGPIPE not available on platform (e.g. Windows), nothing to do\n            pass\n\n        _ = flu(sys.stdin).map(str.rstrip)\n\n    locals_dict = {\n        \"flu\": flu,\n        \"_\": _,\n        \"walk_files\": walk_files,\n        \"walk_dirs\": walk_dirs,\n    }\n\n    pipeline = eval(_command, import_dict, locals_dict)\n\n    if hasattr(pipeline, \"__iter__\") and not isinstance(pipeline, (str, bytes)):\n        for r in pipeline:\n            sys.stdout.write(str(r) + \"\\n\")\n\n    elif pipeline is None:\n        pass\n    else:\n        sys.stdout.write(str(pipeline) + \"\\n\")\n"
  },
  {
    "path": "src/flupy/cli/utils.py",
    "content": "# pylint: disable=invalid-name\nimport os\nfrom typing import Generator\n\nfrom flupy.fluent import Fluent, flu\n\n\ndef walk_files(*pathes: str, abspath: bool = True) -> \"Fluent[str]\":\n    \"\"\"Yield files recursively starting from each location in *pathes\"\"\"\n\n    if pathes == ():\n        pathes = (\".\",)\n\n    def _impl() -> Generator[str, None, None]:\n        for path in pathes:\n            for d, _, files in os.walk(path):\n                for x in files:\n                    rel_path = os.path.join(d, x)\n                    if abspath:\n                        yield os.path.abspath(rel_path)\n                    else:\n                        yield rel_path\n\n    return flu(_impl())\n\n\ndef walk_dirs(path: str = \".\") -> \"Fluent[str]\":\n    \"\"\"Yield files recursively starting from *path\"\"\"\n\n    def _impl() -> Generator[str, None, None]:\n        for d, _, _ in os.walk(path):\n            yield d\n\n    return flu(_impl())\n"
  },
  {
    "path": "src/flupy/fluent.py",
    "content": "# pylint: disable=invalid-name\nimport time\nfrom collections import defaultdict, deque\nfrom collections.abc import Iterable as IterableType\nfrom functools import reduce\nfrom itertools import dropwhile, groupby, islice, product, takewhile, tee, zip_longest\nfrom random import sample\nfrom typing import (\n    Any,\n    Callable,\n    Collection,\n    Deque,\n    Dict,\n    Generator,\n    Generic,\n    Hashable,\n    Iterable,\n    Iterator,\n    List,\n    Optional,\n    Set,\n    Tuple,\n    Type,\n    TypeVar,\n    Union,\n    overload,\n)\n\nfrom typing_extensions import Concatenate, ParamSpec, Protocol\n\n__all__ = [\"flu\"]\n\n\nT = TypeVar(\"T\")\nT_co = TypeVar(\"T_co\", covariant=True)\nT_contra = TypeVar(\"T_contra\", contravariant=True)\n_T1 = TypeVar(\"_T1\")\n_T2 = TypeVar(\"_T2\")\n_T3 = TypeVar(\"_T3\")\nS = TypeVar(\"S\")\nP = ParamSpec(\"P\")\n\nCallableTakesIterable = Callable[[Iterable[T]], Collection[T]]\n\n\nclass SupportsEquality(Protocol):\n    def __eq__(self, __other: object) -> bool:\n        pass\n\n\nclass SupportsGetItem(Protocol[T_co]):\n    def __getitem__(self, __k: Hashable) -> T_co:\n        pass\n\n\nclass SupportsIteration(Protocol[T_co]):\n    def __iter__(self) -> Iterator[T]:\n        pass\n\n\nclass SupportsLessThan(Protocol):\n    def __lt__(self, __other: Any) -> bool:\n        pass\n\n\nSupportsLessThanT = TypeVar(\"SupportsLessThanT\", bound=\"SupportsLessThan\")\n\n\nclass Empty:\n    pass\n\n\ndef identity(x: T) -> T:\n    return x\n\n\nclass Fluent(Generic[T]):\n    \"\"\"A fluent interface to lazy generator functions\n\n    >>> from flupy import flu\n    >>> (\n        flu(range(100))\n        .map(lambda x: x**2)\n        .filter(lambda x: x % 3 == 0)\n        .chunk(3)\n        .take(2)\n        .to_list()\n    )\n    [[0, 9, 36], [81, 144, 225]]\n    \"\"\"\n\n    def __init__(self, iterable: Iterable[T]) -> None:\n        iterator = iter(iterable)\n        self._iterator: Iterator[T] = iterator\n\n    @overload\n    def __getitem__(self, index: int) -> T:\n        pass\n\n    @overload\n    def __getitem__(self, index: slice) -> \"Fluent[T]\":\n        pass\n\n    def __getitem__(self, key: Union[int, slice]) -> Union[T, \"Fluent[T]\"]:\n        if isinstance(key, int) and key >= 0:\n            try:\n                return next(islice(self._iterator, key, key + 1))\n            except StopIteration:\n                raise IndexError(\"flu index out of range\")\n        elif isinstance(key, slice):\n            return flu(islice(self._iterator, key.start, key.stop, key.step))\n        else:\n            raise TypeError(f\"Indices must be non-negative integers or slices, not {type(key).__name__}\")\n\n    ### Summary ###\n    def collect(self, n: Optional[int] = None, container_type: CallableTakesIterable[T] = list) -> Collection[T]:\n        \"\"\"Collect items from iterable into a container\n\n        >>> flu(range(4)).collect()\n        [0, 1, 2, 3]\n\n        >>> flu(range(4)).collect(container_type=set)\n        {0, 1, 2, 3}\n\n        >>> flu(range(4)).collect(n=2)\n        [0, 1]\n        \"\"\"\n        return container_type(self.take(n))\n\n    def to_list(self) -> List[T]:\n        \"\"\"Collect items from iterable into a list\n\n        >>> flu(range(4)).to_list()\n        [0, 1, 2, 3]\n        \"\"\"\n        return list(self)\n\n    def sum(self) -> Union[T, int]:\n        \"\"\"Sum of elements in the iterable\n\n        >>> flu([1,2,3]).sum()\n        6\n\n        \"\"\"\n        return sum(self)  # type: ignore\n\n    def count(self) -> int:\n        \"\"\"Count of elements in the iterable\n\n        >>> flu(['a','b','c']).count()\n        3\n        \"\"\"\n        return sum(1 for _ in self)\n\n    def min(self: \"Fluent[SupportsLessThanT]\") -> SupportsLessThanT:\n        \"\"\"Smallest element in the interable\n\n        >>> flu([1, 3, 0, 2]).min()\n        0\n        \"\"\"\n        return min(self)\n\n    def max(self: \"Fluent[SupportsLessThanT]\") -> SupportsLessThanT:\n        \"\"\"Largest element in the interable\n\n        >>> flu([0, 3, 2, 1]).max()\n        3\n        \"\"\"\n        return max(self)\n\n    def first(self, default: Any = Empty()) -> T:\n        \"\"\"Return the first item of the iterable. Raise IndexError if empty, or return default if provided.\n\n        >>> flu([0, 1, 2, 3]).first()\n        0\n        >>> flu([]).first(default=\"some_default\")\n        'some_default'\n        \"\"\"\n        x: Union[Empty, T] = default\n        for x in self:\n            return x\n        if isinstance(x, Empty):\n            raise IndexError(\"Empty iterator\")\n        return x\n\n    def last(self, default: Any = Empty()) -> T:\n        \"\"\"Return the last item of the iterble. Raise IndexError if empty or default if provided.\n\n        >>> flu([0, 1, 2, 3]).last()\n        3\n        >>> flu([]).last(default='some_default')\n        'some_default'\n        \"\"\"\n        x: Union[Empty, T] = default\n        for x in self:\n            pass\n        if isinstance(x, Empty):\n            raise IndexError(\"Empty iterator\")\n        return x\n\n    def head(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]:\n        \"\"\"Returns up to the first *n* elements from the iterable.\n\n        >>> flu(range(20)).head()\n        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n\n        >>> flu(range(15)).head(n=2)\n        [0, 1]\n\n        >>> flu([]).head()\n        []\n        \"\"\"\n        return self.take(n).collect(container_type=container_type)\n\n    def tail(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]:\n        \"\"\"Return up to the last *n* elements from the iterable\n\n        >>> flu(range(20)).tail()\n        [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]\n\n        >>> flu(range(15)).tail(n=2)\n        [13, 14]\n        \"\"\"\n        val: Union[List[Empty], Tuple[Any, ...]] = [Empty()]\n        for val in self.window(n, fill_value=Empty()):\n            pass\n        return container_type([x for x in val if not isinstance(x, Empty)])\n\n    ### End Summary ###\n\n    ### Non-Constant Memory ###\n    def sort(\n        self: \"Fluent[SupportsLessThanT]\",\n        key: Optional[Callable[[Any], Any]] = None,\n        reverse: bool = False,\n    ) -> \"Fluent[SupportsLessThanT]\":\n        \"\"\"Sort iterable by *key* function if provided or identity otherwise\n\n        Note: sorting loads the entire iterable into memory\n\n        >>> flu([3,6,1]).sort().to_list()\n        [1, 3, 6]\n\n        >>> flu([3,6,1]).sort(reverse=True).to_list()\n        [6, 3, 1]\n\n        >>> flu([3,-6,1]).sort(key=abs).to_list()\n        [1, 3, -6]\n        \"\"\"\n        return Fluent(sorted(self, key=key, reverse=reverse))\n\n    def join_left(\n        self,\n        other: Iterable[_T1],\n        key: Callable[[T], Hashable] = identity,\n        other_key: Callable[[_T1], Hashable] = identity,\n    ) -> \"Fluent[Tuple[T, Union[_T1, None]]]\":\n        \"\"\"Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries\n\n        When no matching entry is found in *other*, entries in the iterable are paired with None\n\n        Note: join_left loads *other* into memory\n\n        >>> flu(range(6)).join_left(range(0, 6, 2)).to_list()\n        [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)]\n        \"\"\"\n\n        def _impl() -> Generator[Tuple[T, Union[_T1, None]], None, None]:\n\n            other_lookup = defaultdict(list)\n\n            for entry_other in other:\n                other_lookup[other_key(entry_other)].append(entry_other)\n\n            for entry in self:\n                matches: Optional[List[_T1]] = other_lookup.get(key(entry))\n\n                if matches:\n                    for match in matches:\n                        yield (entry, match)\n                else:\n                    yield (entry, None)\n\n        return Fluent(_impl())\n\n    def join_inner(\n        self,\n        other: Iterable[_T1],\n        key: Callable[[T], Hashable] = identity,\n        other_key: Callable[[_T1], Hashable] = identity,\n    ) -> \"Fluent[Tuple[T, _T1]]\":\n        \"\"\"Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries\n\n        When no matching entry is found in *other*, entries in the iterable are filtered from the results\n\n        Note: join_inner loads *other* into memory\n\n        >>> flu(range(6)).join_inner(range(0, 6, 2)).to_list()\n        [(0, 0), (2, 2), (4, 4)]\n\n        \"\"\"\n\n        def _impl() -> Generator[Tuple[T, _T1], None, None]:\n\n            other_lookup = defaultdict(list)\n\n            for entry_other in other:\n                other_lookup[other_key(entry_other)].append(entry_other)\n\n            for entry in self:\n                matches: List[_T1] = other_lookup[key(entry)]\n\n                for match in matches:\n                    yield (entry, match)\n\n        return Fluent(_impl())\n\n    def join_full(\n        self,\n        other: Iterable[_T1],\n        key: Callable[[T], Hashable] = identity,\n        other_key: Callable[[_T1], Hashable] = identity,\n    ) -> \"Fluent[Tuple[Union[T, None], Union[_T1, None]]]\":\n        \"\"\"Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries\n\n        Returns all entries from both iterables. When no matching entry is found, entries are paired with None\n\n        Note: join_full loads both *self* and *other* into memory\n\n        >>> flu(range(4)).join_full(range(2, 6)).to_list()\n        [(0, None), (1, None), (2, 2), (3, 3), (None, 4), (None, 5)]\n        \"\"\"\n\n        def _impl() -> Generator[Tuple[Union[T, None], Union[_T1, None]], None, None]:\n\n            # Build lookup for other\n            other_lookup: Dict[Hashable, List[_T1]] = defaultdict(list)\n            other_keys_seen: Set[Hashable] = set()\n\n            for entry_other in other:\n                other_key_val = other_key(entry_other)\n                other_lookup[other_key_val].append(entry_other)\n                other_keys_seen.add(other_key_val)\n\n            # Track which keys from other have been matched\n            matched_other_keys: Set[Hashable] = set()\n\n            # Process all entries from self\n            for entry in self:\n                entry_key = key(entry)\n                matches: Optional[List[_T1]] = other_lookup.get(entry_key)\n\n                if matches:\n                    matched_other_keys.add(entry_key)\n                    for match in matches:\n                        yield (entry, match)\n                else:\n                    yield (entry, None)\n\n            # Yield unmatched entries from other\n            unmatched_keys = other_keys_seen - matched_other_keys\n            for unmatched_key in unmatched_keys:\n                for entry_other in other_lookup[unmatched_key]:\n                    yield (None, entry_other)\n\n        return Fluent(_impl())\n\n    def shuffle(self) -> \"Fluent[T]\":\n        \"\"\"Randomize the order of elements in the interable\n\n        Note: shuffle loads the entire iterable into memory\n\n        >>> flu([3,6,1]).shuffle().to_list()\n        [6, 1, 3]\n        \"\"\"\n        dat: List[T] = self.to_list()\n        return Fluent(sample(dat, len(dat)))\n\n    def group_by(\n        self, key: Callable[[T], Union[T, _T1]] = identity, sort: bool = True\n    ) -> \"Fluent[Tuple[Union[T,_T1], Fluent[T]]]\":\n        \"\"\"Yield consecutive keys and groups from the iterable\n\n        *key* is a function to compute a key value used in grouping and sorting for each element. *key* defaults to an identity function which returns the unchaged element\n\n        When the iterable is pre-sorted according to *key*, setting *sort* to False will prevent loading the dataset into memory and improve performance\n\n        >>> flu([2, 4, 2, 4]).group_by().to_list()\n        [(2, <flu object>), (4, <flu object>)]\n\n        Or, if the iterable is pre-sorted\n\n        >>> flu([2, 2, 5, 5]).group_by(sort=False).to_list()\n        [(2, <flu object>), (5, <flu object>)]\n\n        Using a key function\n\n        >>> points = [\n            {'x': 1, 'y': 0},\n            {'x': 4, 'y': 3},\n            {'x': 1, 'y': 5}\n        ]\n        >>> key_func = lambda u: u['x']\n        >>> flu(points).group_by(key=key_func, sort=True).to_list()\n        [(1, <flu object>), (4, <flu object>)]\n        \"\"\"\n\n        gen = self.sort(key) if sort else self\n        return Fluent(groupby(gen, key)).map(lambda x: (x[0], flu([y for y in x[1]])))\n\n    def unique(self, key: Callable[[T], Hashable] = identity) -> \"Fluent[T]\":\n        \"\"\"Yield elements that are unique by a *key*.\n\n        >>> flu([2, 3, 2, 3]).unique().to_list()\n        [2, 3]\n\n        >>> flu([2, -3, -2, 3]).unique(key=abs).to_list()\n        [2, -3]\n        \"\"\"\n\n        def _impl() -> Generator[T, None, None]:\n            seen: Set[Any] = set()\n            for x in self:\n                x_hash = key(x)\n                if x_hash in seen:\n                    continue\n                else:\n                    seen.add(x_hash)\n                    yield x\n\n        return Fluent(_impl())\n\n    ### End Non-Constant Memory ###\n\n    ### Side Effect ###\n    def rate_limit(self, per_second: Union[int, float] = 100) -> \"Fluent[T]\":\n        \"\"\"Restrict consumption of iterable to n item  *per_second*\n\n        >>> import time\n        >>> start_time = time.time()\n        >>> _ = flu(range(3)).rate_limit(3).to_list()\n        >>> print('Runtime', int(time.time() - start_time))\n        1.00126 # approximately 1 second for 3 items\n        \"\"\"\n\n        def _impl() -> Generator[T, None, None]:\n            wait_time = 1.0 / per_second\n            for val in self:\n                start_time = time.time()\n                yield val\n                call_duration = time.time() - start_time\n                time.sleep(max(wait_time - call_duration, 0.0))\n\n        return Fluent(_impl())\n\n    def side_effect(\n        self,\n        func: Callable[[T], Any],\n        before: Optional[Callable[[], Any]] = None,\n        after: Optional[Callable[[], Any]] = None,\n    ) -> \"Fluent[T]\":\n        \"\"\"Invoke *func* for each item in the iterable before yielding the item.\n        *func* takes a single argument and the output is discarded\n        *before* and *after* are optional functions that take no parameters and are executed once before iteration begins\n        and after iteration ends respectively. Each will be called exactly once.\n\n\n        >>> flu(range(2)).side_effect(lambda x: print(f'Collected {x}')).to_list()\n        Collected 0\n        Collected 1\n        [0, 1]\n        \"\"\"\n\n        def _impl() -> Generator[T, None, None]:\n            try:\n                if before is not None:\n                    before()\n\n                for x in self:\n                    func(x)\n                    yield x\n\n            finally:\n                if after is not None:\n                    after()\n\n        return Fluent(_impl())\n\n    ### End Side Effect ###\n\n    def map(self, func: Callable[Concatenate[T, P], _T1], *args: Any, **kwargs: Any) -> \"Fluent[_T1]\":\n        \"\"\"Apply *func* to each element of iterable\n\n        >>> flu(range(5)).map(lambda x: x*x).to_list()\n        [0, 1, 4, 9, 16]\n        \"\"\"\n\n        def _impl() -> Generator[_T1, None, None]:\n            for val in self._iterator:\n                yield func(val, *args, **kwargs)\n\n        return Fluent(_impl())\n\n    def map_item(self: \"Fluent[SupportsGetItem[T]]\", item: Hashable) -> \"Fluent[T]\":\n        \"\"\"Extracts *item* from every element of the iterable\n\n        >>> flu([(2, 4), (2, 5)]).map_item(1).to_list()\n        [4, 5]\n\n        >>> flu([{'mykey': 8}, {'mykey': 5}]).map_item('mykey').to_list()\n        [8, 5]\n        \"\"\"\n\n        def _impl() -> Generator[T, None, None]:\n            for x in self:\n                yield x[item]\n\n        return Fluent(_impl())\n\n    def map_attr(self, attr: str) -> \"Fluent[Any]\":\n        \"\"\"Extracts the attribute *attr* from each element of the iterable\n\n        >>> from collections import namedtuple\n        >>> MyTup = namedtuple('MyTup', ['value', 'backup_val'])\n        >>> flu([MyTup(1, 5), MyTup(2, 4)]).map_attr('value').to_list()\n        [1, 2]\n        \"\"\"\n        return self.map(lambda x: getattr(x, attr))\n\n    def filter(self, func: Callable[Concatenate[T, P], bool], *args: Any, **kwargs: Any) -> \"Fluent[T]\":\n        \"\"\"Yield elements of iterable where *func* returns truthy\n\n        >>> flu(range(10)).filter(lambda x: x % 2 == 0).to_list()\n        [0, 2, 4, 6, 8]\n        \"\"\"\n\n        def _impl() -> Generator[T, None, None]:\n            for val in self._iterator:\n                if func(val, *args, **kwargs):\n                    yield val\n\n        return Fluent(_impl())\n\n    def reduce(self, func: Callable[[T, T], T]) -> T:\n        \"\"\"Apply a function of two arguments cumulatively to the items of the iterable,\n        from left to right, so as to reduce the sequence to a single value\n\n        >>> flu(range(5)).reduce(lambda x, y: x + y)\n        10\n        \"\"\"\n        return reduce(func, self)\n\n    def fold_left(self, func: Callable[[S, T], S], initial: S) -> S:\n        \"\"\"Apply a function of two arguments cumulatively to the items of the iterable,\n        from left to right, starting with *initial*, so as to fold the sequence to\n        a single value\n\n        >>> flu(range(5)).fold_left(lambda x, y: x + str(y), \"\")\n        '01234'\n        \"\"\"\n        return reduce(func, self, initial)\n\n    @overload\n    def zip(self, __iter1: Iterable[_T1]) -> \"Fluent[Tuple[T, _T1]]\": ...\n\n    @overload\n    def zip(self, __iter1: Iterable[_T1], __iter2: Iterable[_T2]) -> \"Fluent[Tuple[T, _T1, _T2]]\": ...\n\n    @overload\n    def zip(\n        self, __iter1: Iterable[_T1], __iter2: Iterable[_T2], __iter3: Iterable[_T3]\n    ) -> \"Fluent[Tuple[T, _T1, _T2, _T3]]\": ...\n\n    @overload\n    def zip(\n        self,\n        __iter1: Iterable[Any],\n        __iter2: Iterable[Any],\n        __iter3: Iterable[Any],\n        __iter4: Iterable[Any],\n        *iterable: Iterable[Any],\n    ) -> \"Fluent[Tuple[T, ...]]\": ...\n\n    def zip(self, *iterable: Iterable[Any]) -> Union[\n        \"Fluent[Tuple[T, ...]]\",\n        \"Fluent[Tuple[T, _T1]]\",\n        \"Fluent[Tuple[T, _T1, _T2]]\",\n        \"Fluent[Tuple[T, _T1, _T2, _T3]]\",\n    ]:\n        \"\"\"Yields tuples containing the i-th element from the i-th\n        argument in the instance, and the iterable\n\n        >>> flu(range(5)).zip(range(3, 0, -1)).to_list()\n        [(0, 3), (1, 2), (2, 1)]\n        \"\"\"\n        # @self_to_flu is not compatible with @overload\n        # make sure any usage of self supports arbitrary iterables\n        tup_iter = zip(iter(self), *iterable)\n        return Fluent(tup_iter)\n\n    def zip_longest(self, *iterable: Iterable[_T1], fill_value: Any = None) -> \"Fluent[Tuple[T, ...]]\":\n        \"\"\"Yields tuples containing the i-th element from the i-th\n        argument in the instance, and the iterable\n        Iteration continues until the longest iterable is exhaused.\n        If iterables are uneven in length, missing values are filled in with fill value\n\n        >>> flu(range(5)).zip_longest(range(3, 0, -1)).to_list()\n        [(0, 3), (1, 2), (2, 1), (3, None), (4, None)]\n\n\n        >>> flu(range(5)).zip_longest(range(3, 0, -1), fill_value='a').to_list()\n        [(0, 3), (1, 2), (2, 1), (3, 'a'), (4, 'a')]\n        \"\"\"\n        return Fluent(zip_longest(self, *iterable, fillvalue=fill_value))\n\n    def enumerate(self, start: int = 0) -> \"Fluent[Tuple[int, T]]\":\n        \"\"\"Yields tuples from the instance where the first element\n        is a count from initial value *start*.\n\n        >>> flu([3,4,5]).enumerate().to_list()\n        [(0, 3), (1, 4), (2, 5)]\n        \"\"\"\n        return Fluent(enumerate(self, start=start))\n\n    def take(self, n: Optional[int] = None) -> \"Fluent[T]\":\n        \"\"\"Yield first *n* items of the iterable\n\n        >>> flu(range(10)).take(2).to_list()\n        [0, 1]\n        \"\"\"\n        return Fluent(islice(self._iterator, n))\n\n    def take_while(self, predicate: Callable[[T], bool]) -> \"Fluent[T]\":\n        \"\"\"Yield elements from the chainable so long as the predicate is true\n\n        >>> flu(range(10)).take_while(lambda x: x < 3).to_list()\n        [0, 1, 2]\n        \"\"\"\n        return Fluent(takewhile(predicate, self._iterator))\n\n    def drop_while(self, predicate: Callable[[T], bool]) -> \"Fluent[T]\":\n        \"\"\"Drop elements from the chainable as long as the predicate is true;\n        afterwards, return every element\n\n        >>> flu(range(10)).drop_while(lambda x: x < 3).to_list()\n        [3, 4, 5, 6, 7, 8, 9]\n        \"\"\"\n        return Fluent(dropwhile(predicate, self._iterator))\n\n    def chunk(self, n: int) -> \"Fluent[List[T]]\":\n        \"\"\"Yield lists of elements from iterable in groups of *n*\n\n        if the iterable is not evenly divisiible by *n*, the final list will be shorter\n\n        >>> flu(range(10)).chunk(3).to_list()\n        [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]\n        \"\"\"\n\n        def _impl() -> Generator[List[T], None, None]:\n\n            while True:\n                vals: List[T] = list(self.take(n))\n                if vals:\n                    yield vals\n                else:\n                    return\n\n        return Fluent(_impl())\n\n    def flatten(\n        self,\n        depth: int = 1,\n        base_type: Optional[Type[object]] = None,\n        iterate_strings: bool = False,\n    ) -> \"Fluent[Any]\":\n        \"\"\"Recursively flatten nested iterables (e.g., a list of lists of tuples)\n        into non-iterable type or an optional user-defined base_type\n\n        Strings are treated as non-iterable for convenience. set iterate_string=True\n        to change that behavior.\n\n        >>> flu([[0, 1, 2], [3, 4, 5]]).flatten().to_list()\n        [0, 1, 2, 3, 4, 5]\n\n        >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten().to_list()\n        [0, [1, 2], [3, 4], 5]\n\n        >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list()\n        [0, 1, 2, 3, 4, 5]\n\n        >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list()\n        [0, 1, 2, 3, 4, 5]\n\n        >>> flu([1, (2, 2), 4, [5, (6, 6, 6)]]).flatten(base_type=tuple).to_list()\n        [1, (2, 2), 4, 5, (6, 6, 6)]\n\n        >>> flu([[2, 0], 'abc', 3, [4]]).flatten(iterate_strings=True).to_list()\n        [2, 0, 'a', 'b', 'c', 3, 4]\n        \"\"\"\n\n        # TODO(OR): Reimplement with strong types\n        def walk(node: Any, level: int) -> Generator[T, None, None]:\n            if (\n                ((depth is not None) and (level > depth))\n                or (isinstance(node, str) and not iterate_strings)\n                or ((base_type is not None) and isinstance(node, base_type))\n            ):\n                yield node\n                return\n            try:\n                tree = iter(node)\n            except TypeError:\n                yield node\n                return\n            else:\n                for child in tree:\n                    for val in walk(child, level + 1):\n                        yield val\n\n        return Fluent(walk(self, level=0))\n\n    def denormalize(self: \"Fluent[SupportsIteration[Any]]\", iterate_strings: bool = False) -> \"Fluent[Tuple[Any, ...]]\":\n        \"\"\"Denormalize iterable components of each record\n\n        >>> flu([(\"abc\", [1, 2, 3])]).denormalize().to_list()\n        [('abc', 1), ('abc', 2), ('abc', 3)]\n\n        >>> flu([(\"abc\", [1, 2])]).denormalize(iterate_strings=True).to_list()\n        [('a', 1), ('a', 2), ('b', 1), ('b', 2), ('c', 1), ('c', 2)]\n\n        >>> flu([(\"abc\", [])]).denormalize().to_list()\n        []\n        \"\"\"\n\n        def _impl() -> Generator[Tuple[Any, ...], None, None]:\n            for record in self:\n                iter_elements: List[Iterable[Any]] = []\n                element: Any\n                for element in record:\n\n                    # Check for string and string iteration is allowed\n                    if isinstance(element, str) and iterate_strings:\n                        iter_elements.append(element)\n\n                    # Check for string and string iteration is not allowed\n                    elif isinstance(element, str):\n                        iter_elements.append([element])\n\n                    # Check for iterable\n                    elif isinstance(element, IterableType):\n                        iter_elements.append(element)\n\n                    # Check for non-iterable\n                    else:\n                        iter_elements.append([element])\n\n                for row in product(*iter_elements):\n                    yield row\n\n        return Fluent(_impl())\n\n    def window(self, n: int, step: int = 1, fill_value: Any = None) -> \"Fluent[Tuple[Any, ...]]\":\n        \"\"\"Yield a sliding window of width *n* over the given iterable.\n\n        Each window will advance in increments of *step*:\n\n        If the length of the iterable does not evenly divide by the *step*\n        the final output is padded with *fill_value*\n\n        >>> flu(range(5)).window(3).to_list()\n        [(0, 1, 2), (1, 2, 3), (2, 3, 4)]\n\n        >>> flu(range(5)).window(n=3, step=2).to_list()\n        [(0, 1, 2), (2, 3, 4)]\n\n        >>> flu(range(9)).window(n=4, step=3).to_list()\n        [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, None)]\n\n        >>> flu(range(9)).window(n=4, step=3, fill_value=-1).to_list()\n        [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, -1)]\n        \"\"\"\n\n        def _impl() -> Generator[Tuple[Any, ...], None, None]:\n            if n < 0:\n                raise ValueError(\"n must be >= 0\")\n            elif n == 0:\n                yield tuple()\n                return\n            if step < 1:\n                raise ValueError(\"step must be >= 1\")\n\n            window: Deque[Any] = deque([], n)\n            append = window.append\n\n            # Initial deque fill\n            for _ in range(n):\n                append(next(self, fill_value))\n            yield tuple(window)\n\n            # Appending new items to the right causes old items to fall off the left\n            i = 0\n            for item in self:\n                append(item)\n                i = (i + 1) % step\n                if i % step == 0:\n                    yield tuple(window)\n\n            # If there are items from the iterable in the window, pad with the given\n            # value and emit them.\n            if (i % step) and (step - i < n):\n                for _ in range(step - i):\n                    append(fill_value)\n                yield tuple(window)\n\n        return Fluent(_impl())\n\n    def __iter__(self) -> \"Fluent[T]\":\n        return self\n\n    def __next__(self) -> T:\n        return next(self._iterator)\n\n    def tee(self, n: int = 2) -> \"Fluent[Fluent[T]]\":\n        \"\"\"Return n independent iterators from a single iterable\n\n        once tee() has made a split, the original iterable should not be used\n        anywhere else; otherwise, the iterable could get advanced without the\n        tee objects being informed\n\n        >>> copy1, copy2 = flu(range(5)).tee()\n        >>> copy1.sum()\n        10\n        >>> copy2.to_list()\n        [0, 1, 2, 3, 4]\n        \"\"\"\n        return Fluent((Fluent(x) for x in tee(self, n)))\n\n\nclass flu(Fluent[T]):\n    \"\"\"A fluent interface to lazy generator functions\n\n    >>> from flupy import flu\n    >>> (\n            flu(range(100))\n            .map(lambda x: x**2)\n            .filter(lambda x: x % 3 == 0)\n            .chunk(3)\n            .take(2)\n            .to_list()\n        )\n    [[0, 9, 36], [81, 144, 225]]\n    \"\"\"\n"
  },
  {
    "path": "src/flupy/py.typed",
    "content": ""
  },
  {
    "path": "src/tests/test_cli.py",
    "content": "from tempfile import NamedTemporaryFile\n\nimport pytest\n\nfrom flupy.cli.cli import build_import_dict, main, parse_args\n\n\ndef test_parse_args():\n    with pytest.raises(SystemExit) as cm:\n        parse_args([])\n        assert cm.exception.code == 2\n\n    args = parse_args([\"_\"])\n    assert args.command == \"_\"\n\n    args = parse_args([\"_\", \"-i\", \"os:environ:env\"])\n    assert \"os:environ:env\" in getattr(args, \"import\")\n    assert args.command == \"_\"\n\n    import_dict = build_import_dict([\"json\"])\n    assert \"json\" in import_dict\n\n\ndef test_build_import_dict():\n    import json\n\n    import_dict = build_import_dict([\"json\"])\n    assert \"json\" in import_dict\n    assert import_dict[\"json\"] == json\n\n    import_dict = build_import_dict([\"json:dumps\"])\n    assert \"dumps\" in import_dict\n    assert import_dict[\"dumps\"] == json.dumps\n\n    import_dict = build_import_dict([\"json:dumps:ds\"])\n    assert \"ds\" in import_dict\n    assert import_dict[\"ds\"] == json.dumps\n\n    import_dict = build_import_dict([\"json::j\"])\n    assert \"j\" in import_dict\n    assert import_dict[\"j\"] == json\n\n\ndef test_show_help(capsys):\n    with pytest.raises(SystemExit):\n        main([\"flu\", \"-h\"])\n\n    result = capsys.readouterr()\n    stdout = result.out\n    assert stdout.startswith(\"usage\")\n\n\ndef test_show_version(capsys):\n    main([\"flu\", \"flu(range(5)).collect()\"])\n\n    result = capsys.readouterr()\n    stdout = result.out.replace(\"\\n\", \"\")\n    assert stdout.startswith(\"0\")\n\n\ndef test_basic_pipeline(capsys):\n    main([\"flu\", \"flu(range(5)).collect()\"])\n    result = capsys.readouterr()\n    stdout = result.out.replace(\"\\n\", \"\")\n    assert stdout.startswith(\"0\")\n\n\ndef test_pass_on_none_pipeline(capsys):\n    main([\"flu\", \"None\"])\n    result = capsys.readouterr()\n    stdout = result.out\n    assert stdout == \"\"\n\n\ndef test_non_iterable_non_none_pipeline(capsys):\n    main([\"flu\", '\"hello_world\"'])\n    result = capsys.readouterr()\n    stdout = result.out.strip(\"\\n\")\n    assert stdout == \"hello_world\"\n\n\ndef test_cli_walk_files(capsys):\n    main([\"flu\", \"walk_files().head(2)\"])\n    result = capsys.readouterr()\n    stdout = result.out.strip(\"\\n\").split(\"\\n\")\n    assert len(stdout) == 2\n\n\ndef test_cli_walk_dirs(capsys):\n    main([\"flu\", \"walk_dirs().head(2)\"])\n    result = capsys.readouterr()\n    stdout = result.out.strip(\"\\n\").split(\"\\n\")\n    assert len(stdout) == 2\n\n\ndef test_from_file(capsys):\n    with NamedTemporaryFile(\"w+\") as f:\n        f.write(\"hello\")\n        f.read()\n        f_name = f.name\n        main([\"flu\", \"-f\", f_name, \"_.map(str.upper)\"])\n    result = capsys.readouterr()\n    stdout = result.out.strip(\"\\n\")\n    assert stdout == \"HELLO\"\n\n\ndef test_glob_imports(capsys):\n    main([\"flu\", \"flu(env).count()\", \"-i\", \"os:environ:env\"])\n    result = capsys.readouterr()\n    stdout = result.out\n    assert stdout\n"
  },
  {
    "path": "src/tests/test_cli_utils.py",
    "content": "from flupy.cli.utils import walk_dirs, walk_files\n\n\ndef test_walk_files():\n    assert walk_files().head()\n    assert walk_files(abspath=False).head()\n\n\ndef test_walk_dirs():\n    assert walk_dirs().head()\n"
  },
  {
    "path": "src/tests/test_flu.py",
    "content": "import sys\nfrom itertools import count, cycle\n\nimport pytest\n\nfrom flupy import flu\n\n\ndef test_collect():\n    assert flu(range(3)).collect() == [0, 1, 2]\n    assert flu(range(3)).collect(container_type=tuple) == (0, 1, 2)\n    assert flu(range(3)).collect(n=2) == [0, 1]\n\n\ndef test_to_list():\n    assert flu(range(3)).to_list() == [0, 1, 2]\n\n\ndef test___getitem__():\n    assert flu(range(3))[1] == 1\n    assert flu(range(3))[1:].collect() == [1, 2]\n    assert flu(range(35))[1:2].collect() == [1]\n    assert flu(range(35))[1:3].collect() == [1, 2]\n    with pytest.raises(IndexError):\n        flu([1])[4]\n    with pytest.raises((KeyError, TypeError)):\n        flu([1])[\"not an index\"]\n\n\ndef test_sum():\n    gen = flu(range(3))\n    assert gen.sum() == 3\n\n\ndef test_reduce():\n    gen = flu(range(5))\n    assert gen.reduce(lambda x, y: x + y) == 10\n\n\ndef test_fold_left():\n    assert flu(range(5)).fold_left(lambda x, y: x + y, 0) == 10\n    assert flu(range(5)).fold_left(lambda x, y: x + str(y), \"\") == \"01234\"\n\n\ndef test_count():\n    gen = flu(range(3))\n    assert gen.count() == 3\n\n\ndef test_min():\n    gen = flu(range(3))\n    assert gen.min() == 0\n\n\ndef test_first():\n    gen = flu(range(3))\n    assert gen.first() == 0\n    gen = flu([])\n    with pytest.raises(IndexError):\n        gen.first()\n    gen = flu([])\n    assert gen.first(default=1) == 1\n\n\ndef test_last():\n    gen = flu(range(3))\n    assert gen.last() == 2\n    gen = flu([])\n    with pytest.raises(IndexError):\n        gen.last()\n    gen = flu([])\n    assert gen.last(default=1) == 1\n\n\ndef test_head():\n    gen = flu(range(30))\n    assert gen.head(n=2) == [0, 1]\n    gen = flu(range(30))\n    assert gen.head(n=3, container_type=set) == set([0, 1, 2])\n    gen = flu(range(3))\n    assert gen.head(n=50) == [0, 1, 2]\n\n\ndef test_tail():\n    gen = flu(range(30))\n    assert gen.tail(n=2) == [28, 29]\n    gen = flu(range(30))\n    assert gen.tail(n=3, container_type=set) == set([27, 28, 29])\n    gen = flu(range(3))\n    assert gen.tail(n=50) == [0, 1, 2]\n\n\ndef test_max():\n    gen = flu(range(3))\n    assert gen.max() == 2\n\n\ndef test_unique():\n    class NoHash:\n        def __init__(self, letter, keyf):\n            self.letter = letter\n            self.keyf = keyf\n\n    a = NoHash(\"a\", 1)\n    b = NoHash(\"b\", 1)\n    c = NoHash(\"c\", 2)\n\n    gen = flu([a, b, c]).unique()\n    assert gen.collect() == [a, b, c]\n    gen = flu([a, b, c]).unique(lambda x: x.letter)\n    assert gen.collect() == [a, b, c]\n    gen = flu([a, b, c]).unique(lambda x: x.keyf)\n    assert gen.collect() == [a, c]\n\n\ndef test_side_effect():\n    class FakeFile:\n        def __init__(self):\n            self.is_open = False\n            self.content = []\n\n        def write(self, text):\n            if self.is_open:\n                self.content.append(text)\n            else:\n                raise IOError(\"fake file is not open for writing\")\n\n        def open(self):\n            self.is_open = True\n\n        def close(self):\n            self.is_open = False\n\n    # Test the fake file\n    ffile = FakeFile()\n    ffile.open()\n    ffile.write(\"should be there\")\n    ffile.close()\n    assert ffile.content[0] == \"should be there\"\n    with pytest.raises(IOError):\n        ffile.write(\"should fail\")\n\n    # Reset fake file\n    ffile = FakeFile()\n\n    with pytest.raises(IOError):\n        flu(range(5)).side_effect(ffile.write).collect()\n\n    gen_result = flu(range(5)).side_effect(ffile.write, before=ffile.open, after=ffile.close).collect()\n    assert ffile.is_open == False\n    assert ffile.content == [0, 1, 2, 3, 4]\n    assert gen_result == [0, 1, 2, 3, 4]\n\n\ndef test_sort():\n    gen = flu(range(3, 0, -1)).sort()\n    assert gen.collect() == [1, 2, 3]\n\n\ndef test_shuffle():\n    original_order = list(range(10000))\n    new_order = flu(original_order).shuffle().collect()\n    assert new_order != original_order\n    assert len(new_order) == len(original_order)\n    assert sum(new_order) == sum(original_order)\n\n\ndef test_map():\n    gen = flu(range(3)).map(lambda x: x + 2)\n    assert gen.collect() == [2, 3, 4]\n\n\ndef test_rate_limit():\n    resA = flu(range(3)).collect()\n    resB = flu(range(3)).rate_limit(5000).collect()\n    assert resA == resB\n\n\ndef test_map_item():\n    gen = flu(range(3)).map(lambda x: {\"a\": x}).map_item(\"a\")\n    assert gen.collect() == [0, 1, 2]\n\n\ndef test_map_attr():\n    class Person:\n        def __init__(self, age: int) -> None:\n            self.age = age\n\n    gen = flu(range(3)).map(lambda x: Person(x)).map_attr(\"age\")\n    assert gen.collect() == [0, 1, 2]\n\n\ndef test_filter():\n    gen = flu(range(3)).filter(lambda x: 0 < x < 2)\n    assert gen.collect() == [1]\n\n\ndef test_take():\n    gen = flu(range(10)).take(5)\n    assert gen.collect() == [0, 1, 2, 3, 4]\n\n\ndef test_take_while():\n    gen = flu(cycle(range(10))).take_while(lambda x: x < 4)\n    assert gen.collect() == [0, 1, 2, 3]\n\n\ndef test_drop_while():\n    gen = flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4)\n    assert gen.collect() == [4, 3, 2, 1]\n\n\ndef test_group_by():\n    gen = flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0])\n    g1, g2, g3 = gen.map(lambda x: (x[0], x[1].collect())).collect()\n    # Standard usage\n    assert g1 == (1, [(1, 0), (1, 1), (1, 2)])\n    assert g2 == (2, [(2, 3), (2, 4), (2, 5), (2, 6)])\n    assert g3 == (3, [(3, 7)])\n    # No param usage\n    v1 = flu(range(10)).group_by().map(lambda x: (x[0], list(x[1])))\n    v2 = flu(range(10)).map(lambda x: (x, [x]))\n    assert v1.collect() == v2.collect()\n    # Sort\n    gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=False)\n    assert gen.count() == 4\n    gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=True)\n    assert gen.count() == 2\n\n    # Identity Function\n    points = [{\"x\": 1, \"y\": 0}, {\"x\": 4, \"y\": 3}, {\"x\": 1, \"y\": 5}]\n    key_func = lambda u: u[\"x\"]\n    gen = flu(points).group_by(key=key_func, sort=True).collect()\n    assert len(gen) == 2\n    assert gen[0][0] == 1\n    assert gen[1][0] == 4\n    assert len(gen[0][1].collect()) == 2\n    assert len(gen[1][1].collect()) == 1\n\n\ndef test_chunk():\n    gen = flu(range(5)).chunk(2)\n    assert gen.collect() == [[0, 1], [2, 3], [4]]\n\n\ndef test_next():\n    gen = flu(range(5))\n    assert next(gen) == 0\n\n\ndef test_iter():\n    gen = flu(range(5))\n    assert next(iter(gen)) == 0\n\n\ndef test_enumerate():\n    # Check default\n    gen = flu(range(3)).enumerate()\n    assert gen.collect() == [(0, 0), (1, 1), (2, 2)]\n\n    # Check start param\n    gen = flu(range(3)).enumerate(start=1)\n    assert gen.collect() == [(1, 0), (2, 1), (3, 2)]\n\n\ndef test_zip():\n    gen = flu(range(3)).zip(range(3))\n    assert gen.collect() == [(0, 0), (1, 1), (2, 2)]\n\n    gen2 = flu(range(3)).zip(range(3), range(2))\n    assert gen2.collect() == [(0, 0, 0), (1, 1, 1)]\n\n\ndef test_zip_longest():\n    gen = flu(range(3)).zip_longest(range(5))\n    assert gen.collect() == [(0, 0), (1, 1), (2, 2), (None, 3), (None, 4)]\n    gen = flu(range(3)).zip_longest(range(5), fill_value=\"a\")\n    assert gen.collect() == [(0, 0), (1, 1), (2, 2), (\"a\", 3), (\"a\", 4)]\n    gen = flu(range(3)).zip_longest(range(5), range(4), fill_value=\"a\")\n    assert gen.collect() == [(0, 0, 0), (1, 1, 1), (2, 2, 2), (\"a\", 3, 3), (\"a\", 4, \"a\")]\n\n\ndef test_window():\n    # Check default\n    gen = flu(range(5)).window(n=3)\n    assert gen.collect() == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]\n\n    # Check step param\n    gen = flu(range(5)).window(n=3, step=3)\n    assert gen.collect() == [(0, 1, 2), (3, 4, None)]\n\n    # Check fill_value param\n    gen = flu(range(5)).window(n=3, step=3, fill_value=\"i\")\n    assert gen.collect() == [(0, 1, 2), (3, 4, \"i\")]\n\n    assert flu(range(4)).window(n=0).collect() == [tuple()]\n\n    with pytest.raises(ValueError):\n        flu(range(5)).window(n=-1).collect()\n\n    with pytest.raises(ValueError):\n        flu(range(5)).window(3, step=0).collect()\n\n\ndef test_flu():\n    gen = flu(count()).map(lambda x: x**2).filter(lambda x: x % 517 == 0).chunk(5).take(3)\n    assert next(gen) == [0, 267289, 1069156, 2405601, 4276624]\n\n\ndef test_flatten():\n    nested = [1, [2, (3, [4])], [\"rbsd\", \"abc\"], (7,)]\n\n    # Defaults with depth of 1\n    gen = flu(nested).flatten()\n    assert [x for x in gen] == [1, 2, (3, [4]), \"rbsd\", \"abc\", 7]\n\n    # Depth 2\n    gen = flu(nested).flatten(depth=2)\n    assert [x for x in gen] == [1, 2, 3, [4], \"rbsd\", \"abc\", 7]\n\n    # Depth 3\n    gen = flu(nested).flatten(depth=3)\n    assert [x for x in gen] == [1, 2, 3, 4, \"rbsd\", \"abc\", 7]\n\n    # Depth infinite\n    gen = flu(nested).flatten(depth=sys.maxsize)\n    assert [x for x in gen] == [1, 2, 3, 4, \"rbsd\", \"abc\", 7]\n\n    # Depth 2 with tuple base_type\n    gen = flu(nested).flatten(depth=2, base_type=tuple)\n    assert [x for x in gen] == [1, 2, (3, [4]), \"rbsd\", \"abc\", (7,)]\n\n    # Depth 2 with iterate strings\n    gen = flu(nested).flatten(depth=2, base_type=tuple, iterate_strings=True)\n    assert [x for x in gen] == [1, 2, (3, [4]), \"r\", \"b\", \"s\", \"d\", \"a\", \"b\", \"c\", (7,)]\n\n\ndef test_denormalize():\n    content = [\n        [\"abc\", [1, 2, 3]],\n    ]\n    assert flu(content).denormalize().collect() == [(\"abc\", 1), (\"abc\", 2), (\"abc\", 3)]\n    assert (flu(content).denormalize(iterate_strings=True).collect()) == [\n        (\"a\", 1),\n        (\"a\", 2),\n        (\"a\", 3),\n        (\"b\", 1),\n        (\"b\", 2),\n        (\"b\", 3),\n        (\"c\", 1),\n        (\"c\", 2),\n        (\"c\", 3),\n    ]\n\n    assert (flu([[[1], [1, 2], None]]).denormalize().collect()) == [\n        (1, 1, None),\n        (1, 2, None),\n    ]\n\n    assert (flu([[[1], [1, 2], []]]).denormalize().collect()) == []\n\n\ndef test_tee():\n    # Default unpacking\n    gen1, gen2 = flu(range(100)).tee()\n    assert gen1.sum() == gen2.sum()\n\n    # adjusting *n* paramter\n    gen1, gen2, gen3 = flu(range(100)).tee(3)\n    assert gen1.sum() == gen3.sum()\n\n    # No sync progress\n    gen1, gen2 = flu(range(100)).tee()\n    assert next(gen1) == next(gen2)\n\n    # No break chaining\n    assert flu(range(5)).tee().map(sum).sum() == 20\n\n\ndef test_join_left():\n    # Default unpacking\n    res = flu(range(6)).join_left(range(0, 6, 2)).collect()\n    assert res == [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)]\n\n\ndef test_join_inner():\n    # Default unpacking\n    res = flu(range(6)).join_inner(range(0, 6, 2)).collect()\n    assert res == [(0, 0), (2, 2), (4, 4)]\n\n\ndef test_join_full():\n    # Basic full join\n    res = flu(range(4)).join_full(range(2, 6)).collect()\n    assert res == [(0, None), (1, None), (2, 2), (3, 3), (None, 4), (None, 5)]\n\n    # Full join with custom keys\n    left = [{\"id\": 1, \"name\": \"Alice\"}, {\"id\": 2, \"name\": \"Bob\"}]\n    right = [{\"id\": 2, \"value\": 100}, {\"id\": 3, \"value\": 200}]\n    res = flu(left).join_full(right, key=lambda x: x[\"id\"], other_key=lambda x: x[\"id\"]).collect()\n    assert res == [\n        ({\"id\": 1, \"name\": \"Alice\"}, None),\n        ({\"id\": 2, \"name\": \"Bob\"}, {\"id\": 2, \"value\": 100}),\n        (None, {\"id\": 3, \"value\": 200}),\n    ]\n\n    # Full join with empty left\n    res = flu([]).join_full(range(3)).collect()\n    assert res == [(None, 0), (None, 1), (None, 2)]\n\n    # Full join with empty right\n    res = flu(range(3)).join_full([]).collect()\n    assert res == [(0, None), (1, None), (2, None)]\n\n    # Full join with both empty\n    res = flu([]).join_full([]).collect()\n    assert res == []\n\n    # Full join with duplicates\n    res = flu([1, 2, 2, 3]).join_full([2, 2, 4]).collect()\n    expected = [(1, None), (2, 2), (2, 2), (2, 2), (2, 2), (3, None), (None, 4)]  # 2x2 cartesian product\n    # Sort with custom key to handle None values\n    sort_key = lambda x: (\n        x[0] is None,\n        x[0] if x[0] is not None else -1,\n        x[1] is None,\n        x[1] if x[1] is not None else -1,\n    )\n    assert sorted(res, key=sort_key) == sorted(expected, key=sort_key)\n"
  },
  {
    "path": "src/tests/test_version.py",
    "content": "\"\"\"\nTests for version information.\n\"\"\"\n\nimport re\n\nimport flupy\n\n\ndef test_version_format():\n    \"\"\"Test that __version__ follows semantic versioning format (MAJOR.MINOR.PATCH).\"\"\"\n    # Standard semver regex pattern\n    semver_pattern = r\"^(?P<major>0|[1-9]\\d*)\\.(?P<minor>0|[1-9]\\d*)\\.(?P<patch>0|[1-9]\\d*)(?:-(?P<prerelease>(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$\"\n\n    assert re.match(\n        semver_pattern, flupy.__version__\n    ), f\"Version '{flupy.__version__}' does not match semantic versioning format\"\n\n    # Ensure version parts can be parsed as integers\n    major, minor, patch = flupy.__version__.split(\"-\")[0].split(\"+\")[0].split(\".\")[:3]\n    assert major.isdigit(), f\"Major version '{major}' is not a valid integer\"\n    assert minor.isdigit(), f\"Minor version '{minor}' is not a valid integer\"\n    assert patch.isdigit(), f\"Patch version '{patch}' is not a valid integer\"\n"
  }
]