Repository: olirice/flupy Branch: master Commit: 1bc446fd3efc Files: 25 Total size: 61.7 KB Directory structure: gitextract_nws95u0a/ ├── .coveragerc ├── .github/ │ └── workflows/ │ ├── pre-commit_hooks.yaml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── .version ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── benchmark/ │ └── test_benchmark.py ├── mypy.ini ├── pyproject.toml ├── pytest.ini ├── setup.cfg └── src/ ├── flupy/ │ ├── __init__.py │ ├── cli/ │ │ ├── __init__.py │ │ ├── cli.py │ │ └── utils.py │ ├── fluent.py │ └── py.typed └── tests/ ├── test_cli.py ├── test_cli_utils.py ├── test_flu.py └── test_version.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .coveragerc ================================================ [report] exclude_lines = pragma: no cover if TYPE_CHECKING: raise AssertionError raise NotImplementedError @overload pass ================================================ FILE: .github/workflows/pre-commit_hooks.yaml ================================================ name: pre-commit hooks on: [push] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - name: python setup 3.9 uses: actions/setup-python@v1 with: python-version: '3.9' - name: Install Poetry uses: snok/install-poetry@v1 with: version: 1.7.1 virtualenvs-create: true virtualenvs-in-project: true - name: Install dependencies run: | poetry install --with dev - name: run tests run: | poetry run pre-commit run --all ================================================ FILE: .github/workflows/test.yml ================================================ name: tests on: [push] jobs: build: runs-on: ubuntu-latest strategy: matrix: python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v1 - name: python setup ${{ matrix.python-version }} uses: actions/setup-python@v1 with: python-version: ${{ matrix.python-version }} - name: Install Poetry uses: snok/install-poetry@v1 with: version: 1.7.1 virtualenvs-create: true virtualenvs-in-project: true - name: Install dependencies run: | poetry install --with dev - name: run tests run: | poetry run pytest --cov=src/flupy src/tests --cov-report=xml - name: upload coverage to codecov uses: codecov/codecov-action@v1 with: token: ${{ secrets.CODECOV_TOKEN }} file: ./coverage.xml flags: unittests name: codecov-umbrella fail_ci_if_error: true ================================================ FILE: .gitignore ================================================ docs/* # Temporary Python files *.pyc *.egg-info __pycache__ .ipynb_checkpoints # pyenv .python-version .benchmarks poetry.lock pip-wheel-metadata/ .vscode # Temporary OS files Icon* # Pytest cache .pytest_cache/* # Virtual environment venv/* # Temporary virtual environment files /.cache/ /.venv/ # Temporary server files .env *.pid *.swp # Generated documentation /docs/gen/ /docs/apidocs/ /docs/_build/ /site/ /*.html /*.rst /docs/*.png # Google Drive *.gdoc *.gsheet *.gslides *.gdraw # Testing and coverage results /.pytest/ /.coverage /.coverage.* /htmlcov/ /xmlreport/ /pyunit.xml /tmp/ *.tmp # Build and release directories /build/ /dist/ *.spec # Sublime Text *.sublime-workspace # Eclipse .settings # LLMs CLAUDE.md ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: https://github.com/pre-commit/mirrors-isort rev: v5.10.1 hooks: - id: isort args: ['--multi-line=3', '--trailing-comma', '--force-grid-wrap=0', '--use-parentheses', '--line-width=88'] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: trailing-whitespace - id: check-added-large-files - id: check-yaml - id: mixed-line-ending args: ['--fix=lf'] - repo: https://github.com/humitos/mirrors-autoflake.git rev: v1.1 hooks: - id: autoflake args: ['--in-place', '--remove-all-unused-imports'] - repo: https://github.com/psf/black rev: 25.1.0 hooks: - id: black language_version: python3.9 - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.17.0 hooks: - id: mypy files: flupy/ args: ["--config-file", "mypy.ini"] ================================================ FILE: .readthedocs.yml ================================================ # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Set the OS, Python version and other tools you might need build: os: ubuntu-22.04 tools: python: "3.11" jobs: post_create_environment: # Install poetry - pip install poetry post_install: # Install dependencies with Poetry # VIRTUAL_ENV needs to be set manually for Poetry to work correctly - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --with dev # Build documentation in the "docs/" directory with Sphinx sphinx: configuration: docs/conf.py ================================================ FILE: .version ================================================ 1.0.11 ================================================ FILE: CONTRIBUTING.md ================================================ # For Contributors ## Setup ### Requirements * Make: * Windows: http://mingw.org/download/installer * Mac: http://developer.apple.com/xcode * Linux: http://www.gnu.org/software/make * pipenv: http://docs.pipenv.org * Pandoc: http://johnmacfarlane.net/pandoc/installing.html * Graphviz: http://www.graphviz.org/Download.php To confirm these system dependencies are configured correctly: ```sh $ make doctor ``` ### Installation Install project dependencies into a virtual environment: ```sh $ make install ``` ## Development Tasks ### Testing Manually run the tests: ```sh $ make test ``` or keep them running on change: ```sh $ make watch ``` > In order to have OS X notifications, `brew install terminal-notifier`. ### Documentation Build the documentation: ```sh $ make docs ``` ### Static Analysis Run linters and static analyzers: ```sh $ make pylint $ make pycodestyle $ make pydocstyle $ make check # includes all checks ``` ## Continuous Integration The CI server will report overall build status: ```sh $ make ci ``` ## Release Tasks Release to PyPI: ```sh $ make upload ``` ================================================ FILE: LICENSE.md ================================================ # License **The MIT License (MIT)** Copyright © 2017, Oliver Rice Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # flupy

Tests Codestyle Black

Python version PyPI version License Download count

--- **Documentation**: https://flupy.readthedocs.io/en/latest/ **Source Code**: https://github.com/olirice/flupy --- ## Overview Flupy implements a [fluent interface](https://en.wikipedia.org/wiki/Fluent_interface) for operating on python iterables. All flupy methods return generators and are evaluated lazily. This allows expressions to transform arbitrary size data in extremely limited memory. You can think of flupy as a light weight, 0 dependency, pure python alternative to the excellent [Apache Spark](https://spark.apache.org/) project. ## Setup ### Requirements * Python 3.9+ ### Installation Install flupy with pip: ```sh $ pip install flupy ``` ### Library ```python from itertools import count from flupy import flu # Processing an infinite sequence in constant memory pipeline = ( flu(count()) .map(lambda x: x**2) .filter(lambda x: x % 517 == 0) .chunk(5) .take(3) ) for item in pipeline: print(item) # Returns: # [0, 267289, 1069156, 2405601, 4276624] # [6682225, 9622404, 13097161, 17106496, 21650409] # [26728900, 32341969, 38489616, 45171841, 52388644] ``` ### CLI The flupy command line interface brings the same syntax for lazy piplines to your shell. Inputs to the `flu` command are auto-populated into a `Fluent` context named `_`. ```` $ flu -h usage: flu [-h] [-f FILE] [-i [IMPORT [IMPORT ...]]] command flupy: a fluent interface for python positional arguments: command flupy command to execute on input optional arguments: -h, --help show this help message and exit -f FILE, --file FILE path to input file -i [IMPORT [IMPORT ...]], --import [IMPORT [IMPORT ...]] modules to import Syntax: :: Examples: 'import os' = '-i os' 'import os as op_sys' = '-i os::op_sys' 'from os import environ' = '-i os:environ' 'from os import environ as env' = '-i os:environ:env' ```` ================================================ FILE: benchmark/test_benchmark.py ================================================ from itertools import cycle from flupy import flu def test_integration(benchmark): @benchmark def work(): (flu(range(100000)).chunk(100).chunk(2).map_item(0).count()) def test_max(benchmark): @benchmark def work(): flu(range(300000)).max() def test_initialize(benchmark): @benchmark def work(): flu(range(10)) def test_collect(benchmark): @benchmark def work(): flu(range(3)).collect() def test___getitem__(benchmark): @benchmark def work(): flu(range(350))[1:3].collect() def test_sum(benchmark): @benchmark def work(): gen = flu(range(1000)).sum() def test_reduce(benchmark): @benchmark def work(): flu(range(50)).reduce(lambda x, y: x + y) def test_fold_left(benchmark): @benchmark def work(): flu(range(5)).fold_left(lambda x, y: x + y, 0) def test_count(benchmark): @benchmark def work(): gen = flu(range(3000)).count() def test_min(benchmark): @benchmark def work(): flu(range(3000)).min() def test_first(benchmark): @benchmark def work(): flu(range(3)).first() def test_last(benchmark): @benchmark def work(): flu(range(3000)).last() def test_head(benchmark): @benchmark def work(): flu(range(30000)).head(n=10) def test_tail(benchmark): @benchmark def work(): gen = flu(range(30000)).tail(n=10) def test_unique(benchmark): class NoHash: def __init__(self, letter, keyf): self.letter = letter self.keyf = keyf a = NoHash("a", 1) b = NoHash("b", 1) c = NoHash("c", 2) data = [x % 500 for x in range(10000)] @benchmark def work(): gen = flu(data).unique().collect() def test_sort(benchmark): @benchmark def work(): flu(range(3000, 0, -1)).sort().collect() def test_shuffle(benchmark): original_order = list(range(10000)) @benchmark def work(): flu(original_order).shuffle().collect() def test_map(benchmark): @benchmark def work(): flu(range(3)).map(lambda x: x + 2).collect() def test_rate_limit(benchmark): @benchmark def work(): flu(range(300)).rate_limit(50000000000000).collect() def test_map_item(benchmark): data = flu(range(300)).map(lambda x: {"a": x}) @benchmark def work(): gen = flu(data).map_item("a") def test_map_attr(benchmark): class Person: def __init__(self, age: int) -> None: self.age = age people = flu(range(200)).map(Person).collect() @benchmark def work(): flu(people).map_attr("age").collect() def test_filter(benchmark): @benchmark def work(): flu(range(3)).filter(lambda x: 0 < x < 2).collect() def test_take(benchmark): @benchmark def work(): flu(range(10)).take(5).collect() def test_take_while(benchmark): @benchmark def work(): flu(cycle(range(10))).take_while(lambda x: x < 4).collect() def test_drop_while(benchmark): @benchmark def work(): flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4).collect() def test_group_by(benchmark): @benchmark def work(): flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0]).collect() def test_chunk(benchmark): @benchmark def work(): flu(range(500)).chunk(2).collect() def test_enumerate(benchmark): @benchmark def work(): flu(range(3)).enumerate(start=1).collect() def test_zip(benchmark): @benchmark def work(): flu(range(3)).zip(range(3)).collect() def test_zip_longest(benchmark): @benchmark def work(): flu(range(3)).zip_longest(range(5)).collect() def test_window(benchmark): @benchmark def work(): gen = flu(range(5)).window(n=3, step=3).collect def test_flatten(benchmark): nested = [1, [2, (3, [4])], ["rbsd", "abc"], (7,)] @benchmark def work(): gen = flu(nested).flatten(depth=2, base_type=tuple).collect() def test_tee(benchmark): @benchmark def work(): gen1, gen2, gen3 = flu(range(100)).tee(3) def test_join_left(benchmark): @benchmark def work(): flu(range(6)).join_left(range(0, 6, 2)).collect() def test_join_inner(benchmark): @benchmark def work(): flu(range(6)).join_inner(range(0, 6, 2)).collect() ================================================ FILE: mypy.ini ================================================ [mypy] ignore_missing_imports = True strict_optional = True follow_imports = skip warn_redundant_casts = True warn_unused_ignores = False check_untyped_defs = True no_implicit_reexport = True # Strict Mode: disallow_untyped_defs = True disallow_any_generics = True ================================================ FILE: pyproject.toml ================================================ [tool.poetry] name = "flupy" version = "1.2.3" description = "Fluent data processing in Python - a chainable stream processing library for expressive data manipulation using method chaining" authors = ["Oliver Rice "] license = "MIT" readme = "README.md" repository = "https://github.com/olirice/flupy" packages = [{include = "flupy", from = "src"}] classifiers = [ "Development Status :: 5 - Production/Stable", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] [tool.poetry.dependencies] python = ">=3.9" typing_extensions = ">=4" [tool.poetry.group.dev.dependencies] pytest = "*" pytest-cov = "*" pytest-benchmark = "*" pre-commit = "*" pylint = "*" black = "*" mypy = "*" sphinx = "*" sphinx-rtd-theme = "*" [tool.poetry.scripts] flu = "flupy.cli.cli:main" flu_precommit = "flupy.cli.cli:precommit" [build-system] requires = ["poetry-core>=2.0.0"] build-backend = "poetry.core.masonry.api" [tool.black] line-length = 120 exclude = ''' /( \.git | \.hg | \.mypy_cache | \.tox | \.venv | _build | buck-out | build | dist )/ ''' [tool.mypy] python_version = "3.9" ignore_missing_imports = true strict_optional = true follow_imports = "skip" warn_redundant_casts = true warn_unused_ignores = false check_untyped_defs = true no_implicit_reexport = true disallow_untyped_defs = true disallow_any_generics = true [tool.pytest.ini_options] addopts = "--cov=src/flupy src/tests" [tool.coverage.report] exclude_lines = [ "pragma: no cover", "if TYPE_CHECKING:", "raise AssertionError", "raise NotImplementedError", "@overload", "pass", ] ================================================ FILE: pytest.ini ================================================ [pytest] addopts = --cov=src/flupy src/tests ================================================ FILE: setup.cfg ================================================ [metadata] description_file = README.md ================================================ FILE: src/flupy/__init__.py ================================================ from importlib.metadata import version from flupy.cli.utils import walk_dirs, walk_files from flupy.fluent import flu __project__ = "flupy" __version__ = version(__project__) __all__ = ["flu", "walk_files", "walk_dirs"] ================================================ FILE: src/flupy/cli/__init__.py ================================================ ================================================ FILE: src/flupy/cli/cli.py ================================================ import argparse import importlib import sys from typing import Any, Dict, Generator, List, Optional from flupy import __version__, flu, walk_dirs, walk_files def read_file(path: str) -> Generator[str, None, None]: """Yield lines from a file given its path""" with open(path, "r") as f: yield from f def parse_args(args: List[str]) -> argparse.Namespace: """Parse input arguments""" parser = argparse.ArgumentParser( description="flupy: a fluent interface for python collections", formatter_class=argparse.RawTextHelpFormatter, ) parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__) parser.add_argument("command", help="command to execute against input") parser.add_argument("-f", "--file", help="path to input file") parser.add_argument( "-i", "--import", nargs="*", default=[], help="modules to import\n" "Syntax: ::\n" "Examples:\n" "\t'import os' = '-i os'\n" "\t'import os as op_sys' = '-i os::op_sys'\n" "\t'from os import environ' = '-i os:environ'\n" "\t'from os import environ as env' = '-i os:environ:env'\n", ) return parser.parse_args(args) def build_import_dict(imps: List[str]) -> Dict[str, Any]: """Execute CLI scoped imports""" import_dict = {} for imp_stx in imps: module, _, obj_alias = imp_stx.partition(":") obj, _, alias = obj_alias.partition(":") if not obj: import_dict[alias or module] = importlib.import_module(module) else: _garb = importlib.import_module(module) import_dict[alias or obj] = getattr(_garb, obj) return import_dict def main(argv: Optional[List[str]] = None) -> None: """CLI Entrypoint""" args = parse_args(argv[1:] if argv is not None else sys.argv[1:]) _command = args.command _file = args.file _import = getattr(args, "import") import_dict = build_import_dict(_import) if _file: _ = flu(read_file(_file)).map(str.rstrip) else: try: # Restore the default SIGPIPE handler from signal import SIG_DFL, SIGPIPE, signal signal(SIGPIPE, SIG_DFL) except ImportError: # SIGPIPE not available on platform (e.g. Windows), nothing to do pass _ = flu(sys.stdin).map(str.rstrip) locals_dict = { "flu": flu, "_": _, "walk_files": walk_files, "walk_dirs": walk_dirs, } pipeline = eval(_command, import_dict, locals_dict) if hasattr(pipeline, "__iter__") and not isinstance(pipeline, (str, bytes)): for r in pipeline: sys.stdout.write(str(r) + "\n") elif pipeline is None: pass else: sys.stdout.write(str(pipeline) + "\n") ================================================ FILE: src/flupy/cli/utils.py ================================================ # pylint: disable=invalid-name import os from typing import Generator from flupy.fluent import Fluent, flu def walk_files(*pathes: str, abspath: bool = True) -> "Fluent[str]": """Yield files recursively starting from each location in *pathes""" if pathes == (): pathes = (".",) def _impl() -> Generator[str, None, None]: for path in pathes: for d, _, files in os.walk(path): for x in files: rel_path = os.path.join(d, x) if abspath: yield os.path.abspath(rel_path) else: yield rel_path return flu(_impl()) def walk_dirs(path: str = ".") -> "Fluent[str]": """Yield files recursively starting from *path""" def _impl() -> Generator[str, None, None]: for d, _, _ in os.walk(path): yield d return flu(_impl()) ================================================ FILE: src/flupy/fluent.py ================================================ # pylint: disable=invalid-name import time from collections import defaultdict, deque from collections.abc import Iterable as IterableType from functools import reduce from itertools import dropwhile, groupby, islice, product, takewhile, tee, zip_longest from random import sample from typing import ( Any, Callable, Collection, Deque, Dict, Generator, Generic, Hashable, Iterable, Iterator, List, Optional, Set, Tuple, Type, TypeVar, Union, overload, ) from typing_extensions import Concatenate, ParamSpec, Protocol __all__ = ["flu"] T = TypeVar("T") T_co = TypeVar("T_co", covariant=True) T_contra = TypeVar("T_contra", contravariant=True) _T1 = TypeVar("_T1") _T2 = TypeVar("_T2") _T3 = TypeVar("_T3") S = TypeVar("S") P = ParamSpec("P") CallableTakesIterable = Callable[[Iterable[T]], Collection[T]] class SupportsEquality(Protocol): def __eq__(self, __other: object) -> bool: pass class SupportsGetItem(Protocol[T_co]): def __getitem__(self, __k: Hashable) -> T_co: pass class SupportsIteration(Protocol[T_co]): def __iter__(self) -> Iterator[T]: pass class SupportsLessThan(Protocol): def __lt__(self, __other: Any) -> bool: pass SupportsLessThanT = TypeVar("SupportsLessThanT", bound="SupportsLessThan") class Empty: pass def identity(x: T) -> T: return x class Fluent(Generic[T]): """A fluent interface to lazy generator functions >>> from flupy import flu >>> ( flu(range(100)) .map(lambda x: x**2) .filter(lambda x: x % 3 == 0) .chunk(3) .take(2) .to_list() ) [[0, 9, 36], [81, 144, 225]] """ def __init__(self, iterable: Iterable[T]) -> None: iterator = iter(iterable) self._iterator: Iterator[T] = iterator @overload def __getitem__(self, index: int) -> T: pass @overload def __getitem__(self, index: slice) -> "Fluent[T]": pass def __getitem__(self, key: Union[int, slice]) -> Union[T, "Fluent[T]"]: if isinstance(key, int) and key >= 0: try: return next(islice(self._iterator, key, key + 1)) except StopIteration: raise IndexError("flu index out of range") elif isinstance(key, slice): return flu(islice(self._iterator, key.start, key.stop, key.step)) else: raise TypeError(f"Indices must be non-negative integers or slices, not {type(key).__name__}") ### Summary ### def collect(self, n: Optional[int] = None, container_type: CallableTakesIterable[T] = list) -> Collection[T]: """Collect items from iterable into a container >>> flu(range(4)).collect() [0, 1, 2, 3] >>> flu(range(4)).collect(container_type=set) {0, 1, 2, 3} >>> flu(range(4)).collect(n=2) [0, 1] """ return container_type(self.take(n)) def to_list(self) -> List[T]: """Collect items from iterable into a list >>> flu(range(4)).to_list() [0, 1, 2, 3] """ return list(self) def sum(self) -> Union[T, int]: """Sum of elements in the iterable >>> flu([1,2,3]).sum() 6 """ return sum(self) # type: ignore def count(self) -> int: """Count of elements in the iterable >>> flu(['a','b','c']).count() 3 """ return sum(1 for _ in self) def min(self: "Fluent[SupportsLessThanT]") -> SupportsLessThanT: """Smallest element in the interable >>> flu([1, 3, 0, 2]).min() 0 """ return min(self) def max(self: "Fluent[SupportsLessThanT]") -> SupportsLessThanT: """Largest element in the interable >>> flu([0, 3, 2, 1]).max() 3 """ return max(self) def first(self, default: Any = Empty()) -> T: """Return the first item of the iterable. Raise IndexError if empty, or return default if provided. >>> flu([0, 1, 2, 3]).first() 0 >>> flu([]).first(default="some_default") 'some_default' """ x: Union[Empty, T] = default for x in self: return x if isinstance(x, Empty): raise IndexError("Empty iterator") return x def last(self, default: Any = Empty()) -> T: """Return the last item of the iterble. Raise IndexError if empty or default if provided. >>> flu([0, 1, 2, 3]).last() 3 >>> flu([]).last(default='some_default') 'some_default' """ x: Union[Empty, T] = default for x in self: pass if isinstance(x, Empty): raise IndexError("Empty iterator") return x def head(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]: """Returns up to the first *n* elements from the iterable. >>> flu(range(20)).head() [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] >>> flu(range(15)).head(n=2) [0, 1] >>> flu([]).head() [] """ return self.take(n).collect(container_type=container_type) def tail(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]: """Return up to the last *n* elements from the iterable >>> flu(range(20)).tail() [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] >>> flu(range(15)).tail(n=2) [13, 14] """ val: Union[List[Empty], Tuple[Any, ...]] = [Empty()] for val in self.window(n, fill_value=Empty()): pass return container_type([x for x in val if not isinstance(x, Empty)]) ### End Summary ### ### Non-Constant Memory ### def sort( self: "Fluent[SupportsLessThanT]", key: Optional[Callable[[Any], Any]] = None, reverse: bool = False, ) -> "Fluent[SupportsLessThanT]": """Sort iterable by *key* function if provided or identity otherwise Note: sorting loads the entire iterable into memory >>> flu([3,6,1]).sort().to_list() [1, 3, 6] >>> flu([3,6,1]).sort(reverse=True).to_list() [6, 3, 1] >>> flu([3,-6,1]).sort(key=abs).to_list() [1, 3, -6] """ return Fluent(sorted(self, key=key, reverse=reverse)) def join_left( self, other: Iterable[_T1], key: Callable[[T], Hashable] = identity, other_key: Callable[[_T1], Hashable] = identity, ) -> "Fluent[Tuple[T, Union[_T1, None]]]": """Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries When no matching entry is found in *other*, entries in the iterable are paired with None Note: join_left loads *other* into memory >>> flu(range(6)).join_left(range(0, 6, 2)).to_list() [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)] """ def _impl() -> Generator[Tuple[T, Union[_T1, None]], None, None]: other_lookup = defaultdict(list) for entry_other in other: other_lookup[other_key(entry_other)].append(entry_other) for entry in self: matches: Optional[List[_T1]] = other_lookup.get(key(entry)) if matches: for match in matches: yield (entry, match) else: yield (entry, None) return Fluent(_impl()) def join_inner( self, other: Iterable[_T1], key: Callable[[T], Hashable] = identity, other_key: Callable[[_T1], Hashable] = identity, ) -> "Fluent[Tuple[T, _T1]]": """Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries When no matching entry is found in *other*, entries in the iterable are filtered from the results Note: join_inner loads *other* into memory >>> flu(range(6)).join_inner(range(0, 6, 2)).to_list() [(0, 0), (2, 2), (4, 4)] """ def _impl() -> Generator[Tuple[T, _T1], None, None]: other_lookup = defaultdict(list) for entry_other in other: other_lookup[other_key(entry_other)].append(entry_other) for entry in self: matches: List[_T1] = other_lookup[key(entry)] for match in matches: yield (entry, match) return Fluent(_impl()) def join_full( self, other: Iterable[_T1], key: Callable[[T], Hashable] = identity, other_key: Callable[[_T1], Hashable] = identity, ) -> "Fluent[Tuple[Union[T, None], Union[_T1, None]]]": """Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries Returns all entries from both iterables. When no matching entry is found, entries are paired with None Note: join_full loads both *self* and *other* into memory >>> flu(range(4)).join_full(range(2, 6)).to_list() [(0, None), (1, None), (2, 2), (3, 3), (None, 4), (None, 5)] """ def _impl() -> Generator[Tuple[Union[T, None], Union[_T1, None]], None, None]: # Build lookup for other other_lookup: Dict[Hashable, List[_T1]] = defaultdict(list) other_keys_seen: Set[Hashable] = set() for entry_other in other: other_key_val = other_key(entry_other) other_lookup[other_key_val].append(entry_other) other_keys_seen.add(other_key_val) # Track which keys from other have been matched matched_other_keys: Set[Hashable] = set() # Process all entries from self for entry in self: entry_key = key(entry) matches: Optional[List[_T1]] = other_lookup.get(entry_key) if matches: matched_other_keys.add(entry_key) for match in matches: yield (entry, match) else: yield (entry, None) # Yield unmatched entries from other unmatched_keys = other_keys_seen - matched_other_keys for unmatched_key in unmatched_keys: for entry_other in other_lookup[unmatched_key]: yield (None, entry_other) return Fluent(_impl()) def shuffle(self) -> "Fluent[T]": """Randomize the order of elements in the interable Note: shuffle loads the entire iterable into memory >>> flu([3,6,1]).shuffle().to_list() [6, 1, 3] """ dat: List[T] = self.to_list() return Fluent(sample(dat, len(dat))) def group_by( self, key: Callable[[T], Union[T, _T1]] = identity, sort: bool = True ) -> "Fluent[Tuple[Union[T,_T1], Fluent[T]]]": """Yield consecutive keys and groups from the iterable *key* is a function to compute a key value used in grouping and sorting for each element. *key* defaults to an identity function which returns the unchaged element When the iterable is pre-sorted according to *key*, setting *sort* to False will prevent loading the dataset into memory and improve performance >>> flu([2, 4, 2, 4]).group_by().to_list() [(2, ), (4, )] Or, if the iterable is pre-sorted >>> flu([2, 2, 5, 5]).group_by(sort=False).to_list() [(2, ), (5, )] Using a key function >>> points = [ {'x': 1, 'y': 0}, {'x': 4, 'y': 3}, {'x': 1, 'y': 5} ] >>> key_func = lambda u: u['x'] >>> flu(points).group_by(key=key_func, sort=True).to_list() [(1, ), (4, )] """ gen = self.sort(key) if sort else self return Fluent(groupby(gen, key)).map(lambda x: (x[0], flu([y for y in x[1]]))) def unique(self, key: Callable[[T], Hashable] = identity) -> "Fluent[T]": """Yield elements that are unique by a *key*. >>> flu([2, 3, 2, 3]).unique().to_list() [2, 3] >>> flu([2, -3, -2, 3]).unique(key=abs).to_list() [2, -3] """ def _impl() -> Generator[T, None, None]: seen: Set[Any] = set() for x in self: x_hash = key(x) if x_hash in seen: continue else: seen.add(x_hash) yield x return Fluent(_impl()) ### End Non-Constant Memory ### ### Side Effect ### def rate_limit(self, per_second: Union[int, float] = 100) -> "Fluent[T]": """Restrict consumption of iterable to n item *per_second* >>> import time >>> start_time = time.time() >>> _ = flu(range(3)).rate_limit(3).to_list() >>> print('Runtime', int(time.time() - start_time)) 1.00126 # approximately 1 second for 3 items """ def _impl() -> Generator[T, None, None]: wait_time = 1.0 / per_second for val in self: start_time = time.time() yield val call_duration = time.time() - start_time time.sleep(max(wait_time - call_duration, 0.0)) return Fluent(_impl()) def side_effect( self, func: Callable[[T], Any], before: Optional[Callable[[], Any]] = None, after: Optional[Callable[[], Any]] = None, ) -> "Fluent[T]": """Invoke *func* for each item in the iterable before yielding the item. *func* takes a single argument and the output is discarded *before* and *after* are optional functions that take no parameters and are executed once before iteration begins and after iteration ends respectively. Each will be called exactly once. >>> flu(range(2)).side_effect(lambda x: print(f'Collected {x}')).to_list() Collected 0 Collected 1 [0, 1] """ def _impl() -> Generator[T, None, None]: try: if before is not None: before() for x in self: func(x) yield x finally: if after is not None: after() return Fluent(_impl()) ### End Side Effect ### def map(self, func: Callable[Concatenate[T, P], _T1], *args: Any, **kwargs: Any) -> "Fluent[_T1]": """Apply *func* to each element of iterable >>> flu(range(5)).map(lambda x: x*x).to_list() [0, 1, 4, 9, 16] """ def _impl() -> Generator[_T1, None, None]: for val in self._iterator: yield func(val, *args, **kwargs) return Fluent(_impl()) def map_item(self: "Fluent[SupportsGetItem[T]]", item: Hashable) -> "Fluent[T]": """Extracts *item* from every element of the iterable >>> flu([(2, 4), (2, 5)]).map_item(1).to_list() [4, 5] >>> flu([{'mykey': 8}, {'mykey': 5}]).map_item('mykey').to_list() [8, 5] """ def _impl() -> Generator[T, None, None]: for x in self: yield x[item] return Fluent(_impl()) def map_attr(self, attr: str) -> "Fluent[Any]": """Extracts the attribute *attr* from each element of the iterable >>> from collections import namedtuple >>> MyTup = namedtuple('MyTup', ['value', 'backup_val']) >>> flu([MyTup(1, 5), MyTup(2, 4)]).map_attr('value').to_list() [1, 2] """ return self.map(lambda x: getattr(x, attr)) def filter(self, func: Callable[Concatenate[T, P], bool], *args: Any, **kwargs: Any) -> "Fluent[T]": """Yield elements of iterable where *func* returns truthy >>> flu(range(10)).filter(lambda x: x % 2 == 0).to_list() [0, 2, 4, 6, 8] """ def _impl() -> Generator[T, None, None]: for val in self._iterator: if func(val, *args, **kwargs): yield val return Fluent(_impl()) def reduce(self, func: Callable[[T, T], T]) -> T: """Apply a function of two arguments cumulatively to the items of the iterable, from left to right, so as to reduce the sequence to a single value >>> flu(range(5)).reduce(lambda x, y: x + y) 10 """ return reduce(func, self) def fold_left(self, func: Callable[[S, T], S], initial: S) -> S: """Apply a function of two arguments cumulatively to the items of the iterable, from left to right, starting with *initial*, so as to fold the sequence to a single value >>> flu(range(5)).fold_left(lambda x, y: x + str(y), "") '01234' """ return reduce(func, self, initial) @overload def zip(self, __iter1: Iterable[_T1]) -> "Fluent[Tuple[T, _T1]]": ... @overload def zip(self, __iter1: Iterable[_T1], __iter2: Iterable[_T2]) -> "Fluent[Tuple[T, _T1, _T2]]": ... @overload def zip( self, __iter1: Iterable[_T1], __iter2: Iterable[_T2], __iter3: Iterable[_T3] ) -> "Fluent[Tuple[T, _T1, _T2, _T3]]": ... @overload def zip( self, __iter1: Iterable[Any], __iter2: Iterable[Any], __iter3: Iterable[Any], __iter4: Iterable[Any], *iterable: Iterable[Any], ) -> "Fluent[Tuple[T, ...]]": ... def zip(self, *iterable: Iterable[Any]) -> Union[ "Fluent[Tuple[T, ...]]", "Fluent[Tuple[T, _T1]]", "Fluent[Tuple[T, _T1, _T2]]", "Fluent[Tuple[T, _T1, _T2, _T3]]", ]: """Yields tuples containing the i-th element from the i-th argument in the instance, and the iterable >>> flu(range(5)).zip(range(3, 0, -1)).to_list() [(0, 3), (1, 2), (2, 1)] """ # @self_to_flu is not compatible with @overload # make sure any usage of self supports arbitrary iterables tup_iter = zip(iter(self), *iterable) return Fluent(tup_iter) def zip_longest(self, *iterable: Iterable[_T1], fill_value: Any = None) -> "Fluent[Tuple[T, ...]]": """Yields tuples containing the i-th element from the i-th argument in the instance, and the iterable Iteration continues until the longest iterable is exhaused. If iterables are uneven in length, missing values are filled in with fill value >>> flu(range(5)).zip_longest(range(3, 0, -1)).to_list() [(0, 3), (1, 2), (2, 1), (3, None), (4, None)] >>> flu(range(5)).zip_longest(range(3, 0, -1), fill_value='a').to_list() [(0, 3), (1, 2), (2, 1), (3, 'a'), (4, 'a')] """ return Fluent(zip_longest(self, *iterable, fillvalue=fill_value)) def enumerate(self, start: int = 0) -> "Fluent[Tuple[int, T]]": """Yields tuples from the instance where the first element is a count from initial value *start*. >>> flu([3,4,5]).enumerate().to_list() [(0, 3), (1, 4), (2, 5)] """ return Fluent(enumerate(self, start=start)) def take(self, n: Optional[int] = None) -> "Fluent[T]": """Yield first *n* items of the iterable >>> flu(range(10)).take(2).to_list() [0, 1] """ return Fluent(islice(self._iterator, n)) def take_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]": """Yield elements from the chainable so long as the predicate is true >>> flu(range(10)).take_while(lambda x: x < 3).to_list() [0, 1, 2] """ return Fluent(takewhile(predicate, self._iterator)) def drop_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]": """Drop elements from the chainable as long as the predicate is true; afterwards, return every element >>> flu(range(10)).drop_while(lambda x: x < 3).to_list() [3, 4, 5, 6, 7, 8, 9] """ return Fluent(dropwhile(predicate, self._iterator)) def chunk(self, n: int) -> "Fluent[List[T]]": """Yield lists of elements from iterable in groups of *n* if the iterable is not evenly divisiible by *n*, the final list will be shorter >>> flu(range(10)).chunk(3).to_list() [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] """ def _impl() -> Generator[List[T], None, None]: while True: vals: List[T] = list(self.take(n)) if vals: yield vals else: return return Fluent(_impl()) def flatten( self, depth: int = 1, base_type: Optional[Type[object]] = None, iterate_strings: bool = False, ) -> "Fluent[Any]": """Recursively flatten nested iterables (e.g., a list of lists of tuples) into non-iterable type or an optional user-defined base_type Strings are treated as non-iterable for convenience. set iterate_string=True to change that behavior. >>> flu([[0, 1, 2], [3, 4, 5]]).flatten().to_list() [0, 1, 2, 3, 4, 5] >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten().to_list() [0, [1, 2], [3, 4], 5] >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list() [0, 1, 2, 3, 4, 5] >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list() [0, 1, 2, 3, 4, 5] >>> flu([1, (2, 2), 4, [5, (6, 6, 6)]]).flatten(base_type=tuple).to_list() [1, (2, 2), 4, 5, (6, 6, 6)] >>> flu([[2, 0], 'abc', 3, [4]]).flatten(iterate_strings=True).to_list() [2, 0, 'a', 'b', 'c', 3, 4] """ # TODO(OR): Reimplement with strong types def walk(node: Any, level: int) -> Generator[T, None, None]: if ( ((depth is not None) and (level > depth)) or (isinstance(node, str) and not iterate_strings) or ((base_type is not None) and isinstance(node, base_type)) ): yield node return try: tree = iter(node) except TypeError: yield node return else: for child in tree: for val in walk(child, level + 1): yield val return Fluent(walk(self, level=0)) def denormalize(self: "Fluent[SupportsIteration[Any]]", iterate_strings: bool = False) -> "Fluent[Tuple[Any, ...]]": """Denormalize iterable components of each record >>> flu([("abc", [1, 2, 3])]).denormalize().to_list() [('abc', 1), ('abc', 2), ('abc', 3)] >>> flu([("abc", [1, 2])]).denormalize(iterate_strings=True).to_list() [('a', 1), ('a', 2), ('b', 1), ('b', 2), ('c', 1), ('c', 2)] >>> flu([("abc", [])]).denormalize().to_list() [] """ def _impl() -> Generator[Tuple[Any, ...], None, None]: for record in self: iter_elements: List[Iterable[Any]] = [] element: Any for element in record: # Check for string and string iteration is allowed if isinstance(element, str) and iterate_strings: iter_elements.append(element) # Check for string and string iteration is not allowed elif isinstance(element, str): iter_elements.append([element]) # Check for iterable elif isinstance(element, IterableType): iter_elements.append(element) # Check for non-iterable else: iter_elements.append([element]) for row in product(*iter_elements): yield row return Fluent(_impl()) def window(self, n: int, step: int = 1, fill_value: Any = None) -> "Fluent[Tuple[Any, ...]]": """Yield a sliding window of width *n* over the given iterable. Each window will advance in increments of *step*: If the length of the iterable does not evenly divide by the *step* the final output is padded with *fill_value* >>> flu(range(5)).window(3).to_list() [(0, 1, 2), (1, 2, 3), (2, 3, 4)] >>> flu(range(5)).window(n=3, step=2).to_list() [(0, 1, 2), (2, 3, 4)] >>> flu(range(9)).window(n=4, step=3).to_list() [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, None)] >>> flu(range(9)).window(n=4, step=3, fill_value=-1).to_list() [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, -1)] """ def _impl() -> Generator[Tuple[Any, ...], None, None]: if n < 0: raise ValueError("n must be >= 0") elif n == 0: yield tuple() return if step < 1: raise ValueError("step must be >= 1") window: Deque[Any] = deque([], n) append = window.append # Initial deque fill for _ in range(n): append(next(self, fill_value)) yield tuple(window) # Appending new items to the right causes old items to fall off the left i = 0 for item in self: append(item) i = (i + 1) % step if i % step == 0: yield tuple(window) # If there are items from the iterable in the window, pad with the given # value and emit them. if (i % step) and (step - i < n): for _ in range(step - i): append(fill_value) yield tuple(window) return Fluent(_impl()) def __iter__(self) -> "Fluent[T]": return self def __next__(self) -> T: return next(self._iterator) def tee(self, n: int = 2) -> "Fluent[Fluent[T]]": """Return n independent iterators from a single iterable once tee() has made a split, the original iterable should not be used anywhere else; otherwise, the iterable could get advanced without the tee objects being informed >>> copy1, copy2 = flu(range(5)).tee() >>> copy1.sum() 10 >>> copy2.to_list() [0, 1, 2, 3, 4] """ return Fluent((Fluent(x) for x in tee(self, n))) class flu(Fluent[T]): """A fluent interface to lazy generator functions >>> from flupy import flu >>> ( flu(range(100)) .map(lambda x: x**2) .filter(lambda x: x % 3 == 0) .chunk(3) .take(2) .to_list() ) [[0, 9, 36], [81, 144, 225]] """ ================================================ FILE: src/flupy/py.typed ================================================ ================================================ FILE: src/tests/test_cli.py ================================================ from tempfile import NamedTemporaryFile import pytest from flupy.cli.cli import build_import_dict, main, parse_args def test_parse_args(): with pytest.raises(SystemExit) as cm: parse_args([]) assert cm.exception.code == 2 args = parse_args(["_"]) assert args.command == "_" args = parse_args(["_", "-i", "os:environ:env"]) assert "os:environ:env" in getattr(args, "import") assert args.command == "_" import_dict = build_import_dict(["json"]) assert "json" in import_dict def test_build_import_dict(): import json import_dict = build_import_dict(["json"]) assert "json" in import_dict assert import_dict["json"] == json import_dict = build_import_dict(["json:dumps"]) assert "dumps" in import_dict assert import_dict["dumps"] == json.dumps import_dict = build_import_dict(["json:dumps:ds"]) assert "ds" in import_dict assert import_dict["ds"] == json.dumps import_dict = build_import_dict(["json::j"]) assert "j" in import_dict assert import_dict["j"] == json def test_show_help(capsys): with pytest.raises(SystemExit): main(["flu", "-h"]) result = capsys.readouterr() stdout = result.out assert stdout.startswith("usage") def test_show_version(capsys): main(["flu", "flu(range(5)).collect()"]) result = capsys.readouterr() stdout = result.out.replace("\n", "") assert stdout.startswith("0") def test_basic_pipeline(capsys): main(["flu", "flu(range(5)).collect()"]) result = capsys.readouterr() stdout = result.out.replace("\n", "") assert stdout.startswith("0") def test_pass_on_none_pipeline(capsys): main(["flu", "None"]) result = capsys.readouterr() stdout = result.out assert stdout == "" def test_non_iterable_non_none_pipeline(capsys): main(["flu", '"hello_world"']) result = capsys.readouterr() stdout = result.out.strip("\n") assert stdout == "hello_world" def test_cli_walk_files(capsys): main(["flu", "walk_files().head(2)"]) result = capsys.readouterr() stdout = result.out.strip("\n").split("\n") assert len(stdout) == 2 def test_cli_walk_dirs(capsys): main(["flu", "walk_dirs().head(2)"]) result = capsys.readouterr() stdout = result.out.strip("\n").split("\n") assert len(stdout) == 2 def test_from_file(capsys): with NamedTemporaryFile("w+") as f: f.write("hello") f.read() f_name = f.name main(["flu", "-f", f_name, "_.map(str.upper)"]) result = capsys.readouterr() stdout = result.out.strip("\n") assert stdout == "HELLO" def test_glob_imports(capsys): main(["flu", "flu(env).count()", "-i", "os:environ:env"]) result = capsys.readouterr() stdout = result.out assert stdout ================================================ FILE: src/tests/test_cli_utils.py ================================================ from flupy.cli.utils import walk_dirs, walk_files def test_walk_files(): assert walk_files().head() assert walk_files(abspath=False).head() def test_walk_dirs(): assert walk_dirs().head() ================================================ FILE: src/tests/test_flu.py ================================================ import sys from itertools import count, cycle import pytest from flupy import flu def test_collect(): assert flu(range(3)).collect() == [0, 1, 2] assert flu(range(3)).collect(container_type=tuple) == (0, 1, 2) assert flu(range(3)).collect(n=2) == [0, 1] def test_to_list(): assert flu(range(3)).to_list() == [0, 1, 2] def test___getitem__(): assert flu(range(3))[1] == 1 assert flu(range(3))[1:].collect() == [1, 2] assert flu(range(35))[1:2].collect() == [1] assert flu(range(35))[1:3].collect() == [1, 2] with pytest.raises(IndexError): flu([1])[4] with pytest.raises((KeyError, TypeError)): flu([1])["not an index"] def test_sum(): gen = flu(range(3)) assert gen.sum() == 3 def test_reduce(): gen = flu(range(5)) assert gen.reduce(lambda x, y: x + y) == 10 def test_fold_left(): assert flu(range(5)).fold_left(lambda x, y: x + y, 0) == 10 assert flu(range(5)).fold_left(lambda x, y: x + str(y), "") == "01234" def test_count(): gen = flu(range(3)) assert gen.count() == 3 def test_min(): gen = flu(range(3)) assert gen.min() == 0 def test_first(): gen = flu(range(3)) assert gen.first() == 0 gen = flu([]) with pytest.raises(IndexError): gen.first() gen = flu([]) assert gen.first(default=1) == 1 def test_last(): gen = flu(range(3)) assert gen.last() == 2 gen = flu([]) with pytest.raises(IndexError): gen.last() gen = flu([]) assert gen.last(default=1) == 1 def test_head(): gen = flu(range(30)) assert gen.head(n=2) == [0, 1] gen = flu(range(30)) assert gen.head(n=3, container_type=set) == set([0, 1, 2]) gen = flu(range(3)) assert gen.head(n=50) == [0, 1, 2] def test_tail(): gen = flu(range(30)) assert gen.tail(n=2) == [28, 29] gen = flu(range(30)) assert gen.tail(n=3, container_type=set) == set([27, 28, 29]) gen = flu(range(3)) assert gen.tail(n=50) == [0, 1, 2] def test_max(): gen = flu(range(3)) assert gen.max() == 2 def test_unique(): class NoHash: def __init__(self, letter, keyf): self.letter = letter self.keyf = keyf a = NoHash("a", 1) b = NoHash("b", 1) c = NoHash("c", 2) gen = flu([a, b, c]).unique() assert gen.collect() == [a, b, c] gen = flu([a, b, c]).unique(lambda x: x.letter) assert gen.collect() == [a, b, c] gen = flu([a, b, c]).unique(lambda x: x.keyf) assert gen.collect() == [a, c] def test_side_effect(): class FakeFile: def __init__(self): self.is_open = False self.content = [] def write(self, text): if self.is_open: self.content.append(text) else: raise IOError("fake file is not open for writing") def open(self): self.is_open = True def close(self): self.is_open = False # Test the fake file ffile = FakeFile() ffile.open() ffile.write("should be there") ffile.close() assert ffile.content[0] == "should be there" with pytest.raises(IOError): ffile.write("should fail") # Reset fake file ffile = FakeFile() with pytest.raises(IOError): flu(range(5)).side_effect(ffile.write).collect() gen_result = flu(range(5)).side_effect(ffile.write, before=ffile.open, after=ffile.close).collect() assert ffile.is_open == False assert ffile.content == [0, 1, 2, 3, 4] assert gen_result == [0, 1, 2, 3, 4] def test_sort(): gen = flu(range(3, 0, -1)).sort() assert gen.collect() == [1, 2, 3] def test_shuffle(): original_order = list(range(10000)) new_order = flu(original_order).shuffle().collect() assert new_order != original_order assert len(new_order) == len(original_order) assert sum(new_order) == sum(original_order) def test_map(): gen = flu(range(3)).map(lambda x: x + 2) assert gen.collect() == [2, 3, 4] def test_rate_limit(): resA = flu(range(3)).collect() resB = flu(range(3)).rate_limit(5000).collect() assert resA == resB def test_map_item(): gen = flu(range(3)).map(lambda x: {"a": x}).map_item("a") assert gen.collect() == [0, 1, 2] def test_map_attr(): class Person: def __init__(self, age: int) -> None: self.age = age gen = flu(range(3)).map(lambda x: Person(x)).map_attr("age") assert gen.collect() == [0, 1, 2] def test_filter(): gen = flu(range(3)).filter(lambda x: 0 < x < 2) assert gen.collect() == [1] def test_take(): gen = flu(range(10)).take(5) assert gen.collect() == [0, 1, 2, 3, 4] def test_take_while(): gen = flu(cycle(range(10))).take_while(lambda x: x < 4) assert gen.collect() == [0, 1, 2, 3] def test_drop_while(): gen = flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4) assert gen.collect() == [4, 3, 2, 1] def test_group_by(): gen = flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0]) g1, g2, g3 = gen.map(lambda x: (x[0], x[1].collect())).collect() # Standard usage assert g1 == (1, [(1, 0), (1, 1), (1, 2)]) assert g2 == (2, [(2, 3), (2, 4), (2, 5), (2, 6)]) assert g3 == (3, [(3, 7)]) # No param usage v1 = flu(range(10)).group_by().map(lambda x: (x[0], list(x[1]))) v2 = flu(range(10)).map(lambda x: (x, [x])) assert v1.collect() == v2.collect() # Sort gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=False) assert gen.count() == 4 gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=True) assert gen.count() == 2 # Identity Function points = [{"x": 1, "y": 0}, {"x": 4, "y": 3}, {"x": 1, "y": 5}] key_func = lambda u: u["x"] gen = flu(points).group_by(key=key_func, sort=True).collect() assert len(gen) == 2 assert gen[0][0] == 1 assert gen[1][0] == 4 assert len(gen[0][1].collect()) == 2 assert len(gen[1][1].collect()) == 1 def test_chunk(): gen = flu(range(5)).chunk(2) assert gen.collect() == [[0, 1], [2, 3], [4]] def test_next(): gen = flu(range(5)) assert next(gen) == 0 def test_iter(): gen = flu(range(5)) assert next(iter(gen)) == 0 def test_enumerate(): # Check default gen = flu(range(3)).enumerate() assert gen.collect() == [(0, 0), (1, 1), (2, 2)] # Check start param gen = flu(range(3)).enumerate(start=1) assert gen.collect() == [(1, 0), (2, 1), (3, 2)] def test_zip(): gen = flu(range(3)).zip(range(3)) assert gen.collect() == [(0, 0), (1, 1), (2, 2)] gen2 = flu(range(3)).zip(range(3), range(2)) assert gen2.collect() == [(0, 0, 0), (1, 1, 1)] def test_zip_longest(): gen = flu(range(3)).zip_longest(range(5)) assert gen.collect() == [(0, 0), (1, 1), (2, 2), (None, 3), (None, 4)] gen = flu(range(3)).zip_longest(range(5), fill_value="a") assert gen.collect() == [(0, 0), (1, 1), (2, 2), ("a", 3), ("a", 4)] gen = flu(range(3)).zip_longest(range(5), range(4), fill_value="a") assert gen.collect() == [(0, 0, 0), (1, 1, 1), (2, 2, 2), ("a", 3, 3), ("a", 4, "a")] def test_window(): # Check default gen = flu(range(5)).window(n=3) assert gen.collect() == [(0, 1, 2), (1, 2, 3), (2, 3, 4)] # Check step param gen = flu(range(5)).window(n=3, step=3) assert gen.collect() == [(0, 1, 2), (3, 4, None)] # Check fill_value param gen = flu(range(5)).window(n=3, step=3, fill_value="i") assert gen.collect() == [(0, 1, 2), (3, 4, "i")] assert flu(range(4)).window(n=0).collect() == [tuple()] with pytest.raises(ValueError): flu(range(5)).window(n=-1).collect() with pytest.raises(ValueError): flu(range(5)).window(3, step=0).collect() def test_flu(): gen = flu(count()).map(lambda x: x**2).filter(lambda x: x % 517 == 0).chunk(5).take(3) assert next(gen) == [0, 267289, 1069156, 2405601, 4276624] def test_flatten(): nested = [1, [2, (3, [4])], ["rbsd", "abc"], (7,)] # Defaults with depth of 1 gen = flu(nested).flatten() assert [x for x in gen] == [1, 2, (3, [4]), "rbsd", "abc", 7] # Depth 2 gen = flu(nested).flatten(depth=2) assert [x for x in gen] == [1, 2, 3, [4], "rbsd", "abc", 7] # Depth 3 gen = flu(nested).flatten(depth=3) assert [x for x in gen] == [1, 2, 3, 4, "rbsd", "abc", 7] # Depth infinite gen = flu(nested).flatten(depth=sys.maxsize) assert [x for x in gen] == [1, 2, 3, 4, "rbsd", "abc", 7] # Depth 2 with tuple base_type gen = flu(nested).flatten(depth=2, base_type=tuple) assert [x for x in gen] == [1, 2, (3, [4]), "rbsd", "abc", (7,)] # Depth 2 with iterate strings gen = flu(nested).flatten(depth=2, base_type=tuple, iterate_strings=True) assert [x for x in gen] == [1, 2, (3, [4]), "r", "b", "s", "d", "a", "b", "c", (7,)] def test_denormalize(): content = [ ["abc", [1, 2, 3]], ] assert flu(content).denormalize().collect() == [("abc", 1), ("abc", 2), ("abc", 3)] assert (flu(content).denormalize(iterate_strings=True).collect()) == [ ("a", 1), ("a", 2), ("a", 3), ("b", 1), ("b", 2), ("b", 3), ("c", 1), ("c", 2), ("c", 3), ] assert (flu([[[1], [1, 2], None]]).denormalize().collect()) == [ (1, 1, None), (1, 2, None), ] assert (flu([[[1], [1, 2], []]]).denormalize().collect()) == [] def test_tee(): # Default unpacking gen1, gen2 = flu(range(100)).tee() assert gen1.sum() == gen2.sum() # adjusting *n* paramter gen1, gen2, gen3 = flu(range(100)).tee(3) assert gen1.sum() == gen3.sum() # No sync progress gen1, gen2 = flu(range(100)).tee() assert next(gen1) == next(gen2) # No break chaining assert flu(range(5)).tee().map(sum).sum() == 20 def test_join_left(): # Default unpacking res = flu(range(6)).join_left(range(0, 6, 2)).collect() assert res == [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)] def test_join_inner(): # Default unpacking res = flu(range(6)).join_inner(range(0, 6, 2)).collect() assert res == [(0, 0), (2, 2), (4, 4)] def test_join_full(): # Basic full join res = flu(range(4)).join_full(range(2, 6)).collect() assert res == [(0, None), (1, None), (2, 2), (3, 3), (None, 4), (None, 5)] # Full join with custom keys left = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}] right = [{"id": 2, "value": 100}, {"id": 3, "value": 200}] res = flu(left).join_full(right, key=lambda x: x["id"], other_key=lambda x: x["id"]).collect() assert res == [ ({"id": 1, "name": "Alice"}, None), ({"id": 2, "name": "Bob"}, {"id": 2, "value": 100}), (None, {"id": 3, "value": 200}), ] # Full join with empty left res = flu([]).join_full(range(3)).collect() assert res == [(None, 0), (None, 1), (None, 2)] # Full join with empty right res = flu(range(3)).join_full([]).collect() assert res == [(0, None), (1, None), (2, None)] # Full join with both empty res = flu([]).join_full([]).collect() assert res == [] # Full join with duplicates res = flu([1, 2, 2, 3]).join_full([2, 2, 4]).collect() expected = [(1, None), (2, 2), (2, 2), (2, 2), (2, 2), (3, None), (None, 4)] # 2x2 cartesian product # Sort with custom key to handle None values sort_key = lambda x: ( x[0] is None, x[0] if x[0] is not None else -1, x[1] is None, x[1] if x[1] is not None else -1, ) assert sorted(res, key=sort_key) == sorted(expected, key=sort_key) ================================================ FILE: src/tests/test_version.py ================================================ """ Tests for version information. """ import re import flupy def test_version_format(): """Test that __version__ follows semantic versioning format (MAJOR.MINOR.PATCH).""" # Standard semver regex pattern semver_pattern = r"^(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$" assert re.match( semver_pattern, flupy.__version__ ), f"Version '{flupy.__version__}' does not match semantic versioning format" # Ensure version parts can be parsed as integers major, minor, patch = flupy.__version__.split("-")[0].split("+")[0].split(".")[:3] assert major.isdigit(), f"Major version '{major}' is not a valid integer" assert minor.isdigit(), f"Minor version '{minor}' is not a valid integer" assert patch.isdigit(), f"Patch version '{patch}' is not a valid integer"