Repository: olirice/flupy
Branch: master
Commit: 1bc446fd3efc
Files: 25
Total size: 61.7 KB
Directory structure:
gitextract_nws95u0a/
├── .coveragerc
├── .github/
│ └── workflows/
│ ├── pre-commit_hooks.yaml
│ └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── .version
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── benchmark/
│ └── test_benchmark.py
├── mypy.ini
├── pyproject.toml
├── pytest.ini
├── setup.cfg
└── src/
├── flupy/
│ ├── __init__.py
│ ├── cli/
│ │ ├── __init__.py
│ │ ├── cli.py
│ │ └── utils.py
│ ├── fluent.py
│ └── py.typed
└── tests/
├── test_cli.py
├── test_cli_utils.py
├── test_flu.py
└── test_version.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .coveragerc
================================================
[report]
exclude_lines =
pragma: no cover
if TYPE_CHECKING:
raise AssertionError
raise NotImplementedError
@overload
pass
================================================
FILE: .github/workflows/pre-commit_hooks.yaml
================================================
name: pre-commit hooks
on: [push]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: python setup 3.9
uses: actions/setup-python@v1
with:
python-version: '3.9'
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.7.1
virtualenvs-create: true
virtualenvs-in-project: true
- name: Install dependencies
run: |
poetry install --with dev
- name: run tests
run: |
poetry run pre-commit run --all
================================================
FILE: .github/workflows/test.yml
================================================
name: tests
on: [push]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
steps:
- uses: actions/checkout@v1
- name: python setup ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.7.1
virtualenvs-create: true
virtualenvs-in-project: true
- name: Install dependencies
run: |
poetry install --with dev
- name: run tests
run: |
poetry run pytest --cov=src/flupy src/tests --cov-report=xml
- name: upload coverage to codecov
uses: codecov/codecov-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
flags: unittests
name: codecov-umbrella
fail_ci_if_error: true
================================================
FILE: .gitignore
================================================
docs/*
# Temporary Python files
*.pyc
*.egg-info
__pycache__
.ipynb_checkpoints
# pyenv
.python-version
.benchmarks
poetry.lock
pip-wheel-metadata/
.vscode
# Temporary OS files
Icon*
# Pytest cache
.pytest_cache/*
# Virtual environment
venv/*
# Temporary virtual environment files
/.cache/
/.venv/
# Temporary server files
.env
*.pid
*.swp
# Generated documentation
/docs/gen/
/docs/apidocs/
/docs/_build/
/site/
/*.html
/*.rst
/docs/*.png
# Google Drive
*.gdoc
*.gsheet
*.gslides
*.gdraw
# Testing and coverage results
/.pytest/
/.coverage
/.coverage.*
/htmlcov/
/xmlreport/
/pyunit.xml
/tmp/
*.tmp
# Build and release directories
/build/
/dist/
*.spec
# Sublime Text
*.sublime-workspace
# Eclipse
.settings
# LLMs
CLAUDE.md
================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/pre-commit/mirrors-isort
rev: v5.10.1
hooks:
- id: isort
args: ['--multi-line=3', '--trailing-comma', '--force-grid-wrap=0', '--use-parentheses', '--line-width=88']
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: check-added-large-files
- id: check-yaml
- id: mixed-line-ending
args: ['--fix=lf']
- repo: https://github.com/humitos/mirrors-autoflake.git
rev: v1.1
hooks:
- id: autoflake
args: ['--in-place', '--remove-all-unused-imports']
- repo: https://github.com/psf/black
rev: 25.1.0
hooks:
- id: black
language_version: python3.9
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.17.0
hooks:
- id: mypy
files: flupy/
args: ["--config-file", "mypy.ini"]
================================================
FILE: .readthedocs.yml
================================================
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the OS, Python version and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.11"
jobs:
post_create_environment:
# Install poetry
- pip install poetry
post_install:
# Install dependencies with Poetry
# VIRTUAL_ENV needs to be set manually for Poetry to work correctly
- VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --with dev
# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/conf.py
================================================
FILE: .version
================================================
1.0.11
================================================
FILE: CONTRIBUTING.md
================================================
# For Contributors
## Setup
### Requirements
* Make:
* Windows: http://mingw.org/download/installer
* Mac: http://developer.apple.com/xcode
* Linux: http://www.gnu.org/software/make
* pipenv: http://docs.pipenv.org
* Pandoc: http://johnmacfarlane.net/pandoc/installing.html
* Graphviz: http://www.graphviz.org/Download.php
To confirm these system dependencies are configured correctly:
```sh
$ make doctor
```
### Installation
Install project dependencies into a virtual environment:
```sh
$ make install
```
## Development Tasks
### Testing
Manually run the tests:
```sh
$ make test
```
or keep them running on change:
```sh
$ make watch
```
> In order to have OS X notifications, `brew install terminal-notifier`.
### Documentation
Build the documentation:
```sh
$ make docs
```
### Static Analysis
Run linters and static analyzers:
```sh
$ make pylint
$ make pycodestyle
$ make pydocstyle
$ make check # includes all checks
```
## Continuous Integration
The CI server will report overall build status:
```sh
$ make ci
```
## Release Tasks
Release to PyPI:
```sh
$ make upload
```
================================================
FILE: LICENSE.md
================================================
# License
**The MIT License (MIT)**
Copyright © 2017, Oliver Rice
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
================================================
FILE: README.md
================================================
# flupy
---
**Documentation**: https://flupy.readthedocs.io/en/latest/
**Source Code**: https://github.com/olirice/flupy
---
## Overview
Flupy implements a [fluent interface](https://en.wikipedia.org/wiki/Fluent_interface) for operating on python iterables. All flupy methods return generators and are evaluated lazily. This allows expressions to transform arbitrary size data in extremely limited memory.
You can think of flupy as a light weight, 0 dependency, pure python alternative to the excellent [Apache Spark](https://spark.apache.org/) project.
## Setup
### Requirements
* Python 3.9+
### Installation
Install flupy with pip:
```sh
$ pip install flupy
```
### Library
```python
from itertools import count
from flupy import flu
# Processing an infinite sequence in constant memory
pipeline = (
flu(count())
.map(lambda x: x**2)
.filter(lambda x: x % 517 == 0)
.chunk(5)
.take(3)
)
for item in pipeline:
print(item)
# Returns:
# [0, 267289, 1069156, 2405601, 4276624]
# [6682225, 9622404, 13097161, 17106496, 21650409]
# [26728900, 32341969, 38489616, 45171841, 52388644]
```
### CLI
The flupy command line interface brings the same syntax for lazy piplines to your shell. Inputs to the `flu` command are auto-populated into a `Fluent` context named `_`.
````
$ flu -h
usage: flu [-h] [-f FILE] [-i [IMPORT [IMPORT ...]]] command
flupy: a fluent interface for python
positional arguments:
command flupy command to execute on input
optional arguments:
-h, --help show this help message and exit
-f FILE, --file FILE path to input file
-i [IMPORT [IMPORT ...]], --import [IMPORT [IMPORT ...]]
modules to import
Syntax: ::
Examples:
'import os' = '-i os'
'import os as op_sys' = '-i os::op_sys'
'from os import environ' = '-i os:environ'
'from os import environ as env' = '-i os:environ:env'
````
================================================
FILE: benchmark/test_benchmark.py
================================================
from itertools import cycle
from flupy import flu
def test_integration(benchmark):
@benchmark
def work():
(flu(range(100000)).chunk(100).chunk(2).map_item(0).count())
def test_max(benchmark):
@benchmark
def work():
flu(range(300000)).max()
def test_initialize(benchmark):
@benchmark
def work():
flu(range(10))
def test_collect(benchmark):
@benchmark
def work():
flu(range(3)).collect()
def test___getitem__(benchmark):
@benchmark
def work():
flu(range(350))[1:3].collect()
def test_sum(benchmark):
@benchmark
def work():
gen = flu(range(1000)).sum()
def test_reduce(benchmark):
@benchmark
def work():
flu(range(50)).reduce(lambda x, y: x + y)
def test_fold_left(benchmark):
@benchmark
def work():
flu(range(5)).fold_left(lambda x, y: x + y, 0)
def test_count(benchmark):
@benchmark
def work():
gen = flu(range(3000)).count()
def test_min(benchmark):
@benchmark
def work():
flu(range(3000)).min()
def test_first(benchmark):
@benchmark
def work():
flu(range(3)).first()
def test_last(benchmark):
@benchmark
def work():
flu(range(3000)).last()
def test_head(benchmark):
@benchmark
def work():
flu(range(30000)).head(n=10)
def test_tail(benchmark):
@benchmark
def work():
gen = flu(range(30000)).tail(n=10)
def test_unique(benchmark):
class NoHash:
def __init__(self, letter, keyf):
self.letter = letter
self.keyf = keyf
a = NoHash("a", 1)
b = NoHash("b", 1)
c = NoHash("c", 2)
data = [x % 500 for x in range(10000)]
@benchmark
def work():
gen = flu(data).unique().collect()
def test_sort(benchmark):
@benchmark
def work():
flu(range(3000, 0, -1)).sort().collect()
def test_shuffle(benchmark):
original_order = list(range(10000))
@benchmark
def work():
flu(original_order).shuffle().collect()
def test_map(benchmark):
@benchmark
def work():
flu(range(3)).map(lambda x: x + 2).collect()
def test_rate_limit(benchmark):
@benchmark
def work():
flu(range(300)).rate_limit(50000000000000).collect()
def test_map_item(benchmark):
data = flu(range(300)).map(lambda x: {"a": x})
@benchmark
def work():
gen = flu(data).map_item("a")
def test_map_attr(benchmark):
class Person:
def __init__(self, age: int) -> None:
self.age = age
people = flu(range(200)).map(Person).collect()
@benchmark
def work():
flu(people).map_attr("age").collect()
def test_filter(benchmark):
@benchmark
def work():
flu(range(3)).filter(lambda x: 0 < x < 2).collect()
def test_take(benchmark):
@benchmark
def work():
flu(range(10)).take(5).collect()
def test_take_while(benchmark):
@benchmark
def work():
flu(cycle(range(10))).take_while(lambda x: x < 4).collect()
def test_drop_while(benchmark):
@benchmark
def work():
flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4).collect()
def test_group_by(benchmark):
@benchmark
def work():
flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0]).collect()
def test_chunk(benchmark):
@benchmark
def work():
flu(range(500)).chunk(2).collect()
def test_enumerate(benchmark):
@benchmark
def work():
flu(range(3)).enumerate(start=1).collect()
def test_zip(benchmark):
@benchmark
def work():
flu(range(3)).zip(range(3)).collect()
def test_zip_longest(benchmark):
@benchmark
def work():
flu(range(3)).zip_longest(range(5)).collect()
def test_window(benchmark):
@benchmark
def work():
gen = flu(range(5)).window(n=3, step=3).collect
def test_flatten(benchmark):
nested = [1, [2, (3, [4])], ["rbsd", "abc"], (7,)]
@benchmark
def work():
gen = flu(nested).flatten(depth=2, base_type=tuple).collect()
def test_tee(benchmark):
@benchmark
def work():
gen1, gen2, gen3 = flu(range(100)).tee(3)
def test_join_left(benchmark):
@benchmark
def work():
flu(range(6)).join_left(range(0, 6, 2)).collect()
def test_join_inner(benchmark):
@benchmark
def work():
flu(range(6)).join_inner(range(0, 6, 2)).collect()
================================================
FILE: mypy.ini
================================================
[mypy]
ignore_missing_imports = True
strict_optional = True
follow_imports = skip
warn_redundant_casts = True
warn_unused_ignores = False
check_untyped_defs = True
no_implicit_reexport = True
# Strict Mode:
disallow_untyped_defs = True
disallow_any_generics = True
================================================
FILE: pyproject.toml
================================================
[tool.poetry]
name = "flupy"
version = "1.2.3"
description = "Fluent data processing in Python - a chainable stream processing library for expressive data manipulation using method chaining"
authors = ["Oliver Rice "]
license = "MIT"
readme = "README.md"
repository = "https://github.com/olirice/flupy"
packages = [{include = "flupy", from = "src"}]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
[tool.poetry.dependencies]
python = ">=3.9"
typing_extensions = ">=4"
[tool.poetry.group.dev.dependencies]
pytest = "*"
pytest-cov = "*"
pytest-benchmark = "*"
pre-commit = "*"
pylint = "*"
black = "*"
mypy = "*"
sphinx = "*"
sphinx-rtd-theme = "*"
[tool.poetry.scripts]
flu = "flupy.cli.cli:main"
flu_precommit = "flupy.cli.cli:precommit"
[build-system]
requires = ["poetry-core>=2.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.black]
line-length = 120
exclude = '''
/(
\.git
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| _build
| buck-out
| build
| dist
)/
'''
[tool.mypy]
python_version = "3.9"
ignore_missing_imports = true
strict_optional = true
follow_imports = "skip"
warn_redundant_casts = true
warn_unused_ignores = false
check_untyped_defs = true
no_implicit_reexport = true
disallow_untyped_defs = true
disallow_any_generics = true
[tool.pytest.ini_options]
addopts = "--cov=src/flupy src/tests"
[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"if TYPE_CHECKING:",
"raise AssertionError",
"raise NotImplementedError",
"@overload",
"pass",
]
================================================
FILE: pytest.ini
================================================
[pytest]
addopts = --cov=src/flupy src/tests
================================================
FILE: setup.cfg
================================================
[metadata]
description_file = README.md
================================================
FILE: src/flupy/__init__.py
================================================
from importlib.metadata import version
from flupy.cli.utils import walk_dirs, walk_files
from flupy.fluent import flu
__project__ = "flupy"
__version__ = version(__project__)
__all__ = ["flu", "walk_files", "walk_dirs"]
================================================
FILE: src/flupy/cli/__init__.py
================================================
================================================
FILE: src/flupy/cli/cli.py
================================================
import argparse
import importlib
import sys
from typing import Any, Dict, Generator, List, Optional
from flupy import __version__, flu, walk_dirs, walk_files
def read_file(path: str) -> Generator[str, None, None]:
"""Yield lines from a file given its path"""
with open(path, "r") as f:
yield from f
def parse_args(args: List[str]) -> argparse.Namespace:
"""Parse input arguments"""
parser = argparse.ArgumentParser(
description="flupy: a fluent interface for python collections",
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__)
parser.add_argument("command", help="command to execute against input")
parser.add_argument("-f", "--file", help="path to input file")
parser.add_argument(
"-i",
"--import",
nargs="*",
default=[],
help="modules to import\n"
"Syntax: ::\n"
"Examples:\n"
"\t'import os' = '-i os'\n"
"\t'import os as op_sys' = '-i os::op_sys'\n"
"\t'from os import environ' = '-i os:environ'\n"
"\t'from os import environ as env' = '-i os:environ:env'\n",
)
return parser.parse_args(args)
def build_import_dict(imps: List[str]) -> Dict[str, Any]:
"""Execute CLI scoped imports"""
import_dict = {}
for imp_stx in imps:
module, _, obj_alias = imp_stx.partition(":")
obj, _, alias = obj_alias.partition(":")
if not obj:
import_dict[alias or module] = importlib.import_module(module)
else:
_garb = importlib.import_module(module)
import_dict[alias or obj] = getattr(_garb, obj)
return import_dict
def main(argv: Optional[List[str]] = None) -> None:
"""CLI Entrypoint"""
args = parse_args(argv[1:] if argv is not None else sys.argv[1:])
_command = args.command
_file = args.file
_import = getattr(args, "import")
import_dict = build_import_dict(_import)
if _file:
_ = flu(read_file(_file)).map(str.rstrip)
else:
try:
# Restore the default SIGPIPE handler
from signal import SIG_DFL, SIGPIPE, signal
signal(SIGPIPE, SIG_DFL)
except ImportError:
# SIGPIPE not available on platform (e.g. Windows), nothing to do
pass
_ = flu(sys.stdin).map(str.rstrip)
locals_dict = {
"flu": flu,
"_": _,
"walk_files": walk_files,
"walk_dirs": walk_dirs,
}
pipeline = eval(_command, import_dict, locals_dict)
if hasattr(pipeline, "__iter__") and not isinstance(pipeline, (str, bytes)):
for r in pipeline:
sys.stdout.write(str(r) + "\n")
elif pipeline is None:
pass
else:
sys.stdout.write(str(pipeline) + "\n")
================================================
FILE: src/flupy/cli/utils.py
================================================
# pylint: disable=invalid-name
import os
from typing import Generator
from flupy.fluent import Fluent, flu
def walk_files(*pathes: str, abspath: bool = True) -> "Fluent[str]":
"""Yield files recursively starting from each location in *pathes"""
if pathes == ():
pathes = (".",)
def _impl() -> Generator[str, None, None]:
for path in pathes:
for d, _, files in os.walk(path):
for x in files:
rel_path = os.path.join(d, x)
if abspath:
yield os.path.abspath(rel_path)
else:
yield rel_path
return flu(_impl())
def walk_dirs(path: str = ".") -> "Fluent[str]":
"""Yield files recursively starting from *path"""
def _impl() -> Generator[str, None, None]:
for d, _, _ in os.walk(path):
yield d
return flu(_impl())
================================================
FILE: src/flupy/fluent.py
================================================
# pylint: disable=invalid-name
import time
from collections import defaultdict, deque
from collections.abc import Iterable as IterableType
from functools import reduce
from itertools import dropwhile, groupby, islice, product, takewhile, tee, zip_longest
from random import sample
from typing import (
Any,
Callable,
Collection,
Deque,
Dict,
Generator,
Generic,
Hashable,
Iterable,
Iterator,
List,
Optional,
Set,
Tuple,
Type,
TypeVar,
Union,
overload,
)
from typing_extensions import Concatenate, ParamSpec, Protocol
__all__ = ["flu"]
T = TypeVar("T")
T_co = TypeVar("T_co", covariant=True)
T_contra = TypeVar("T_contra", contravariant=True)
_T1 = TypeVar("_T1")
_T2 = TypeVar("_T2")
_T3 = TypeVar("_T3")
S = TypeVar("S")
P = ParamSpec("P")
CallableTakesIterable = Callable[[Iterable[T]], Collection[T]]
class SupportsEquality(Protocol):
def __eq__(self, __other: object) -> bool:
pass
class SupportsGetItem(Protocol[T_co]):
def __getitem__(self, __k: Hashable) -> T_co:
pass
class SupportsIteration(Protocol[T_co]):
def __iter__(self) -> Iterator[T]:
pass
class SupportsLessThan(Protocol):
def __lt__(self, __other: Any) -> bool:
pass
SupportsLessThanT = TypeVar("SupportsLessThanT", bound="SupportsLessThan")
class Empty:
pass
def identity(x: T) -> T:
return x
class Fluent(Generic[T]):
"""A fluent interface to lazy generator functions
>>> from flupy import flu
>>> (
flu(range(100))
.map(lambda x: x**2)
.filter(lambda x: x % 3 == 0)
.chunk(3)
.take(2)
.to_list()
)
[[0, 9, 36], [81, 144, 225]]
"""
def __init__(self, iterable: Iterable[T]) -> None:
iterator = iter(iterable)
self._iterator: Iterator[T] = iterator
@overload
def __getitem__(self, index: int) -> T:
pass
@overload
def __getitem__(self, index: slice) -> "Fluent[T]":
pass
def __getitem__(self, key: Union[int, slice]) -> Union[T, "Fluent[T]"]:
if isinstance(key, int) and key >= 0:
try:
return next(islice(self._iterator, key, key + 1))
except StopIteration:
raise IndexError("flu index out of range")
elif isinstance(key, slice):
return flu(islice(self._iterator, key.start, key.stop, key.step))
else:
raise TypeError(f"Indices must be non-negative integers or slices, not {type(key).__name__}")
### Summary ###
def collect(self, n: Optional[int] = None, container_type: CallableTakesIterable[T] = list) -> Collection[T]:
"""Collect items from iterable into a container
>>> flu(range(4)).collect()
[0, 1, 2, 3]
>>> flu(range(4)).collect(container_type=set)
{0, 1, 2, 3}
>>> flu(range(4)).collect(n=2)
[0, 1]
"""
return container_type(self.take(n))
def to_list(self) -> List[T]:
"""Collect items from iterable into a list
>>> flu(range(4)).to_list()
[0, 1, 2, 3]
"""
return list(self)
def sum(self) -> Union[T, int]:
"""Sum of elements in the iterable
>>> flu([1,2,3]).sum()
6
"""
return sum(self) # type: ignore
def count(self) -> int:
"""Count of elements in the iterable
>>> flu(['a','b','c']).count()
3
"""
return sum(1 for _ in self)
def min(self: "Fluent[SupportsLessThanT]") -> SupportsLessThanT:
"""Smallest element in the interable
>>> flu([1, 3, 0, 2]).min()
0
"""
return min(self)
def max(self: "Fluent[SupportsLessThanT]") -> SupportsLessThanT:
"""Largest element in the interable
>>> flu([0, 3, 2, 1]).max()
3
"""
return max(self)
def first(self, default: Any = Empty()) -> T:
"""Return the first item of the iterable. Raise IndexError if empty, or return default if provided.
>>> flu([0, 1, 2, 3]).first()
0
>>> flu([]).first(default="some_default")
'some_default'
"""
x: Union[Empty, T] = default
for x in self:
return x
if isinstance(x, Empty):
raise IndexError("Empty iterator")
return x
def last(self, default: Any = Empty()) -> T:
"""Return the last item of the iterble. Raise IndexError if empty or default if provided.
>>> flu([0, 1, 2, 3]).last()
3
>>> flu([]).last(default='some_default')
'some_default'
"""
x: Union[Empty, T] = default
for x in self:
pass
if isinstance(x, Empty):
raise IndexError("Empty iterator")
return x
def head(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]:
"""Returns up to the first *n* elements from the iterable.
>>> flu(range(20)).head()
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> flu(range(15)).head(n=2)
[0, 1]
>>> flu([]).head()
[]
"""
return self.take(n).collect(container_type=container_type)
def tail(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]:
"""Return up to the last *n* elements from the iterable
>>> flu(range(20)).tail()
[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
>>> flu(range(15)).tail(n=2)
[13, 14]
"""
val: Union[List[Empty], Tuple[Any, ...]] = [Empty()]
for val in self.window(n, fill_value=Empty()):
pass
return container_type([x for x in val if not isinstance(x, Empty)])
### End Summary ###
### Non-Constant Memory ###
def sort(
self: "Fluent[SupportsLessThanT]",
key: Optional[Callable[[Any], Any]] = None,
reverse: bool = False,
) -> "Fluent[SupportsLessThanT]":
"""Sort iterable by *key* function if provided or identity otherwise
Note: sorting loads the entire iterable into memory
>>> flu([3,6,1]).sort().to_list()
[1, 3, 6]
>>> flu([3,6,1]).sort(reverse=True).to_list()
[6, 3, 1]
>>> flu([3,-6,1]).sort(key=abs).to_list()
[1, 3, -6]
"""
return Fluent(sorted(self, key=key, reverse=reverse))
def join_left(
self,
other: Iterable[_T1],
key: Callable[[T], Hashable] = identity,
other_key: Callable[[_T1], Hashable] = identity,
) -> "Fluent[Tuple[T, Union[_T1, None]]]":
"""Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries
When no matching entry is found in *other*, entries in the iterable are paired with None
Note: join_left loads *other* into memory
>>> flu(range(6)).join_left(range(0, 6, 2)).to_list()
[(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)]
"""
def _impl() -> Generator[Tuple[T, Union[_T1, None]], None, None]:
other_lookup = defaultdict(list)
for entry_other in other:
other_lookup[other_key(entry_other)].append(entry_other)
for entry in self:
matches: Optional[List[_T1]] = other_lookup.get(key(entry))
if matches:
for match in matches:
yield (entry, match)
else:
yield (entry, None)
return Fluent(_impl())
def join_inner(
self,
other: Iterable[_T1],
key: Callable[[T], Hashable] = identity,
other_key: Callable[[_T1], Hashable] = identity,
) -> "Fluent[Tuple[T, _T1]]":
"""Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries
When no matching entry is found in *other*, entries in the iterable are filtered from the results
Note: join_inner loads *other* into memory
>>> flu(range(6)).join_inner(range(0, 6, 2)).to_list()
[(0, 0), (2, 2), (4, 4)]
"""
def _impl() -> Generator[Tuple[T, _T1], None, None]:
other_lookup = defaultdict(list)
for entry_other in other:
other_lookup[other_key(entry_other)].append(entry_other)
for entry in self:
matches: List[_T1] = other_lookup[key(entry)]
for match in matches:
yield (entry, match)
return Fluent(_impl())
def join_full(
self,
other: Iterable[_T1],
key: Callable[[T], Hashable] = identity,
other_key: Callable[[_T1], Hashable] = identity,
) -> "Fluent[Tuple[Union[T, None], Union[_T1, None]]]":
"""Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries
Returns all entries from both iterables. When no matching entry is found, entries are paired with None
Note: join_full loads both *self* and *other* into memory
>>> flu(range(4)).join_full(range(2, 6)).to_list()
[(0, None), (1, None), (2, 2), (3, 3), (None, 4), (None, 5)]
"""
def _impl() -> Generator[Tuple[Union[T, None], Union[_T1, None]], None, None]:
# Build lookup for other
other_lookup: Dict[Hashable, List[_T1]] = defaultdict(list)
other_keys_seen: Set[Hashable] = set()
for entry_other in other:
other_key_val = other_key(entry_other)
other_lookup[other_key_val].append(entry_other)
other_keys_seen.add(other_key_val)
# Track which keys from other have been matched
matched_other_keys: Set[Hashable] = set()
# Process all entries from self
for entry in self:
entry_key = key(entry)
matches: Optional[List[_T1]] = other_lookup.get(entry_key)
if matches:
matched_other_keys.add(entry_key)
for match in matches:
yield (entry, match)
else:
yield (entry, None)
# Yield unmatched entries from other
unmatched_keys = other_keys_seen - matched_other_keys
for unmatched_key in unmatched_keys:
for entry_other in other_lookup[unmatched_key]:
yield (None, entry_other)
return Fluent(_impl())
def shuffle(self) -> "Fluent[T]":
"""Randomize the order of elements in the interable
Note: shuffle loads the entire iterable into memory
>>> flu([3,6,1]).shuffle().to_list()
[6, 1, 3]
"""
dat: List[T] = self.to_list()
return Fluent(sample(dat, len(dat)))
def group_by(
self, key: Callable[[T], Union[T, _T1]] = identity, sort: bool = True
) -> "Fluent[Tuple[Union[T,_T1], Fluent[T]]]":
"""Yield consecutive keys and groups from the iterable
*key* is a function to compute a key value used in grouping and sorting for each element. *key* defaults to an identity function which returns the unchaged element
When the iterable is pre-sorted according to *key*, setting *sort* to False will prevent loading the dataset into memory and improve performance
>>> flu([2, 4, 2, 4]).group_by().to_list()
[(2, ), (4, )]
Or, if the iterable is pre-sorted
>>> flu([2, 2, 5, 5]).group_by(sort=False).to_list()
[(2, ), (5, )]
Using a key function
>>> points = [
{'x': 1, 'y': 0},
{'x': 4, 'y': 3},
{'x': 1, 'y': 5}
]
>>> key_func = lambda u: u['x']
>>> flu(points).group_by(key=key_func, sort=True).to_list()
[(1, ), (4, )]
"""
gen = self.sort(key) if sort else self
return Fluent(groupby(gen, key)).map(lambda x: (x[0], flu([y for y in x[1]])))
def unique(self, key: Callable[[T], Hashable] = identity) -> "Fluent[T]":
"""Yield elements that are unique by a *key*.
>>> flu([2, 3, 2, 3]).unique().to_list()
[2, 3]
>>> flu([2, -3, -2, 3]).unique(key=abs).to_list()
[2, -3]
"""
def _impl() -> Generator[T, None, None]:
seen: Set[Any] = set()
for x in self:
x_hash = key(x)
if x_hash in seen:
continue
else:
seen.add(x_hash)
yield x
return Fluent(_impl())
### End Non-Constant Memory ###
### Side Effect ###
def rate_limit(self, per_second: Union[int, float] = 100) -> "Fluent[T]":
"""Restrict consumption of iterable to n item *per_second*
>>> import time
>>> start_time = time.time()
>>> _ = flu(range(3)).rate_limit(3).to_list()
>>> print('Runtime', int(time.time() - start_time))
1.00126 # approximately 1 second for 3 items
"""
def _impl() -> Generator[T, None, None]:
wait_time = 1.0 / per_second
for val in self:
start_time = time.time()
yield val
call_duration = time.time() - start_time
time.sleep(max(wait_time - call_duration, 0.0))
return Fluent(_impl())
def side_effect(
self,
func: Callable[[T], Any],
before: Optional[Callable[[], Any]] = None,
after: Optional[Callable[[], Any]] = None,
) -> "Fluent[T]":
"""Invoke *func* for each item in the iterable before yielding the item.
*func* takes a single argument and the output is discarded
*before* and *after* are optional functions that take no parameters and are executed once before iteration begins
and after iteration ends respectively. Each will be called exactly once.
>>> flu(range(2)).side_effect(lambda x: print(f'Collected {x}')).to_list()
Collected 0
Collected 1
[0, 1]
"""
def _impl() -> Generator[T, None, None]:
try:
if before is not None:
before()
for x in self:
func(x)
yield x
finally:
if after is not None:
after()
return Fluent(_impl())
### End Side Effect ###
def map(self, func: Callable[Concatenate[T, P], _T1], *args: Any, **kwargs: Any) -> "Fluent[_T1]":
"""Apply *func* to each element of iterable
>>> flu(range(5)).map(lambda x: x*x).to_list()
[0, 1, 4, 9, 16]
"""
def _impl() -> Generator[_T1, None, None]:
for val in self._iterator:
yield func(val, *args, **kwargs)
return Fluent(_impl())
def map_item(self: "Fluent[SupportsGetItem[T]]", item: Hashable) -> "Fluent[T]":
"""Extracts *item* from every element of the iterable
>>> flu([(2, 4), (2, 5)]).map_item(1).to_list()
[4, 5]
>>> flu([{'mykey': 8}, {'mykey': 5}]).map_item('mykey').to_list()
[8, 5]
"""
def _impl() -> Generator[T, None, None]:
for x in self:
yield x[item]
return Fluent(_impl())
def map_attr(self, attr: str) -> "Fluent[Any]":
"""Extracts the attribute *attr* from each element of the iterable
>>> from collections import namedtuple
>>> MyTup = namedtuple('MyTup', ['value', 'backup_val'])
>>> flu([MyTup(1, 5), MyTup(2, 4)]).map_attr('value').to_list()
[1, 2]
"""
return self.map(lambda x: getattr(x, attr))
def filter(self, func: Callable[Concatenate[T, P], bool], *args: Any, **kwargs: Any) -> "Fluent[T]":
"""Yield elements of iterable where *func* returns truthy
>>> flu(range(10)).filter(lambda x: x % 2 == 0).to_list()
[0, 2, 4, 6, 8]
"""
def _impl() -> Generator[T, None, None]:
for val in self._iterator:
if func(val, *args, **kwargs):
yield val
return Fluent(_impl())
def reduce(self, func: Callable[[T, T], T]) -> T:
"""Apply a function of two arguments cumulatively to the items of the iterable,
from left to right, so as to reduce the sequence to a single value
>>> flu(range(5)).reduce(lambda x, y: x + y)
10
"""
return reduce(func, self)
def fold_left(self, func: Callable[[S, T], S], initial: S) -> S:
"""Apply a function of two arguments cumulatively to the items of the iterable,
from left to right, starting with *initial*, so as to fold the sequence to
a single value
>>> flu(range(5)).fold_left(lambda x, y: x + str(y), "")
'01234'
"""
return reduce(func, self, initial)
@overload
def zip(self, __iter1: Iterable[_T1]) -> "Fluent[Tuple[T, _T1]]": ...
@overload
def zip(self, __iter1: Iterable[_T1], __iter2: Iterable[_T2]) -> "Fluent[Tuple[T, _T1, _T2]]": ...
@overload
def zip(
self, __iter1: Iterable[_T1], __iter2: Iterable[_T2], __iter3: Iterable[_T3]
) -> "Fluent[Tuple[T, _T1, _T2, _T3]]": ...
@overload
def zip(
self,
__iter1: Iterable[Any],
__iter2: Iterable[Any],
__iter3: Iterable[Any],
__iter4: Iterable[Any],
*iterable: Iterable[Any],
) -> "Fluent[Tuple[T, ...]]": ...
def zip(self, *iterable: Iterable[Any]) -> Union[
"Fluent[Tuple[T, ...]]",
"Fluent[Tuple[T, _T1]]",
"Fluent[Tuple[T, _T1, _T2]]",
"Fluent[Tuple[T, _T1, _T2, _T3]]",
]:
"""Yields tuples containing the i-th element from the i-th
argument in the instance, and the iterable
>>> flu(range(5)).zip(range(3, 0, -1)).to_list()
[(0, 3), (1, 2), (2, 1)]
"""
# @self_to_flu is not compatible with @overload
# make sure any usage of self supports arbitrary iterables
tup_iter = zip(iter(self), *iterable)
return Fluent(tup_iter)
def zip_longest(self, *iterable: Iterable[_T1], fill_value: Any = None) -> "Fluent[Tuple[T, ...]]":
"""Yields tuples containing the i-th element from the i-th
argument in the instance, and the iterable
Iteration continues until the longest iterable is exhaused.
If iterables are uneven in length, missing values are filled in with fill value
>>> flu(range(5)).zip_longest(range(3, 0, -1)).to_list()
[(0, 3), (1, 2), (2, 1), (3, None), (4, None)]
>>> flu(range(5)).zip_longest(range(3, 0, -1), fill_value='a').to_list()
[(0, 3), (1, 2), (2, 1), (3, 'a'), (4, 'a')]
"""
return Fluent(zip_longest(self, *iterable, fillvalue=fill_value))
def enumerate(self, start: int = 0) -> "Fluent[Tuple[int, T]]":
"""Yields tuples from the instance where the first element
is a count from initial value *start*.
>>> flu([3,4,5]).enumerate().to_list()
[(0, 3), (1, 4), (2, 5)]
"""
return Fluent(enumerate(self, start=start))
def take(self, n: Optional[int] = None) -> "Fluent[T]":
"""Yield first *n* items of the iterable
>>> flu(range(10)).take(2).to_list()
[0, 1]
"""
return Fluent(islice(self._iterator, n))
def take_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]":
"""Yield elements from the chainable so long as the predicate is true
>>> flu(range(10)).take_while(lambda x: x < 3).to_list()
[0, 1, 2]
"""
return Fluent(takewhile(predicate, self._iterator))
def drop_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]":
"""Drop elements from the chainable as long as the predicate is true;
afterwards, return every element
>>> flu(range(10)).drop_while(lambda x: x < 3).to_list()
[3, 4, 5, 6, 7, 8, 9]
"""
return Fluent(dropwhile(predicate, self._iterator))
def chunk(self, n: int) -> "Fluent[List[T]]":
"""Yield lists of elements from iterable in groups of *n*
if the iterable is not evenly divisiible by *n*, the final list will be shorter
>>> flu(range(10)).chunk(3).to_list()
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
"""
def _impl() -> Generator[List[T], None, None]:
while True:
vals: List[T] = list(self.take(n))
if vals:
yield vals
else:
return
return Fluent(_impl())
def flatten(
self,
depth: int = 1,
base_type: Optional[Type[object]] = None,
iterate_strings: bool = False,
) -> "Fluent[Any]":
"""Recursively flatten nested iterables (e.g., a list of lists of tuples)
into non-iterable type or an optional user-defined base_type
Strings are treated as non-iterable for convenience. set iterate_string=True
to change that behavior.
>>> flu([[0, 1, 2], [3, 4, 5]]).flatten().to_list()
[0, 1, 2, 3, 4, 5]
>>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten().to_list()
[0, [1, 2], [3, 4], 5]
>>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list()
[0, 1, 2, 3, 4, 5]
>>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list()
[0, 1, 2, 3, 4, 5]
>>> flu([1, (2, 2), 4, [5, (6, 6, 6)]]).flatten(base_type=tuple).to_list()
[1, (2, 2), 4, 5, (6, 6, 6)]
>>> flu([[2, 0], 'abc', 3, [4]]).flatten(iterate_strings=True).to_list()
[2, 0, 'a', 'b', 'c', 3, 4]
"""
# TODO(OR): Reimplement with strong types
def walk(node: Any, level: int) -> Generator[T, None, None]:
if (
((depth is not None) and (level > depth))
or (isinstance(node, str) and not iterate_strings)
or ((base_type is not None) and isinstance(node, base_type))
):
yield node
return
try:
tree = iter(node)
except TypeError:
yield node
return
else:
for child in tree:
for val in walk(child, level + 1):
yield val
return Fluent(walk(self, level=0))
def denormalize(self: "Fluent[SupportsIteration[Any]]", iterate_strings: bool = False) -> "Fluent[Tuple[Any, ...]]":
"""Denormalize iterable components of each record
>>> flu([("abc", [1, 2, 3])]).denormalize().to_list()
[('abc', 1), ('abc', 2), ('abc', 3)]
>>> flu([("abc", [1, 2])]).denormalize(iterate_strings=True).to_list()
[('a', 1), ('a', 2), ('b', 1), ('b', 2), ('c', 1), ('c', 2)]
>>> flu([("abc", [])]).denormalize().to_list()
[]
"""
def _impl() -> Generator[Tuple[Any, ...], None, None]:
for record in self:
iter_elements: List[Iterable[Any]] = []
element: Any
for element in record:
# Check for string and string iteration is allowed
if isinstance(element, str) and iterate_strings:
iter_elements.append(element)
# Check for string and string iteration is not allowed
elif isinstance(element, str):
iter_elements.append([element])
# Check for iterable
elif isinstance(element, IterableType):
iter_elements.append(element)
# Check for non-iterable
else:
iter_elements.append([element])
for row in product(*iter_elements):
yield row
return Fluent(_impl())
def window(self, n: int, step: int = 1, fill_value: Any = None) -> "Fluent[Tuple[Any, ...]]":
"""Yield a sliding window of width *n* over the given iterable.
Each window will advance in increments of *step*:
If the length of the iterable does not evenly divide by the *step*
the final output is padded with *fill_value*
>>> flu(range(5)).window(3).to_list()
[(0, 1, 2), (1, 2, 3), (2, 3, 4)]
>>> flu(range(5)).window(n=3, step=2).to_list()
[(0, 1, 2), (2, 3, 4)]
>>> flu(range(9)).window(n=4, step=3).to_list()
[(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, None)]
>>> flu(range(9)).window(n=4, step=3, fill_value=-1).to_list()
[(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, -1)]
"""
def _impl() -> Generator[Tuple[Any, ...], None, None]:
if n < 0:
raise ValueError("n must be >= 0")
elif n == 0:
yield tuple()
return
if step < 1:
raise ValueError("step must be >= 1")
window: Deque[Any] = deque([], n)
append = window.append
# Initial deque fill
for _ in range(n):
append(next(self, fill_value))
yield tuple(window)
# Appending new items to the right causes old items to fall off the left
i = 0
for item in self:
append(item)
i = (i + 1) % step
if i % step == 0:
yield tuple(window)
# If there are items from the iterable in the window, pad with the given
# value and emit them.
if (i % step) and (step - i < n):
for _ in range(step - i):
append(fill_value)
yield tuple(window)
return Fluent(_impl())
def __iter__(self) -> "Fluent[T]":
return self
def __next__(self) -> T:
return next(self._iterator)
def tee(self, n: int = 2) -> "Fluent[Fluent[T]]":
"""Return n independent iterators from a single iterable
once tee() has made a split, the original iterable should not be used
anywhere else; otherwise, the iterable could get advanced without the
tee objects being informed
>>> copy1, copy2 = flu(range(5)).tee()
>>> copy1.sum()
10
>>> copy2.to_list()
[0, 1, 2, 3, 4]
"""
return Fluent((Fluent(x) for x in tee(self, n)))
class flu(Fluent[T]):
"""A fluent interface to lazy generator functions
>>> from flupy import flu
>>> (
flu(range(100))
.map(lambda x: x**2)
.filter(lambda x: x % 3 == 0)
.chunk(3)
.take(2)
.to_list()
)
[[0, 9, 36], [81, 144, 225]]
"""
================================================
FILE: src/flupy/py.typed
================================================
================================================
FILE: src/tests/test_cli.py
================================================
from tempfile import NamedTemporaryFile
import pytest
from flupy.cli.cli import build_import_dict, main, parse_args
def test_parse_args():
with pytest.raises(SystemExit) as cm:
parse_args([])
assert cm.exception.code == 2
args = parse_args(["_"])
assert args.command == "_"
args = parse_args(["_", "-i", "os:environ:env"])
assert "os:environ:env" in getattr(args, "import")
assert args.command == "_"
import_dict = build_import_dict(["json"])
assert "json" in import_dict
def test_build_import_dict():
import json
import_dict = build_import_dict(["json"])
assert "json" in import_dict
assert import_dict["json"] == json
import_dict = build_import_dict(["json:dumps"])
assert "dumps" in import_dict
assert import_dict["dumps"] == json.dumps
import_dict = build_import_dict(["json:dumps:ds"])
assert "ds" in import_dict
assert import_dict["ds"] == json.dumps
import_dict = build_import_dict(["json::j"])
assert "j" in import_dict
assert import_dict["j"] == json
def test_show_help(capsys):
with pytest.raises(SystemExit):
main(["flu", "-h"])
result = capsys.readouterr()
stdout = result.out
assert stdout.startswith("usage")
def test_show_version(capsys):
main(["flu", "flu(range(5)).collect()"])
result = capsys.readouterr()
stdout = result.out.replace("\n", "")
assert stdout.startswith("0")
def test_basic_pipeline(capsys):
main(["flu", "flu(range(5)).collect()"])
result = capsys.readouterr()
stdout = result.out.replace("\n", "")
assert stdout.startswith("0")
def test_pass_on_none_pipeline(capsys):
main(["flu", "None"])
result = capsys.readouterr()
stdout = result.out
assert stdout == ""
def test_non_iterable_non_none_pipeline(capsys):
main(["flu", '"hello_world"'])
result = capsys.readouterr()
stdout = result.out.strip("\n")
assert stdout == "hello_world"
def test_cli_walk_files(capsys):
main(["flu", "walk_files().head(2)"])
result = capsys.readouterr()
stdout = result.out.strip("\n").split("\n")
assert len(stdout) == 2
def test_cli_walk_dirs(capsys):
main(["flu", "walk_dirs().head(2)"])
result = capsys.readouterr()
stdout = result.out.strip("\n").split("\n")
assert len(stdout) == 2
def test_from_file(capsys):
with NamedTemporaryFile("w+") as f:
f.write("hello")
f.read()
f_name = f.name
main(["flu", "-f", f_name, "_.map(str.upper)"])
result = capsys.readouterr()
stdout = result.out.strip("\n")
assert stdout == "HELLO"
def test_glob_imports(capsys):
main(["flu", "flu(env).count()", "-i", "os:environ:env"])
result = capsys.readouterr()
stdout = result.out
assert stdout
================================================
FILE: src/tests/test_cli_utils.py
================================================
from flupy.cli.utils import walk_dirs, walk_files
def test_walk_files():
assert walk_files().head()
assert walk_files(abspath=False).head()
def test_walk_dirs():
assert walk_dirs().head()
================================================
FILE: src/tests/test_flu.py
================================================
import sys
from itertools import count, cycle
import pytest
from flupy import flu
def test_collect():
assert flu(range(3)).collect() == [0, 1, 2]
assert flu(range(3)).collect(container_type=tuple) == (0, 1, 2)
assert flu(range(3)).collect(n=2) == [0, 1]
def test_to_list():
assert flu(range(3)).to_list() == [0, 1, 2]
def test___getitem__():
assert flu(range(3))[1] == 1
assert flu(range(3))[1:].collect() == [1, 2]
assert flu(range(35))[1:2].collect() == [1]
assert flu(range(35))[1:3].collect() == [1, 2]
with pytest.raises(IndexError):
flu([1])[4]
with pytest.raises((KeyError, TypeError)):
flu([1])["not an index"]
def test_sum():
gen = flu(range(3))
assert gen.sum() == 3
def test_reduce():
gen = flu(range(5))
assert gen.reduce(lambda x, y: x + y) == 10
def test_fold_left():
assert flu(range(5)).fold_left(lambda x, y: x + y, 0) == 10
assert flu(range(5)).fold_left(lambda x, y: x + str(y), "") == "01234"
def test_count():
gen = flu(range(3))
assert gen.count() == 3
def test_min():
gen = flu(range(3))
assert gen.min() == 0
def test_first():
gen = flu(range(3))
assert gen.first() == 0
gen = flu([])
with pytest.raises(IndexError):
gen.first()
gen = flu([])
assert gen.first(default=1) == 1
def test_last():
gen = flu(range(3))
assert gen.last() == 2
gen = flu([])
with pytest.raises(IndexError):
gen.last()
gen = flu([])
assert gen.last(default=1) == 1
def test_head():
gen = flu(range(30))
assert gen.head(n=2) == [0, 1]
gen = flu(range(30))
assert gen.head(n=3, container_type=set) == set([0, 1, 2])
gen = flu(range(3))
assert gen.head(n=50) == [0, 1, 2]
def test_tail():
gen = flu(range(30))
assert gen.tail(n=2) == [28, 29]
gen = flu(range(30))
assert gen.tail(n=3, container_type=set) == set([27, 28, 29])
gen = flu(range(3))
assert gen.tail(n=50) == [0, 1, 2]
def test_max():
gen = flu(range(3))
assert gen.max() == 2
def test_unique():
class NoHash:
def __init__(self, letter, keyf):
self.letter = letter
self.keyf = keyf
a = NoHash("a", 1)
b = NoHash("b", 1)
c = NoHash("c", 2)
gen = flu([a, b, c]).unique()
assert gen.collect() == [a, b, c]
gen = flu([a, b, c]).unique(lambda x: x.letter)
assert gen.collect() == [a, b, c]
gen = flu([a, b, c]).unique(lambda x: x.keyf)
assert gen.collect() == [a, c]
def test_side_effect():
class FakeFile:
def __init__(self):
self.is_open = False
self.content = []
def write(self, text):
if self.is_open:
self.content.append(text)
else:
raise IOError("fake file is not open for writing")
def open(self):
self.is_open = True
def close(self):
self.is_open = False
# Test the fake file
ffile = FakeFile()
ffile.open()
ffile.write("should be there")
ffile.close()
assert ffile.content[0] == "should be there"
with pytest.raises(IOError):
ffile.write("should fail")
# Reset fake file
ffile = FakeFile()
with pytest.raises(IOError):
flu(range(5)).side_effect(ffile.write).collect()
gen_result = flu(range(5)).side_effect(ffile.write, before=ffile.open, after=ffile.close).collect()
assert ffile.is_open == False
assert ffile.content == [0, 1, 2, 3, 4]
assert gen_result == [0, 1, 2, 3, 4]
def test_sort():
gen = flu(range(3, 0, -1)).sort()
assert gen.collect() == [1, 2, 3]
def test_shuffle():
original_order = list(range(10000))
new_order = flu(original_order).shuffle().collect()
assert new_order != original_order
assert len(new_order) == len(original_order)
assert sum(new_order) == sum(original_order)
def test_map():
gen = flu(range(3)).map(lambda x: x + 2)
assert gen.collect() == [2, 3, 4]
def test_rate_limit():
resA = flu(range(3)).collect()
resB = flu(range(3)).rate_limit(5000).collect()
assert resA == resB
def test_map_item():
gen = flu(range(3)).map(lambda x: {"a": x}).map_item("a")
assert gen.collect() == [0, 1, 2]
def test_map_attr():
class Person:
def __init__(self, age: int) -> None:
self.age = age
gen = flu(range(3)).map(lambda x: Person(x)).map_attr("age")
assert gen.collect() == [0, 1, 2]
def test_filter():
gen = flu(range(3)).filter(lambda x: 0 < x < 2)
assert gen.collect() == [1]
def test_take():
gen = flu(range(10)).take(5)
assert gen.collect() == [0, 1, 2, 3, 4]
def test_take_while():
gen = flu(cycle(range(10))).take_while(lambda x: x < 4)
assert gen.collect() == [0, 1, 2, 3]
def test_drop_while():
gen = flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4)
assert gen.collect() == [4, 3, 2, 1]
def test_group_by():
gen = flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0])
g1, g2, g3 = gen.map(lambda x: (x[0], x[1].collect())).collect()
# Standard usage
assert g1 == (1, [(1, 0), (1, 1), (1, 2)])
assert g2 == (2, [(2, 3), (2, 4), (2, 5), (2, 6)])
assert g3 == (3, [(3, 7)])
# No param usage
v1 = flu(range(10)).group_by().map(lambda x: (x[0], list(x[1])))
v2 = flu(range(10)).map(lambda x: (x, [x]))
assert v1.collect() == v2.collect()
# Sort
gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=False)
assert gen.count() == 4
gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=True)
assert gen.count() == 2
# Identity Function
points = [{"x": 1, "y": 0}, {"x": 4, "y": 3}, {"x": 1, "y": 5}]
key_func = lambda u: u["x"]
gen = flu(points).group_by(key=key_func, sort=True).collect()
assert len(gen) == 2
assert gen[0][0] == 1
assert gen[1][0] == 4
assert len(gen[0][1].collect()) == 2
assert len(gen[1][1].collect()) == 1
def test_chunk():
gen = flu(range(5)).chunk(2)
assert gen.collect() == [[0, 1], [2, 3], [4]]
def test_next():
gen = flu(range(5))
assert next(gen) == 0
def test_iter():
gen = flu(range(5))
assert next(iter(gen)) == 0
def test_enumerate():
# Check default
gen = flu(range(3)).enumerate()
assert gen.collect() == [(0, 0), (1, 1), (2, 2)]
# Check start param
gen = flu(range(3)).enumerate(start=1)
assert gen.collect() == [(1, 0), (2, 1), (3, 2)]
def test_zip():
gen = flu(range(3)).zip(range(3))
assert gen.collect() == [(0, 0), (1, 1), (2, 2)]
gen2 = flu(range(3)).zip(range(3), range(2))
assert gen2.collect() == [(0, 0, 0), (1, 1, 1)]
def test_zip_longest():
gen = flu(range(3)).zip_longest(range(5))
assert gen.collect() == [(0, 0), (1, 1), (2, 2), (None, 3), (None, 4)]
gen = flu(range(3)).zip_longest(range(5), fill_value="a")
assert gen.collect() == [(0, 0), (1, 1), (2, 2), ("a", 3), ("a", 4)]
gen = flu(range(3)).zip_longest(range(5), range(4), fill_value="a")
assert gen.collect() == [(0, 0, 0), (1, 1, 1), (2, 2, 2), ("a", 3, 3), ("a", 4, "a")]
def test_window():
# Check default
gen = flu(range(5)).window(n=3)
assert gen.collect() == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]
# Check step param
gen = flu(range(5)).window(n=3, step=3)
assert gen.collect() == [(0, 1, 2), (3, 4, None)]
# Check fill_value param
gen = flu(range(5)).window(n=3, step=3, fill_value="i")
assert gen.collect() == [(0, 1, 2), (3, 4, "i")]
assert flu(range(4)).window(n=0).collect() == [tuple()]
with pytest.raises(ValueError):
flu(range(5)).window(n=-1).collect()
with pytest.raises(ValueError):
flu(range(5)).window(3, step=0).collect()
def test_flu():
gen = flu(count()).map(lambda x: x**2).filter(lambda x: x % 517 == 0).chunk(5).take(3)
assert next(gen) == [0, 267289, 1069156, 2405601, 4276624]
def test_flatten():
nested = [1, [2, (3, [4])], ["rbsd", "abc"], (7,)]
# Defaults with depth of 1
gen = flu(nested).flatten()
assert [x for x in gen] == [1, 2, (3, [4]), "rbsd", "abc", 7]
# Depth 2
gen = flu(nested).flatten(depth=2)
assert [x for x in gen] == [1, 2, 3, [4], "rbsd", "abc", 7]
# Depth 3
gen = flu(nested).flatten(depth=3)
assert [x for x in gen] == [1, 2, 3, 4, "rbsd", "abc", 7]
# Depth infinite
gen = flu(nested).flatten(depth=sys.maxsize)
assert [x for x in gen] == [1, 2, 3, 4, "rbsd", "abc", 7]
# Depth 2 with tuple base_type
gen = flu(nested).flatten(depth=2, base_type=tuple)
assert [x for x in gen] == [1, 2, (3, [4]), "rbsd", "abc", (7,)]
# Depth 2 with iterate strings
gen = flu(nested).flatten(depth=2, base_type=tuple, iterate_strings=True)
assert [x for x in gen] == [1, 2, (3, [4]), "r", "b", "s", "d", "a", "b", "c", (7,)]
def test_denormalize():
content = [
["abc", [1, 2, 3]],
]
assert flu(content).denormalize().collect() == [("abc", 1), ("abc", 2), ("abc", 3)]
assert (flu(content).denormalize(iterate_strings=True).collect()) == [
("a", 1),
("a", 2),
("a", 3),
("b", 1),
("b", 2),
("b", 3),
("c", 1),
("c", 2),
("c", 3),
]
assert (flu([[[1], [1, 2], None]]).denormalize().collect()) == [
(1, 1, None),
(1, 2, None),
]
assert (flu([[[1], [1, 2], []]]).denormalize().collect()) == []
def test_tee():
# Default unpacking
gen1, gen2 = flu(range(100)).tee()
assert gen1.sum() == gen2.sum()
# adjusting *n* paramter
gen1, gen2, gen3 = flu(range(100)).tee(3)
assert gen1.sum() == gen3.sum()
# No sync progress
gen1, gen2 = flu(range(100)).tee()
assert next(gen1) == next(gen2)
# No break chaining
assert flu(range(5)).tee().map(sum).sum() == 20
def test_join_left():
# Default unpacking
res = flu(range(6)).join_left(range(0, 6, 2)).collect()
assert res == [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)]
def test_join_inner():
# Default unpacking
res = flu(range(6)).join_inner(range(0, 6, 2)).collect()
assert res == [(0, 0), (2, 2), (4, 4)]
def test_join_full():
# Basic full join
res = flu(range(4)).join_full(range(2, 6)).collect()
assert res == [(0, None), (1, None), (2, 2), (3, 3), (None, 4), (None, 5)]
# Full join with custom keys
left = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]
right = [{"id": 2, "value": 100}, {"id": 3, "value": 200}]
res = flu(left).join_full(right, key=lambda x: x["id"], other_key=lambda x: x["id"]).collect()
assert res == [
({"id": 1, "name": "Alice"}, None),
({"id": 2, "name": "Bob"}, {"id": 2, "value": 100}),
(None, {"id": 3, "value": 200}),
]
# Full join with empty left
res = flu([]).join_full(range(3)).collect()
assert res == [(None, 0), (None, 1), (None, 2)]
# Full join with empty right
res = flu(range(3)).join_full([]).collect()
assert res == [(0, None), (1, None), (2, None)]
# Full join with both empty
res = flu([]).join_full([]).collect()
assert res == []
# Full join with duplicates
res = flu([1, 2, 2, 3]).join_full([2, 2, 4]).collect()
expected = [(1, None), (2, 2), (2, 2), (2, 2), (2, 2), (3, None), (None, 4)] # 2x2 cartesian product
# Sort with custom key to handle None values
sort_key = lambda x: (
x[0] is None,
x[0] if x[0] is not None else -1,
x[1] is None,
x[1] if x[1] is not None else -1,
)
assert sorted(res, key=sort_key) == sorted(expected, key=sort_key)
================================================
FILE: src/tests/test_version.py
================================================
"""
Tests for version information.
"""
import re
import flupy
def test_version_format():
"""Test that __version__ follows semantic versioning format (MAJOR.MINOR.PATCH)."""
# Standard semver regex pattern
semver_pattern = r"^(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
assert re.match(
semver_pattern, flupy.__version__
), f"Version '{flupy.__version__}' does not match semantic versioning format"
# Ensure version parts can be parsed as integers
major, minor, patch = flupy.__version__.split("-")[0].split("+")[0].split(".")[:3]
assert major.isdigit(), f"Major version '{major}' is not a valid integer"
assert minor.isdigit(), f"Minor version '{minor}' is not a valid integer"
assert patch.isdigit(), f"Patch version '{patch}' is not a valid integer"