Repository: fopina/django-bulk-update-or-create Branch: develop Commit: 8b3f852398cc Files: 30 Total size: 45.0 KB Directory structure: gitextract_7tn2bvqh/ ├── .github/ │ └── workflows/ │ ├── publish-dev.yml │ ├── publish.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── bulk_update_or_create/ │ ├── __init__.py │ ├── __version__.py │ ├── apps.py │ └── query.py ├── setup.cfg ├── setup.py ├── tests/ │ ├── README.md │ ├── manage.py │ ├── pytest.ini │ ├── requirements.txt │ ├── settings.py │ ├── settings_mysql.py │ ├── settings_postgresql.py │ ├── tests/ │ │ ├── __init__.py │ │ ├── management/ │ │ │ ├── __init__.py │ │ │ └── commands/ │ │ │ ├── __init__.py │ │ │ └── bulk_it.py │ │ ├── migrations/ │ │ │ ├── 0001_initial.py │ │ │ └── __init__.py │ │ ├── models.py │ │ └── tests.py │ └── urls.py └── tox.ini ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/publish-dev.yml ================================================ # This workflows will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: publish dev on: push: branches: - develop jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel twine - name: set dev version run: | sed -i "s/^\(__version__.*\)'/\1.dev.${{github.run_number}}'/g" bulk_update_or_create/__version__.py grep dev bulk_update_or_create/__version__.py - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python setup.py sdist bdist_wheel twine upload dist/* ================================================ FILE: .github/workflows/publish.yml ================================================ # This workflows will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: publish on: release: types: [created] jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel twine - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python setup.py sdist bdist_wheel twine upload dist/* ================================================ FILE: .github/workflows/test.yml ================================================ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: tests on: push: branches: [ main ] pull_request: branches: [ main ] jobs: style: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python 3.6 uses: actions/setup-python@v2 with: python-version: 3.6 - name: Install tox run: pip install tox - name: Style check run : tox -e style test: runs-on: ubuntu-latest strategy: matrix: python-version: [3.6, 3.7, 3.8, 3.9] database: [sqlite, mysql, postgresql] services: mysql: image: mysql:5 env: MYSQL_ROOT_PASSWORD: root ports: - 8877:3306 # needed because the container does not provide a healthcheck options: --health-cmd "mysqladmin ping" --health-interval 10s --health-timeout 5s --health-retries=5 postgres: image: postgres:10 env: POSTGRES_USER: postgres POSTGRES_PASSWORD: postgres POSTGRES_DB: postgres ports: - 8878:5432 # needed because the postgres container does not provide a healthcheck options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install tox run: pip install tox - name: Toxit run: tox -e py-${{ matrix.database }} -v - name: coverage xml run: .tox/py-mysql/bin/coverage xml if: ${{ matrix.python-version == 3.7 && matrix.database == 'mysql' }} - uses: codecov/codecov-action@v1 with: fail_ci_if_error: true if: ${{ matrix.python-version == 3.7 && matrix.database == 'mysql' }} ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ .vscode .DS_Store ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2020 Filipe Pina Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: Makefile ================================================ .PHONY: flake8 test coverage style style_check style: black --target-version=py36 \ --line-length=120 \ --skip-string-normalization \ bulk_update_or_create tests setup.py flake8 bulk_update_or_create tests style_check: flake8 black --target-version=py36 \ --line-length=120 \ --skip-string-normalization \ --check \ bulk_update_or_create tests setup.py flake8: flake8 bulk_update_or_create tests startmysql: @docker inspect django-bulk_update_or_create-mysql | grep -q '"Running": true' || \ docker run --name django-bulk_update_or_create-mysql \ -e MYSQL_ROOT_PASSWORD=root \ --rm -p 8877:3306 -d \ --health-cmd "mysqladmin ping" \ --health-interval 10s \ --health-timeout 5s \ --health-retries=5 \ mysql:5 # TODO: wait for healthy startpg: @docker inspect django-bulk_update_or_create-pg | grep -q '"Running": true' || \ docker run --name django-bulk_update_or_create-pg \ -e POSTGRES_USER=postgres \ -e POSTGRES_PASSWORD=postgres \ -e POSTGRES_DB=postgres \ --rm -p 8878:5432 -d \ --health-cmd pg_isready \ --health-interval 10s \ --health-timeout 5s \ --health-retries 5 \ postgres:10 # TODO: wait for healthy test: startmysql DJANGO_SETTINGS_MODULE=settings_mysql \ tests/manage.py test $${TEST_ARGS:-tests} testpg: startpg DJANGO_SETTINGS_MODULE=settings_postgresql \ tests/manage.py test $${TEST_ARGS:-tests} testcmd: startpg startmysql # default - sqlite DJANGO_SETTINGS_MODULE=settings tests/manage.py migrate DJANGO_SETTINGS_MODULE=settings tests/manage.py bulk_it # mysql DJANGO_SETTINGS_MODULE=settings_mysql tests/manage.py migrate DJANGO_SETTINGS_MODULE=settings_mysql tests/manage.py bulk_it # postgres DJANGO_SETTINGS_MODULE=settings_postgresql tests/manage.py migrate DJANGO_SETTINGS_MODULE=settings_postgresql tests/manage.py bulk_it coverage: PYTHONPATH="tests" \ python -b -W always -m coverage run tests/manage.py test $${TEST_ARGS:-tests} coverage report ================================================ FILE: README.md ================================================ # django-bulk-update-or-create [![tests](https://github.com/fopina/django-bulk-update-or-create/workflows/tests/badge.svg)](https://github.com/fopina/django-bulk-update-or-create/actions?query=workflow%3Atests) [![Test coverage status](https://codecov.io/gh/fopina/django-bulk-update-or-create/branch/main/graph/badge.svg)](https://codecov.io/gh/fopina/django-bulk-update-or-create) [![Current version on PyPi](https://img.shields.io/pypi/v/django-bulk-update-or-create)](https://pypi.org/project/django-bulk-update-or-create/) [![monthly downloads](https://img.shields.io/pypi/dm/django-bulk-update-or-create)](https://pypi.org/project/django-bulk-update-or-create/) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/django-bulk-update-or-create) ![PyPI - Django Version](https://img.shields.io/pypi/djversions/django-bulk-update-or-create) Everyone using Django ORM will eventually find himself doing batch `update_or_create` operations: ingest files from external sources, sync with external APIs, etc. If the number of records is big, the slowliness of `QuerySet.update_or_create` will stand out: it is very practical to use but it always does one `SELECT` and then one `INSERT` (if select didn't return anything) or `UPDATE`/`.save` (if it did). Searching online shows that this does indeed happen to quite a few people though it doesn't seem to be any good solution: * `bulk_create` is really fast if you know all records are new (and you're not using multi-table inheritance) * `bulk_update` does some nice voodoo to update several records with the same `UPDATE` statement (using a huge `WHERE` condition together with `CASE`), but you need to be sure they all exist * UPSERTs [(INSERT .. ON DUPLICATE KEY UPDATE](https://dev.mysql.com/doc/refman/8.0/en/insert-on-duplicate.html)) look interesting (TODO on different package) but they will be retricted by `bulk_create` limitations ==> cannot use on models with multi-table inheritance This package tries to tackle this introducing `bulk_update_or_create` to model QuerySet/Manager: * `update_or_create`: `(1 SELECT + 1 INSERT/UPDATE) * N` * `bulk_update_or_create`: `1 BIG_SELECT + 1 BIG_UPDATE + (lte_N) INSERT` For a batch of records: * `SELECT` all from database (based on the `match_field` parameter) * Update records in memory * Use `bulk_update` for those * Use `INSERT`/`.create` on each of the remaining The (*SOFTCORE*) [performance test](tests/tests/management/commands/bulk_it.py) looks promising, more than 70% less time (average): ```shell $ make testcmd # default - sqlite DJANGO_SETTINGS_MODULE=settings tests/manage.py bulk_it loop of update_or_create - all creates: 3.966486692428589 loop of update_or_create - all updates: 4.020653247833252 loop of update_or_create - half half: 3.9968857765197754 bulk_update_or_create - all creates: 2.949239730834961 bulk_update_or_create - all updates: 0.15633511543273926 bulk_update_or_create - half half: 1.4585723876953125 # mysql DJANGO_SETTINGS_MODULE=settings_mysql tests/manage.py bulk_it loop of update_or_create - all creates: 5.511938571929932 loop of update_or_create - all updates: 5.321666955947876 loop of update_or_create - half half: 5.391834735870361 bulk_update_or_create - all creates: 1.5671980381011963 bulk_update_or_create - all updates: 0.14612770080566406 bulk_update_or_create - half half: 0.7262606620788574 # postgres DJANGO_SETTINGS_MODULE=settings_postgresql tests/manage.py bulk_it loop of update_or_create - all creates: 4.3584535121917725 loop of update_or_create - all updates: 3.6183276176452637 loop of update_or_create - half half: 4.145816087722778 bulk_update_or_create - all creates: 1.044851541519165 bulk_update_or_create - all updates: 0.14954638481140137 bulk_update_or_create - half half: 0.8407495021820068 ``` Installation ============ ``` pip install django-bulk-update-or-create ``` ```py INSTALLED_APPS = [ ... 'bulk_update_or_create', ... ] ``` Usage ===== * use `BulkUpdateOrCreateQuerySet` as manager of your model(s) ```python from django.db import models from bulk_update_or_create import BulkUpdateOrCreateQuerySet class RandomData(models.Model): objects = BulkUpdateOrCreateQuerySet.as_manager() uuid = models.IntegerField(unique=True) data = models.CharField(max_length=200, null=True, blank=True) ``` * call `bulk_update_or_create` ```python items = [ RandomData(uuid=1, data='data for 1'), RandomData(uuid=2, data='data for 2'), ] RandomData.objects.bulk_update_or_create(items, ['data'], match_field='uuid') ``` * or use the context manager, if you are updating a big number of items, as it manages a batch queue ```python with RandomData.objects.bulk_update_or_create_context(['data'], match_field='uuid', batch_size=10) as bulkit: for i in range(10000): bulkit.queue(RandomData(uuid=i, data=i + 20)) ``` `bulk_update_or_create` supports `yield_objects=True` so you can iterate over the created/updated objects. `bulk_update_or_create_context` provides the same information to the callback function specified as `status_cb` Docs ==== WIP ToDo ==== * [ ] Docs! * [ ] Add option to use `bulk_create` for creates: assert model is not multi-table, if enabled * [ ] Fix the collation mess: the keyword arg `case_insensitive_match` should be dropped and collation detected in runtime * [x] Add support for multiple `match_field` - probably will need to use `WHERE (K1=X and K2=Y) or (K1=.. and K2 =..)` instead of `IN` for those, as that SQL standard doesn't seem widely adopted yet * [ ] Link to `UPSERT` alternative package once done! ================================================ FILE: bulk_update_or_create/__init__.py ================================================ from .__version__ import __version__ from .query import BulkUpdateOrCreateQuerySet, BulkUpdateOrCreateMixin __all__ = ['BulkUpdateOrCreateQuerySet', 'BulkUpdateOrCreateMixin'] default_app_config = 'bulk_update_or_create.apps.BulkUpdateOrCreateConfig' ================================================ FILE: bulk_update_or_create/__version__.py ================================================ __version__ = '1.0.0' ================================================ FILE: bulk_update_or_create/apps.py ================================================ from django.apps import AppConfig class BulkUpdateOrCreateConfig(AppConfig): name = 'bulk_update_or_create' ================================================ FILE: bulk_update_or_create/query.py ================================================ from types import TracebackType from typing import Any, Callable, Generator, List, Optional, Tuple, Type, Union from django.db import models from django.db.models import Model, QuerySet class BulkUpdateOrCreateMixin: def bulk_update_or_create_context( self, update_fields: List[str], match_field: str = 'pk', batch_size: int = 100, case_insensitive_match: bool = False, status_cb: Optional[ Callable[[Tuple[List[Model], List[Model]]], Any] ] = None, ): """ Helper method that returns a context manager (_BulkUpdateOrCreateContextManager) that makes it easier to handle a stream of objects with unknown size. Call `.queue(obj)` and whenever `batch_size` is reached or the context terminates, this context manager will call `bulk_update_or_create` on the queue :param update_fields: fields that will be updated if record already exists (passed on to bulk_update) :param match_field: model field that will match existing records (defaults to "pk") :param batch_size: number of records to process in each batch (defaults to 100) :param case_insensitive_match: set to True if using MySQL with "ci" collations (defaults to False) :param status_cb: if set to a callable, status_cb is called a tuple of lists with ([created], [updated]) objects as they're yielded """ return _BulkUpdateOrCreateContextManager( self, update_fields, batch_size=batch_size, status_cb=status_cb, match_field=match_field, case_insensitive_match=case_insensitive_match, ) def bulk_update_or_create( self, objs: List[Model], update_fields: List[str], match_field: str = 'pk', batch_size: int = 100, case_insensitive_match: bool = False, yield_objects: bool = False, ) -> Union[ Generator[Tuple[List[Model], List[Model]], None, None], List[Tuple[List[Model], List[Model]]] ]: """ :param objs: model instances to be updated or created :param update_fields: fields that will be updated if record already exists (passed on to bulk_update) :param match_field: model fields that will match existing records (defaults to ["pk"]) :param batch_size: number of records to process in each batch (defaults to len(objs)) :param case_insensitive_match: set to True if using MySQL with "ci" collations (defaults to False) :param yield_objects: if True, method becomes a generator that will yield a tuple of lists with ([created], [updated]) objects. This is one tuple per each `batch`. If this is False, a single tuple of lists with ([created], [updated]) will be returned. """ r = self.__bulk_update_or_create( objs, update_fields, match_field, batch_size, case_insensitive_match, yield_objects, ) if yield_objects: return r return list(r) def __bulk_update_or_create_inner_methods(self, match_fields, case_insensitive_match): single_match_field = len(match_fields) == 1 def _obj_key_getter_sensitive(obj): # use to_python to coerce value same way it's done when fetched from DB # https://github.com/fopina/django-bulk-update-or-create/issues/11 # k = _match_field.to_python(_match_field.value_from_object(obj)) return tuple(match_field.to_python(match_field.value_from_object(obj)) for match_field in match_fields) _obj_key_getter = _obj_key_getter_sensitive if case_insensitive_match: def _obj_key_getter(obj): return tuple( map( lambda v: v.lower() if hasattr(v, 'lower') else v, _obj_key_getter_sensitive(obj), ) ) if single_match_field: def _obj_filter(obj_map): return models.Q(**{f'{match_fields[0].name}__in': obj_map.keys()}) def _obj_key_getter_single(obj): return _obj_key_getter(obj)[0] return _obj_key_getter_single, _obj_filter else: def _obj_filter(obj_map): return models.Q( *( models.Q(**{k.name: obj_key[i] for i, k in enumerate(match_fields)}) for obj_key in obj_map.keys() ), _connector=models.Q.OR, ) return _obj_key_getter, _obj_filter def __bulk_update_or_create( self, objs: List[Model], update_fields: List[str], match_field: str = 'pk', batch_size: Optional[int] = None, case_insensitive_match: bool = False, yield_objects: bool = False, ) -> Union[ Generator[Tuple[List[Model], List[Model]], None, None], None ]: # validations like bulk_update if batch_size is not None and batch_size < 0: raise ValueError('Batch size must be a positive integer.') if not update_fields: raise ValueError('update_fields cannot be empty') match_field = (match_field,) if isinstance(match_field, str) else match_field _match_fields = [self.model._meta.get_field(name) for name in match_field] _update_fields = [self.model._meta.get_field(name) for name in update_fields] if any(not f.concrete or f.many_to_many for f in _update_fields): raise ValueError('bulk_update_or_create() can only be used with concrete fields.') if any(f.primary_key for f in _update_fields): raise ValueError('bulk_update_or_create() cannot be used with primary key fields.') # generators not supported (for now?), as bulk_update doesn't either objs = list(objs) if not objs: return if batch_size is None: batch_size = len(objs) batches = (objs[i : i + batch_size] for i in range(0, len(objs), batch_size)) _obj_key_getter, _obj_filter = self.__bulk_update_or_create_inner_methods(_match_fields, case_insensitive_match) for batch in batches: obj_map = {_obj_key_getter(obj): obj for obj in batch} # mass select for bulk_update on existing ones to_update = self.filter(_obj_filter(obj_map)) for to_u in to_update: obj = obj_map[_obj_key_getter(to_u)] for _f in update_fields: setattr(to_u, _f, getattr(obj, _f)) del obj_map[_obj_key_getter(to_u)] self.bulk_update(to_update, update_fields) # .create on the remaining (bulk_create won't work on multi-table inheritance models...) created_objs = [] for obj in obj_map.values(): obj.save() created_objs.append(obj) if yield_objects: yield created_objs, to_update return created_objs, to_update class BulkUpdateOrCreateQuerySet(BulkUpdateOrCreateMixin, models.QuerySet): pass class _BulkUpdateOrCreateContextManager: def __init__( self, queryset: QuerySet, update_fields: List[str], batch_size: int = 500, status_cb: Optional[ Callable[[Tuple[List[Model], List[Model]]], Any] ] = None, **kwargs: Optional[Any] ): self._queue = [] self._queryset = queryset self._batch_size = batch_size assert status_cb is None or callable(status_cb) self._cb = status_cb self._fields = update_fields self._kwargs = kwargs def queue(self, obj: Model): self._queue.append(obj) if len(self._queue) >= self._batch_size: self.dump_queue() def queue_obj(self, **kwargs): """ proxy method to forward kwargs to self.model instantiation before calling queue() """ return self.queue(self._queryset.model(**kwargs)) def dump_queue(self): if not self._queue: return r = self._queryset.bulk_update_or_create( self._queue, self._fields, yield_objects=self._cb is not None, **self._kwargs, ) if self._cb is not None: for st in r: self._cb(st) self._queue = [] def __enter__(self): return self def __exit__( self, type: Optional[Type[BaseException]], value: Optional[BaseException], traceback: Optional[TracebackType] ): self.dump_queue() ================================================ FILE: setup.cfg ================================================ [metadata] name = django-bulk-update-or-create version = attr: bulk_update_or_create.__version__ description = bulk_update_or_create for Django model managers long_description = file: README.md long_description_content_type = text/markdown author = Filipe Pina author_email = fopina@gmail.com url = https://github.com/fopina/django-bulk-update-or-create/ download_url = https://pypi.org/project/django-bulk-update-or-create/ license = BSD license_files = LICENSE classifiers = Development Status :: 5 - Production/Stable Environment :: Web Environment Framework :: Django Framework :: Django :: 2.2 Framework :: Django :: 3.0 Intended Audience :: Developers License :: OSI Approved :: BSD License Operating System :: OS Independent Programming Language :: Python Programming Language :: Python :: 3 Programming Language :: Python :: 3 :: Only Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Topic :: Software Development :: Libraries :: Python Modules [options] python_requires = >=3.6 install_requires = Django >= 2.2 packages = find: include_package_data = true zip_safe = false [options.packages.find] exclude = tests tests.* [flake8] exclude = conf.py ignore = E203,W503 max-line-length = 120 [coverage:run] source = bulk_update_or_create ================================================ FILE: setup.py ================================================ #!/usr/bin/env python3 from setuptools import setup setup() ================================================ FILE: tests/README.md ================================================ # tests This is a django app to run tests on `bulk_update_or_create`. `manage.py` has been patched to include parent directory in `sys.path` so you can simply run: ``` ./manage.py test ``` `pytest.ini` added to make it easier to run tests from IDEs (such as VSCode), thanks to [pytest-django](https://github.com/pytest-dev/pytest-django/). `pytest` needs to be executed inside this directory (where `manage.py` is) and [requirements.txt](requirements.txt) need to be installed: ``` pip install -r requirements.txt ``` Use `make -f ../Makefile startmysql` to spin up a mysql docker (or set `DJANGO_SETTINGS_MODULE` env var to different settings). ## VSCode To run/debug the tests in VSCode: * make sure to open this folder (not parent) as workspace * or use multi-project workspaces: open parent and then select "Add Folder to Workspace" and add this one * select `Python > Configure Tests` and choose `pytest` :heavy_check_mark: ================================================ FILE: tests/manage.py ================================================ #!/usr/bin/env python """Django's command-line utility for administrative tasks.""" import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), os.path.pardir)) def main(): os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings') try: from django.core.management import execute_from_command_line except ImportError as exc: raise ImportError( "Couldn't import Django. Are you sure it's installed and " "available on your PYTHONPATH environment variable? Did you " "forget to activate a virtual environment?" ) from exc execute_from_command_line(sys.argv) if __name__ == '__main__': main() ================================================ FILE: tests/pytest.ini ================================================ # pytest.ini [pytest] DJANGO_SETTINGS_MODULE = settings_mysql minversion = 6.0 addopts = -ra -q testpaths = tests python_files = tests.py test_*.py ================================================ FILE: tests/requirements.txt ================================================ -e .. pytest==6.2.4 pytest-django==4.3.0 ================================================ FILE: tests/settings.py ================================================ """ Django settings for tests project. Generated by 'django-admin startproject' using Django 2.2. For more information on this file, see https://docs.djangoproject.com/en/2.2/topics/settings/ For the full list of settings and their values, see https://docs.djangoproject.com/en/2.2/ref/settings/ """ import os # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.abspath(__file__)) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/2.2/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = 'nf5e4!s1s+kjxd58j(z1b8il#520m9!-j+*2#1*h0m_hv_-is8' # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True ALLOWED_HOSTS = [] # Application definition INSTALLED_APPS = [ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'bulk_update_or_create', 'tests', ] MIDDLEWARE = [ 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', ] ROOT_URLCONF = 'urls' TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', 'DIRS': [], 'APP_DIRS': True, 'OPTIONS': { 'context_processors': [ 'django.template.context_processors.debug', 'django.template.context_processors.request', 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', ], }, }, ] # Database # https://docs.djangoproject.com/en/2.2/ref/settings/#databases DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), } } # Password validation # https://docs.djangoproject.com/en/2.2/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', }, { 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', }, { 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', }, { 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', }, ] # Internationalization # https://docs.djangoproject.com/en/2.2/topics/i18n/ LANGUAGE_CODE = 'en-us' TIME_ZONE = 'UTC' USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/2.2/howto/static-files/ STATIC_URL = '/static/' ================================================ FILE: tests/settings_mysql.py ================================================ from settings import * # noqa DATABASES = { 'default': { 'ENGINE': 'django.db.backends.mysql', 'NAME': 'mysql', 'USER': 'root', 'PASSWORD': 'root', 'HOST': '127.0.0.1', 'PORT': '8877', 'TEST': {'CHARSET': 'utf8mb4', 'COLLATION': 'utf8mb4_bin'}, } } ================================================ FILE: tests/settings_postgresql.py ================================================ from settings import * # noqa DATABASES = { 'default': { 'ENGINE': 'django.db.backends.postgresql_psycopg2', 'NAME': 'postgres', 'USER': 'postgres', 'PASSWORD': 'postgres', 'HOST': '127.0.0.1', 'PORT': '8878', } } ================================================ FILE: tests/tests/__init__.py ================================================ ================================================ FILE: tests/tests/management/__init__.py ================================================ ================================================ FILE: tests/tests/management/commands/__init__.py ================================================ ================================================ FILE: tests/tests/management/commands/bulk_it.py ================================================ from time import time from django.core.management.base import BaseCommand from tests.models import RandomData from contextlib import contextmanager @contextmanager def timing(description: str) -> None: start = time() yield ellapsed_time = time() - start print(f"{description}: {ellapsed_time}") class Command(BaseCommand): help = 'Lock it!' def _loop(self, n=1000, offset=0, data_offset=0): for i in range(n): RandomData.objects.update_or_create( uuid=i + offset, defaults={'data': str(i + offset + data_offset)}, ) def _bulk(self, n=1000, offset=0, data_offset=0): items = [RandomData(uuid=i + offset, data=str(i + offset + data_offset)) for i in range(n)] RandomData.objects.bulk_update_or_create(items, ['data'], match_field='uuid') def _clear(self): RandomData.objects.all().delete() def _check(self, n=1000, min=0, max=999): values = sorted([int(x.data) for x in RandomData.objects.all()]) assert len(values) == n assert values[0] == min assert values[-1] == max def handle(self, *args, **options): self._clear() with timing('loop of update_or_create - all creates'): self._loop() self._check() with timing('loop of update_or_create - all updates'): self._loop(data_offset=1) self._check(1000, 1, 1000) with timing('loop of update_or_create - half half'): self._loop(offset=500, data_offset=2) self._check(1500, 1, 1501) self._clear() with timing('bulk_update_or_create - all creates'): self._bulk() self._check() with timing('bulk_update_or_create - all updates'): self._bulk(data_offset=1) self._check(1000, 1, 1000) with timing('bulk_update_or_create - half half'): self._bulk(offset=500, data_offset=2) self._check(1500, 1, 1501) ================================================ FILE: tests/tests/migrations/0001_initial.py ================================================ # Generated by Django 2.2 on 2020-07-14 10:04 from django.db import migrations, models class Migration(migrations.Migration): initial = True dependencies = [] operations = [ migrations.CreateModel( name='RandomData', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('uuid', models.IntegerField(unique=True)), ('value', models.IntegerField(default=0)), ('data', models.CharField(blank=True, max_length=200, null=True)), ], ), ] ================================================ FILE: tests/tests/migrations/__init__.py ================================================ ================================================ FILE: tests/tests/models.py ================================================ from django.db import models from bulk_update_or_create import BulkUpdateOrCreateQuerySet class RandomData(models.Model): objects = BulkUpdateOrCreateQuerySet.as_manager() uuid = models.IntegerField(unique=True) value = models.IntegerField(default=0) data = models.CharField(max_length=200, null=True, blank=True) def __str__(self): return f'{self.uuid} - {self.data} - {self.value}' ================================================ FILE: tests/tests/tests.py ================================================ from django.test import TestCase from django.core.exceptions import FieldDoesNotExist from tests.models import RandomData class Test(TestCase): def test_all_create(self): items = [RandomData(uuid=i, data=i) for i in range(10)] # 1 select + 10 creates, all new with self.assertNumQueries(11): RandomData.objects.bulk_update_or_create(items, ['data'], match_field='uuid') self.assertEqual(RandomData.objects.count(), 10) self.assertEqual(sorted(int(x.data) for x in RandomData.objects.all()), list(range(10))) def test_update_some(self): self.test_all_create() items = [RandomData(uuid=i + 5, data=i + 10) for i in range(10)] # 1 select, 1 bulk update, 5 create with self.assertNumQueries(7): RandomData.objects.bulk_update_or_create(items, ['data'], match_field='uuid') self.assertEqual(RandomData.objects.count(), 15) self.assertEqual( sorted(int(x.data) for x in RandomData.objects.all()), list(range(5)) + list(range(10, 20)), ) def test_all_update(self): self.test_all_create() items = [RandomData(uuid=i, data=i + 10) for i in range(10)] # 1 select, 1 bulk update with self.assertNumQueries(2): RandomData.objects.bulk_update_or_create(items, ['data'], match_field='uuid') self.assertEqual(RandomData.objects.count(), 10) self.assertEqual( sorted(int(x.data) for x in RandomData.objects.all()), list(range(10, 20)), ) def test_update_some_generator(self): self.test_all_create() items = [RandomData(uuid=i + 5, data=i + 10) for i in range(10)] updated_items = RandomData.objects.bulk_update_or_create( items, ['data'], match_field='uuid', yield_objects=True ) # not executed yet, just generator self.assertEqual(RandomData.objects.count(), 10) updated_items = list(updated_items) self.assertEqual(RandomData.objects.count(), 15) self.assertEqual( sorted(int(x.data) for x in RandomData.objects.all()), list(range(5)) + list(range(10, 20)), ) # one batch self.assertEqual(len(updated_items), 1) # tuple with (created, updated) self.assertEqual(len(updated_items[0]), 2) # 5 were created - 15 to 19 self.assertEqual(len(updated_items[0][0]), 5) self.assertEqual( sorted(int(x.data) for x in updated_items[0][0]), list(range(15, 20)), ) for x in updated_items[0][0]: self.assertIsNotNone(x.pk) # 5 were updated - 10 to 14 (from 5 to 9) self.assertEqual(len(updated_items[0][1]), 5) self.assertEqual( sorted(int(x.data) for x in updated_items[0][1]), list(range(10, 15)), ) for x in updated_items[0][1]: self.assertIsNotNone(x.pk) def test_errors(self): with self.assertRaises(ValueError) as cm: RandomData.objects.bulk_update_or_create([None], []) self.assertEqual(cm.exception.args, ('update_fields cannot be empty',)) with self.assertRaises(ValueError) as cm: RandomData.objects.bulk_update_or_create([None], ['data'], batch_size=-1) self.assertEqual(cm.exception.args, ('Batch size must be a positive integer.',)) with self.assertRaises(FieldDoesNotExist) as cm: RandomData.objects.bulk_update_or_create([RandomData(uuid=1, data='x')], ['data'], match_field='x') self.assertEqual(cm.exception.args, ("RandomData has no field named 'x'",)) with self.assertRaises(FieldDoesNotExist) as cm: RandomData.objects.bulk_update_or_create([RandomData(uuid=1, data='x')], ['x'], match_field='uuid') self.assertEqual(cm.exception.args, ("RandomData has no field named 'x'",)) def test_case_sensitivity(self): """ match_fields should always be unique but for test simplicity (no extra model), using RandomData.data """ RandomData.objects.bulk_update_or_create( [ RandomData(uuid=1, data='x'), ], ['uuid'], match_field='data', ) self.assertEqual(RandomData.objects.count(), 1) self.assertEqual(sorted(x.data for x in RandomData.objects.all()), ['x']) RandomData.objects.bulk_update_or_create( [ RandomData(uuid=2, data='X'), ], ['uuid'], match_field='data', case_insensitive_match=True, ) self.assertEqual(RandomData.objects.count(), 1) self.assertEqual(sorted(x.data for x in RandomData.objects.all()), ['x']) RandomData.objects.bulk_update_or_create( [ RandomData(uuid=3, data='X'), ], ['uuid'], match_field='data', ) self.assertEqual(RandomData.objects.count(), 2) self.assertEqual(sorted(x.data for x in RandomData.objects.all()), ['X', 'x']) def test_update_some_with_context_manager(self): self.test_all_create() with self.assertNumQueries(7): with RandomData.objects.bulk_update_or_create_context( ['data'], match_field='uuid', batch_size=500 ) as bulkit: for i in range(10): bulkit.queue(RandomData(uuid=i + 5, data=i + 10)) self.assertEqual(RandomData.objects.count(), 15) self.assertEqual( sorted(int(x.data) for x in RandomData.objects.all()), list(range(5)) + list(range(10, 20)), ) # smaller batch_size to test more than 1 batch and test status_cb cb_calls = [] def _cb(x): # nothing created self.assertEqual(x[0], []) cb_calls.extend(x[1]) # 4 all-update batches = 8 queries with self.assertNumQueries(8): with RandomData.objects.bulk_update_or_create_context( ['data'], match_field='uuid', batch_size=3, status_cb=_cb ) as bulkit: for i in range(10): bulkit.queue(RandomData(uuid=i, data=i + 20)) self.assertEqual(RandomData.objects.count(), 15) self.assertEqual( # 20 to 29 ... 15 to 19 sorted(int(x.data) for x in RandomData.objects.all()), list(range(15, 30)), ) self.assertEqual(len(cb_calls), 10) for i in range(10): self.assertEqual(cb_calls[i].uuid, i) self.assertEqual(cb_calls[i].data, i + 20) def test_context_manager_exact_batch_size(self): # test made to hit *empty* queue on context manager __exit__()! with self.assertNumQueries(11): with RandomData.objects.bulk_update_or_create_context( ['data'], match_field='uuid', batch_size=10 ) as bulkit: for i in range(10): bulkit.queue(RandomData(uuid=i + 5, data=i + 10)) self.assertSum(145) def test_context_manager_queue_kwargs(self): with self.assertNumQueries(11): with RandomData.objects.bulk_update_or_create_context( ['data'], match_field='uuid', batch_size=10 ) as bulkit: for i in range(10): bulkit.queue_obj(uuid=i + 5, data=i + 10) self.assertSum(145) def test_empty_objs(self): """ test change of behaviour for empty objs to match bulk_update https://github.com/fopina/django-bulk-update-or-create/issues/10 """ with self.assertNumQueries(0): RandomData.objects.bulk_update([], fields=['data']) with self.assertNumQueries(0): RandomData.objects.bulk_update_or_create([], ['data'], match_field='uuid') def test_keyerror(self): """ test for issue https://github.com/fopina/django-bulk-update-or-create/issues/11 eg: using string values in model IntegerFields cause obj_map lookups to fail on existing objects """ self.test_all_create() self.assertSum(45) # this works RandomData.objects.bulk_update_or_create( [RandomData(uuid=i, data=i + 1) for i in range(10)], ['data'], match_field='uuid' ) self.assertSum(55) # but this *DID* not - it does now though! RandomData.objects.bulk_update_or_create( [RandomData(uuid=str(i), data=i + 2) for i in range(10)], ['data'], match_field='uuid' ) self.assertSum(65) def assertSum(self, total): self.assertEqual(sum(int(x.data) for x in RandomData.objects.all()), total) def test_multiple_match_fields_update(self): items = [RandomData(uuid=i, value=i % 5, data=i) for i in range(10)] RandomData.objects.bulk_create(items) items = [RandomData(uuid=i, value=i % 5, data=i + 10) for i in range(10)] # 1 select, 1 bulk update with self.assertNumQueries(2): RandomData.objects.bulk_update_or_create(items, ['data'], match_field=('uuid', 'value')) self.assertEqual(RandomData.objects.count(), 10) self.assertEqual( sorted(int(x.data) for x in RandomData.objects.all()), list(range(10, 20)), ) def test_multiple_match_fields_update_create(self): items = [RandomData(uuid=i, value=i % 5, data=i) for i in range(10)] RandomData.objects.bulk_create(items) items = [RandomData(uuid=i + 5, value=i % 5, data=i + 10) for i in range(10)] # 1 select, 1 bulk update, 5 inserts with self.assertNumQueries(7): RandomData.objects.bulk_update_or_create(items, ['data'], match_field=('uuid', 'value')) self.assertEqual(RandomData.objects.count(), 15) self.assertEqual( list(int(x.data) for x in RandomData.objects.order_by('uuid')), [*range(5), *range(10, 15), *range(15, 20)], ) def test_multiple_match_fields_update_pk(self): items = [RandomData(uuid=i, value=i % 5, data=str(i)) for i in range(10)] RandomData.objects.bulk_create(items) items = [RandomData(uuid=i + 100, value=i % 5, data=str(i)) for i in range(10)] # 1 select, 1 bulk update with self.assertNumQueries(2): RandomData.objects.bulk_update_or_create(items, ['uuid'], match_field=('data', 'value')) self.assertEqual(RandomData.objects.count(), 10) self.assertEqual( list(x.uuid for x in RandomData.objects.order_by('data', 'value')), list(range(100, 110)), ) ================================================ FILE: tests/urls.py ================================================ """tests URL Configuration The `urlpatterns` list routes URLs to views. For more information please see: https://docs.djangoproject.com/en/2.2/topics/http/urls/ Examples: Function views 1. Add an import: from my_app import views 2. Add a URL to urlpatterns: path('', views.home, name='home') Class-based views 1. Add an import: from other_app.views import Home 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') Including another URLconf 1. Import the include() function: from django.urls import include, path 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ from django.contrib import admin from django.urls import path urlpatterns = [ path('admin/', admin.site.urls), ] ================================================ FILE: tox.ini ================================================ [tox] envlist = flake8 py{37,38,39}-dj{22,30,32}-{sqlite,postgresql,mysql} [testenv] deps = dj22: Django==2.2.* dj30: Django==3.0.* dj32: Django==3.2.* postgresql: psycopg2-binary mysql: mysqlclient coverage setenv = PYTHONPATH = {toxinidir} sqlite: DJANGO_SETTINGS_MODULE = settings postgresql: DJANGO_SETTINGS_MODULE = settings_postgresql mysql: DJANGO_SETTINGS_MODULE = settings_mysql whitelist_externals = make pip_pre = True commands = make coverage TEST_ARGS='{posargs:tests}' [testenv:flake8] basepython = python3 commands = make flake8 deps = flake8 skip_install = true [testenv:style] basepython = python3 commands = make style_check deps = black>=19.10b0 flake8 skip_install = true