Repository: mkalioby/leopards Branch: main Commit: 3e86850c1f88 Files: 13 Total size: 41.2 KB Directory structure: gitextract_8g1d786o/ ├── .github/ │ └── workflows/ │ └── workflow.yml ├── .gitignore ├── LICENSE ├── README.md ├── USAGE.md ├── leopards/ │ ├── Q.py │ ├── Query.py │ └── __init__.py ├── setup.py ├── tests/ │ ├── __init__.py │ ├── test_file.py │ └── test_leopards.py └── tox.ini ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/workflow.yml ================================================ name: test on: [push] env: MODE: Test jobs: tests: name: "Python ${{ matrix.python-version }} on ${{ matrix.os }}" runs-on: "${{ matrix.os }}" strategy: fail-fast: false matrix: os: - ubuntu-latest - macos-latest - windows-latest python-version: - "3.8" - "3.9" - "3.10" - "3.11" - "3.12" - "3.13" steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - name: Checkout code uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: "${{ matrix.python-version }}" - name: Install requirements run: | pip install pytest pytest-cov - name: Run tests run: | pytest --cov=leopards tests/ --cov-report term-missing ================================================ FILE: .gitignore ================================================ env/ .idea/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2022 Mohamed El-Kalioby Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Leopards [![PyPI version](https://badge.fury.io/py/leopards.svg)](https://badge.fury.io/py/leopards) [![Python Versions](https://img.shields.io/pypi/pyversions/leopards.svg)](https://img.shields.io/pypi/pyversions/leopards.svg) ![Coverage](https://img.shields.io/badge/coverage-100%25-success) ![build status](https://github.com/mkalioby/leopards/actions/workflows/workflow.yml/badge.svg) Leopards is a way to query list of dictionaries or objects as if you are filtering in DBMS. You can get dicts/objects that are matched by OR, AND or NOT or all of them. As you can see in the comparison they are much faster than Pandas. ## Installation ```shell pip install leopards ``` ## Usage ```python from leopards import Q l = [{"name":"John","age":"16"}, {"name":"Mike","age":"19"},{"name":"Sarah","age":"21"}] filtered= Q(l,{'name__contains':"k", "age__lt":20}) print(list(filtered)) ``` output ```python [{'name': 'Mike', 'age': '19'}] ``` The above filtration can be written as ```python from leopards import Q l = [{"name": "John", "age": "16"}, {"name": "Mike", "age": "19"}, {"name": "Sarah", "age": "21"}] filtered = Q(l, name__contains="k", age__lt=20) ``` **Notes:** 1. `Q` returns an iterator which can be converted to a list by calling `list`. 2. Even though, age was `str` in the dict, as the value of in the query dict was `int`, Leopards converted the value in dict automatically to match the query data type. This behaviour can be stopped by passing `False` to `convert_types` parameter. ## Supported filters * `eq`: equals and this default filter * `gt`: greater than. * `gte`: greater than or equal. * `lt`: less than * `lte`: less than or equal * `in`: the value in a list of a tuple. * e.g. age__in=[10,20,30] * `contains`: contains a substring as in the example. * `icontains`: case-insensitive `contains`. * `startswith`: checks if a value starts with a query strings. * `istartswith`: case-insensitive `startswith`. * `endswith`: checks if a value ends with a query strings. * `iendswith`: case-insensitive `endswith`. * `isnull`: checks if the value matches any of NULL_VALUES which are `('', '.', None, "None", "null", "NULL")` * e.g. `filter__isnull=True` or `filter__isnull=False` For `eq`,`gt`,`gte`,`lt`,`lte`, `in`, `contains`, `icontains`, `startswith`,`istartswith`, `endswith` and `iendswith`, you can add a `n` to negate the results. e.g `nin` which is equivalent to `not in` ## Advanced examples This section will cover the use of `OR`, `AND` and `NOT` ### Usage of `OR` `OR` or `__or__` takes a list of dictionaries to evaluate and returns with the first `True`. ```python from leopards import Q l = [{"name": "John", "age": "16"}, {"name": "Mike", "age": "19"}, {"name": "Sarah", "age": "21"}] filtered = Q(l, {"OR": [{"name__contains": "k"}, {"age__gte": 21}]}) print(list(filtered)) ``` output ```python [{'name': 'Mike', 'age': '19'}, {'name': 'Sarah', 'age': '21'}] ``` ### Usage of `NOT` `NOT` or `__not__` takes a dict for query run. ```python from leopards import Q l = [{"name": "John", "age": "16"}, {"name": "Mike", "age": "19"}, {"name": "Sarah", "age": "21"}] filtered = Q(l, {"age__gt": 15, "NOT": {"age__eq": 19}}) print(list(filtered)) ``` output ```python [{'name': 'John', 'age': '16'}, {'name': 'Sarah', 'age': '21'}] ``` ### Usage of `AND` `AND` or `__and__` takes a list of dict for query run, returns with the first `False`. ```python from leopards import Q l = [{"name": "John", "age": "16"}, {"name": "Mike", "age": "19"}, {"name": "Sarah", "age": "21"}] filtered = Q(l, {"__and__": [{"age__gte": 15}, {"age__lt": 21}]}) print(list(filtered)) ``` output ```python [{'name': 'John', 'age': '16'}, {'name': 'Mike', 'age': '19'}] ``` ## Aggregating Data You can run the following aggregations * Count * Max * Min * Sum * Avg ### Count Find the count of certain aggregated column ```python l = [{"name": "John", "age": "16"}, {"name": "Mike", "age": "19"}, {"name": "Sarah", "age": "21"},{"name":"John","age":"19"}] from leopards import Count count = Count(l,['age']) ``` output ```python [{"age":"16","count":1},{"age":"19","count":2}, {"age":"21","count":1}] ``` ### Max Find the Max value for a certain column in certain aggregated columns ```python l = [{"name": "John", "age": "16"}, {"name": "Mike", "age": "19"}, {"name": "Sarah", "age": "21"},{"name":"John","age":"19"}] from leopards import Max count = Max(l,"age",['name'],dtype=int) ``` output ```python [{'name': 'John', 'age': '19'}, {'name': 'Mike', 'age': '19'}, {'name': 'Sarah', 'age': '21'}] ``` **Notes:** * If you don't pass the aggregation columns, the maximum will be found across dataset. * You can pass the datatype of the column to convert it on the fly while evaluating ```python l = [{"name": "John", "age": "16"}, {"name": "Mike", "age": "19"}, {"name": "Sarah", "age": "21"},{"name":"John","age":"19"}] from leopards import Max m = Max(l,"age",dtype=int) ``` output ```python [{'age': 21}] ``` ### Min Find the Max value for a certain column in certain aggregated columns ```python l = [{"name": "John", "age": "16"}, {"name": "Mike", "age": "19"}, {"name": "Sarah", "age": "21"},{"name":"John","age":"19"}] from leopards import Min m = Min(l,"age",['name']) ``` output ```python [{'name': 'John', 'age': '16'}, {'name': 'Mike', 'age': '19'}, {'name': 'Sarah', 'age': '21'}] ``` **Note:** * If you don't pass the aggregation columns, the min will be found across dataset. * You can pass the datatype of the column to convert it on the fly while evaluating ## Sum and Avg Like Min and Max but only works with integers and floats. ## Comparison with Pandas This is done on Python 3.8 running on Ubuntu 22.04 on i7 11th generation and 32 GB of RAM. | Comparison | Pandas | Leopards | |-------------------------------------------------------------|----------|-------------| | Package Size
(Lower is better) | 29.8 MB | **7.5 KB** | | import Time (Worst)
(Lower is better) | 146 ms | **1.05 ms** | | load 10k CSV lines
(Lower is better) [1] | 0.295s | **0.138s** | | get first matched record
(Lower is better) | 0.310s | **0.017s** | | print all filtered records (10/10k)
(Lower is better) | 0.310s | **0.137s** | | filter by integers
(Lower is better) | 0.316s | **0.138s** | [1] This was loading the whole csv in memory which was for sake of fair comparison. Nevertheless, Leopards can work with DictReader as an iterable which executes in **0.014s**, then it handles line by line. Thanks for [Asma Tahir](https://github.com/tahirasma) for Pandas stats. ## Contributors * [saeedesmaili](https://github.com/saeedesmaili) ## Tutorials * [Usage with different file types](https://dev.to/mkalioby/leopards-with-different-file-types-1d3) * [Work on CSV Files with Leopards](https://dev.to/mkalioby/working-with-csv-by-leopards-5bmd) ================================================ FILE: USAGE.md ================================================ # Usage This document covers how to use leopards with different file types ## CSV `DictReader` from `csv` module can be used to read csv files as dictionaries as shown below. ```python import csv from leopards import Q data = csv.DictReader(open("data.csv")) res = Q(data, {"age__gt": 15}) ``` ## TSV `DictReader` from `csv` module can be used to read tsv files as dictionaries as shown below. ```python import csv from leopards import Q data = csv.DictReader(open("data.csv"), delimiter="\t") res = Q(data, {"age__gt": 15}) ``` ## JSON `json.load` can be used to read json files as dictionaries as shown below. ```python import json from leopards import Q data = json.load(open("data.json")) res = Q(data, {"age__gt": 15}) ``` ## XLS `xlrd` library can be used to read xls files as dictionaries as shown below. ```python import xlrd from leopards import Q wb = xlrd.open_workbook("data.xls") sh = wb.sheets()[0] keys = sh.row_values(0) data =[] for n in range(1, sh.nrows): data.append({key: sh.row_values(n)[n2] for n2, key in enumerate(keys)}) res = Q(data, {"age__gt": 15}) ``` ## ClickHouse 'clickhouse_driver' library can be used to read data from ClickHouse as dictionaries as shown below. ```python import clickhouse_connect client = clickhouse_connect.get_client( host='localhost', username='default',password='' ) rows = client.execute("SELECT * FROM TABLE") data = rows.named_results() res = Q(data, {"age__gt": 15}) ``` ## MySQL `mysql-client` library can be used to read data from MySQL as dictionaries as shown below. ```python import MySQLdb from MySQLdb.cursors import DictCursor from leopards import Q db=MySQLdb.connect(user='root',password='PASS', database="db", cursorclass=DictCursor) cursor = db.cursor() cursor.execute("SELECT * FROM TABLE") data = cursor.fetchall() res = Q(data, {"age__gt": 15}) ``` ================================================ FILE: leopards/Q.py ================================================ NULL_VALUES = ('', '.', None, "None", "null", "NULL") def get_key_op(key:str): """Separate at the key to name and op""" if "__" in key: cols = key.split("__") k = cols[0] op = cols[1] else: k = key op = "eq" return k, op def convert_value(v:str, value_type:type): """Converts str to the data type of the value""" from decimal import Decimal if value_type is float: return float(v) elif value_type is int: return int(v) elif value_type is bytes: return bytes(v, 'ascii') elif value_type is Decimal: return Decimal(v) return str(v) def evaluate(value:type, op:str, qv:type): """Evaluate the current value again the query value based on type.""" if op == "eq": return value == qv elif op == "gt": return value > qv elif op == "gte": return value >= qv elif op == "lt": return value < qv elif op == "lte": return value <= qv elif op == "in": return value in qv elif op == "contains": return qv in value elif op == "icontains": return qv.lower() in value.lower() elif op == "startswith": return value.startswith(qv) elif op == "istartswith": return value.lower().startswith(qv.lower()) elif op == "endswith": return value.endswith(qv) elif op == "iendswith": return value.lower().endswith(qv.lower()) elif op == "isnull": res = value in NULL_VALUES return res == qv def check(value:type, op:str, qv:type): """Checks for negation""" if op.startswith("n"): op = op[1:] return not evaluate(value, op, qv) return evaluate(value, op, qv) ================================================ FILE: leopards/Query.py ================================================ from functools import partial try: from Q import get_key_op, convert_value,check except ModuleNotFoundError: # pragma: no cover from .Q import get_key_op, convert_value,check # pragma: no cover def Q(iterable:list, query:dict=None, convert_types=True, **kwargs): """ Query a list of dictionary or objects by a query dict. :param iterable: the iterable of dicts :param query: dictionary holding your query :param convert_types: try to convert the field in data to match query type :return: Iterable of type filter """ def filter_list(item, **kwargs): if type(item) in (str, int, float, list, tuple): raise TypeError("The item in the list shall be dict or object") if type(item) is not dict: item= item.__dict__ for k,v in kwargs.items(): if k in("OR" , "__or__"): for q in v: if filter_list(item,**q): return True return False elif k in("AND","__and__"): for q in v: if not filter_list(item,**q): return False return True elif k in ("NOT", "__not__"): return not filter_list(item,**v) key, op = get_key_op(k) value = item[key] if convert_types and type(v) != type(value): value = convert_value(value, type(v)) if not check(value, op, v): return False return True if query is None: query={} query.update(kwargs) p = partial(filter_list, **query) return filter(p, iterable) def Count(iterable:list, cols:list=None, col_name:str='count'): """ :param iterable: iterable to count :param cols: columns used to aggregated :param col_name: the name of count column, default: count :return: iterable of dicts """ new_dict={} for item in iterable: if type(item) is not dict: item=item.__dict__ # pragma: no cover if cols: d={k:v for k,v in item.items() if k in cols} k = ":".join(d.values()) else: k="ALL" d={} if not k in new_dict: new_dict[k]=d new_dict[k][col_name]=0 new_dict[k][col_name]+=1 return new_dict.values() def Max(iterable:list, col_name:str, cols:list=None, dtype=str): """ :param iterable: iterable to loop through :param col_name: the name of the column that Max shall be computed against :param cols: columns to aggregate on :return: iterable of dicts """ new_dict={} for item in iterable: if type(item) is not dict: item=item.__dict__ # pragma: no cover if cols: d={k:v for k,v in item.items() if k in cols} k = ":".join(d.values()) else: k='ALL' d={} v: str = item[col_name] if dtype!=str: v = dtype(v) if not k in new_dict: new_dict[k]=d new_dict[k][col_name]=v if v>new_dict[k][col_name]: new_dict[k][col_name] = v return new_dict.values() def Min(iterable:list, col_name:str, cols:list=None, dtype=str): """ :param iterable: iterable to loop through :param col_name: the name of the column that Min shall be computed against :param cols: columns to aggregate on :param dtype: data type of the cols :return: iterable of dicts """ new_dict={} for item in iterable: if type(item) is not dict: item=item.__dict__ # pragma: no cover if cols: d={k:v for k,v in item.items() if k in cols} k = ":".join(d.values()) else: k="ALL" d={} v:str=item[col_name] if v != str: v=dtype(v) if not k in new_dict: new_dict[k]=d new_dict[k][col_name]=v if vC:p.Lys695Asn,PTPN22:NM_001308297:exon17:c.2178G>C:p.Lys726Asn,PTPN22:NM_001193431:exon18:c.2166G>C:p.Lys722Asn,PTPN22:NM_015967:exon18:c.2250G>C:p.Lys750Asn", "refGene exonic;splicing Info": "NM_012411:exon16:114372213-114372329:0:115;NM_015967:exon18:114372213-114372329:0:115;NM_001308297:exon17:114372213-114372329:0:115;NM_001193431:exon18:114372213-114372329:0:115"}, {"chromosome": "chr1", "start pos": "145360608", "end pos": "145360608", "reference": "T", "observed": "A", "quality": "52.12", "filter": "PASS", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "NBPF19", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "NBPF19:NM_001351365:exon69:c.8414T>A:p.Leu2805Gln", "refGene exonic;splicing Info": ""}, {"chromosome": "chr1", "start pos": "145360700", "end pos": "145360700", "reference": "C", "observed": "T", "quality": "9.32", "filter": "MG_SNP_Filter", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "NBPF19", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "NBPF19:NM_001351365:exon69:c.8506C>T:p.Arg2836Cys", "refGene exonic;splicing Info": ""}, {"chromosome": "chr10", "start pos": "125780762", "end pos": "125780762", "reference": "-", "observed": "GGGC", "quality": "159.95", "filter": ".", "zygosity": "hom", "refGene function": "exonic;splicing", "refGene gene": "CHST15;CHST15", "refGene splice info": "NM_015892:exon6:c.1347+10->GCCC;NM_001270764:exon6:c.1347+10->GCCC", "refGene exonic function": "frameshift insertion", "refGene AA change": "CHST15:NM_001270765:exon6:c.1356_1357insGCCC:p.Pro453fs,CHST15:NM_014863:exon6:c.1356_1357insGCCC:p.Pro453fs", "refGene exonic;splicing Info": "NM_015892:exon6:125780771-125780928:-10:166;NM_001270765:exon6:125779168-125780928:1593:166;NM_014863:exon6:125779168-125780928:1593:166;NM_001270764:exon6:125780771-125780928:-10:166"}, {"chromosome": "chr10", "start pos": "126714752", "end pos": "126714752", "reference": "G", "observed": "A", "quality": "140.3", "filter": ".", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "CTBP2", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "CTBP2:NM_022802:exon1:c.1577C>T:p.Pro526Leu", "refGene exonic;splicing Info": ""}, {"chromosome": "chr11", "start pos": "12316344", "end pos": "12316344", "reference": "-", "observed": "CTCCTCCTCCTC", "quality": "68.98", "filter": "PASS", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "MICALCL", "refGene splice info": "", "refGene exonic function": "nonframeshift insertion", "refGene AA change": "MICALCL:NM_032867:exon3:c.1366_1367insCTCCTCCTCCTC:p.Ala456delinsAlaProProProPro", "refGene exonic;splicing Info": ""}, {"chromosome": "chr11", "start pos": "60899366", "end pos": "60899366", "reference": "G", "observed": "T", "quality": "7.79", "filter": ".", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "VPS37C", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "VPS37C:NM_017966:exon5:c.994C>A:p.Pro332Thr", "refGene exonic;splicing Info": ""}, {"chromosome": "chr11", "start pos": "60899396", "end pos": "60899396", "reference": "C", "observed": "T", "quality": "7.79", "filter": ".", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "VPS37C", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "VPS37C:NM_017966:exon5:c.964G>A:p.Gly322Ser", "refGene exonic;splicing Info": ""}, {"chromosome": "chr12", "start pos": "21375307", "end pos": "21375309", "reference": "AAA", "observed": "-", "quality": "63.71", "filter": ".", "zygosity": "hom", "refGene function": "splicing", "refGene gene": "SLCO1B1", "refGene splice info": "NM_006446:exon13:r.spl", "refGene exonic function": "", "refGene AA change": "", "refGene exonic;splicing Info": "NM_006446:exon13:21375233-21375298:73:+9"}, {"chromosome": "chr13", "start pos": "32972752", "end pos": "32972752", "reference": "T", "observed": "C", "quality": "245.66", "filter": ".", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "BRCA2", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "BRCA2:NM_000059:exon27:c.10102T>C:p.Ser3368Pro", "refGene exonic;splicing Info": ""}, {"chromosome": "chr19", "start pos": "43865321", "end pos": "43865321", "reference": "T", "observed": "A", "quality": "17.17", "filter": "PASS", "zygosity": "het", "refGene function": "exonic", "refGene gene": "CD177", "refGene splice info": "", "refGene exonic function": "unknown", "refGene AA change": "UNKNOWN", "refGene exonic;splicing Info": ""}, {"chromosome": "chr20", "start pos": "10389422", "end pos": "10389422", "reference": "T", "observed": "C", "quality": "49.79", "filter": "PASS", "zygosity": "het", "refGene function": "exonic", "refGene gene": "MKKS", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "MKKS:NM_018848:exon4:c.1015A>G:p.Ile339Val,MKKS:NM_170784:exon4:c.1015A>G:p.Ile339Val", "refGene exonic;splicing Info": ""}, {"chromosome": "chr20", "start pos": "31672703", "end pos": "31672703", "reference": "G", "observed": "A", "quality": "175.97", "filter": ".", "zygosity": "hom", "refGene function": "exonic;splicing", "refGene gene": "BPIFB4;BPIFB4", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "BPIFB4:NM_182519:exon4:c.683G>A:p.Arg228His", "refGene exonic;splicing Info": "NM_182519:exon4:31672697-31672802:5:99"}, {"chromosome": "chr22", "start pos": "29885581", "end pos": "29885604", "reference": "AGGCCAAGTCCCCAGAGAAGGAAG", "observed": "-", "quality": "32.62", "filter": "PASS", "zygosity": "het", "refGene function": "exonic", "refGene gene": "NEFH", "refGene splice info": "", "refGene exonic function": "nonframeshift deletion", "refGene AA change": "NEFH:NM_021076:exon4:c.1952_1975del:p.651_659del", "refGene exonic;splicing Info": ""}, {"chromosome": "chrX", "start pos": "7811643", "end pos": "7811643", "reference": "C", "observed": "A", "quality": "118.3", "filter": ".", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "VCX", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "VCX:NM_013452:exon3:c.207C>A:p.Ser69Arg", "refGene exonic;splicing Info": ""}, {"chromosome": "chrX", "start pos": "38145640", "end pos": "38145640", "reference": "C", "observed": "T", "quality": "7.77", "filter": ".", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "RPGR", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "RPGR:NM_001034853:exon15:c.2612G>A:p.Gly871Glu", "refGene exonic;splicing Info": ""}, {"chromosome": "chrX", "start pos": "66765158", "end pos": "66765158", "reference": "-", "observed": "GCA", "quality": "73.91", "filter": "PASS", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "AR", "refGene splice info": "", "refGene exonic function": "nonframeshift insertion", "refGene AA change": "AR:NM_000044:exon1:c.170_171insGCA:p.Leu57delinsLeuGln,AR:NM_001348061:exon1:c.170_171insGCA:p.Leu57delinsLeuGln,AR:NM_001348063:exon1:c.170_171insGCA:p.Leu57delinsLeuGln,AR:NM_001348064:exon1:c.170_171insGCA:p.Leu57delinsLeuGln", "refGene exonic;splicing Info": ""}, {"chromosome": "chrX", "start pos": "71379704", "end pos": "71379704", "reference": "T", "observed": "C", "quality": "80.11", "filter": ".", "zygosity": "hom", "refGene function": "exonic;splicing", "refGene gene": "FLJ44635;FLJ44635", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "FLJ44635:NM_207422:exon2:c.25T>C:p.Tyr9His", "refGene exonic;splicing Info": "NM_207422:exon2:71379686-71381600:17:1896"}, {"chromosome": "chrX", "start pos": "100749053", "end pos": "100749053", "reference": "-", "observed": "GACTGA", "quality": "34.48", "filter": ".", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "ARMCX4", "refGene splice info": "", "refGene exonic function": "nonframeshift insertion", "refGene AA change": "ARMCX4:NM_001256155:exon2:c.5477_5478insGACTGA:p.Gly1826delinsGlyThrGlu", "refGene exonic;splicing Info": ""}, {"chromosome": "chrX", "start pos": "100749065", "end pos": "100749065", "reference": "A", "observed": "G", "quality": "44.99", "filter": "PASS", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "ARMCX4", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "ARMCX4:NM_001256155:exon2:c.5489A>G:p.Glu1830Gly", "refGene exonic;splicing Info": ""}, {"chromosome": "chrX", "start pos": "101573515", "end pos": "101573515", "reference": "C", "observed": "T", "quality": "6.99", "filter": ".", "zygosity": "hom", "refGene function": "splicing", "refGene gene": "NXF2", "refGene splice info": "NM_022053:exon9:c.724+14C>T", "refGene exonic function": "", "refGene AA change": "", "refGene exonic;splicing Info": "NM_022053:exon9:101573431-101573501:83:+14"}, {"chromosome": "chrX", "start pos": "103349504", "end pos": "103349504", "reference": "C", "observed": "T", "quality": "115.48", "filter": ".", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "SLC25A53", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "SLC25A53:NM_001012755:exon2:c.437G>A:p.Arg146His", "refGene exonic;splicing Info": ""}, {"chromosome": "chrX", "start pos": "111000973", "end pos": "111000973", "reference": "C", "observed": "T", "quality": "77.11", "filter": ".", "zygosity": "hom", "refGene function": "exonic;splicing", "refGene gene": "ALG13;ALG13", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "ALG13:NM_001324293:exon23:c.2408C>T:p.Ala803Val,ALG13:NM_001257230:exon25:c.2582C>T:p.Ala861Val,ALG13:NM_001257234:exon25:c.2582C>T:p.Ala861Val,ALG13:NM_001257237:exon25:c.2582C>T:p.Ala861Val,ALG13:NM_001324292:exon25:c.2894C>T:p.Ala965Val,ALG13:NM_001099922:exon26:c.3131C>T:p.Ala1044Val,ALG13:NM_001257231:exon26:c.2897C>T:p.Ala966Val", "refGene exonic;splicing Info": "NM_001099922:exon26:111000815-111000990:157:17;NM_001257230:exon25:111000815-111000990:157:17;NM_001324292:exon25:111000815-111000990:157:17;NM_001257231:exon26:111000815-111000990:157:17;NM_001257234:exon25:111000815-111000990:157:17;NM_001257237:exon25:111000815-111000990:157:17;NM_001324293:exon23:111000815-111000990:157:17"}, {"chromosome": "chrX", "start pos": "118985711", "end pos": "118985711", "reference": "-", "observed": "TTTTTTT", "quality": "93.71", "filter": "PASS", "zygosity": "hom", "refGene function": "splicing", "refGene gene": "UPF3B", "refGene splice info": "NM_023010:exon2:c.263+19->AAAAAAA", "refGene exonic function": "", "refGene AA change": "", "refGene exonic;splicing Info": "NM_023010:exon2:118985729-118985836:-19:125;NM_080632:exon2:118985729-118985836:-19:125"}, {"chromosome": "chrX", "start pos": "128896018", "end pos": "128896018", "reference": "C", "observed": "G", "quality": "133.37", "filter": "PASS", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "XPNPEP2", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "XPNPEP2:NM_003399:exon18:c.1640C>G:p.Ala547Gly", "refGene exonic;splicing Info": ""}, {"chromosome": "chrX", "start pos": "135429119", "end pos": "135429119", "reference": "T", "observed": "G", "quality": "142.39", "filter": ".", "zygosity": "hom", "refGene function": "exonic", "refGene gene": "ADGRG4", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "ADGRG4:NM_153834:exon6:c.3254T>G:p.Ile1085Ser", "refGene exonic;splicing Info": ""}, {"chromosome": "chrY", "start pos": "9175204", "end pos": "9175204", "reference": "T", "observed": "G", "quality": "7.77", "filter": ".", "zygosity": "hom", "refGene function": "exonic;splicing", "refGene gene": "TSPY1;TSPY10", "refGene splice info": "", "refGene exonic function": "nonsynonymous SNV", "refGene AA change": "TSPY10:NM_001282469:exon1:c.86T>G:p.Leu29Trp,TSPY10:NM_001320962:exon1:c.86T>G:p.Leu29Trp,TSPY1:NM_001320964:exon1:c.86T>G:p.Leu29Trp", "refGene exonic;splicing Info": "NM_001320962:exon1:9175072-9175337:131:133;NM_001282469:exon1:9175072-9175337:131:133;NM_001320964:exon1:9175072-9175205:131:1"}] ================================================ FILE: tests/test_leopards.py ================================================ #! /usr/bin/env python import unittest from decimal import Decimal try: from .test_file import * except ModuleNotFoundError: from test_file import * def Q(data,query=None,convert_types=True,**kwargs): from leopards.Query import Q return list(Q(data,query,convert_types,**kwargs)) class Employee: def __init__(self,name,age): self.name = name self.age = age employees = [Employee('Ahmed',12),Employee('Mohamed',24)] class TestConditions(unittest.TestCase): def test_eq(self): self.assertEqual(len(Q(l, {"chromosome": 'chr1', 'start pos': '114372214', 'end pos': '114372214'})), 1) def test_neq(self): self.assertEqual(len(Q(l, chromosome__neq='chr1')), 24) def test_neq2(self): self.assertEqual(len(Q(l, {"chromosome": 'chr1', 'start pos__neq': '114372214'})), 3) def test_gt(self): self.assertEqual(len(Q(l, {"chromosome": 'chr1', 'start pos__gt': '114372214', 'end pos__gt': '114372214'})), 2) def test_gte(self): self.assertEqual(len(Q(l, {"chromosome": 'chr1', 'start pos__gte': '114372214', 'end pos__gte': '114372214'})), 3) def test_lt(self): self.assertEqual(len(Q(l, {"chromosome": 'chr1', 'start pos__lt': '114372214'})), 1) def test_lte(self): self.assertEqual(len(Q(l, {"chromosome": 'chr1', 'end pos__lte': '114372214'})), 2) def test_in(self): self.assertEqual(len(Q(l, chromosome__in=('chr1', 'chr22'))), 5) def test_nin(self): self.assertEqual(len(Q(l, chromosome__nin=('chr1', 'chr22'))), 23) def test_contains(self): self.assertEqual(len(Q(l, chromosome__contains='chr2')), 3) def test_ncontains(self): self.assertEqual(len(Q(l, chromosome__ncontains='chr1')), 16) def test_contains2(self): self.assertEqual(len(Q(l, chromosome__contains='Chr2')), 0) def test_ncontains2(self): self.assertEqual(len(Q(l, chromosome__ncontains='Chr2')), 28) def test_icontains(self): self.assertEqual(len(Q(l, chromosome__icontains='Chr2')), 3) def test_icontains2(self): self.assertEqual(len(Q(l, chromosome__icontains='chr2')), 3) def test_nicontains(self): self.assertEqual(len(Q(l, chromosome__nicontains='Chr2')), 25) def test_nicontains2(self): self.assertEqual(len(Q(l, chromosome__nicontains='chr2')), 25) def test_null(self): self.assertEqual(len(Q(l, filter__isnull=True)), 16) def test_null2(self): self.assertEqual(len(Q(l, {"filter__isnull": False})), 12) def test_startswith(self): self.assertEqual(len(Q(l, {"reference__startswith": 'T'})), 7) def test_istartswith(self): self.assertEqual(len(Q(l, {"reference__istartswith": 't'})), 7) def test_nstartswith(self): self.assertEqual(len(Q(l, {"reference__nstartswith": 'T'})), 21) def test_endswith(self): self.assertEqual(len(Q(l, {"observed__endswith": 'C'})), 5) def test_iendsswith(self): self.assertEqual(len(Q(l, {"observed__iendswith": 'C'})), 5) def test_nendswith(self): self.assertEqual(len(Q(l, {"observed__nendswith": 'C'})), 23) class TestConverations(unittest.TestCase): def test_int(self): self.assertEqual(len(Q(l, {"chromosome": 'chr1', 'start pos': 114372214, 'end pos': 114372214})), 1) def test_bytes(self): self.assertEqual(len(Q(l, {"chromosome": b'chr1', 'start pos': 114372214, 'end pos': 114372214})), 1) def test_float(self): res = Q(l, {"quality__gte": 133.47, "quality__lt": 142.39}) self.assertEqual(len(res), 1) def test_decimal(self): res = Q(l, {"quality__gte": Decimal(133.47), "quality__lte": Decimal(142.39)}) self.assertEqual(len(res), 1) class TestCombination(unittest.TestCase): def test_or(self): res = Q(l, {"OR": [{"chromosome": 'chr1', 'start pos': 114372214, 'end pos': 114372214}, {"reference": 'C', 'observed': 'A'}]}) self.assertEqual(len(res), 2) def test_and(self): res = Q(l, {"__and__": [{"chromosome": 'chr1', 'start pos': 114372214, 'end pos': 114372214}, {"reference__neq": 'G'}]}) self.assertEqual(len(res), 1) def test_not(self): self.assertEqual(len(Q(l, {"reference": "T", "NOT": {"observed": "C"}})), 4) class TestObjects(unittest.TestCase): def test_obj(self): self.assertEqual(len(Q(employees,age__gt=13)),1) class TestException(unittest.TestCase): def testInt(self): self.assertRaises(TypeError, Q, [1,2,3],{"i__gt":1}) class TestAgg(unittest.TestCase): def testCountDefault(self): from leopards import Count res = Count(l, ["chromosome"]) self.assertEqual(list(Q(res, chromosome="chr20"))[0]["count"], 2) def testCountDefault2(self): from leopards import Count res = Count(l) self.assertEqual(list(res)[0]["count"], 28) def testCountCol(self): from leopards import Count res= Count(l,["chromosome"],"chr_count") self.assertEqual(list(Q(res,chromosome="chr1"))[0]["chr_count"],4) def testCountCol2(self): from leopards import Count res= Count(l,col_name="chr_count") self.assertEqual(list(res)[0]['chr_count'],28) def testMax(self): from leopards import Max res = Max(l, "start pos", ["chromosome"]) self.assertEqual(list(Q(res, chromosome="chr10"))[0]["start pos"], "126714752") def testMax2(self): from leopards import Max res = Max(l, "quality",dtype=float) self.assertEqual(list(res)[0]['quality'], 245.66) def testMin(self): from leopards import Min res = Min(l, "end pos", ["chromosome"],dtype=float) self.assertEqual(list(Q(res, chromosome="chrX"))[0]["end pos"], 7811643) def testMin2(self): from leopards import Min res = Min(l, "quality", dtype=float) self.assertEqual(list(res)[0]["quality"], 6.99) def testSum(self): from leopards import Sum res = Sum(l, "quality", ["chromosome"]) v="%.2f" % list(Q(res, chromosome="chr11"))[0]["quality"] self.assertEqual(v, "84.56") def testSum2(self): from leopards import Sum res = Sum(l, "quality") v="%.2f" % list(res)[0]["quality"] self.assertEqual(v, "2133.04") def testAvg(self): from leopards import Avg res = Avg(l, "quality", ["chromosome"]) v="%.2f" % list(Q(res, chromosome="chr11"))[0]["quality"] self.assertEqual(v, "28.19") def testAvg2(self): from leopards import Avg res = Avg(l, "quality") v="%.2f" % list(res)[0]["quality"] self.assertEqual(v, "76.18") if __name__ == '__main__': unittest.main() ================================================ FILE: tox.ini ================================================ [tox] envlist= py37, py38, py39 py310 py311 py312 py313 [testenv] deps = allowlist_externals = pytest commands = pytest --cov=leopards tests/ --cov-report term-missing