Repository: shenweichen/DeepCTR
Branch: master
Commit: e8f4d818f9b4
Files: 220
Total size: 867.4 KB
Directory structure:
gitextract_0q19foz8/
├── .gitattributes
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ ├── feature_request.md
│ │ └── question.md
│ └── workflows/
│ ├── ci.yml
│ └── ci2.yml
├── .gitignore
├── .readthedocs.yml
├── .travis.yml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── deepctr/
│ ├── __init__.py
│ ├── contrib/
│ │ ├── __init__.py
│ │ ├── rnn.py
│ │ ├── rnn_v2.py
│ │ └── utils.py
│ ├── estimator/
│ │ ├── __init__.py
│ │ ├── feature_column.py
│ │ ├── inputs.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── afm.py
│ │ │ ├── autoint.py
│ │ │ ├── ccpm.py
│ │ │ ├── dcn.py
│ │ │ ├── deepfefm.py
│ │ │ ├── deepfm.py
│ │ │ ├── fibinet.py
│ │ │ ├── fnn.py
│ │ │ ├── fwfm.py
│ │ │ ├── nfm.py
│ │ │ ├── pnn.py
│ │ │ ├── wdl.py
│ │ │ └── xdeepfm.py
│ │ └── utils.py
│ ├── feature_column.py
│ ├── inputs.py
│ ├── layers/
│ │ ├── __init__.py
│ │ ├── activation.py
│ │ ├── core.py
│ │ ├── interaction.py
│ │ ├── normalization.py
│ │ ├── sequence.py
│ │ └── utils.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── afm.py
│ │ ├── autoint.py
│ │ ├── ccpm.py
│ │ ├── dcn.py
│ │ ├── dcnmix.py
│ │ ├── deepfefm.py
│ │ ├── deepfm.py
│ │ ├── difm.py
│ │ ├── edcn.py
│ │ ├── fgcnn.py
│ │ ├── fibinet.py
│ │ ├── flen.py
│ │ ├── fnn.py
│ │ ├── fwfm.py
│ │ ├── ifm.py
│ │ ├── mlr.py
│ │ ├── multitask/
│ │ │ ├── __init__.py
│ │ │ ├── esmm.py
│ │ │ ├── mmoe.py
│ │ │ ├── ple.py
│ │ │ └── sharedbottom.py
│ │ ├── nfm.py
│ │ ├── onn.py
│ │ ├── pnn.py
│ │ ├── sequence/
│ │ │ ├── __init__.py
│ │ │ ├── bst.py
│ │ │ ├── dien.py
│ │ │ ├── din.py
│ │ │ └── dsin.py
│ │ ├── wdl.py
│ │ └── xdeepfm.py
│ └── utils.py
├── docs/
│ ├── Makefile
│ ├── make.bat
│ ├── requirements.readthedocs.txt
│ └── source/
│ ├── Estimators.rst
│ ├── Examples.md
│ ├── FAQ.md
│ ├── Features.md
│ ├── History.md
│ ├── Layers.rst
│ ├── Model_Methods.md
│ ├── Models.rst
│ ├── Quick-Start.md
│ ├── conf.py
│ ├── deepctr.contrib.rnn.rst
│ ├── deepctr.contrib.rst
│ ├── deepctr.contrib.utils.rst
│ ├── deepctr.estimator.feature_column.rst
│ ├── deepctr.estimator.inputs.rst
│ ├── deepctr.estimator.models.afm.rst
│ ├── deepctr.estimator.models.autoint.rst
│ ├── deepctr.estimator.models.ccpm.rst
│ ├── deepctr.estimator.models.dcn.rst
│ ├── deepctr.estimator.models.deepfefm.rst
│ ├── deepctr.estimator.models.deepfm.rst
│ ├── deepctr.estimator.models.fibinet.rst
│ ├── deepctr.estimator.models.fnn.rst
│ ├── deepctr.estimator.models.fwfm.rst
│ ├── deepctr.estimator.models.nfm.rst
│ ├── deepctr.estimator.models.pnn.rst
│ ├── deepctr.estimator.models.rst
│ ├── deepctr.estimator.models.wdl.rst
│ ├── deepctr.estimator.models.xdeepfm.rst
│ ├── deepctr.estimator.rst
│ ├── deepctr.estimator.utils.rst
│ ├── deepctr.feature_column.rst
│ ├── deepctr.inputs.rst
│ ├── deepctr.layers.activation.rst
│ ├── deepctr.layers.core.rst
│ ├── deepctr.layers.interaction.rst
│ ├── deepctr.layers.normalization.rst
│ ├── deepctr.layers.rst
│ ├── deepctr.layers.sequence.rst
│ ├── deepctr.layers.utils.rst
│ ├── deepctr.models.afm.rst
│ ├── deepctr.models.autoint.rst
│ ├── deepctr.models.ccpm.rst
│ ├── deepctr.models.dcn.rst
│ ├── deepctr.models.dcnmix.rst
│ ├── deepctr.models.deepfefm.rst
│ ├── deepctr.models.deepfm.rst
│ ├── deepctr.models.deepfwfm.rst
│ ├── deepctr.models.difm.rst
│ ├── deepctr.models.edcn.rst
│ ├── deepctr.models.fgcnn.rst
│ ├── deepctr.models.fibinet.rst
│ ├── deepctr.models.flen.rst
│ ├── deepctr.models.fnn.rst
│ ├── deepctr.models.ifm.rst
│ ├── deepctr.models.mlr.rst
│ ├── deepctr.models.multitask.esmm.rst
│ ├── deepctr.models.multitask.mmoe.rst
│ ├── deepctr.models.multitask.ple.rst
│ ├── deepctr.models.multitask.sharedbottom.rst
│ ├── deepctr.models.nfm.rst
│ ├── deepctr.models.onn.rst
│ ├── deepctr.models.pnn.rst
│ ├── deepctr.models.rst
│ ├── deepctr.models.sequence.bst.rst
│ ├── deepctr.models.sequence.dien.rst
│ ├── deepctr.models.sequence.din.rst
│ ├── deepctr.models.sequence.dsin.rst
│ ├── deepctr.models.wdl.rst
│ ├── deepctr.models.xdeepfm.rst
│ ├── deepctr.rst
│ ├── deepctr.utils.rst
│ ├── index.rst
│ └── modules.rst
├── examples/
│ ├── avazu_sample.txt
│ ├── census-income.sample
│ ├── criteo_sample.te.tfrecords
│ ├── criteo_sample.tr.tfrecords
│ ├── criteo_sample.txt
│ ├── gen_tfrecords.py
│ ├── movielens_age_vocabulary.csv
│ ├── movielens_sample.txt
│ ├── run_all.sh
│ ├── run_classification_criteo.py
│ ├── run_classification_criteo_hash.py
│ ├── run_classification_criteo_multi_gpu.py
│ ├── run_dien.py
│ ├── run_din.py
│ ├── run_dsin.py
│ ├── run_estimator_pandas_classification.py
│ ├── run_estimator_tfrecord_classification.py
│ ├── run_flen.py
│ ├── run_mtl.py
│ ├── run_multivalue_movielens.py
│ ├── run_multivalue_movielens_hash.py
│ ├── run_multivalue_movielens_vocab_hash.py
│ └── run_regression_movielens.py
├── setup.cfg
├── setup.py
└── tests/
├── README.md
├── __init__.py
├── feature_test.py
├── layers/
│ ├── __init__.py
│ ├── activations_test.py
│ ├── core_test.py
│ ├── interaction_test.py
│ ├── normalization_test.py
│ ├── sequence_test.py
│ ├── utils_test.py
│ └── vocabulary_example.csv
├── models/
│ ├── AFM_test.py
│ ├── AutoInt_test.py
│ ├── BST_test.py
│ ├── CCPM_test.py
│ ├── DCNMix_test.py
│ ├── DCN_test.py
│ ├── DIEN_test.py
│ ├── DIFM_test.py
│ ├── DIN_test.py
│ ├── DSIN_test.py
│ ├── DeepFEFM_test.py
│ ├── DeepFM_test.py
│ ├── EDCN_test.py
│ ├── FGCNN_test.py
│ ├── FLEN_test.py
│ ├── FNN_test.py
│ ├── FiBiNET_test.py
│ ├── FwFM_test.py
│ ├── IFM_test.py
│ ├── MLR_test.py
│ ├── MTL_test.py
│ ├── NFM_test.py
│ ├── ONN_test.py
│ ├── PNN_test.py
│ ├── WDL_test.py
│ ├── __init__.py
│ └── xDeepFM_test.py
├── utils.py
├── utils_mtl.py
└── utils_test.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitattributes
================================================
# Auto detect text files and perform LF normalization
* text=auto
# Custom for Visual Studio
*.cs diff=csharp
# Standard to msysgit
*.doc diff=astextplain
*.DOC diff=astextplain
*.docx diff=astextplain
*.DOCX diff=astextplain
*.dot diff=astextplain
*.DOT diff=astextplain
*.pdf diff=astextplain
*.PDF diff=astextplain
*.rtf diff=astextplain
*.RTF diff=astextplain
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''
---
**Describe the bug(问题描述)**
A clear and concise description of what the bug is.Better with standalone code to reproduce the issue.
**To Reproduce(复现步骤)**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error
**Operating environment(运行环境):**
- python version [e.g. 3.6, 3.7]
- tensorflow version [e.g. 1.4.0, 1.15.0, 2.10.0]
- deepctr version [e.g. 0.9.2,]
**Additional context**
Add any other context about the problem here.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: enhancement&feature request
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.
================================================
FILE: .github/ISSUE_TEMPLATE/question.md
================================================
---
name: Question
about: Ask any question ~
title: ''
labels: question
assignees: ''
---
Please refer to the [FAQ](https://deepctr-doc.readthedocs.io/en/latest/FAQ.html) in doc and search for the [related issues](https://github.com/shenweichen/DeepCTR/issues) before you ask the question.
**Describe the question(问题描述)**
A clear and concise description of what the question is.
**Additional context**
Add any other context about the problem here.
**Operating environment(运行环境):**
- python version [e.g. 3.6]
- tensorflow version [e.g. 1.4.0, 1.15.0, 2.10.0]
- deepctr version [e.g. 0.9.2,]
================================================
FILE: .github/workflows/ci.yml
================================================
name: CI_TF2
on:
push:
path:
- 'deepctr/*'
- 'tests/*'
pull_request:
path:
- 'deepctr/*'
- 'tests/*'
jobs:
build:
runs-on: ubuntu-latest
timeout-minutes: 180
strategy:
matrix:
python-version: [ 3.6,3.7,3.8, 3.9,3.10.7 ]
tf-version: [ 2.6.0,2.7.0,2.8.0,2.9.0,2.10.0 ]
exclude:
- python-version: 3.7
tf-version: 1.4.0
- python-version: 3.7
tf-version: 1.15.0
- python-version: 3.8
tf-version: 1.4.0
- python-version: 3.8
tf-version: 1.14.0
- python-version: 3.8
tf-version: 1.15.0
- python-version: 3.6
tf-version: 2.7.0
- python-version: 3.6
tf-version: 2.8.0
- python-version: 3.6
tf-version: 2.9.0
- python-version: 3.6
tf-version: 2.10.0
- python-version: 3.9
tf-version: 1.4.0
- python-version: 3.9
tf-version: 1.15.0
- python-version: 3.9
tf-version: 2.2.0
- python-version: 3.9
tf-version: 2.5.0
- python-version: 3.9
tf-version: 2.6.0
- python-version: 3.9
tf-version: 2.7.0
- python-version: 3.10.7
tf-version: 1.4.0
- python-version: 3.10.7
tf-version: 1.15.0
- python-version: 3.10.7
tf-version: 2.2.0
- python-version: 3.10.7
tf-version: 2.5.0
- python-version: 3.10.7
tf-version: 2.6.0
- python-version: 3.10.7
tf-version: 2.7.0
steps:
- uses: actions/checkout@v3
- name: Setup python environment
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip3 install -q tensorflow==${{ matrix.tf-version }}
pip install -q protobuf==3.19.0
pip install -q requests
pip install -e .
- name: Test with pytest
timeout-minutes: 180
run: |
pip install -q pytest
pip install -q pytest-cov
pip install -q python-coveralls
pytest --cov=deepctr --cov-report=xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3.1.0
with:
token: ${{secrets.CODECOV_TOKEN}}
file: ./coverage.xml
flags: pytest
name: py${{ matrix.python-version }}-tf${{ matrix.tf-version }}
================================================
FILE: .github/workflows/ci2.yml
================================================
name: CI_TF1
on:
push:
path:
- 'deepctr/*'
- 'tests/*'
pull_request:
path:
- 'deepctr/*'
- 'tests/*'
jobs:
build:
runs-on: ubuntu-latest
timeout-minutes: 360
strategy:
matrix:
python-version: [ 3.6,3.7 ]
tf-version: [ 1.15.0 ]
exclude:
- python-version: 3.7
tf-version: 1.4.0
- python-version: 3.7
tf-version: 1.12.0
- python-version: 3.7
tf-version: 1.15.0
- python-version: 3.8
tf-version: 1.4.0
- python-version: 3.8
tf-version: 1.14.0
- python-version: 3.8
tf-version: 1.15.0
- python-version: 3.6
tf-version: 2.7.0
- python-version: 3.6
tf-version: 2.8.0
- python-version: 3.6
tf-version: 2.9.0
- python-version: 3.6
tf-version: 2.10.0
- python-version: 3.9
tf-version: 1.4.0
- python-version: 3.9
tf-version: 1.15.0
- python-version: 3.9
tf-version: 2.2.0
- python-version: 3.9
tf-version: 2.5.0
- python-version: 3.9
tf-version: 2.6.0
- python-version: 3.9
tf-version: 2.7.0
- python-version: 3.10.7
tf-version: 1.4.0
- python-version: 3.10.7
tf-version: 1.15.0
- python-version: 3.10.7
tf-version: 2.2.0
- python-version: 3.10.7
tf-version: 2.5.0
- python-version: 3.10.7
tf-version: 2.6.0
- python-version: 3.10.7
tf-version: 2.7.0
steps:
- uses: actions/checkout@v3
- name: Setup python environment
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip3 install -q tensorflow==${{ matrix.tf-version }}
pip install -q protobuf==3.19.0
pip install -q requests
pip install -e .
- name: Test with pytest
timeout-minutes: 360
run: |
pip install -q pytest
pip install -q pytest-cov
pip install -q python-coveralls
pytest --cov=deepctr --cov-report=xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3.1.0
with:
token: ${{secrets.CODECOV_TOKEN}}
file: ./coverage.xml
flags: pytest
name: py${{ matrix.python-version }}-tf${{ matrix.tf-version }}
================================================
FILE: .gitignore
================================================
*.h5
*.ipynb
.pytest_cache/
.vscode/
tests/unused/*
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
.idea/
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask instance folder
instance/
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
# =========================
# Operating System Files
# =========================
# OSX
# =========================
.DS_Store
.AppleDouble
.LSOverride
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# Windows
# =========================
# Windows image file caches
Thumbs.db
ehthumbs.db
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msm
*.msp
# Windows shortcuts
*.lnk
================================================
FILE: .readthedocs.yml
================================================
build:
image: latest
python:
version: 3.6
================================================
FILE: .travis.yml
================================================
#sudo: required
#dist: trusty xenial
language: python
python:
- "2.7" #time out
#- "3.4"
- "3.5"
- "3.6"
#- "3.7"
env:
# - TF_VERSION=1.13.1
# - TF_VERSION=1.12.2
- TF_VERSION=1.4.0
#Not Support- TF_VERSION=1.7.0
#Not Support- TF_VERSION=1.7.1
#Not Support- TF_VERSION=1.8.0
#- TF_VERSION=1.8.0
# - TF_VERSION=1.11.0
#- TF_VERSION=1.6.0
- TF_VERSION=2.0.0b1
#- TF_VERSION=1.13.2
- TF_VERSION=1.14.0
matrix:
allow_failures:
- python: "2.7"
env: TF_VERSION=1.6.0 # to speed up
- python: "2.7"
env: TF_VERSION=2.0.0b1
- python: "3.4"
- python: "3.5"
- python: "3.7"
- env: TF_VERSION=1.5.0 #local is ok,but sometimes CI is failed
- env: TF_VERSION=1.7.0
- env: TF_VERSION=1.7.1
- env: TF_VERSION=1.8.0
- env: TF_VERSION=1.12.0 # too slow
- env: TF_VERSION=1.13.1 # too slow
- env: TF_VERSION=1.13.2 # too slow
- env: TF_VERSION=1.14.0 # too slow
fast_finish: true
cache: pip
# command to install dependencies
install:
- pip install -q pytest-cov==2.4.0 #>=2.4.0,<2.6
- pip install -q python-coveralls
- pip install -q codacy-coverage
- pip install -q tensorflow==$TF_VERSION
- pip install -q pandas
- pip install -q packaging
- pip install -e .
# command to run tests
script:
- pytest --cov=deepctr
notifications:
recipients:
- weichenswc@163.com
on_success: change
on_failure: change
after_success:
- coveralls
- coverage xml
- python-codacy-coverage -r coverage.xml
================================================
FILE: CONTRIBUTING.md
================================================
This project is under development and we need developers to participate in.
# Join us
If you
- familiar with and interested in ctr prediction algorithms
- familiar with tensorflow
- have spare time to learn and develop
- familiar with git
please send a brief introduction of your background and experience to weichenswc@163.com, welcome to join us!
# Creating a pull request
1. **Become a collaborator**: Send an email with introduction and your github account name to weichenswc@163.com and waiting for invitation to become a collaborator.
2. **Fork&Dev**: Fork your own branch(`dev_yourname`) in `DeepCTR` from the `master` branch for development.If the `master` is updated during the development process, remember to merge and update to `dev_yourname` regularly.
3. **Testing**: Test logical correctness and effect when finishing the code development of the `dev_yourname` branch.
4. **Pre-release** : After testing contact weichenswc@163.com for pre-release integration, usually your branch `dev_yourname` will be merged into `release` branch by squash merge.
5. **Release a new version**: After confirming that the change is no longer needed, `release` branch will be merged into `master` and a new python package will be released on pypi.
# Discussions
https://github.com/shenweichen/DeepCTR/discussions
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2017-present Weichen Shen
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# DeepCTR
[](https://pypi.org/project/deepctr)
[](https://pypi.org/project/deepctr)
[](https://pepy.tech/project/deepctr)
[](https://pypi.org/project/deepctr)
[](https://github.com/shenweichen/deepctr/issues)
[](https://deepctr-doc.readthedocs.io/)

[](https://codecov.io/gh/shenweichen/DeepCTR)
[](https://www.codacy.com/gh/shenweichen/DeepCTR?utm_source=github.com&utm_medium=referral&utm_content=shenweichen/DeepCTR&utm_campaign=Badge_Grade)
[](./README.md#DisscussionGroup)
[](https://github.com/shenweichen/deepctr/blob/master/LICENSE)
DeepCTR is a **Easy-to-use**, **Modular** and **Extendible** package of deep-learning based CTR models along with lots of
core components layers which can be used to easily build custom models.You can use any complex model with `model.fit()`
,and `model.predict()` .
- Provide `tf.keras.Model` like interfaces for **quick experiment**. [example](https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html#getting-started-4-steps-to-deepctr)
- Provide `tensorflow estimator` interface for **large scale data** and **distributed training**. [example](https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html#getting-started-4-steps-to-deepctr-estimator-with-tfrecord)
- It is compatible with both `tf 1.x` and `tf 2.x`.
Some related projects:
- DeepMatch: https://github.com/shenweichen/DeepMatch
- DeepCTR-Torch: https://github.com/shenweichen/DeepCTR-Torch
Let's [**Get Started!**](https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html)([Chinese
Introduction](https://zhuanlan.zhihu.com/p/53231955)) and [welcome to join us!](./CONTRIBUTING.md)
## Models List
| Model | Paper |
| :------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Convolutional Click Prediction Model | [CIKM 2015][A Convolutional Click Prediction Model](http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf) |
| Factorization-supported Neural Network | [ECIR 2016][Deep Learning over Multi-field Categorical Data: A Case Study on User Response Prediction](https://arxiv.org/pdf/1601.02376.pdf) |
| Product-based Neural Network | [ICDM 2016][Product-based neural networks for user response prediction](https://arxiv.org/pdf/1611.00144.pdf) |
| Wide & Deep | [DLRS 2016][Wide & Deep Learning for Recommender Systems](https://arxiv.org/pdf/1606.07792.pdf) |
| DeepFM | [IJCAI 2017][DeepFM: A Factorization-Machine based Neural Network for CTR Prediction](http://www.ijcai.org/proceedings/2017/0239.pdf) |
| Piece-wise Linear Model | [arxiv 2017][Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction](https://arxiv.org/abs/1704.05194) |
| Deep & Cross Network | [ADKDD 2017][Deep & Cross Network for Ad Click Predictions](https://arxiv.org/abs/1708.05123) |
| Attentional Factorization Machine | [IJCAI 2017][Attentional Factorization Machines: Learning the Weight of Feature Interactions via Attention Networks](http://www.ijcai.org/proceedings/2017/435) |
| Neural Factorization Machine | [SIGIR 2017][Neural Factorization Machines for Sparse Predictive Analytics](https://arxiv.org/pdf/1708.05027.pdf) |
| xDeepFM | [KDD 2018][xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems](https://arxiv.org/pdf/1803.05170.pdf) |
| Deep Interest Network | [KDD 2018][Deep Interest Network for Click-Through Rate Prediction](https://arxiv.org/pdf/1706.06978.pdf) |
| AutoInt | [CIKM 2019][AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks](https://arxiv.org/abs/1810.11921) |
| Deep Interest Evolution Network | [AAAI 2019][Deep Interest Evolution Network for Click-Through Rate Prediction](https://arxiv.org/pdf/1809.03672.pdf) |
| FwFM | [WWW 2018][Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising](https://arxiv.org/pdf/1806.03514.pdf) |
| ONN | [arxiv 2019][Operation-aware Neural Networks for User Response Prediction](https://arxiv.org/pdf/1904.12579.pdf) |
| FGCNN | [WWW 2019][Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction ](https://arxiv.org/pdf/1904.04447) |
| Deep Session Interest Network | [IJCAI 2019][Deep Session Interest Network for Click-Through Rate Prediction ](https://arxiv.org/abs/1905.06482) |
| FiBiNET | [RecSys 2019][FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) |
| FLEN | [arxiv 2019][FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690.pdf) |
| BST | [DLP-KDD 2019][Behavior sequence transformer for e-commerce recommendation in Alibaba](https://arxiv.org/pdf/1905.06874.pdf) |
| IFM | [IJCAI 2019][An Input-aware Factorization Machine for Sparse Prediction](https://www.ijcai.org/Proceedings/2019/0203.pdf) |
| DCN V2 | [arxiv 2020][DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535) |
| DIFM | [IJCAI 2020][A Dual Input-aware Factorization Machine for CTR Prediction](https://www.ijcai.org/Proceedings/2020/0434.pdf) |
| FEFM and DeepFEFM | [arxiv 2020][Field-Embedded Factorization Machines for Click-through rate prediction](https://arxiv.org/abs/2009.09931) |
| SharedBottom | [arxiv 2017][An Overview of Multi-Task Learning in Deep Neural Networks](https://arxiv.org/pdf/1706.05098.pdf) |
| ESMM | [SIGIR 2018][Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate](https://arxiv.org/abs/1804.07931) |
| MMOE | [KDD 2018][Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts](https://dl.acm.org/doi/abs/10.1145/3219819.3220007) |
| PLE | [RecSys 2020][Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations](https://dl.acm.org/doi/10.1145/3383313.3412236) |
| EDCN | [KDD 2021][Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models](https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf) |
## Citation
- Weichen Shen. (2017). DeepCTR: Easy-to-use,Modular and Extendible package of deep-learning based CTR
models. https://github.com/shenweichen/deepctr.
If you find this code useful in your research, please cite it using the following BibTeX:
```bibtex
@misc{shen2017deepctr,
author = {Weichen Shen},
title = {DeepCTR: Easy-to-use,Modular and Extendible package of deep-learning based CTR models},
year = {2017},
publisher = {GitHub},
journal = {GitHub Repository},
howpublished = {\url{https://github.com/shenweichen/deepctr}},
}
```
## DisscussionGroup
- [Github Discussions](https://github.com/shenweichen/DeepCTR/discussions)
- Wechat Discussions
|公众号:浅梦学习笔记|微信:deepctrbot|学习小组 [加入](https://t.zsxq.com/026UJEuzv) [主题集合](https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MjM5MzY4NzE3MA==&action=getalbum&album_id=1361647041096843265&scene=126#wechat_redirect)|
|:--:|:--:|:--:|
| [](https://github.com/shenweichen/AlgoNotes)| [](https://github.com/shenweichen/AlgoNotes)|[](https://t.zsxq.com/026UJEuzv)|
## Main contributors([welcome to join us!](./CONTRIBUTING.md))
================================================
FILE: deepctr/__init__.py
================================================
from .utils import check_version
__version__ = '0.9.3'
check_version(__version__)
================================================
FILE: deepctr/contrib/__init__.py
================================================
================================================
FILE: deepctr/contrib/rnn.py
================================================
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""RNN helpers for TensorFlow models.
@@bidirectional_dynamic_rnn
@@dynamic_rnn
@@raw_rnn
@@static_rnn
@@static_state_saving_rnn
@@static_bidirectional_rnn
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import rnn_cell_impl
from tensorflow.python.ops import tensor_array_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.util import nest
import tensorflow as tf
def _like_rnncell_(cell):
"""Checks that a given object is an RNNCell by using duck typing."""
conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"),
hasattr(cell, "zero_state"), callable(cell)]
return all(conditions)
# pylint: disable=protected-access
_concat = rnn_cell_impl._concat
try:
_like_rnncell = rnn_cell_impl._like_rnncell
except Exception as e:
_like_rnncell = _like_rnncell_
# pylint: enable=protected-access
def _transpose_batch_time(x):
"""Transpose the batch and time dimensions of a Tensor.
Retains as much of the static shape information as possible.
Args:
x: A tensor of rank 2 or higher.
Returns:
x transposed along the first two dimensions.
Raises:
ValueError: if `x` is rank 1 or lower.
"""
x_static_shape = x.get_shape()
if x_static_shape.ndims is not None and x_static_shape.ndims < 2:
raise ValueError(
"Expected input tensor %s to have rank at least 2, but saw shape: %s" %
(x, x_static_shape))
x_rank = array_ops.rank(x)
x_t = array_ops.transpose(
x, array_ops.concat(
([1, 0], math_ops.range(2, x_rank)), axis=0))
x_t.set_shape(
tensor_shape.TensorShape([
x_static_shape[1].value, x_static_shape[0].value
]).concatenate(x_static_shape[2:]))
return x_t
def _best_effort_input_batch_size(flat_input):
"""Get static input batch size if available, with fallback to the dynamic one.
Args:
flat_input: An iterable of time major input Tensors of shape [max_time,
batch_size, ...]. All inputs should have compatible batch sizes.
Returns:
The batch size in Python integer if available, or a scalar Tensor otherwise.
Raises:
ValueError: if there is any input with an invalid shape.
"""
for input_ in flat_input:
shape = input_.shape
if shape.ndims is None:
continue
if shape.ndims < 2:
raise ValueError(
"Expected input tensor %s to have rank at least 2" % input_)
batch_size = shape[1].value
if batch_size is not None:
return batch_size
# Fallback to the dynamic batch size of the first input.
return array_ops.shape(flat_input[0])[1]
def _infer_state_dtype(explicit_dtype, state):
"""Infer the dtype of an RNN state.
Args:
explicit_dtype: explicitly declared dtype or None.
state: RNN's hidden state. Must be a Tensor or a nested iterable containing
Tensors.
Returns:
dtype: inferred dtype of hidden state.
Raises:
ValueError: if `state` has heterogeneous dtypes or is empty.
"""
if explicit_dtype is not None:
return explicit_dtype
elif nest.is_sequence(state):
inferred_dtypes = [element.dtype for element in nest.flatten(state)]
if not inferred_dtypes:
raise ValueError("Unable to infer dtype from empty state.")
all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes])
if not all_same:
raise ValueError(
"State has tensors of different inferred_dtypes. Unable to infer a "
"single representative dtype.")
return inferred_dtypes[0]
else:
return state.dtype
# pylint: disable=unused-argument
def _rnn_step(
time, sequence_length, min_sequence_length, max_sequence_length,
zero_output, state, call_cell, state_size, skip_conditionals=False):
"""Calculate one step of a dynamic RNN minibatch.
Returns an (output, state) pair conditioned on the sequence_lengths.
When skip_conditionals=False, the pseudocode is something like:
if t >= max_sequence_length:
return (zero_output, state)
if t < min_sequence_length:
return call_cell()
# Selectively output zeros or output, old state or new state depending
# on if we've finished calculating each row.
new_output, new_state = call_cell()
final_output = np.vstack([
zero_output if time >= sequence_lengths[r] else new_output_r
for r, new_output_r in enumerate(new_output)
])
final_state = np.vstack([
state[r] if time >= sequence_lengths[r] else new_state_r
for r, new_state_r in enumerate(new_state)
])
return (final_output, final_state)
Args:
time: Python int, the current time step
sequence_length: int32 `Tensor` vector of size [batch_size]
min_sequence_length: int32 `Tensor` scalar, min of sequence_length
max_sequence_length: int32 `Tensor` scalar, max of sequence_length
zero_output: `Tensor` vector of shape [output_size]
state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`,
or a list/tuple of such tensors.
call_cell: lambda returning tuple of (new_output, new_state) where
new_output is a `Tensor` matrix of shape `[batch_size, output_size]`.
new_state is a `Tensor` matrix of shape `[batch_size, state_size]`.
state_size: The `cell.state_size` associated with the state.
skip_conditionals: Python bool, whether to skip using the conditional
calculations. This is useful for `dynamic_rnn`, where the input tensor
matches `max_sequence_length`, and using conditionals just slows
everything down.
Returns:
A tuple of (`final_output`, `final_state`) as given by the pseudocode above:
final_output is a `Tensor` matrix of shape [batch_size, output_size]
final_state is either a single `Tensor` matrix, or a tuple of such
matrices (matching length and shapes of input `state`).
Raises:
ValueError: If the cell returns a state tuple whose length does not match
that returned by `state_size`.
"""
# Convert state to a list for ease of use
flat_state = nest.flatten(state)
flat_zero_output = nest.flatten(zero_output)
def _copy_one_through(output, new_output):
# If the state contains a scalar value we simply pass it through.
if output.shape.ndims == 0:
return new_output
copy_cond = (time >= sequence_length)
with ops.colocate_with(new_output):
return array_ops.where(copy_cond, output, new_output)
def _copy_some_through(flat_new_output, flat_new_state):
# Use broadcasting select to determine which values should get
# the previous state & zero output, and which values should get
# a calculated state & output.
flat_new_output = [
_copy_one_through(zero_output, new_output)
for zero_output, new_output in zip(flat_zero_output, flat_new_output)]
flat_new_state = [
_copy_one_through(state, new_state)
for state, new_state in zip(flat_state, flat_new_state)]
return flat_new_output + flat_new_state
def _maybe_copy_some_through():
"""Run RNN step. Pass through either no or some past state."""
new_output, new_state = call_cell()
nest.assert_same_structure(state, new_state)
flat_new_state = nest.flatten(new_state)
flat_new_output = nest.flatten(new_output)
return control_flow_ops.cond(
# if t < min_seq_len: calculate and return everything
time < min_sequence_length, lambda: flat_new_output + flat_new_state,
# else copy some of it through
lambda: _copy_some_through(flat_new_output, flat_new_state))
# TODO(ebrevdo): skipping these conditionals may cause a slowdown,
# but benefits from removing cond() and its gradient. We should
# profile with and without this switch here.
if skip_conditionals:
# Instead of using conditionals, perform the selective copy at all time
# steps. This is faster when max_seq_len is equal to the number of unrolls
# (which is typical for dynamic_rnn).
new_output, new_state = call_cell()
nest.assert_same_structure(state, new_state)
new_state = nest.flatten(new_state)
new_output = nest.flatten(new_output)
final_output_and_state = _copy_some_through(new_output, new_state)
else:
empty_update = lambda: flat_zero_output + flat_state
final_output_and_state = control_flow_ops.cond(
# if t >= max_seq_len: copy all state through, output zeros
time >= max_sequence_length, empty_update,
# otherwise calculation is required: copy some or all of it through
_maybe_copy_some_through)
if len(final_output_and_state) != len(flat_zero_output) + len(flat_state):
raise ValueError("Internal error: state and output were not concatenated "
"correctly.")
final_output = final_output_and_state[:len(flat_zero_output)]
final_state = final_output_and_state[len(flat_zero_output):]
for output, flat_output in zip(final_output, flat_zero_output):
output.set_shape(flat_output.get_shape())
for substate, flat_substate in zip(final_state, flat_state):
substate.set_shape(flat_substate.get_shape())
final_output = nest.pack_sequence_as(
structure=zero_output, flat_sequence=final_output)
final_state = nest.pack_sequence_as(
structure=state, flat_sequence=final_state)
return final_output, final_state
def _reverse_seq(input_seq, lengths):
"""Reverse a list of Tensors up to specified lengths.
Args:
input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features)
or nested tuples of tensors.
lengths: A `Tensor` of dimension batch_size, containing lengths for each
sequence in the batch. If "None" is specified, simply reverses
the list.
Returns:
time-reversed sequence
"""
if lengths is None:
return list(reversed(input_seq))
flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq)
flat_results = [[] for _ in range(len(input_seq))]
for sequence in zip(*flat_input_seq):
input_shape = tensor_shape.unknown_shape(
ndims=sequence[0].get_shape().ndims)
for input_ in sequence:
input_shape.merge_with(input_.get_shape())
input_.set_shape(input_shape)
# Join into (time, batch_size, depth)
s_joined = array_ops.stack(sequence)
# Reverse along dimension 0
s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
# Split again into list
result = array_ops.unstack(s_reversed)
for r, flat_result in zip(result, flat_results):
r.set_shape(input_shape)
flat_result.append(r)
results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result)
for input_, flat_result in zip(input_seq, flat_results)]
return results
#
# def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
#
# initial_state_fw=None, initial_state_bw=None,
#
# dtype=None, parallel_iterations=None,
#
# swap_memory=False, time_major=False, scope=None):
#
# """Creates a dynamic version of bidirectional recurrent neural network.
#
#
#
# Takes input and builds independent forward and backward RNNs. The input_size
#
# of forward and backward cell must match. The initial state for both directions
#
# is zero by default (but can be set optionally) and no intermediate states are
#
# ever returned -- the network is fully unrolled for the given (passed in)
#
# length(s) of the sequence(s) or completely unrolled if length(s) is not
#
# given.
#
#
#
# Args:
#
# cell_fw: An instance of RNNCell, to be used for forward direction.
#
# cell_bw: An instance of RNNCell, to be used for backward direction.
#
# inputs: The RNN inputs.
#
# If time_major == False (default), this must be a tensor of shape:
#
# `[batch_size, max_time, ...]`, or a nested tuple of such elements.
#
# If time_major == True, this must be a tensor of shape:
#
# `[max_time, batch_size, ...]`, or a nested tuple of such elements.
#
# sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
#
# containing the actual lengths for each of the sequences in the batch.
#
# If not provided, all batch entries are assumed to be full sequences; and
#
# time reversal is applied from time `0` to `max_time` for each sequence.
#
# initial_state_fw: (optional) An initial state for the forward RNN.
#
# This must be a tensor of appropriate type and shape
#
# `[batch_size, cell_fw.state_size]`.
#
# If `cell_fw.state_size` is a tuple, this should be a tuple of
#
# tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
#
# initial_state_bw: (optional) Same as for `initial_state_fw`, but using
#
# the corresponding properties of `cell_bw`.
#
# dtype: (optional) The data type for the initial states and expected output.
#
# Required if initial_states are not provided or RNN states have a
#
# heterogeneous dtype.
#
# parallel_iterations: (Default: 32). The number of iterations to run in
#
# parallel. Those operations which do not have any temporal dependency
#
# and can be run in parallel, will be. This parameter trades off
#
# time for space. Values >> 1 use more memory but take less time,
#
# while smaller values use less memory but computations take longer.
#
# swap_memory: Transparently swap the tensors produced in forward inference
#
# but needed for back prop from GPU to CPU. This allows training RNNs
#
# which would typically not fit on a single GPU, with very minimal (or no)
#
# performance penalty.
#
# time_major: The shape format of the `inputs` and `outputs` Tensors.
#
# If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
#
# If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
#
# Using `time_major = True` is a bit more efficient because it avoids
#
# transposes at the beginning and end of the RNN calculation. However,
#
# most TensorFlow data is batch-major, so by default this function
#
# accepts input and emits output in batch-major form.
#
# scope: VariableScope for the created subgraph; defaults to
#
# "bidirectional_rnn"
#
#
#
# Returns:
#
# A tuple (outputs, output_states) where:
#
# outputs: A tuple (output_fw, output_bw) containing the forward and
#
# the backward rnn output `Tensor`.
#
# If time_major == False (default),
#
# output_fw will be a `Tensor` shaped:
#
# `[batch_size, max_time, cell_fw.output_size]`
#
# and output_bw will be a `Tensor` shaped:
#
# `[batch_size, max_time, cell_bw.output_size]`.
#
# If time_major == True,
#
# output_fw will be a `Tensor` shaped:
#
# `[max_time, batch_size, cell_fw.output_size]`
#
# and output_bw will be a `Tensor` shaped:
#
# `[max_time, batch_size, cell_bw.output_size]`.
#
# It returns a tuple instead of a single concatenated `Tensor`, unlike
#
# in the `bidirectional_rnn`. If the concatenated one is preferred,
#
# the forward and backward outputs can be concatenated as
#
# `tf.concat(outputs, 2)`.
#
# output_states: A tuple (output_state_fw, output_state_bw) containing
#
# the forward and the backward final states of bidirectional rnn.
#
#
#
# Raises:
#
# TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
#
# """
#
#
#
# if not _like_rnncell(cell_fw):
#
# raise TypeError("cell_fw must be an instance of RNNCell")
#
# if not _like_rnncell(cell_bw):
#
# raise TypeError("cell_bw must be an instance of RNNCell")
#
#
#
# with vs.variable_scope(scope or "bidirectional_rnn"):
#
# # Forward direction
#
# with vs.variable_scope("fw") as fw_scope:
#
# output_fw, output_state_fw = dynamic_rnn(
#
# cell=cell_fw, inputs=inputs, sequence_length=sequence_length,
#
# initial_state=initial_state_fw, dtype=dtype,
#
# parallel_iterations=parallel_iterations, swap_memory=swap_memory,
#
# time_major=time_major, scope=fw_scope)
#
#
#
# # Backward direction
#
# if not time_major:
#
# time_dim = 1
#
# batch_dim = 0
#
# else:
#
# time_dim = 0
#
# batch_dim = 1
#
#
#
# def _reverse(input_, seq_lengths, seq_dim, batch_dim):
#
# if seq_lengths is not None:
#
# return array_ops.reverse_sequence(
#
# input=input_, seq_lengths=seq_lengths,
#
# seq_dim=seq_dim, batch_dim=batch_dim)
#
# else:
#
# return array_ops.reverse(input_, axis=[seq_dim])
#
#
#
# with vs.variable_scope("bw") as bw_scope:
#
# inputs_reverse = _reverse(
#
# inputs, seq_lengths=sequence_length,
#
# seq_dim=time_dim, batch_dim=batch_dim)
#
# tmp, output_state_bw = dynamic_rnn(
#
# cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length,
#
# initial_state=initial_state_bw, dtype=dtype,
#
# parallel_iterations=parallel_iterations, swap_memory=swap_memory,
#
# time_major=time_major, scope=bw_scope)
#
#
#
# output_bw = _reverse(
#
# tmp, seq_lengths=sequence_length,
#
# seq_dim=time_dim, batch_dim=batch_dim)
#
#
#
# outputs = (output_fw, output_bw)
#
# output_states = (output_state_fw, output_state_bw)
#
#
#
# return (outputs, output_states)
#
def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None,
dtype=None, parallel_iterations=None, swap_memory=False,
time_major=False, scope=None):
"""Creates a recurrent neural network specified by RNNCell `cell`.
Performs fully dynamic unrolling of `inputs`.
Example:
```python
# create a BasicRNNCell
rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
# 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]
# defining initial state
initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
# 'state' is a tensor of shape [batch_size, cell_state_size]
outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data,
initial_state=initial_state,
dtype=tf.float32)
```
```python
# create 2 LSTMCells
rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]]
# create a RNN cell composed sequentially of a number of RNNCells
multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
# 'outputs' is a tensor of shape [batch_size, max_time, 256]
# 'state' is a N-tuple where N is the number of LSTMCells containing a
# tf.contrib.rnn.LSTMStateTuple for each cell
outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
inputs=data,
dtype=tf.float32)
```
Args:
cell: An instance of RNNCell.
inputs: The RNN inputs.
If `time_major == False` (default), this must be a `Tensor` of shape:
`[batch_size, max_time, ...]`, or a nested tuple of such
elements.
If `time_major == True`, this must be a `Tensor` of shape:
`[max_time, batch_size, ...]`, or a nested tuple of such
elements.
This may also be a (possibly nested) tuple of Tensors satisfying
this property. The first two dimensions must match across all the inputs,
but otherwise the ranks and other shape components may differ.
In this case, input to `cell` at each time-step will replicate the
structure of these tuples, except for the time dimension (from which the
time is taken).
The input to `cell` at each time step will be a `Tensor` or (possibly
nested) tuple of Tensors each with dimensions `[batch_size, ...]`.
sequence_length: (optional) An int32/int64 vector sized `[batch_size]`.
Used to copy-through state and zero-out outputs when past a batch
element's sequence length. So it's more for correctness than performance.
initial_state: (optional) An initial state for the RNN.
If `cell.state_size` is an integer, this must be
a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
If `cell.state_size` is a tuple, this should be a tuple of
tensors having shapes `[batch_size, s] for s in cell.state_size`.
dtype: (optional) The data type for the initial state and expected output.
Required if initial_state is not provided or RNN state has a heterogeneous
dtype.
parallel_iterations: (Default: 32). The number of iterations to run in
parallel. Those operations which do not have any temporal dependency
and can be run in parallel, will be. This parameter trades off
time for space. Values >> 1 use more memory but take less time,
while smaller values use less memory but computations take longer.
swap_memory: Transparently swap the tensors produced in forward inference
but needed for back prop from GPU to CPU. This allows training RNNs
which would typically not fit on a single GPU, with very minimal (or no)
performance penalty.
time_major: The shape format of the `inputs` and `outputs` Tensors.
If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
Using `time_major = True` is a bit more efficient because it avoids
transposes at the beginning and end of the RNN calculation. However,
most TensorFlow data is batch-major, so by default this function
accepts input and emits output in batch-major form.
scope: VariableScope for the created subgraph; defaults to "rnn".
Returns:
A pair (outputs, state) where:
outputs: The RNN output `Tensor`.
If time_major == False (default), this will be a `Tensor` shaped:
`[batch_size, max_time, cell.output_size]`.
If time_major == True, this will be a `Tensor` shaped:
`[max_time, batch_size, cell.output_size]`.
Note, if `cell.output_size` is a (possibly nested) tuple of integers
or `TensorShape` objects, then `outputs` will be a tuple having the
same structure as `cell.output_size`, containing Tensors having shapes
corresponding to the shape data in `cell.output_size`.
state: The final state. If `cell.state_size` is an int, this
will be shaped `[batch_size, cell.state_size]`. If it is a
`TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
If it is a (possibly nested) tuple of ints or `TensorShape`, this will
be a tuple having the corresponding shapes. If cells are `LSTMCells`
`state` will be a tuple containing a `LSTMStateTuple` for each cell.
Raises:
TypeError: If `cell` is not an instance of RNNCell.
ValueError: If inputs is None or an empty list.
"""
if not _like_rnncell(cell):
raise TypeError("cell must be an instance of RNNCell")
# By default, time_major==False and inputs are batch-major: shaped
# [batch, time, depth]
# For internal calculations, we transpose to [time, batch, depth]
flat_input = nest.flatten(inputs)
if not time_major:
# (B,T,D) => (T,B,D)
flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input)
parallel_iterations = parallel_iterations or 32
if sequence_length is not None:
sequence_length = math_ops.to_int32(sequence_length)
if sequence_length.get_shape().ndims not in (None, 1):
raise ValueError(
"sequence_length must be a vector of length batch_size, "
"but saw shape: %s" % sequence_length.get_shape())
sequence_length = array_ops.identity( # Just to find it in the graph.
sequence_length, name="sequence_length")
# Create a new scope in which the caching device is either
# determined by the parent scope, or is set to place the cached
# Variable using the same placement as for the rest of the RNN.
with vs.variable_scope(scope or "rnn",reuse=tf.AUTO_REUSE) as varscope:#TODO:user defined reuse
if varscope.caching_device is None:
varscope.set_caching_device(lambda op: op.device)
batch_size = _best_effort_input_batch_size(flat_input)
if initial_state is not None:
state = initial_state
else:
if not dtype:
raise ValueError("If there is no initial_state, you must give a dtype.")
state = cell.zero_state(batch_size, dtype)
def _assert_has_shape(x, shape):
x_shape = array_ops.shape(x)
packed_shape = array_ops.stack(shape)
return control_flow_ops.Assert(
math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)),
["Expected shape for Tensor %s is " % x.name,
packed_shape, " but saw shape: ", x_shape])
if sequence_length is not None:
# Perform some shape validation
with ops.control_dependencies(
[_assert_has_shape(sequence_length, [batch_size])]):
sequence_length = array_ops.identity(
sequence_length, name="CheckSeqLen")
inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input)
(outputs, final_state) = _dynamic_rnn_loop(
cell,
inputs,
state,
parallel_iterations=parallel_iterations,
swap_memory=swap_memory,
att_scores=att_scores,
sequence_length=sequence_length,
dtype=dtype)
# Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth].
# If we are performing batch-major calculations, transpose output back
# to shape [batch, time, depth]
if not time_major:
# (T,B,D) => (B,T,D)
outputs = nest.map_structure(_transpose_batch_time, outputs)
return (outputs, final_state)
def _dynamic_rnn_loop(cell,
inputs,
initial_state,
parallel_iterations,
swap_memory,
att_scores=None,
sequence_length=None,
dtype=None):
"""Internal implementation of Dynamic RNN.
Args:
cell: An instance of RNNCell.
inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested
tuple of such elements.
initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if
`cell.state_size` is a tuple, then this should be a tuple of
tensors having shapes `[batch_size, s] for s in cell.state_size`.
parallel_iterations: Positive Python int.
swap_memory: A Python boolean
sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].
dtype: (optional) Expected dtype of output. If not specified, inferred from
initial_state.
Returns:
Tuple `(final_outputs, final_state)`.
final_outputs:
A `Tensor` of shape `[time, batch_size, cell.output_size]`. If
`cell.output_size` is a (possibly nested) tuple of ints or `TensorShape`
objects, then this returns a (possibly nsted) tuple of Tensors matching
the corresponding shapes.
final_state:
A `Tensor`, or possibly nested tuple of Tensors, matching in length
and shapes to `initial_state`.
Raises:
ValueError: If the input depth cannot be inferred via shape inference
from the inputs.
"""
state = initial_state
assert isinstance(parallel_iterations, int), "parallel_iterations must be int"
state_size = cell.state_size
flat_input = nest.flatten(inputs)
flat_output_size = nest.flatten(cell.output_size)
# Construct an initial output
input_shape = array_ops.shape(flat_input[0])
time_steps = input_shape[0]
batch_size = _best_effort_input_batch_size(flat_input)
inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3)
for input_ in flat_input)
const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2]
for shape in inputs_got_shape:
if not shape[2:].is_fully_defined():
raise ValueError(
"Input size (depth of inputs) must be accessible via shape inference,"
" but saw value None.")
got_time_steps = shape[0].value
got_batch_size = shape[1].value
if const_time_steps != got_time_steps:
raise ValueError(
"Time steps is not the same for all the elements in the input in a "
"batch.")
if const_batch_size != got_batch_size:
raise ValueError(
"Batch_size is not the same for all the elements in the input.")
# Prepare dynamic conditional copying of state & output
def _create_zero_arrays(size):
size = _concat(batch_size, size)
return array_ops.zeros(
array_ops.stack(size), _infer_state_dtype(dtype, state))
flat_zero_output = tuple(_create_zero_arrays(output)
for output in flat_output_size)
zero_output = nest.pack_sequence_as(structure=cell.output_size,
flat_sequence=flat_zero_output)
if sequence_length is not None:
min_sequence_length = math_ops.reduce_min(sequence_length)
max_sequence_length = math_ops.reduce_max(sequence_length)
time = array_ops.constant(0, dtype=dtypes.int32, name="time")
with ops.name_scope("dynamic_rnn") as scope:
base_name = scope
def _create_ta(name, dtype):
return tensor_array_ops.TensorArray(dtype=dtype,
size=time_steps,
tensor_array_name=base_name + name)
output_ta = tuple(_create_ta("output_%d" % i,
_infer_state_dtype(dtype, state))
for i in range(len(flat_output_size)))
input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype)
for i in range(len(flat_input)))
input_ta = tuple(ta.unstack(input_)
for ta, input_ in zip(input_ta, flat_input))
def _time_step(time, output_ta_t, state, att_scores=None):
"""Take a time step of the dynamic RNN.
Args:
time: int32 scalar Tensor.
output_ta_t: List of `TensorArray`s that represent the output.
state: nested tuple of vector tensors that represent the state.
Returns:
The tuple (time + 1, output_ta_t with updated flow, new_state).
"""
input_t = tuple(ta.read(time) for ta in input_ta)
# Restore some shape information
for input_, shape in zip(input_t, inputs_got_shape):
input_.set_shape(shape[1:])
input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t)
if att_scores is not None:
att_score = att_scores[:, time, :]
call_cell = lambda: cell(input_t, state, att_score)
else:
call_cell = lambda: cell(input_t, state)
if sequence_length is not None:
(output, new_state) = _rnn_step(
time=time,
sequence_length=sequence_length,
min_sequence_length=min_sequence_length,
max_sequence_length=max_sequence_length,
zero_output=zero_output,
state=state,
call_cell=call_cell,
state_size=state_size,
skip_conditionals=True)
else:
(output, new_state) = call_cell()
# Pack state if using state tuples
output = nest.flatten(output)
output_ta_t = tuple(
ta.write(time, out) for ta, out in zip(output_ta_t, output))
if att_scores is not None:
return (time + 1, output_ta_t, new_state, att_scores)
else:
return (time + 1, output_ta_t, new_state)
if att_scores is not None:
_, output_final_ta, final_state, _ = control_flow_ops.while_loop(
cond=lambda time, *_: time < time_steps,
body=_time_step,
loop_vars=(time, output_ta, state, att_scores),
parallel_iterations=parallel_iterations,
swap_memory=swap_memory)
else:
_, output_final_ta, final_state = control_flow_ops.while_loop(
cond=lambda time, *_: time < time_steps,
body=_time_step,
loop_vars=(time, output_ta, state),
parallel_iterations=parallel_iterations,
swap_memory=swap_memory)
# Unpack final output if not using output tuples.
final_outputs = tuple(ta.stack() for ta in output_final_ta)
# Restore some shape information
for output, output_size in zip(final_outputs, flat_output_size):
shape = _concat(
[const_time_steps, const_batch_size], output_size, static=True)
output.set_shape(shape)
final_outputs = nest.pack_sequence_as(
structure=cell.output_size, flat_sequence=final_outputs)
return (final_outputs, final_state)
================================================
FILE: deepctr/contrib/rnn_v2.py
================================================
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""RNN helpers for TensorFlow models.
@@bidirectional_dynamic_rnn
@@dynamic_rnn
@@raw_rnn
@@static_rnn
@@static_state_saving_rnn
@@static_bidirectional_rnn
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import rnn_cell_impl
from tensorflow.python.ops import tensor_array_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.util import nest
import tensorflow as tf
def _like_rnncell_(cell):
"""Checks that a given object is an RNNCell by using duck typing."""
conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"),
hasattr(cell, "zero_state"), callable(cell)]
return all(conditions)
# pylint: disable=protected-access
_concat = rnn_cell_impl._concat
try:
_like_rnncell = rnn_cell_impl._like_rnncell
except:
_like_rnncell = _like_rnncell_
# pylint: enable=protected-access
def _transpose_batch_time(x):
"""Transpose the batch and time dimensions of a Tensor.
Retains as much of the static shape information as possible.
Args:
x: A tensor of rank 2 or higher.
Returns:
x transposed along the first two dimensions.
Raises:
ValueError: if `x` is rank 1 or lower.
"""
x_static_shape = x.get_shape()
if x_static_shape.ndims is not None and x_static_shape.ndims < 2:
raise ValueError(
"Expected input tensor %s to have rank at least 2, but saw shape: %s" %
(x, x_static_shape))
x_rank = array_ops.rank(x)
x_t = array_ops.transpose(
x, array_ops.concat(
([1, 0], math_ops.range(2, x_rank)), axis=0))
x_t.set_shape(
tensor_shape.TensorShape([
x_static_shape[1], x_static_shape[0]
]).concatenate(x_static_shape[2:]))
return x_t
def _best_effort_input_batch_size(flat_input):
"""Get static input batch size if available, with fallback to the dynamic one.
Args:
flat_input: An iterable of time major input Tensors of shape [max_time,
batch_size, ...]. All inputs should have compatible batch sizes.
Returns:
The batch size in Python integer if available, or a scalar Tensor otherwise.
Raises:
ValueError: if there is any input with an invalid shape.
"""
for input_ in flat_input:
shape = input_.shape
if shape.ndims is None:
continue
if shape.ndims < 2:
raise ValueError(
"Expected input tensor %s to have rank at least 2" % input_)
batch_size = shape[1]
if batch_size is not None:
return batch_size
# Fallback to the dynamic batch size of the first input.
return array_ops.shape(flat_input[0])[1]
def _infer_state_dtype(explicit_dtype, state):
"""Infer the dtype of an RNN state.
Args:
explicit_dtype: explicitly declared dtype or None.
state: RNN's hidden state. Must be a Tensor or a nested iterable containing
Tensors.
Returns:
dtype: inferred dtype of hidden state.
Raises:
ValueError: if `state` has heterogeneous dtypes or is empty.
"""
if explicit_dtype is not None:
return explicit_dtype
elif nest.is_sequence(state):
inferred_dtypes = [element.dtype for element in nest.flatten(state)]
if not inferred_dtypes:
raise ValueError("Unable to infer dtype from empty state.")
all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes])
if not all_same:
raise ValueError(
"State has tensors of different inferred_dtypes. Unable to infer a "
"single representative dtype.")
return inferred_dtypes[0]
else:
return state.dtype
# pylint: disable=unused-argument
def _rnn_step(
time, sequence_length, min_sequence_length, max_sequence_length,
zero_output, state, call_cell, state_size, skip_conditionals=False):
"""Calculate one step of a dynamic RNN minibatch.
Returns an (output, state) pair conditioned on the sequence_lengths.
When skip_conditionals=False, the pseudocode is something like:
if t >= max_sequence_length:
return (zero_output, state)
if t < min_sequence_length:
return call_cell()
# Selectively output zeros or output, old state or new state depending
# on if we've finished calculating each row.
new_output, new_state = call_cell()
final_output = np.vstack([
zero_output if time >= sequence_lengths[r] else new_output_r
for r, new_output_r in enumerate(new_output)
])
final_state = np.vstack([
state[r] if time >= sequence_lengths[r] else new_state_r
for r, new_state_r in enumerate(new_state)
])
return (final_output, final_state)
Args:
time: Python int, the current time step
sequence_length: int32 `Tensor` vector of size [batch_size]
min_sequence_length: int32 `Tensor` scalar, min of sequence_length
max_sequence_length: int32 `Tensor` scalar, max of sequence_length
zero_output: `Tensor` vector of shape [output_size]
state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`,
or a list/tuple of such tensors.
call_cell: lambda returning tuple of (new_output, new_state) where
new_output is a `Tensor` matrix of shape `[batch_size, output_size]`.
new_state is a `Tensor` matrix of shape `[batch_size, state_size]`.
state_size: The `cell.state_size` associated with the state.
skip_conditionals: Python bool, whether to skip using the conditional
calculations. This is useful for `dynamic_rnn`, where the input tensor
matches `max_sequence_length`, and using conditionals just slows
everything down.
Returns:
A tuple of (`final_output`, `final_state`) as given by the pseudocode above:
final_output is a `Tensor` matrix of shape [batch_size, output_size]
final_state is either a single `Tensor` matrix, or a tuple of such
matrices (matching length and shapes of input `state`).
Raises:
ValueError: If the cell returns a state tuple whose length does not match
that returned by `state_size`.
"""
# Convert state to a list for ease of use
flat_state = nest.flatten(state)
flat_zero_output = nest.flatten(zero_output)
def _copy_one_through(output, new_output):
# If the state contains a scalar value we simply pass it through.
if output.shape.ndims == 0:
return new_output
copy_cond = (time >= sequence_length)
with ops.colocate_with(new_output):
return array_ops.where(copy_cond, output, new_output)
def _copy_some_through(flat_new_output, flat_new_state):
# Use broadcasting select to determine which values should get
# the previous state & zero output, and which values should get
# a calculated state & output.
flat_new_output = [
_copy_one_through(zero_output, new_output)
for zero_output, new_output in zip(flat_zero_output, flat_new_output)]
flat_new_state = [
_copy_one_through(state, new_state)
for state, new_state in zip(flat_state, flat_new_state)]
return flat_new_output + flat_new_state
def _maybe_copy_some_through():
"""Run RNN step. Pass through either no or some past state."""
new_output, new_state = call_cell()
nest.assert_same_structure(state, new_state)
flat_new_state = nest.flatten(new_state)
flat_new_output = nest.flatten(new_output)
return control_flow_ops.cond(
# if t < min_seq_len: calculate and return everything
time < min_sequence_length, lambda: flat_new_output + flat_new_state,
# else copy some of it through
lambda: _copy_some_through(flat_new_output, flat_new_state))
# TODO(ebrevdo): skipping these conditionals may cause a slowdown,
# but benefits from removing cond() and its gradient. We should
# profile with and without this switch here.
if skip_conditionals:
# Instead of using conditionals, perform the selective copy at all time
# steps. This is faster when max_seq_len is equal to the number of unrolls
# (which is typical for dynamic_rnn).
new_output, new_state = call_cell()
nest.assert_same_structure(state, new_state)
new_state = nest.flatten(new_state)
new_output = nest.flatten(new_output)
final_output_and_state = _copy_some_through(new_output, new_state)
else:
empty_update = lambda: flat_zero_output + flat_state
final_output_and_state = control_flow_ops.cond(
# if t >= max_seq_len: copy all state through, output zeros
time >= max_sequence_length, empty_update,
# otherwise calculation is required: copy some or all of it through
_maybe_copy_some_through)
if len(final_output_and_state) != len(flat_zero_output) + len(flat_state):
raise ValueError("Internal error: state and output were not concatenated "
"correctly.")
final_output = final_output_and_state[:len(flat_zero_output)]
final_state = final_output_and_state[len(flat_zero_output):]
for output, flat_output in zip(final_output, flat_zero_output):
output.set_shape(flat_output.get_shape())
for substate, flat_substate in zip(final_state, flat_state):
substate.set_shape(flat_substate.get_shape())
final_output = nest.pack_sequence_as(
structure=zero_output, flat_sequence=final_output)
final_state = nest.pack_sequence_as(
structure=state, flat_sequence=final_state)
return final_output, final_state
def _reverse_seq(input_seq, lengths):
"""Reverse a list of Tensors up to specified lengths.
Args:
input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features)
or nested tuples of tensors.
lengths: A `Tensor` of dimension batch_size, containing lengths for each
sequence in the batch. If "None" is specified, simply reverses
the list.
Returns:
time-reversed sequence
"""
if lengths is None:
return list(reversed(input_seq))
flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq)
flat_results = [[] for _ in range(len(input_seq))]
for sequence in zip(*flat_input_seq):
input_shape = tensor_shape.unknown_shape(
ndims=sequence[0].get_shape().ndims)
for input_ in sequence:
input_shape.merge_with(input_.get_shape())
input_.set_shape(input_shape)
# Join into (time, batch_size, depth)
s_joined = array_ops.stack(sequence)
# Reverse along dimension 0
s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
# Split again into list
result = array_ops.unstack(s_reversed)
for r, flat_result in zip(result, flat_results):
r.set_shape(input_shape)
flat_result.append(r)
results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result)
for input_, flat_result in zip(input_seq, flat_results)]
return results
#
# def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
#
# initial_state_fw=None, initial_state_bw=None,
#
# dtype=None, parallel_iterations=None,
#
# swap_memory=False, time_major=False, scope=None):
#
# """Creates a dynamic version of bidirectional recurrent neural network.
#
#
#
# Takes input and builds independent forward and backward RNNs. The input_size
#
# of forward and backward cell must match. The initial state for both directions
#
# is zero by default (but can be set optionally) and no intermediate states are
#
# ever returned -- the network is fully unrolled for the given (passed in)
#
# length(s) of the sequence(s) or completely unrolled if length(s) is not
#
# given.
#
#
#
# Args:
#
# cell_fw: An instance of RNNCell, to be used for forward direction.
#
# cell_bw: An instance of RNNCell, to be used for backward direction.
#
# inputs: The RNN inputs.
#
# If time_major == False (default), this must be a tensor of shape:
#
# `[batch_size, max_time, ...]`, or a nested tuple of such elements.
#
# If time_major == True, this must be a tensor of shape:
#
# `[max_time, batch_size, ...]`, or a nested tuple of such elements.
#
# sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
#
# containing the actual lengths for each of the sequences in the batch.
#
# If not provided, all batch entries are assumed to be full sequences; and
#
# time reversal is applied from time `0` to `max_time` for each sequence.
#
# initial_state_fw: (optional) An initial state for the forward RNN.
#
# This must be a tensor of appropriate type and shape
#
# `[batch_size, cell_fw.state_size]`.
#
# If `cell_fw.state_size` is a tuple, this should be a tuple of
#
# tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
#
# initial_state_bw: (optional) Same as for `initial_state_fw`, but using
#
# the corresponding properties of `cell_bw`.
#
# dtype: (optional) The data type for the initial states and expected output.
#
# Required if initial_states are not provided or RNN states have a
#
# heterogeneous dtype.
#
# parallel_iterations: (Default: 32). The number of iterations to run in
#
# parallel. Those operations which do not have any temporal dependency
#
# and can be run in parallel, will be. This parameter trades off
#
# time for space. Values >> 1 use more memory but take less time,
#
# while smaller values use less memory but computations take longer.
#
# swap_memory: Transparently swap the tensors produced in forward inference
#
# but needed for back prop from GPU to CPU. This allows training RNNs
#
# which would typically not fit on a single GPU, with very minimal (or no)
#
# performance penalty.
#
# time_major: The shape format of the `inputs` and `outputs` Tensors.
#
# If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
#
# If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
#
# Using `time_major = True` is a bit more efficient because it avoids
#
# transposes at the beginning and end of the RNN calculation. However,
#
# most TensorFlow data is batch-major, so by default this function
#
# accepts input and emits output in batch-major form.
#
# scope: VariableScope for the created subgraph; defaults to
#
# "bidirectional_rnn"
#
#
#
# Returns:
#
# A tuple (outputs, output_states) where:
#
# outputs: A tuple (output_fw, output_bw) containing the forward and
#
# the backward rnn output `Tensor`.
#
# If time_major == False (default),
#
# output_fw will be a `Tensor` shaped:
#
# `[batch_size, max_time, cell_fw.output_size]`
#
# and output_bw will be a `Tensor` shaped:
#
# `[batch_size, max_time, cell_bw.output_size]`.
#
# If time_major == True,
#
# output_fw will be a `Tensor` shaped:
#
# `[max_time, batch_size, cell_fw.output_size]`
#
# and output_bw will be a `Tensor` shaped:
#
# `[max_time, batch_size, cell_bw.output_size]`.
#
# It returns a tuple instead of a single concatenated `Tensor`, unlike
#
# in the `bidirectional_rnn`. If the concatenated one is preferred,
#
# the forward and backward outputs can be concatenated as
#
# `tf.concat(outputs, 2)`.
#
# output_states: A tuple (output_state_fw, output_state_bw) containing
#
# the forward and the backward final states of bidirectional rnn.
#
#
#
# Raises:
#
# TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
#
# """
#
#
#
# if not _like_rnncell(cell_fw):
#
# raise TypeError("cell_fw must be an instance of RNNCell")
#
# if not _like_rnncell(cell_bw):
#
# raise TypeError("cell_bw must be an instance of RNNCell")
#
#
#
# with vs.variable_scope(scope or "bidirectional_rnn"):
#
# # Forward direction
#
# with vs.variable_scope("fw") as fw_scope:
#
# output_fw, output_state_fw = dynamic_rnn(
#
# cell=cell_fw, inputs=inputs, sequence_length=sequence_length,
#
# initial_state=initial_state_fw, dtype=dtype,
#
# parallel_iterations=parallel_iterations, swap_memory=swap_memory,
#
# time_major=time_major, scope=fw_scope)
#
#
#
# # Backward direction
#
# if not time_major:
#
# time_dim = 1
#
# batch_dim = 0
#
# else:
#
# time_dim = 0
#
# batch_dim = 1
#
#
#
# def _reverse(input_, seq_lengths, seq_dim, batch_dim):
#
# if seq_lengths is not None:
#
# return array_ops.reverse_sequence(
#
# input=input_, seq_lengths=seq_lengths,
#
# seq_dim=seq_dim, batch_dim=batch_dim)
#
# else:
#
# return array_ops.reverse(input_, axis=[seq_dim])
#
#
#
# with vs.variable_scope("bw") as bw_scope:
#
# inputs_reverse = _reverse(
#
# inputs, seq_lengths=sequence_length,
#
# seq_dim=time_dim, batch_dim=batch_dim)
#
# tmp, output_state_bw = dynamic_rnn(
#
# cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length,
#
# initial_state=initial_state_bw, dtype=dtype,
#
# parallel_iterations=parallel_iterations, swap_memory=swap_memory,
#
# time_major=time_major, scope=bw_scope)
#
#
#
# output_bw = _reverse(
#
# tmp, seq_lengths=sequence_length,
#
# seq_dim=time_dim, batch_dim=batch_dim)
#
#
#
# outputs = (output_fw, output_bw)
#
# output_states = (output_state_fw, output_state_bw)
#
#
#
# return (outputs, output_states)
#
def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None,
dtype=None, parallel_iterations=None, swap_memory=False,
time_major=False, scope=None):
"""Creates a recurrent neural network specified by RNNCell `cell`.
Performs fully dynamic unrolling of `inputs`.
Example:
```python
# create a BasicRNNCell
rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
# 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]
# defining initial state
initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
# 'state' is a tensor of shape [batch_size, cell_state_size]
outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data,
initial_state=initial_state,
dtype=tf.float32)
```
```python
# create 2 LSTMCells
rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]]
# create a RNN cell composed sequentially of a number of RNNCells
multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
# 'outputs' is a tensor of shape [batch_size, max_time, 256]
# 'state' is a N-tuple where N is the number of LSTMCells containing a
# tf.contrib.rnn.LSTMStateTuple for each cell
outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
inputs=data,
dtype=tf.float32)
```
Args:
cell: An instance of RNNCell.
inputs: The RNN inputs.
If `time_major == False` (default), this must be a `Tensor` of shape:
`[batch_size, max_time, ...]`, or a nested tuple of such
elements.
If `time_major == True`, this must be a `Tensor` of shape:
`[max_time, batch_size, ...]`, or a nested tuple of such
elements.
This may also be a (possibly nested) tuple of Tensors satisfying
this property. The first two dimensions must match across all the inputs,
but otherwise the ranks and other shape components may differ.
In this case, input to `cell` at each time-step will replicate the
structure of these tuples, except for the time dimension (from which the
time is taken).
The input to `cell` at each time step will be a `Tensor` or (possibly
nested) tuple of Tensors each with dimensions `[batch_size, ...]`.
sequence_length: (optional) An int32/int64 vector sized `[batch_size]`.
Used to copy-through state and zero-out outputs when past a batch
element's sequence length. So it's more for correctness than performance.
initial_state: (optional) An initial state for the RNN.
If `cell.state_size` is an integer, this must be
a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
If `cell.state_size` is a tuple, this should be a tuple of
tensors having shapes `[batch_size, s] for s in cell.state_size`.
dtype: (optional) The data type for the initial state and expected output.
Required if initial_state is not provided or RNN state has a heterogeneous
dtype.
parallel_iterations: (Default: 32). The number of iterations to run in
parallel. Those operations which do not have any temporal dependency
and can be run in parallel, will be. This parameter trades off
time for space. Values >> 1 use more memory but take less time,
while smaller values use less memory but computations take longer.
swap_memory: Transparently swap the tensors produced in forward inference
but needed for back prop from GPU to CPU. This allows training RNNs
which would typically not fit on a single GPU, with very minimal (or no)
performance penalty.
time_major: The shape format of the `inputs` and `outputs` Tensors.
If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
Using `time_major = True` is a bit more efficient because it avoids
transposes at the beginning and end of the RNN calculation. However,
most TensorFlow data is batch-major, so by default this function
accepts input and emits output in batch-major form.
scope: VariableScope for the created subgraph; defaults to "rnn".
Returns:
A pair (outputs, state) where:
outputs: The RNN output `Tensor`.
If time_major == False (default), this will be a `Tensor` shaped:
`[batch_size, max_time, cell.output_size]`.
If time_major == True, this will be a `Tensor` shaped:
`[max_time, batch_size, cell.output_size]`.
Note, if `cell.output_size` is a (possibly nested) tuple of integers
or `TensorShape` objects, then `outputs` will be a tuple having the
same structure as `cell.output_size`, containing Tensors having shapes
corresponding to the shape data in `cell.output_size`.
state: The final state. If `cell.state_size` is an int, this
will be shaped `[batch_size, cell.state_size]`. If it is a
`TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
If it is a (possibly nested) tuple of ints or `TensorShape`, this will
be a tuple having the corresponding shapes. If cells are `LSTMCells`
`state` will be a tuple containing a `LSTMStateTuple` for each cell.
Raises:
TypeError: If `cell` is not an instance of RNNCell.
ValueError: If inputs is None or an empty list.
"""
if not _like_rnncell(cell):
raise TypeError("cell must be an instance of RNNCell")
# By default, time_major==False and inputs are batch-major: shaped
# [batch, time, depth]
# For internal calculations, we transpose to [time, batch, depth]
flat_input = nest.flatten(inputs)
if not time_major:
# (B,T,D) => (T,B,D)
flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input)
parallel_iterations = parallel_iterations or 32
if sequence_length is not None:
sequence_length = math_ops.to_int32(sequence_length)
if sequence_length.get_shape().ndims not in (None, 1):
raise ValueError(
"sequence_length must be a vector of length batch_size, "
"but saw shape: %s" % sequence_length.get_shape())
sequence_length = array_ops.identity( # Just to find it in the graph.
sequence_length, name="sequence_length")
# Create a new scope in which the caching device is either
# determined by the parent scope, or is set to place the cached
# Variable using the same placement as for the rest of the RNN.
try:
resue = tf.AUTO_REUSE
except:
resue = tf.compat.v1.AUTO_REUSE
with vs.variable_scope(scope or "rnn",reuse=resue) as varscope:#TODO:user defined reuse
if varscope.caching_device is None:
varscope.set_caching_device(lambda op: op.device)
batch_size = _best_effort_input_batch_size(flat_input)
if initial_state is not None:
state = initial_state
else:
if not dtype:
raise ValueError("If there is no initial_state, you must give a dtype.")
state = cell.zero_state(batch_size, dtype)
def _assert_has_shape(x, shape):
x_shape = array_ops.shape(x)
packed_shape = array_ops.stack(shape)
return control_flow_ops.Assert(
math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)),
["Expected shape for Tensor %s is " % x.name,
packed_shape, " but saw shape: ", x_shape])
if sequence_length is not None:
# Perform some shape validation
with ops.control_dependencies(
[_assert_has_shape(sequence_length, [batch_size])]):
sequence_length = array_ops.identity(
sequence_length, name="CheckSeqLen")
inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input)
(outputs, final_state) = _dynamic_rnn_loop(
cell,
inputs,
state,
parallel_iterations=parallel_iterations,
swap_memory=swap_memory,
att_scores=att_scores,
sequence_length=sequence_length,
dtype=dtype)
# Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth].
# If we are performing batch-major calculations, transpose output back
# to shape [batch, time, depth]
if not time_major:
# (T,B,D) => (B,T,D)
outputs = nest.map_structure(_transpose_batch_time, outputs)
return (outputs, final_state)
def _dynamic_rnn_loop(cell,
inputs,
initial_state,
parallel_iterations,
swap_memory,
att_scores=None,
sequence_length=None,
dtype=None):
"""Internal implementation of Dynamic RNN.
Args:
cell: An instance of RNNCell.
inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested
tuple of such elements.
initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if
`cell.state_size` is a tuple, then this should be a tuple of
tensors having shapes `[batch_size, s] for s in cell.state_size`.
parallel_iterations: Positive Python int.
swap_memory: A Python boolean
sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].
dtype: (optional) Expected dtype of output. If not specified, inferred from
initial_state.
Returns:
Tuple `(final_outputs, final_state)`.
final_outputs:
A `Tensor` of shape `[time, batch_size, cell.output_size]`. If
`cell.output_size` is a (possibly nested) tuple of ints or `TensorShape`
objects, then this returns a (possibly nsted) tuple of Tensors matching
the corresponding shapes.
final_state:
A `Tensor`, or possibly nested tuple of Tensors, matching in length
and shapes to `initial_state`.
Raises:
ValueError: If the input depth cannot be inferred via shape inference
from the inputs.
"""
state = initial_state
assert isinstance(parallel_iterations, int), "parallel_iterations must be int"
state_size = cell.state_size
flat_input = nest.flatten(inputs)
flat_output_size = nest.flatten(cell.output_size)
# Construct an initial output
input_shape = array_ops.shape(flat_input[0])
time_steps = input_shape[0]
batch_size = _best_effort_input_batch_size(flat_input)
inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3)
for input_ in flat_input)
const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2]
for shape in inputs_got_shape:
if not shape[2:].is_fully_defined():
raise ValueError(
"Input size (depth of inputs) must be accessible via shape inference,"
" but saw value None.")
got_time_steps = shape[0]
got_batch_size = shape[1]
if const_time_steps != got_time_steps:
raise ValueError(
"Time steps is not the same for all the elements in the input in a "
"batch.")
if const_batch_size != got_batch_size:
raise ValueError(
"Batch_size is not the same for all the elements in the input.")
# Prepare dynamic conditional copying of state & output
def _create_zero_arrays(size):
size = _concat(batch_size, size)
return array_ops.zeros(
array_ops.stack(size), _infer_state_dtype(dtype, state))
flat_zero_output = tuple(_create_zero_arrays(output)
for output in flat_output_size)
zero_output = nest.pack_sequence_as(structure=cell.output_size,
flat_sequence=flat_zero_output)
if sequence_length is not None:
min_sequence_length = math_ops.reduce_min(sequence_length)
max_sequence_length = math_ops.reduce_max(sequence_length)
time = array_ops.constant(0, dtype=dtypes.int32, name="time")
with ops.name_scope("dynamic_rnn") as scope:
base_name = scope
def _create_ta(name, dtype):
return tensor_array_ops.TensorArray(dtype=dtype,
size=time_steps,
tensor_array_name=base_name + name)
output_ta = tuple(_create_ta("output_%d" % i,
_infer_state_dtype(dtype, state))
for i in range(len(flat_output_size)))
input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype)
for i in range(len(flat_input)))
input_ta = tuple(ta.unstack(input_)
for ta, input_ in zip(input_ta, flat_input))
def _time_step(time, output_ta_t, state, att_scores=None):
"""Take a time step of the dynamic RNN.
Args:
time: int32 scalar Tensor.
output_ta_t: List of `TensorArray`s that represent the output.
state: nested tuple of vector tensors that represent the state.
Returns:
The tuple (time + 1, output_ta_t with updated flow, new_state).
"""
input_t = tuple(ta.read(time) for ta in input_ta)
# Restore some shape information
for input_, shape in zip(input_t, inputs_got_shape):
input_.set_shape(shape[1:])
input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t)
if att_scores is not None:
att_score = att_scores[:, time, :]
call_cell = lambda: cell(input_t, state, att_score)
else:
call_cell = lambda: cell(input_t, state)
if sequence_length is not None:
(output, new_state) = _rnn_step(
time=time,
sequence_length=sequence_length,
min_sequence_length=min_sequence_length,
max_sequence_length=max_sequence_length,
zero_output=zero_output,
state=state,
call_cell=call_cell,
state_size=state_size,
skip_conditionals=True)
else:
(output, new_state) = call_cell()
# Pack state if using state tuples
output = nest.flatten(output)
output_ta_t = tuple(
ta.write(time, out) for ta, out in zip(output_ta_t, output))
if att_scores is not None:
return (time + 1, output_ta_t, new_state, att_scores)
else:
return (time + 1, output_ta_t, new_state)
if att_scores is not None:
_, output_final_ta, final_state, _ = control_flow_ops.while_loop(
cond=lambda time, *_: time < time_steps,
body=_time_step,
loop_vars=(time, output_ta, state, att_scores),
parallel_iterations=parallel_iterations,
swap_memory=swap_memory)
else:
_, output_final_ta, final_state = control_flow_ops.while_loop(
cond=lambda time, *_: time < time_steps,
body=_time_step,
loop_vars=(time, output_ta, state),
parallel_iterations=parallel_iterations,
swap_memory=swap_memory)
# Unpack final output if not using output tuples.
final_outputs = tuple(ta.stack() for ta in output_final_ta)
# Restore some shape information
for output, output_size in zip(final_outputs, flat_output_size):
shape = _concat(
[const_time_steps, const_batch_size], output_size, static=True)
output.set_shape(shape)
final_outputs = nest.pack_sequence_as(
structure=cell.output_size, flat_sequence=final_outputs)
return (final_outputs, final_state)
================================================
FILE: deepctr/contrib/utils.py
================================================
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops.rnn_cell import *
from tensorflow.python.util import nest
_BIAS_VARIABLE_NAME = "bias"
_WEIGHTS_VARIABLE_NAME = "kernel"
class _Linear_(object):
"""Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
Args:
args: a 2D Tensor or a list of 2D, batch x n, Tensors.
output_size: int, second dimension of weight variable.
dtype: data type for variables.
build_bias: boolean, whether to build a bias variable.
bias_initializer: starting value to initialize the bias
(default is all zeros).
kernel_initializer: starting value to initialize the weight.
Raises:
ValueError: if inputs_shape is wrong.
"""
def __init__(self,
args,
output_size,
build_bias,
bias_initializer=None,
kernel_initializer=None):
self._build_bias = build_bias
if args is None or (nest.is_sequence(args) and not args):
raise ValueError("`args` must be specified")
if not nest.is_sequence(args):
args = [args]
self._is_sequence = False
else:
self._is_sequence = True
# Calculate the total size of arguments on dimension 1.
total_arg_size = 0
shapes = [a.get_shape() for a in args]
for shape in shapes:
if shape.ndims != 2:
raise ValueError(
"linear is expecting 2D arguments: %s" % shapes)
if shape[1] is None:
raise ValueError("linear expects shape[1] to be provided for shape %s, "
"but saw %s" % (shape, shape[1]))
else:
total_arg_size += int(shape[1])#.value
dtype = [a.dtype for a in args][0]
scope = vs.get_variable_scope()
with vs.variable_scope(scope) as outer_scope:
self._weights = vs.get_variable(
_WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size],
dtype=dtype,
initializer=kernel_initializer)
if build_bias:
with vs.variable_scope(outer_scope) as inner_scope:
inner_scope.set_partitioner(None)
if bias_initializer is None:
bias_initializer = init_ops.constant_initializer(
0.0, dtype=dtype)
self._biases = vs.get_variable(
_BIAS_VARIABLE_NAME, [output_size],
dtype=dtype,
initializer=bias_initializer)
def __call__(self, args):
if not self._is_sequence:
args = [args]
if len(args) == 1:
res = math_ops.matmul(args[0], self._weights)
else:
res = math_ops.matmul(array_ops.concat(args, 1), self._weights)
if self._build_bias:
res = nn_ops.bias_add(res, self._biases)
return res
try:
from tensorflow.python.ops.rnn_cell_impl import _Linear
except:
_Linear = _Linear_
class QAAttGRUCell(RNNCell):
"""Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
Args:
num_units: int, The number of units in the GRU cell.
activation: Nonlinearity to use. Default: `tanh`.
reuse: (optional) Python boolean describing whether to reuse variables
in an existing scope. If not `True`, and the existing scope already has
the given variables, an error is raised.
kernel_initializer: (optional) The initializer to use for the weight and
projection matrices.
bias_initializer: (optional) The initializer to use for the bias.
"""
def __init__(self,
num_units,
activation=None,
reuse=None,
kernel_initializer=None,
bias_initializer=None):
super(QAAttGRUCell, self).__init__(_reuse=reuse)
self._num_units = num_units
self._activation = activation or math_ops.tanh
self._kernel_initializer = kernel_initializer
self._bias_initializer = bias_initializer
self._gate_linear = None
self._candidate_linear = None
@property
def state_size(self):
return self._num_units
@property
def output_size(self):
return self._num_units
def __call__(self, inputs, state, att_score):
return self.call(inputs, state, att_score)
def call(self, inputs, state, att_score=None):
"""Gated recurrent unit (GRU) with nunits cells."""
if self._gate_linear is None:
bias_ones = self._bias_initializer
if self._bias_initializer is None:
bias_ones = init_ops.constant_initializer(
1.0, dtype=inputs.dtype)
with vs.variable_scope("gates"): # Reset gate and update gate.
self._gate_linear = _Linear(
[inputs, state],
2 * self._num_units,
True,
bias_initializer=bias_ones,
kernel_initializer=self._kernel_initializer)
value = math_ops.sigmoid(self._gate_linear([inputs, state]))
r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
r_state = r * state
if self._candidate_linear is None:
with vs.variable_scope("candidate"):
self._candidate_linear = _Linear(
[inputs, r_state],
self._num_units,
True,
bias_initializer=self._bias_initializer,
kernel_initializer=self._kernel_initializer)
c = self._activation(self._candidate_linear([inputs, r_state]))
new_h = (1. - att_score) * state + att_score * c
return new_h, new_h
class VecAttGRUCell(RNNCell):
"""Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
Args:
num_units: int, The number of units in the GRU cell.
activation: Nonlinearity to use. Default: `tanh`.
reuse: (optional) Python boolean describing whether to reuse variables
in an existing scope. If not `True`, and the existing scope already has
the given variables, an error is raised.
kernel_initializer: (optional) The initializer to use for the weight and
projection matrices.
bias_initializer: (optional) The initializer to use for the bias.
"""
def __init__(self,
num_units,
activation=None,
reuse=None,
kernel_initializer=None,
bias_initializer=None):
super(VecAttGRUCell, self).__init__(_reuse=reuse)
self._num_units = num_units
self._activation = activation or math_ops.tanh
self._kernel_initializer = kernel_initializer
self._bias_initializer = bias_initializer
self._gate_linear = None
self._candidate_linear = None
@property
def state_size(self):
return self._num_units
@property
def output_size(self):
return self._num_units
def __call__(self, inputs, state, att_score):
return self.call(inputs, state, att_score)
def call(self, inputs, state, att_score=None):
"""Gated recurrent unit (GRU) with nunits cells."""
if self._gate_linear is None:
bias_ones = self._bias_initializer
if self._bias_initializer is None:
bias_ones = init_ops.constant_initializer(
1.0, dtype=inputs.dtype)
with vs.variable_scope("gates"): # Reset gate and update gate.
self._gate_linear = _Linear(
[inputs, state],
2 * self._num_units,
True,
bias_initializer=bias_ones,
kernel_initializer=self._kernel_initializer)
value = math_ops.sigmoid(self._gate_linear([inputs, state]))
r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
r_state = r * state
if self._candidate_linear is None:
with vs.variable_scope("candidate"):
self._candidate_linear = _Linear(
[inputs, r_state],
self._num_units,
True,
bias_initializer=self._bias_initializer,
kernel_initializer=self._kernel_initializer)
c = self._activation(self._candidate_linear([inputs, r_state]))
u = (1.0 - att_score) * u
new_h = u * state + (1 - u) * c
return new_h, new_h
================================================
FILE: deepctr/estimator/__init__.py
================================================
from .models import *
================================================
FILE: deepctr/estimator/feature_column.py
================================================
import tensorflow as tf
from tensorflow.python.feature_column.feature_column import _EmbeddingColumn
from .utils import LINEAR_SCOPE_NAME, variable_scope, get_collection, get_GraphKeys, input_layer, get_losses
def linear_model(features, linear_feature_columns):
if tf.__version__ >= '2.0.0':
linear_logits = tf.compat.v1.feature_column.linear_model(features, linear_feature_columns)
else:
linear_logits = tf.feature_column.linear_model(features, linear_feature_columns)
return linear_logits
def get_linear_logit(features, linear_feature_columns, l2_reg_linear=0):
with variable_scope(LINEAR_SCOPE_NAME):
if not linear_feature_columns:
linear_logits = tf.Variable([[0.0]], name='bias_weights')
else:
linear_logits = linear_model(features, linear_feature_columns)
if l2_reg_linear > 0:
for var in get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME)[:-1]:
get_losses().add_loss(l2_reg_linear * tf.nn.l2_loss(var, name=var.name.split(":")[0] + "_l2loss"),
get_GraphKeys().REGULARIZATION_LOSSES)
return linear_logits
def input_from_feature_columns(features, feature_columns, l2_reg_embedding=0.0):
dense_value_list = []
sparse_emb_list = []
for feat in feature_columns:
if is_embedding(feat):
sparse_emb = tf.expand_dims(input_layer(features, [feat]), axis=1)
sparse_emb_list.append(sparse_emb)
if l2_reg_embedding > 0:
get_losses().add_loss(l2_reg_embedding * tf.nn.l2_loss(sparse_emb, name=feat.name + "_l2loss"),
get_GraphKeys().REGULARIZATION_LOSSES)
else:
dense_value_list.append(input_layer(features, [feat]))
return sparse_emb_list, dense_value_list
def is_embedding(feature_column):
try:
from tensorflow.python.feature_column.feature_column_v2 import EmbeddingColumn
except ImportError:
EmbeddingColumn = _EmbeddingColumn
return isinstance(feature_column, (_EmbeddingColumn, EmbeddingColumn))
================================================
FILE: deepctr/estimator/inputs.py
================================================
import tensorflow as tf
def input_fn_pandas(df, features, label=None, batch_size=256, num_epochs=1, shuffle=False, queue_capacity_factor=10,
num_threads=1):
if label is not None:
y = df[label]
else:
y = None
if tf.__version__ >= "2.0.0":
return tf.compat.v1.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size,
num_epochs=num_epochs,
shuffle=shuffle,
queue_capacity=batch_size * queue_capacity_factor,
num_threads=num_threads)
return tf.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, num_epochs=num_epochs,
shuffle=shuffle, queue_capacity=batch_size * queue_capacity_factor,
num_threads=num_threads)
def input_fn_tfrecord(filenames, feature_description, label=None, batch_size=256, num_epochs=1, num_parallel_calls=8,
shuffle_factor=10, prefetch_factor=1,
):
def _parse_examples(serial_exmp):
try:
features = tf.parse_single_example(serial_exmp, features=feature_description)
except AttributeError:
features = tf.io.parse_single_example(serial_exmp, features=feature_description)
if label is not None:
labels = features.pop(label)
return features, labels
return features
def input_fn():
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(_parse_examples, num_parallel_calls=num_parallel_calls)
if shuffle_factor > 0:
dataset = dataset.shuffle(buffer_size=batch_size * shuffle_factor)
dataset = dataset.repeat(num_epochs).batch(batch_size)
if prefetch_factor > 0:
dataset = dataset.prefetch(buffer_size=batch_size * prefetch_factor)
try:
iterator = dataset.make_one_shot_iterator()
except AttributeError:
iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
return iterator.get_next()
return input_fn
================================================
FILE: deepctr/estimator/models/__init__.py
================================================
from .afm import AFMEstimator
from .autoint import AutoIntEstimator
from .ccpm import CCPMEstimator
from .dcn import DCNEstimator
from .deepfm import DeepFMEstimator
from .fwfm import FwFMEstimator
from .fibinet import FiBiNETEstimator
from .fnn import FNNEstimator
from .nfm import NFMEstimator
from .pnn import PNNEstimator
from .wdl import WDLEstimator
from .xdeepfm import xDeepFMEstimator
from .deepfefm import DeepFEFMEstimator
================================================
FILE: deepctr/estimator/models/afm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017.
(https://arxiv.org/abs/1708.04617)
"""
import tensorflow as tf
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
from ...layers.interaction import AFMLayer, FM
from ...layers.utils import concat_func
def AFMEstimator(linear_feature_columns, dnn_feature_columns, use_attention=True, attention_factor=8,
l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, afm_dropout=0, seed=1024,
task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the Attentional Factorization Machine architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine**
:param attention_factor: positive integer,units in attention net
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_att: float. L2 regularizer strength applied to attention net
:param afm_dropout: float in [0,1), Fraction of the attention net output units to dropout.
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
if use_attention:
fm_logit = AFMLayer(attention_factor, l2_reg_att, afm_dropout,
seed)(sparse_embedding_list, training=train_flag)
else:
fm_logit = FM()(concat_func(sparse_embedding_list, axis=1))
logits = linear_logits + fm_logit
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/autoint.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.(https://arxiv.org/abs/1810.11921)
"""
import tensorflow as tf
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
from ...layers.core import DNN
from ...layers.interaction import InteractingLayer
from ...layers.utils import concat_func, combined_dnn_input
def AutoIntEstimator(linear_feature_columns, dnn_feature_columns, att_layer_num=3, att_embedding_size=8, att_head_num=2,
att_res=True,
dnn_hidden_units=(256, 128, 64), dnn_activation='relu', l2_reg_linear=1e-5,
l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_use_bn=False, dnn_dropout=0, seed=1024,
task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the AutoInt Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param att_layer_num: int.The InteractingLayer number to be used.
:param att_embedding_size: int.The embedding size in multi-head self-attention network.
:param att_head_num: int.The head number in multi-head self-attention network.
:param att_res: bool.Whether or not use standard residual connections before output.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param dnn_activation: Activation function to use in DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
att_input = concat_func(sparse_embedding_list, axis=1)
for _ in range(att_layer_num):
att_input = InteractingLayer(
att_embedding_size, att_head_num, att_res)(att_input)
att_output = tf.keras.layers.Flatten()(att_input)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
if len(dnn_hidden_units) > 0 and att_layer_num > 0: # Deep & Interacting Layer
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag)
stack_out = tf.keras.layers.Concatenate()([att_output, deep_out])
final_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(stack_out)
elif len(dnn_hidden_units) > 0: # Only Deep
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag)
final_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(deep_out)
elif att_layer_num > 0: # Only Interacting Layer
final_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(att_output)
else: # Error
raise NotImplementedError
logits = linear_logits + final_logit
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/ccpm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746.
(http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf)
"""
import tensorflow as tf
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
from ...layers.core import DNN
from ...layers.sequence import KMaxPooling
from ...layers.utils import concat_func
def CCPMEstimator(linear_feature_columns, dnn_feature_columns, conv_kernel_width=(6, 5), conv_filters=(4, 4),
dnn_hidden_units=(128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_dropout=0,
seed=1024, task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the Convolutional Click Prediction Model architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param conv_kernel_width: list,list of positive integer or empty list,the width of filter in each conv layer.
:param conv_filters: list,list of positive integer or empty list,the number of filters in each conv layer.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN.
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param init_std: float,to use as the initialize std of embedding vector
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
if len(conv_kernel_width) != len(conv_filters):
raise ValueError(
"conv_kernel_width must have same element with conv_filters")
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
n = len(sparse_embedding_list)
l = len(conv_filters)
conv_input = concat_func(sparse_embedding_list, axis=1)
pooling_result = tf.keras.layers.Lambda(
lambda x: tf.expand_dims(x, axis=3))(conv_input)
for i in range(1, l + 1):
filters = conv_filters[i - 1]
width = conv_kernel_width[i - 1]
k = max(1, int((1 - pow(i / l, l - i)) * n)) if i < l else 3
conv_result = tf.keras.layers.Conv2D(filters=filters, kernel_size=(width, 1), strides=(1, 1),
padding='same',
activation='tanh', use_bias=True, )(pooling_result)
pooling_result = KMaxPooling(
k=min(k, int(conv_result.shape[1])), axis=1)(conv_result)
flatten_result = tf.keras.layers.Flatten()(pooling_result)
dnn_out = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout, seed=seed)(flatten_result, training=train_flag)
dnn_logit = tf.keras.layers.Dense(1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out)
logits = linear_logits + dnn_logit
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks
)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/dcn.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. (https://arxiv.org/abs/1708.05123)
"""
import tensorflow as tf
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
from ...layers.core import DNN
from ...layers.interaction import CrossNet
from ...layers.utils import combined_dnn_input
def DCNEstimator(linear_feature_columns, dnn_feature_columns, cross_num=2, dnn_hidden_units=(256, 128, 64),
l2_reg_linear=1e-5,
l2_reg_embedding=1e-5,
l2_reg_cross=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_use_bn=False,
dnn_activation='relu', task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the Deep&Cross Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param cross_num: positive integet,cross layer number
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_cross: float. L2 regularizer strength applied to cross net
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not DNN
:param dnn_activation: Activation function to use in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
if len(dnn_hidden_units) == 0 and cross_num == 0:
raise ValueError("Either hidden_layer or cross layer must > 0")
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
if len(dnn_hidden_units) > 0 and cross_num > 0: # Deep & Cross
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag)
cross_out = CrossNet(cross_num, l2_reg=l2_reg_cross)(dnn_input)
stack_out = tf.keras.layers.Concatenate()([cross_out, deep_out])
final_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(stack_out)
elif len(dnn_hidden_units) > 0: # Only Deep
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag)
final_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(deep_out)
elif cross_num > 0: # Only Cross
cross_out = CrossNet(cross_num, l2_reg=l2_reg_cross)(dnn_input)
final_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(cross_out)
else: # Error
raise NotImplementedError
logits = linear_logits + final_logit
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/deepfefm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Harshit Pande
Reference:
[1] Field-Embedded Factorization Machines for Click-through Rate Prediction]
(https://arxiv.org/abs/2009.09931)
"""
import tensorflow as tf
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import DNN_SCOPE_NAME, deepctr_model_fn, variable_scope
from ...layers.core import DNN
from ...layers.interaction import FEFMLayer
from ...layers.utils import concat_func, add_func, combined_dnn_input, reduce_sum
def DeepFEFMEstimator(linear_feature_columns, dnn_feature_columns,
dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding_feat=0.00001,
l2_reg_embedding_field=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0.0,
dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None,
config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the DeepFEFM Network architecture or the shallow FEFM architecture (Ablation support not provided
as estimator is meant for production, Ablation support provided in DeepFEFM implementation in models
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding_feat: float. L2 regularizer strength applied to embedding vector of features
:param l2_reg_embedding_field: float, L2 regularizer to field embeddings
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
final_logit_components = [linear_logits]
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding_feat)
fefm_interaction_embedding = FEFMLayer(
regularizer=l2_reg_embedding_field)(concat_func(sparse_embedding_list, axis=1))
fefm_logit = tf.keras.layers.Lambda(lambda x: reduce_sum(x, axis=1, keep_dims=True))(
fefm_interaction_embedding)
final_logit_components.append(fefm_logit)
if dnn_hidden_units:
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
dnn_input = concat_func([dnn_input, fefm_interaction_embedding], axis=1)
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(
dnn_input, training=train_flag)
dnn_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output)
final_logit_components.append(dnn_logit)
logits = add_func(final_logit_components)
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/deepfm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247)
"""
import tensorflow as tf
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
from ...layers.core import DNN
from ...layers.interaction import FM
from ...layers.utils import concat_func, combined_dnn_input
def DeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None, config=None,
linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the DeepFM Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param fm_group: list, group_name of features that will be used to do feature interactions.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
fm_logit = FM()(concat_func(sparse_embedding_list, axis=1))
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag)
dnn_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(dnn_output)
logits = linear_logits + fm_logit + dnn_logit
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks
=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/fibinet.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019.
"""
import tensorflow as tf
from tensorflow.python.keras.layers import Dense, Flatten
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
from ...layers.core import DNN
from ...layers.interaction import SENETLayer, BilinearInteraction
from ...layers.utils import concat_func, combined_dnn_input
def FiBiNETEstimator(linear_feature_columns, dnn_feature_columns, bilinear_type='interaction', reduction_ratio=3,
dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5,
l2_reg_embedding=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu',
task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the Feature Importance and Bilinear feature Interaction NETwork architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param bilinear_type: str,bilinear function type used in Bilinear Interaction Layer,can be ``'all'`` , ``'each'`` or ``'interaction'``
:param reduction_ratio: integer in [1,inf), reduction ratio used in SENET Layer
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to wide part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
senet_embedding_list = SENETLayer(
reduction_ratio, seed)(sparse_embedding_list)
senet_bilinear_out = BilinearInteraction(
bilinear_type=bilinear_type, seed=seed)(senet_embedding_list)
bilinear_out = BilinearInteraction(
bilinear_type=bilinear_type, seed=seed)(sparse_embedding_list)
dnn_input = combined_dnn_input(
[Flatten()(concat_func([senet_bilinear_out, bilinear_out]))], dense_value_list)
dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input, training=train_flag)
dnn_logit = Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out)
logits = linear_logits + dnn_logit
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/fnn.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf)
"""
import tensorflow as tf
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
from ...layers.core import DNN
from ...layers.utils import combined_dnn_input
def FNNEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
dnn_activation='relu', task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the Factorization-supported Neural Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_linear: float. L2 regularizer strength applied to linear weight
:param l2_reg_dnn: float . L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input, training=train_flag)
dnn_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(deep_out)
logits = linear_logits + dnn_logit
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/fwfm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Harshit Pande
Reference:
[1] Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising
(https://arxiv.org/pdf/1806.03514.pdf)
"""
import tensorflow as tf
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import DNN_SCOPE_NAME, deepctr_model_fn, variable_scope
from ...layers.core import DNN
from ...layers.interaction import FwFMLayer
from ...layers.utils import concat_func, add_func, combined_dnn_input
def FwFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_field_strength=0.00001, l2_reg_dnn=0,
seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None,
config=None, linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the DeepFwFM Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param fm_group: list, group_name of features that will be used to do feature interactions.
:param dnn_hidden_units: list,list of positive integer or empty list if do not want DNN, the layer number and units
in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_field_strength: float. L2 regularizer strength applied to the field pair strength parameters
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
final_logit_components = [linear_logits]
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
fwfm_logit = FwFMLayer(num_fields=len(sparse_embedding_list), regularizer=l2_reg_field_strength)(
concat_func(sparse_embedding_list, axis=1))
final_logit_components.append(fwfm_logit)
if dnn_hidden_units:
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag)
dnn_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output)
final_logit_components.append(dnn_logit)
logits = add_func(final_logit_components)
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/nfm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027)
"""
import tensorflow as tf
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
from ...layers.core import DNN
from ...layers.interaction import BiInteractionPooling
from ...layers.utils import concat_func, combined_dnn_input
def NFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, bi_dropout=0,
dnn_dropout=0, dnn_activation='relu', task='binary', model_dir=None, config=None,
linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the Neural Factorization Machine architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_linear: float. L2 regularizer strength applied to linear part.
:param l2_reg_dnn: float . L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param biout_dropout: When not ``None``, the probability we will drop out the output of BiInteractionPooling Layer.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in deep net
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
fm_input = concat_func(sparse_embedding_list, axis=1)
bi_out = BiInteractionPooling()(fm_input)
if bi_dropout:
bi_out = tf.keras.layers.Dropout(bi_dropout)(bi_out, training=None)
dnn_input = combined_dnn_input([bi_out], dense_value_list)
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input, training=train_flag)
dnn_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output)
logits = linear_logits + dnn_logit
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/pnn.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf)
"""
import tensorflow as tf
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
from ...layers.core import DNN
from ...layers.interaction import InnerProductLayer, OutterProductLayer
from ...layers.utils import concat_func, combined_dnn_input
def PNNEstimator(dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=1e-5, l2_reg_dnn=0,
seed=1024, dnn_dropout=0, dnn_activation='relu', use_inner=True, use_outter=False, kernel_type='mat',
task='binary', model_dir=None, config=None,
linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the Product-based Neural Network architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param use_inner: bool,whether use inner-product or not.
:param use_outter: bool,whether use outter-product or not.
:param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'``
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
if kernel_type not in ['mat', 'vec', 'num']:
raise ValueError("kernel_type must be mat,vec or num")
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, [], l2_reg_linear=0)
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
inner_product = tf.keras.layers.Flatten()(
InnerProductLayer()(sparse_embedding_list))
outter_product = OutterProductLayer(kernel_type)(sparse_embedding_list)
# ipnn deep input
linear_signal = tf.keras.layers.Reshape(
[sum(map(lambda x: int(x.shape[-1]), sparse_embedding_list))])(concat_func(sparse_embedding_list))
if use_inner and use_outter:
deep_input = tf.keras.layers.Concatenate()(
[linear_signal, inner_product, outter_product])
elif use_inner:
deep_input = tf.keras.layers.Concatenate()(
[linear_signal, inner_product])
elif use_outter:
deep_input = tf.keras.layers.Concatenate()(
[linear_signal, outter_product])
else:
deep_input = linear_signal
dnn_input = combined_dnn_input([deep_input], dense_value_list)
dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input, training=train_flag)
dnn_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out)
logits = linear_logits + dnn_logit
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/wdl.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf)
"""
import tensorflow as tf
from tensorflow.python.keras.layers import Dense
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
from ...layers import DNN, combined_dnn_input
def WDLEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5,
l2_reg_embedding=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu',
task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the Wide&Deep Learning architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to wide part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input, training=train_flag)
dnn_logits = Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out)
logits = linear_logits + dnn_logits
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/models/xdeepfm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.(https://arxiv.org/pdf/1803.05170.pdf)
"""
import tensorflow as tf
from ..feature_column import get_linear_logit, input_from_feature_columns
from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
from ...layers.core import DNN
from ...layers.interaction import CIN
from ...layers.utils import concat_func, add_func, combined_dnn_input
def xDeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
cin_layer_size=(128, 128,), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001,
l2_reg_embedding=0.00001, l2_reg_dnn=0, l2_reg_cin=0, seed=1024, dnn_dropout=0,
dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None, config=None,
linear_optimizer='Ftrl',
dnn_optimizer='Adagrad', training_chief_hooks=None):
"""Instantiates the xDeepFM architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param cin_layer_size: list,list of positive integer or empty list, the feature maps in each hidden layer of Compressed Interaction Network
:param cin_split_half: bool.if set to True, half of the feature maps in each hidden will connect to output unit
:param cin_activation: activation function used on feature maps
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: L2 regularizer strength applied to deep net
:param l2_reg_cin: L2 regularizer strength applied to CIN.
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param model_dir: Directory to save model parameters, graph and etc. This can
also be used to load checkpoints from the directory into a estimator
to continue training a previously saved model.
:param config: tf.RunConfig object to configure the runtime settings.
:param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the linear part of the model. Defaults to FTRL optimizer.
:param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
the deep part of the model. Defaults to Adagrad optimizer.
:param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
run on the chief worker during training.
:return: A Tensorflow Estimator instance.
"""
def _model_fn(features, labels, mode, config):
train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
logits_list = [linear_logits]
with variable_scope(DNN_SCOPE_NAME):
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding=l2_reg_embedding)
fm_input = concat_func(sparse_embedding_list, axis=1)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag)
dnn_logit = tf.keras.layers.Dense(
1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output)
logits_list.append(dnn_logit)
if len(cin_layer_size) > 0:
exFM_out = CIN(cin_layer_size, cin_activation,
cin_split_half, l2_reg_cin, seed)(fm_input, training=train_flag)
exFM_logit = tf.keras.layers.Dense(1, kernel_initializer=tf.keras.initializers.glorot_normal(seed) )(exFM_out)
logits_list.append(exFM_logit)
logits = add_func(logits_list)
return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
training_chief_hooks=training_chief_hooks)
return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
================================================
FILE: deepctr/estimator/utils.py
================================================
import tensorflow as tf
from tensorflow.python.estimator.canned.head import _Head
from tensorflow.python.estimator.canned.optimizers import get_optimizer_instance
LINEAR_SCOPE_NAME = 'linear'
DNN_SCOPE_NAME = 'dnn'
def _summary_key(head_name, val):
return '%s/%s' % (val, head_name) if head_name else val
class Head(_Head):
def __init__(self, task,
name=None):
self._task = task
self._name = name
@property
def name(self):
return self._name
@property
def logits_dimension(self):
return 1
def _eval_metric_ops(self,
labels,
logits,
predictions,
unweighted_loss,
weights=None):
labels = to_float(labels)
predictions = to_float(predictions)
# with name_scope(None, 'metrics', (labels, logits, predictions,
# unweighted_loss, weights)):
metrics = get_metrics()
losses = get_losses()
metric_ops = {
_summary_key(self._name, "prediction/mean"): metrics.mean(predictions, weights=weights),
_summary_key(self._name, "label/mean"): metrics.mean(labels, weights=weights),
}
summary_scalar("prediction/mean", metric_ops[_summary_key(self._name, "prediction/mean")][1])
summary_scalar("label/mean", metric_ops[_summary_key(self._name, "label/mean")][1])
mean_loss = losses.compute_weighted_loss(
unweighted_loss, weights=1.0, reduction=losses.Reduction.MEAN)
if self._task == "binary":
metric_ops[_summary_key(self._name, "LogLoss")] = metrics.mean(mean_loss, weights=weights, )
summary_scalar("LogLoss", mean_loss)
metric_ops[_summary_key(self._name, "AUC")] = metrics.auc(labels, predictions, weights=weights)
summary_scalar("AUC", metric_ops[_summary_key(self._name, "AUC")][1])
else:
metric_ops[_summary_key(self._name, "MSE")] = metrics.mean_squared_error(labels, predictions,
weights=weights)
summary_scalar("MSE", mean_loss)
metric_ops[_summary_key(self._name, "MAE")] = metrics.mean_absolute_error(labels, predictions,
weights=weights)
summary_scalar("MAE", metric_ops[_summary_key(self._name, "MAE")][1])
return metric_ops
def create_loss(self, features, mode, logits, labels):
del mode, features # Unused for this head.
losses = get_losses()
if self._task == "binary":
loss = losses.sigmoid_cross_entropy(labels, logits, reduction=losses.Reduction.NONE)
else:
loss = losses.mean_squared_error(labels, logits, reduction=losses.Reduction.NONE)
return loss
def create_estimator_spec(
self, features, mode, logits, labels=None, train_op_fn=None, training_chief_hooks=None):
# with name_scope('head'):
logits = tf.reshape(logits, [-1, 1])
if self._task == 'binary':
pred = tf.sigmoid(logits)
else:
pred = logits
predictions = {"pred": pred, "logits": logits}
export_outputs = {"predict": tf.estimator.export.PredictOutput(predictions)}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
export_outputs=export_outputs)
labels = tf.reshape(labels, [-1, 1])
unweighted_loss = self.create_loss(features, mode, logits, labels)
losses = get_losses()
loss = losses.compute_weighted_loss(
unweighted_loss, weights=1.0, reduction=losses.Reduction.SUM)
reg_loss = losses.get_regularization_loss()
training_loss = loss + reg_loss
eval_metric_ops = self._eval_metric_ops(labels, logits, pred, unweighted_loss)
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
loss=training_loss,
train_op=train_op_fn(training_loss),
eval_metric_ops=eval_metric_ops,
training_chief_hooks=training_chief_hooks)
def deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks):
linear_optimizer = get_optimizer_instance(linear_optimizer, 0.005)
dnn_optimizer = get_optimizer_instance(dnn_optimizer, 0.01)
train_op_fn = get_train_op_fn(linear_optimizer, dnn_optimizer)
head = Head(task)
return head.create_estimator_spec(features=features,
mode=mode,
labels=labels,
train_op_fn=train_op_fn,
logits=logits, training_chief_hooks=training_chief_hooks)
def get_train_op_fn(linear_optimizer, dnn_optimizer):
def _train_op_fn(loss):
train_ops = []
try:
global_step = tf.train.get_global_step()
except AttributeError:
global_step = tf.compat.v1.train.get_global_step()
linear_var_list = get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME)
dnn_var_list = get_collection(get_GraphKeys().TRAINABLE_VARIABLES, DNN_SCOPE_NAME)
if len(dnn_var_list) > 0:
train_ops.append(
dnn_optimizer.minimize(
loss,
var_list=dnn_var_list))
if len(linear_var_list) > 0:
train_ops.append(
linear_optimizer.minimize(
loss,
var_list=linear_var_list))
train_op = tf.group(*train_ops)
with tf.control_dependencies([train_op]):
try:
return tf.assign_add(global_step, 1).op
except AttributeError:
return tf.compat.v1.assign_add(global_step, 1).op
return _train_op_fn
def variable_scope(name_or_scope):
try:
return tf.variable_scope(name_or_scope)
except AttributeError:
return tf.compat.v1.variable_scope(name_or_scope)
def get_collection(key, scope=None):
try:
return tf.get_collection(key, scope=scope)
except AttributeError:
return tf.compat.v1.get_collection(key, scope=scope)
def get_GraphKeys():
try:
return tf.GraphKeys
except AttributeError:
return tf.compat.v1.GraphKeys
def get_losses():
try:
return tf.compat.v1.losses
except AttributeError:
return tf.losses
def input_layer(features, feature_columns):
try:
return tf.feature_column.input_layer(features, feature_columns)
except AttributeError:
return tf.compat.v1.feature_column.input_layer(features, feature_columns)
def get_metrics():
try:
return tf.compat.v1.metrics
except AttributeError:
return tf.metrics
def to_float(x, name="ToFloat"):
try:
return tf.to_float(x, name)
except AttributeError:
return tf.compat.v1.to_float(x, name)
def summary_scalar(name, data):
try:
tf.summary.scalar(name, data)
except AttributeError: # tf version 2.5.0+:AttributeError: module 'tensorflow._api.v2.summary' has no attribute 'scalar'
tf.compat.v1.summary.scalar(name, data)
================================================
FILE: deepctr/feature_column.py
================================================
import tensorflow as tf
from collections import namedtuple, OrderedDict
from copy import copy
from itertools import chain
from tensorflow.python.keras.initializers import RandomNormal, Zeros
from tensorflow.python.keras.layers import Input, Lambda
from .inputs import create_embedding_matrix, embedding_lookup, get_dense_input, varlen_embedding_lookup, \
get_varlen_pooling_list, mergeDict
from .layers import Linear
from .layers.utils import concat_func
DEFAULT_GROUP_NAME = "default_group"
class SparseFeat(namedtuple('SparseFeat',
['name', 'vocabulary_size', 'embedding_dim', 'use_hash', 'vocabulary_path', 'dtype', 'embeddings_initializer',
'embedding_name',
'group_name', 'trainable'])):
__slots__ = ()
def __new__(cls, name, vocabulary_size, embedding_dim=4, use_hash=False, vocabulary_path=None, dtype="int32", embeddings_initializer=None,
embedding_name=None,
group_name=DEFAULT_GROUP_NAME, trainable=True):
if embedding_dim == "auto":
embedding_dim = 6 * int(pow(vocabulary_size, 0.25))
if embeddings_initializer is None:
embeddings_initializer = RandomNormal(mean=0.0, stddev=0.0001, seed=2020)
if embedding_name is None:
embedding_name = name
return super(SparseFeat, cls).__new__(cls, name, vocabulary_size, embedding_dim, use_hash, vocabulary_path, dtype,
embeddings_initializer,
embedding_name, group_name, trainable)
def __hash__(self):
return self.name.__hash__()
class VarLenSparseFeat(namedtuple('VarLenSparseFeat',
['sparsefeat', 'maxlen', 'combiner', 'length_name', 'weight_name', 'weight_norm'])):
__slots__ = ()
def __new__(cls, sparsefeat, maxlen, combiner="mean", length_name=None, weight_name=None, weight_norm=True):
return super(VarLenSparseFeat, cls).__new__(cls, sparsefeat, maxlen, combiner, length_name, weight_name,
weight_norm)
@property
def name(self):
return self.sparsefeat.name
@property
def vocabulary_size(self):
return self.sparsefeat.vocabulary_size
@property
def embedding_dim(self):
return self.sparsefeat.embedding_dim
@property
def use_hash(self):
return self.sparsefeat.use_hash
@property
def vocabulary_path(self):
return self.sparsefeat.vocabulary_path
@property
def dtype(self):
return self.sparsefeat.dtype
@property
def embeddings_initializer(self):
return self.sparsefeat.embeddings_initializer
@property
def embedding_name(self):
return self.sparsefeat.embedding_name
@property
def group_name(self):
return self.sparsefeat.group_name
@property
def trainable(self):
return self.sparsefeat.trainable
def __hash__(self):
return self.name.__hash__()
class DenseFeat(namedtuple('DenseFeat', ['name', 'dimension', 'dtype', 'transform_fn'])):
""" Dense feature
Args:
name: feature name.
dimension: dimension of the feature, default = 1.
dtype: dtype of the feature, default="float32".
transform_fn: If not `None` , a function that can be used to transform
values of the feature. the function takes the input Tensor as its
argument, and returns the output Tensor.
(e.g. lambda x: (x - 3.0) / 4.2).
"""
__slots__ = ()
def __new__(cls, name, dimension=1, dtype="float32", transform_fn=None):
return super(DenseFeat, cls).__new__(cls, name, dimension, dtype, transform_fn)
def __hash__(self):
return self.name.__hash__()
# def __eq__(self, other):
# if self.name == other.name:
# return True
# return False
# def __repr__(self):
# return 'DenseFeat:'+self.name
def get_feature_names(feature_columns):
features = build_input_features(feature_columns)
return list(features.keys())
def build_input_features(feature_columns, prefix=''):
input_features = OrderedDict()
for fc in feature_columns:
if isinstance(fc, SparseFeat):
input_features[fc.name] = Input(
shape=(1,), name=prefix + fc.name, dtype=fc.dtype)
elif isinstance(fc, DenseFeat):
input_features[fc.name] = Input(
shape=(fc.dimension,), name=prefix + fc.name, dtype=fc.dtype)
elif isinstance(fc, VarLenSparseFeat):
input_features[fc.name] = Input(shape=(fc.maxlen,), name=prefix + fc.name,
dtype=fc.dtype)
if fc.weight_name is not None:
input_features[fc.weight_name] = Input(shape=(fc.maxlen, 1), name=prefix + fc.weight_name,
dtype="float32")
if fc.length_name is not None:
input_features[fc.length_name] = Input((1,), name=prefix + fc.length_name, dtype='int32')
else:
raise TypeError("Invalid feature column type,got", type(fc))
return input_features
def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=1024, prefix='linear',
l2_reg=0, sparse_feat_refine_weight=None):
linear_feature_columns = copy(feature_columns)
for i in range(len(linear_feature_columns)):
if isinstance(linear_feature_columns[i], SparseFeat):
linear_feature_columns[i] = linear_feature_columns[i]._replace(embedding_dim=1,
embeddings_initializer=Zeros())
if isinstance(linear_feature_columns[i], VarLenSparseFeat):
linear_feature_columns[i] = linear_feature_columns[i]._replace(
sparsefeat=linear_feature_columns[i].sparsefeat._replace(embedding_dim=1,
embeddings_initializer=Zeros()))
linear_emb_list = [input_from_feature_columns(features, linear_feature_columns, l2_reg, seed,
prefix=prefix + str(i))[0] for i in range(units)]
_, dense_input_list = input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix)
linear_logit_list = []
for i in range(units):
if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0:
sparse_input = concat_func(linear_emb_list[i])
dense_input = concat_func(dense_input_list)
if sparse_feat_refine_weight is not None:
sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))(
[sparse_input, sparse_feat_refine_weight])
linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input])
elif len(linear_emb_list[i]) > 0:
sparse_input = concat_func(linear_emb_list[i])
if sparse_feat_refine_weight is not None:
sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))(
[sparse_input, sparse_feat_refine_weight])
linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input)
elif len(dense_input_list) > 0:
dense_input = concat_func(dense_input_list)
linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias, seed=seed)(dense_input)
else: #empty feature_columns
return Lambda(lambda x: tf.constant([[0.0]]))(list(features.values())[0])
linear_logit_list.append(linear_logit)
return concat_func(linear_logit_list)
def input_from_feature_columns(features, feature_columns, l2_reg, seed, prefix='', seq_mask_zero=True,
support_dense=True, support_group=False):
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if feature_columns else []
varlen_sparse_feature_columns = list(
filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else []
embedding_matrix_dict = create_embedding_matrix(feature_columns, l2_reg, seed, prefix=prefix,
seq_mask_zero=seq_mask_zero)
group_sparse_embedding_dict = embedding_lookup(embedding_matrix_dict, features, sparse_feature_columns)
dense_value_list = get_dense_input(features, feature_columns)
if not support_dense and len(dense_value_list) > 0:
raise ValueError("DenseFeat is not supported in dnn_feature_columns")
sequence_embed_dict = varlen_embedding_lookup(embedding_matrix_dict, features, varlen_sparse_feature_columns)
group_varlen_sparse_embedding_dict = get_varlen_pooling_list(sequence_embed_dict, features,
varlen_sparse_feature_columns)
group_embedding_dict = mergeDict(group_sparse_embedding_dict, group_varlen_sparse_embedding_dict)
if not support_group:
group_embedding_dict = list(chain.from_iterable(group_embedding_dict.values()))
return group_embedding_dict, dense_value_list
================================================
FILE: deepctr/inputs.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen,weichenswc@163.com
"""
from collections import defaultdict
from itertools import chain
from tensorflow.python.keras.layers import Embedding, Lambda
from tensorflow.python.keras.regularizers import l2
from .layers.sequence import SequencePoolingLayer, WeightedSequenceLayer
from .layers.utils import Hash
def get_inputs_list(inputs):
return list(chain(*list(map(lambda x: x.values(), filter(lambda x: x is not None, inputs)))))
def create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, l2_reg,
prefix='sparse_', seq_mask_zero=True):
sparse_embedding = {}
for feat in sparse_feature_columns:
emb = Embedding(feat.vocabulary_size, feat.embedding_dim,
embeddings_initializer=feat.embeddings_initializer,
embeddings_regularizer=l2(l2_reg),
name=prefix + '_emb_' + feat.embedding_name)
emb.trainable = feat.trainable
sparse_embedding[feat.embedding_name] = emb
if varlen_sparse_feature_columns and len(varlen_sparse_feature_columns) > 0:
for feat in varlen_sparse_feature_columns:
# if feat.name not in sparse_embedding:
emb = Embedding(feat.vocabulary_size, feat.embedding_dim,
embeddings_initializer=feat.embeddings_initializer,
embeddings_regularizer=l2(
l2_reg),
name=prefix + '_seq_emb_' + feat.name,
mask_zero=seq_mask_zero)
emb.trainable = feat.trainable
sparse_embedding[feat.embedding_name] = emb
return sparse_embedding
def get_embedding_vec_list(embedding_dict, input_dict, sparse_feature_columns, return_feat_list=(), mask_feat_list=()):
embedding_vec_list = []
for fg in sparse_feature_columns:
feat_name = fg.name
if len(return_feat_list) == 0 or feat_name in return_feat_list:
if fg.use_hash:
lookup_idx = Hash(fg.vocabulary_size, mask_zero=(feat_name in mask_feat_list), vocabulary_path=fg.vocabulary_path)(input_dict[feat_name])
else:
lookup_idx = input_dict[feat_name]
embedding_vec_list.append(embedding_dict[feat_name](lookup_idx))
return embedding_vec_list
def create_embedding_matrix(feature_columns, l2_reg, seed, prefix="", seq_mask_zero=True):
from . import feature_column as fc_lib
sparse_feature_columns = list(
filter(lambda x: isinstance(x, fc_lib.SparseFeat), feature_columns)) if feature_columns else []
varlen_sparse_feature_columns = list(
filter(lambda x: isinstance(x, fc_lib.VarLenSparseFeat), feature_columns)) if feature_columns else []
sparse_emb_dict = create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed,
l2_reg, prefix=prefix + 'sparse', seq_mask_zero=seq_mask_zero)
return sparse_emb_dict
def embedding_lookup(sparse_embedding_dict, sparse_input_dict, sparse_feature_columns, return_feat_list=(),
mask_feat_list=(), to_list=False):
group_embedding_dict = defaultdict(list)
for fc in sparse_feature_columns:
feature_name = fc.name
embedding_name = fc.embedding_name
if (len(return_feat_list) == 0 or feature_name in return_feat_list):
if fc.use_hash:
lookup_idx = Hash(fc.vocabulary_size, mask_zero=(feature_name in mask_feat_list), vocabulary_path=fc.vocabulary_path)(
sparse_input_dict[feature_name])
else:
lookup_idx = sparse_input_dict[feature_name]
group_embedding_dict[fc.group_name].append(sparse_embedding_dict[embedding_name](lookup_idx))
if to_list:
return list(chain.from_iterable(group_embedding_dict.values()))
return group_embedding_dict
def varlen_embedding_lookup(embedding_dict, sequence_input_dict, varlen_sparse_feature_columns):
varlen_embedding_vec_dict = {}
for fc in varlen_sparse_feature_columns:
feature_name = fc.name
embedding_name = fc.embedding_name
if fc.use_hash:
lookup_idx = Hash(fc.vocabulary_size, mask_zero=True, vocabulary_path=fc.vocabulary_path)(sequence_input_dict[feature_name])
else:
lookup_idx = sequence_input_dict[feature_name]
varlen_embedding_vec_dict[feature_name] = embedding_dict[embedding_name](lookup_idx)
return varlen_embedding_vec_dict
def get_varlen_pooling_list(embedding_dict, features, varlen_sparse_feature_columns, to_list=False):
pooling_vec_list = defaultdict(list)
for fc in varlen_sparse_feature_columns:
feature_name = fc.name
combiner = fc.combiner
feature_length_name = fc.length_name
if feature_length_name is not None:
if fc.weight_name is not None:
seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm)(
[embedding_dict[feature_name], features[feature_length_name], features[fc.weight_name]])
else:
seq_input = embedding_dict[feature_name]
vec = SequencePoolingLayer(combiner, supports_masking=False)(
[seq_input, features[feature_length_name]])
else:
if fc.weight_name is not None:
seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm, supports_masking=True)(
[embedding_dict[feature_name], features[fc.weight_name]])
else:
seq_input = embedding_dict[feature_name]
vec = SequencePoolingLayer(combiner, supports_masking=True)(
seq_input)
pooling_vec_list[fc.group_name].append(vec)
if to_list:
return chain.from_iterable(pooling_vec_list.values())
return pooling_vec_list
def get_dense_input(features, feature_columns):
from . import feature_column as fc_lib
dense_feature_columns = list(
filter(lambda x: isinstance(x, fc_lib.DenseFeat), feature_columns)) if feature_columns else []
dense_input_list = []
for fc in dense_feature_columns:
if fc.transform_fn is None:
dense_input_list.append(features[fc.name])
else:
transform_result = Lambda(fc.transform_fn)(features[fc.name])
dense_input_list.append(transform_result)
return dense_input_list
def mergeDict(a, b):
c = defaultdict(list)
for k, v in a.items():
c[k].extend(v)
for k, v in b.items():
c[k].extend(v)
return c
================================================
FILE: deepctr/layers/__init__.py
================================================
import tensorflow as tf
from .activation import Dice
from .core import DNN, LocalActivationUnit, PredictionLayer, RegulationModule
from .interaction import (CIN, FM, AFMLayer, BiInteractionPooling, CrossNet, CrossNetMix,
InnerProductLayer, InteractingLayer,
OutterProductLayer, FGCNNLayer, SENETLayer, BilinearInteraction,
FieldWiseBiInteraction, FwFMLayer, FEFMLayer, BridgeModule)
from .normalization import LayerNormalization
from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM,
KMaxPooling, SequencePoolingLayer, WeightedSequenceLayer,
Transformer, DynamicGRU, PositionEncoding)
from .utils import NoMask, Hash, Linear, _Add, combined_dnn_input, softmax, reduce_sum, Concat
custom_objects = {'tf': tf,
'InnerProductLayer': InnerProductLayer,
'OutterProductLayer': OutterProductLayer,
'DNN': DNN,
'PredictionLayer': PredictionLayer,
'FM': FM,
'AFMLayer': AFMLayer,
'CrossNet': CrossNet,
'CrossNetMix': CrossNetMix,
'BiInteractionPooling': BiInteractionPooling,
'LocalActivationUnit': LocalActivationUnit,
'Dice': Dice,
'SequencePoolingLayer': SequencePoolingLayer,
'AttentionSequencePoolingLayer': AttentionSequencePoolingLayer,
'CIN': CIN,
'InteractingLayer': InteractingLayer,
'LayerNormalization': LayerNormalization,
'BiLSTM': BiLSTM,
'Transformer': Transformer,
'NoMask': NoMask,
'BiasEncoding': BiasEncoding,
'KMaxPooling': KMaxPooling,
'FGCNNLayer': FGCNNLayer,
'Hash': Hash,
'Linear': Linear,
'Concat': Concat,
'DynamicGRU': DynamicGRU,
'SENETLayer': SENETLayer,
'BilinearInteraction': BilinearInteraction,
'WeightedSequenceLayer': WeightedSequenceLayer,
'_Add': _Add,
'FieldWiseBiInteraction': FieldWiseBiInteraction,
'FwFMLayer': FwFMLayer,
'softmax': softmax,
'FEFMLayer': FEFMLayer,
'reduce_sum': reduce_sum,
'PositionEncoding': PositionEncoding,
'RegulationModule': RegulationModule,
'BridgeModule': BridgeModule
}
================================================
FILE: deepctr/layers/activation.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen,weichenswc@163.com
"""
import tensorflow as tf
try:
from tensorflow.python.ops.init_ops import Zeros
except ImportError:
from tensorflow.python.ops.init_ops_v2 import Zeros
from tensorflow.python.keras.layers import Layer, Activation
try:
from tensorflow.python.keras.layers import BatchNormalization
except ImportError:
BatchNormalization = tf.keras.layers.BatchNormalization
try:
unicode
except NameError:
unicode = str
class Dice(Layer):
"""The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data.
Input shape
- Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
Output shape
- Same shape as the input.
Arguments
- **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis).
- **epsilon** : Small float added to variance to avoid dividing by zero.
References
- [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf)
"""
def __init__(self, axis=-1, epsilon=1e-9, **kwargs):
self.axis = axis
self.epsilon = epsilon
super(Dice, self).__init__(**kwargs)
def build(self, input_shape):
self.bn = BatchNormalization(
axis=self.axis, epsilon=self.epsilon, center=False, scale=False)
self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros(
), dtype=tf.float32, name='dice_alpha') # name='alpha_'+self.name
super(Dice, self).build(input_shape) # Be sure to call this somewhere!
self.uses_learning_phase = True
def call(self, inputs, training=None, **kwargs):
inputs_normed = self.bn(inputs, training=training)
# tf.layers.batch_normalization(
# inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False)
x_p = tf.sigmoid(inputs_normed)
return self.alphas * (1.0 - x_p) * inputs + x_p * inputs
def compute_output_shape(self, input_shape):
return input_shape
def get_config(self, ):
config = {'axis': self.axis, 'epsilon': self.epsilon}
base_config = super(Dice, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def activation_layer(activation):
if activation in ("dice", "Dice"):
act_layer = Dice()
elif isinstance(activation, (str, unicode)):
act_layer = Activation(activation)
elif issubclass(activation, Layer):
act_layer = activation()
else:
raise ValueError(
"Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation))
return act_layer
================================================
FILE: deepctr/layers/core.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen,weichenswc@163.com
"""
import tensorflow as tf
from tensorflow.python.keras import backend as K
try:
from tensorflow.python.ops.init_ops_v2 import Zeros, Ones, glorot_normal
except ImportError:
from tensorflow.python.ops.init_ops import Zeros, Ones, glorot_normal_initializer as glorot_normal
from tensorflow.python.keras.layers import Layer, Dropout
try:
from tensorflow.python.keras.layers import BatchNormalization
except ImportError:
BatchNormalization = tf.keras.layers.BatchNormalization
from tensorflow.python.keras.regularizers import l2
from .activation import activation_layer
class LocalActivationUnit(Layer):
"""The LocalActivationUnit used in DIN with which the representation of
user interests varies adaptively given different candidate items.
Input shape
- A list of two 3D tensor with shape: ``(batch_size, 1, embedding_size)`` and ``(batch_size, T, embedding_size)``
Output shape
- 3D tensor with shape: ``(batch_size, T, 1)``.
Arguments
- **hidden_units**:list of positive integer, the attention net layer number and units in each layer.
- **activation**: Activation function to use in attention net.
- **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix of attention net.
- **dropout_rate**: float in [0,1). Fraction of the units to dropout in attention net.
- **use_bn**: bool. Whether use BatchNormalization before activation or not in attention net.
- **seed**: A Python integer to use as random seed.
References
- [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf)
"""
def __init__(self, hidden_units=(64, 32), activation='sigmoid', l2_reg=0, dropout_rate=0, use_bn=False, seed=1024,
**kwargs):
self.hidden_units = hidden_units
self.activation = activation
self.l2_reg = l2_reg
self.dropout_rate = dropout_rate
self.use_bn = use_bn
self.seed = seed
super(LocalActivationUnit, self).__init__(**kwargs)
self.supports_masking = True
def build(self, input_shape):
if not isinstance(input_shape, list) or len(input_shape) != 2:
raise ValueError('A `LocalActivationUnit` layer should be called '
'on a list of 2 inputs')
if len(input_shape[0]) != 3 or len(input_shape[1]) != 3:
raise ValueError("Unexpected inputs dimensions %d and %d, expect to be 3 dimensions" % (
len(input_shape[0]), len(input_shape[1])))
if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1:
raise ValueError('A `LocalActivationUnit` layer requires '
'inputs of a two inputs with shape (None,1,embedding_size) and (None,T,embedding_size)'
'Got different shapes: %s,%s' % (input_shape[0], input_shape[1]))
size = 4 * \
int(input_shape[0][-1]
) if len(self.hidden_units) == 0 else self.hidden_units[-1]
self.kernel = self.add_weight(shape=(size, 1),
initializer=glorot_normal(
seed=self.seed),
name="kernel")
self.bias = self.add_weight(
shape=(1,), initializer=Zeros(), name="bias")
self.dnn = DNN(self.hidden_units, self.activation, self.l2_reg, self.dropout_rate, self.use_bn, seed=self.seed)
super(LocalActivationUnit, self).build(
input_shape) # Be sure to call this somewhere!
def call(self, inputs, training=None, **kwargs):
query, keys = inputs
keys_len = keys.get_shape()[1]
queries = K.repeat_elements(query, keys_len, 1)
att_input = tf.concat(
[queries, keys, queries - keys, queries * keys], axis=-1)
att_out = self.dnn(att_input, training=training)
attention_score = tf.nn.bias_add(tf.tensordot(att_out, self.kernel, axes=(-1, 0)), self.bias)
return attention_score
def compute_output_shape(self, input_shape):
return input_shape[1][:2] + (1,)
def compute_mask(self, inputs, mask):
return mask
def get_config(self, ):
config = {'activation': self.activation, 'hidden_units': self.hidden_units,
'l2_reg': self.l2_reg, 'dropout_rate': self.dropout_rate, 'use_bn': self.use_bn, 'seed': self.seed}
base_config = super(LocalActivationUnit, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class DNN(Layer):
"""The Multi Layer Percetron
Input shape
- nD tensor with shape: ``(batch_size, ..., input_dim)``. The most common situation would be a 2D input with shape ``(batch_size, input_dim)``.
Output shape
- nD tensor with shape: ``(batch_size, ..., hidden_size[-1])``. For instance, for a 2D input with shape ``(batch_size, input_dim)``, the output would have shape ``(batch_size, hidden_size[-1])``.
Arguments
- **hidden_units**:list of positive integer, the layer number and units in each layer.
- **activation**: Activation function to use.
- **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix.
- **dropout_rate**: float in [0,1). Fraction of the units to dropout.
- **use_bn**: bool. Whether use BatchNormalization before activation or not.
- **output_activation**: Activation function to use in the last layer.If ``None``,it will be same as ``activation``.
- **seed**: A Python integer to use as random seed.
"""
def __init__(self, hidden_units, activation='relu', l2_reg=0, dropout_rate=0, use_bn=False, output_activation=None,
seed=1024, **kwargs):
self.hidden_units = hidden_units
self.activation = activation
self.l2_reg = l2_reg
self.dropout_rate = dropout_rate
self.use_bn = use_bn
self.output_activation = output_activation
self.seed = seed
super(DNN, self).__init__(**kwargs)
def build(self, input_shape):
# if len(self.hidden_units) == 0:
# raise ValueError("hidden_units is empty")
input_size = input_shape[-1]
hidden_units = [int(input_size)] + list(self.hidden_units)
self.kernels = [self.add_weight(name='kernel' + str(i),
shape=(
hidden_units[i], hidden_units[i + 1]),
initializer=glorot_normal(
seed=self.seed),
regularizer=l2(self.l2_reg),
trainable=True) for i in range(len(self.hidden_units))]
self.bias = [self.add_weight(name='bias' + str(i),
shape=(self.hidden_units[i],),
initializer=Zeros(),
trainable=True) for i in range(len(self.hidden_units))]
if self.use_bn:
self.bn_layers = [BatchNormalization() for _ in range(len(self.hidden_units))]
self.dropout_layers = [Dropout(self.dropout_rate, seed=self.seed + i) for i in
range(len(self.hidden_units))]
self.activation_layers = [activation_layer(self.activation) for _ in range(len(self.hidden_units))]
if self.output_activation:
self.activation_layers[-1] = activation_layer(self.output_activation)
super(DNN, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs, training=None, **kwargs):
deep_input = inputs
for i in range(len(self.hidden_units)):
fc = tf.nn.bias_add(tf.tensordot(
deep_input, self.kernels[i], axes=(-1, 0)), self.bias[i])
if self.use_bn:
fc = self.bn_layers[i](fc, training=training)
try:
fc = self.activation_layers[i](fc, training=training)
except TypeError as e: # TypeError: call() got an unexpected keyword argument 'training'
print("make sure the activation function use training flag properly", e)
fc = self.activation_layers[i](fc)
fc = self.dropout_layers[i](fc, training=training)
deep_input = fc
return deep_input
def compute_output_shape(self, input_shape):
if len(self.hidden_units) > 0:
shape = input_shape[:-1] + (self.hidden_units[-1],)
else:
shape = input_shape
return tuple(shape)
def get_config(self, ):
config = {'activation': self.activation, 'hidden_units': self.hidden_units,
'l2_reg': self.l2_reg, 'use_bn': self.use_bn, 'dropout_rate': self.dropout_rate,
'output_activation': self.output_activation, 'seed': self.seed}
base_config = super(DNN, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class PredictionLayer(Layer):
"""
Arguments
- **task**: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
- **use_bias**: bool.Whether add bias term or not.
"""
def __init__(self, task='binary', use_bias=True, **kwargs):
if task not in ["binary", "multiclass", "regression"]:
raise ValueError("task must be binary,multiclass or regression")
self.task = task
self.use_bias = use_bias
super(PredictionLayer, self).__init__(**kwargs)
def build(self, input_shape):
if self.use_bias:
self.global_bias = self.add_weight(
shape=(1,), initializer=Zeros(), name="global_bias")
# Be sure to call this somewhere!
super(PredictionLayer, self).build(input_shape)
def call(self, inputs, **kwargs):
x = inputs
if self.use_bias:
x = tf.nn.bias_add(x, self.global_bias, data_format='NHWC')
if self.task == "binary":
x = tf.sigmoid(x)
output = tf.reshape(x, (-1, 1))
return output
def compute_output_shape(self, input_shape):
return (None, 1)
def get_config(self, ):
config = {'task': self.task, 'use_bias': self.use_bias}
base_config = super(PredictionLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class RegulationModule(Layer):
"""Regulation module used in EDCN.
Input shape
- 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
Output shape
- 2D tensor with shape: ``(batch_size,field_size * embedding_size)``.
Arguments
- **tau** : Positive float, the temperature coefficient to control
distribution of field-wise gating unit.
References
- [Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models.](https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf)
"""
def __init__(self, tau=1.0, **kwargs):
if tau == 0:
raise ValueError("RegulationModule tau can not be zero.")
self.tau = 1.0 / tau
super(RegulationModule, self).__init__(**kwargs)
def build(self, input_shape):
self.field_size = int(input_shape[1])
self.embedding_size = int(input_shape[2])
self.g = self.add_weight(
shape=(1, self.field_size, 1),
initializer=Ones(),
name=self.name + '_field_weight')
# Be sure to call this somewhere!
super(RegulationModule, self).build(input_shape)
def call(self, inputs, **kwargs):
if K.ndim(inputs) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
feild_gating_score = tf.nn.softmax(self.g * self.tau, 1)
E = inputs * feild_gating_score
return tf.reshape(E, [-1, self.field_size * self.embedding_size])
def compute_output_shape(self, input_shape):
return (None, self.field_size * self.embedding_size)
def get_config(self):
config = {'tau': self.tau}
base_config = super(RegulationModule, self).get_config()
base_config.update(config)
return base_config
================================================
FILE: deepctr/layers/interaction.py
================================================
# -*- coding:utf-8 -*-
"""
Authors:
Weichen Shen,weichenswc@163.com,
Harshit Pande,
Yi He, heyi_jack@163.com
"""
import itertools
import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.backend import batch_dot
try:
from tensorflow.python.ops.init_ops import Zeros, Ones, Constant, TruncatedNormal, \
glorot_normal_initializer as glorot_normal, \
glorot_uniform_initializer as glorot_uniform
except ImportError:
from tensorflow.python.ops.init_ops_v2 import Zeros, Ones, Constant, TruncatedNormal, glorot_normal, glorot_uniform
from tensorflow.python.keras.layers import Layer, MaxPooling2D, Conv2D, Dropout, Lambda, Dense, Flatten
from tensorflow.python.keras.regularizers import l2
from tensorflow.python.layers import utils
from .activation import activation_layer
from .utils import concat_func, reduce_sum, softmax, reduce_mean
from .core import DNN
class AFMLayer(Layer):
"""Attentonal Factorization Machine models pairwise (order-2) feature
interactions without linear term and bias.
Input shape
- A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``.
Output shape
- 2D tensor with shape: ``(batch_size, 1)``.
Arguments
- **attention_factor** : Positive integer, dimensionality of the
attention network output space.
- **l2_reg_w** : float between 0 and 1. L2 regularizer strength
applied to attention network.
- **dropout_rate** : float between in [0,1). Fraction of the attention net output units to dropout.
- **seed** : A Python integer to use as random seed.
References
- [Attentional Factorization Machines : Learning the Weight of Feature
Interactions via Attention Networks](https://arxiv.org/pdf/1708.04617.pdf)
"""
def __init__(self, attention_factor=4, l2_reg_w=0, dropout_rate=0, seed=1024, **kwargs):
self.attention_factor = attention_factor
self.l2_reg_w = l2_reg_w
self.dropout_rate = dropout_rate
self.seed = seed
super(AFMLayer, self).__init__(**kwargs)
def build(self, input_shape):
if not isinstance(input_shape, list) or len(input_shape) < 2:
# input_shape = input_shape[0]
# if not isinstance(input_shape, list) or len(input_shape) < 2:
raise ValueError('A `AttentionalFM` layer should be called '
'on a list of at least 2 inputs')
shape_set = set()
reduced_input_shape = [shape.as_list() for shape in input_shape]
for i in range(len(input_shape)):
shape_set.add(tuple(reduced_input_shape[i]))
if len(shape_set) > 1:
raise ValueError('A `AttentionalFM` layer requires '
'inputs with same shapes '
'Got different shapes: %s' % (shape_set))
if len(input_shape[0]) != 3 or input_shape[0][1] != 1:
raise ValueError('A `AttentionalFM` layer requires '
'inputs of a list with same shape tensor like\
(None, 1, embedding_size)'
'Got different shapes: %s' % (input_shape[0]))
embedding_size = int(input_shape[0][-1])
self.attention_W = self.add_weight(shape=(embedding_size,
self.attention_factor), initializer=glorot_normal(seed=self.seed),
regularizer=l2(self.l2_reg_w), name="attention_W")
self.attention_b = self.add_weight(
shape=(self.attention_factor,), initializer=Zeros(), name="attention_b")
self.projection_h = self.add_weight(shape=(self.attention_factor, 1),
initializer=glorot_normal(seed=self.seed), name="projection_h")
self.projection_p = self.add_weight(shape=(
embedding_size, 1), initializer=glorot_normal(seed=self.seed), name="projection_p")
self.dropout = Dropout(
self.dropout_rate, seed=self.seed)
self.tensordot = Lambda(
lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0)))
# Be sure to call this somewhere!
super(AFMLayer, self).build(input_shape)
def call(self, inputs, training=None, **kwargs):
if K.ndim(inputs[0]) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
embeds_vec_list = inputs
row = []
col = []
for r, c in itertools.combinations(embeds_vec_list, 2):
row.append(r)
col.append(c)
p = tf.concat(row, axis=1)
q = tf.concat(col, axis=1)
inner_product = p * q
bi_interaction = inner_product
attention_temp = tf.nn.relu(tf.nn.bias_add(tf.tensordot(
bi_interaction, self.attention_W, axes=(-1, 0)), self.attention_b))
# Dense(self.attention_factor,'relu',kernel_regularizer=l2(self.l2_reg_w))(bi_interaction)
self.normalized_att_score = softmax(tf.tensordot(
attention_temp, self.projection_h, axes=(-1, 0)), dim=1)
attention_output = reduce_sum(
self.normalized_att_score * bi_interaction, axis=1)
attention_output = self.dropout(attention_output, training=training) # training
afm_out = self.tensordot([attention_output, self.projection_p])
return afm_out
def compute_output_shape(self, input_shape):
if not isinstance(input_shape, list):
raise ValueError('A `AFMLayer` layer should be called '
'on a list of inputs.')
return (None, 1)
def get_config(self, ):
config = {'attention_factor': self.attention_factor,
'l2_reg_w': self.l2_reg_w, 'dropout_rate': self.dropout_rate, 'seed': self.seed}
base_config = super(AFMLayer, self).get_config()
base_config.update(config)
return base_config
class BiInteractionPooling(Layer):
"""Bi-Interaction Layer used in Neural FM,compress the
pairwise element-wise product of features into one single vector.
Input shape
- A 3D tensor with shape:``(batch_size,field_size,embedding_size)``.
Output shape
- 3D tensor with shape: ``(batch_size,1,embedding_size)``.
References
- [He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364.](http://arxiv.org/abs/1708.05027)
"""
def __init__(self, **kwargs):
super(BiInteractionPooling, self).__init__(**kwargs)
def build(self, input_shape):
if len(input_shape) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape)))
super(BiInteractionPooling, self).build(
input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
if K.ndim(inputs) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
concated_embeds_value = inputs
square_of_sum = tf.square(reduce_sum(
concated_embeds_value, axis=1, keep_dims=True))
sum_of_square = reduce_sum(
concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True)
cross_term = 0.5 * (square_of_sum - sum_of_square)
return cross_term
def compute_output_shape(self, input_shape):
return (None, 1, input_shape[-1])
class CIN(Layer):
"""Compressed Interaction Network used in xDeepFM.This implemention is
adapted from code that the author of the paper published on https://github.com/Leavingseason/xDeepFM.
Input shape
- 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
Output shape
- 2D tensor with shape: ``(batch_size, featuremap_num)`` ``featuremap_num = sum(self.layer_size[:-1]) // 2 + self.layer_size[-1]`` if ``split_half=True``,else ``sum(layer_size)`` .
Arguments
- **layer_size** : list of int.Feature maps in each layer.
- **activation** : activation function used on feature maps.
- **split_half** : bool.if set to False, half of the feature maps in each hidden will connect to output unit.
- **seed** : A Python integer to use as random seed.
References
- [Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.] (https://arxiv.org/pdf/1803.05170.pdf)
"""
def __init__(self, layer_size=(128, 128), activation='relu', split_half=True, l2_reg=1e-5, seed=1024, **kwargs):
if len(layer_size) == 0:
raise ValueError(
"layer_size must be a list(tuple) of length greater than 1")
self.layer_size = layer_size
self.split_half = split_half
self.activation = activation
self.l2_reg = l2_reg
self.seed = seed
super(CIN, self).__init__(**kwargs)
def build(self, input_shape):
if len(input_shape) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape)))
self.field_nums = [int(input_shape[1])]
self.filters = []
self.bias = []
for i, size in enumerate(self.layer_size):
self.filters.append(self.add_weight(name='filter' + str(i),
shape=[1, self.field_nums[-1]
* self.field_nums[0], size],
dtype=tf.float32, initializer=glorot_uniform(
seed=self.seed + i),
regularizer=l2(self.l2_reg)))
self.bias.append(self.add_weight(name='bias' + str(i), shape=[size], dtype=tf.float32,
initializer=Zeros()))
if self.split_half:
if i != len(self.layer_size) - 1 and size % 2 > 0:
raise ValueError(
"layer_size must be even number except for the last layer when split_half=True")
self.field_nums.append(size // 2)
else:
self.field_nums.append(size)
self.activation_layers = [activation_layer(
self.activation) for _ in self.layer_size]
super(CIN, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
if K.ndim(inputs) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
dim = int(inputs.get_shape()[-1])
hidden_nn_layers = [inputs]
final_result = []
split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2)
for idx, layer_size in enumerate(self.layer_size):
split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2)
dot_result_m = tf.matmul(
split_tensor0, split_tensor, transpose_b=True)
dot_result_o = tf.reshape(
dot_result_m, shape=[dim, -1, self.field_nums[0] * self.field_nums[idx]])
dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2])
curr_out = tf.nn.conv1d(
dot_result, filters=self.filters[idx], stride=1, padding='VALID')
curr_out = tf.nn.bias_add(curr_out, self.bias[idx])
curr_out = self.activation_layers[idx](curr_out)
curr_out = tf.transpose(curr_out, perm=[0, 2, 1])
if self.split_half:
if idx != len(self.layer_size) - 1:
next_hidden, direct_connect = tf.split(
curr_out, 2 * [layer_size // 2], 1)
else:
direct_connect = curr_out
next_hidden = 0
else:
direct_connect = curr_out
next_hidden = curr_out
final_result.append(direct_connect)
hidden_nn_layers.append(next_hidden)
result = tf.concat(final_result, axis=1)
result = reduce_sum(result, -1, keep_dims=False)
return result
def compute_output_shape(self, input_shape):
if self.split_half:
featuremap_num = sum(
self.layer_size[:-1]) // 2 + self.layer_size[-1]
else:
featuremap_num = sum(self.layer_size)
return (None, featuremap_num)
def get_config(self, ):
config = {'layer_size': self.layer_size, 'split_half': self.split_half, 'activation': self.activation,
'seed': self.seed}
base_config = super(CIN, self).get_config()
base_config.update(config)
return base_config
class CrossNet(Layer):
"""The Cross Network part of Deep&Cross Network model,
which leans both low and high degree cross feature.
Input shape
- 2D tensor with shape: ``(batch_size, units)``.
Output shape
- 2D tensor with shape: ``(batch_size, units)``.
Arguments
- **layer_num**: Positive integer, the cross layer number
- **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix
- **parameterization**: string, ``"vector"`` or ``"matrix"`` , way to parameterize the cross network.
- **seed**: A Python integer to use as random seed.
References
- [Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12.](https://arxiv.org/abs/1708.05123)
"""
def __init__(self, layer_num=2, parameterization='vector', l2_reg=0, seed=1024, **kwargs):
self.layer_num = layer_num
self.parameterization = parameterization
self.l2_reg = l2_reg
self.seed = seed
print('CrossNet parameterization:', self.parameterization)
super(CrossNet, self).__init__(**kwargs)
def build(self, input_shape):
if len(input_shape) != 2:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 2 dimensions" % (len(input_shape),))
dim = int(input_shape[-1])
if self.parameterization == 'vector':
self.kernels = [self.add_weight(name='kernel' + str(i),
shape=(dim, 1),
initializer=glorot_normal(
seed=self.seed),
regularizer=l2(self.l2_reg),
trainable=True) for i in range(self.layer_num)]
elif self.parameterization == 'matrix':
self.kernels = [self.add_weight(name='kernel' + str(i),
shape=(dim, dim),
initializer=glorot_normal(
seed=self.seed),
regularizer=l2(self.l2_reg),
trainable=True) for i in range(self.layer_num)]
else: # error
raise ValueError("parameterization should be 'vector' or 'matrix'")
self.bias = [self.add_weight(name='bias' + str(i),
shape=(dim, 1),
initializer=Zeros(),
trainable=True) for i in range(self.layer_num)]
# Be sure to call this somewhere!
super(CrossNet, self).build(input_shape)
def call(self, inputs, **kwargs):
if K.ndim(inputs) != 2:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 2 dimensions" % (K.ndim(inputs)))
x_0 = tf.expand_dims(inputs, axis=2)
x_l = x_0
for i in range(self.layer_num):
if self.parameterization == 'vector':
xl_w = tf.tensordot(x_l, self.kernels[i], axes=(1, 0))
dot_ = tf.matmul(x_0, xl_w)
x_l = dot_ + self.bias[i] + x_l
elif self.parameterization == 'matrix':
xl_w = tf.einsum('ij,bjk->bik', self.kernels[i], x_l) # W * xi (bs, dim, 1)
dot_ = xl_w + self.bias[i] # W * xi + b
x_l = x_0 * dot_ + x_l # x0 · (W * xi + b) +xl Hadamard-product
else: # error
raise ValueError("parameterization should be 'vector' or 'matrix'")
x_l = tf.squeeze(x_l, axis=2)
return x_l
def get_config(self, ):
config = {'layer_num': self.layer_num, 'parameterization': self.parameterization,
'l2_reg': self.l2_reg, 'seed': self.seed}
base_config = super(CrossNet, self).get_config()
base_config.update(config)
return base_config
def compute_output_shape(self, input_shape):
return input_shape
class CrossNetMix(Layer):
"""The Cross Network part of DCN-Mix model, which improves DCN-M by:
1 add MOE to learn feature interactions in different subspaces
2 add nonlinear transformations in low-dimensional space
Input shape
- 2D tensor with shape: ``(batch_size, units)``.
Output shape
- 2D tensor with shape: ``(batch_size, units)``.
Arguments
- **low_rank** : Positive integer, dimensionality of low-rank sapce.
- **num_experts** : Positive integer, number of experts.
- **layer_num**: Positive integer, the cross layer number
- **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix
- **seed**: A Python integer to use as random seed.
References
- [Wang R, Shivanna R, Cheng D Z, et al. DCN-M: Improved Deep & Cross Network for Feature Cross Learning in Web-scale Learning to Rank Systems[J]. 2020.](https://arxiv.org/abs/2008.13535)
"""
def __init__(self, low_rank=32, num_experts=4, layer_num=2, l2_reg=0, seed=1024, **kwargs):
self.low_rank = low_rank
self.num_experts = num_experts
self.layer_num = layer_num
self.l2_reg = l2_reg
self.seed = seed
super(CrossNetMix, self).__init__(**kwargs)
def build(self, input_shape):
if len(input_shape) != 2:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 2 dimensions" % (len(input_shape),))
dim = int(input_shape[-1])
# U: (dim, low_rank)
self.U_list = [self.add_weight(name='U_list' + str(i),
shape=(self.num_experts, dim, self.low_rank),
initializer=glorot_normal(
seed=self.seed),
regularizer=l2(self.l2_reg),
trainable=True) for i in range(self.layer_num)]
# V: (dim, low_rank)
self.V_list = [self.add_weight(name='V_list' + str(i),
shape=(self.num_experts, dim, self.low_rank),
initializer=glorot_normal(
seed=self.seed),
regularizer=l2(self.l2_reg),
trainable=True) for i in range(self.layer_num)]
# C: (low_rank, low_rank)
self.C_list = [self.add_weight(name='C_list' + str(i),
shape=(self.num_experts, self.low_rank, self.low_rank),
initializer=glorot_normal(
seed=self.seed),
regularizer=l2(self.l2_reg),
trainable=True) for i in range(self.layer_num)]
self.gating = [Dense(1, use_bias=False) for i in range(self.num_experts)]
self.bias = [self.add_weight(name='bias' + str(i),
shape=(dim, 1),
initializer=Zeros(),
trainable=True) for i in range(self.layer_num)]
# Be sure to call this somewhere!
super(CrossNetMix, self).build(input_shape)
def call(self, inputs, **kwargs):
if K.ndim(inputs) != 2:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 2 dimensions" % (K.ndim(inputs)))
x_0 = tf.expand_dims(inputs, axis=2)
x_l = x_0
for i in range(self.layer_num):
output_of_experts = []
gating_score_of_experts = []
for expert_id in range(self.num_experts):
# (1) G(x_l)
# compute the gating score by x_l
gating_score_of_experts.append(self.gating[expert_id](tf.squeeze(x_l, axis=2)))
# (2) E(x_l)
# project the input x_l to $\mathbb{R}^{r}$
v_x = tf.einsum('ij,bjk->bik', tf.transpose(self.V_list[i][expert_id]), x_l) # (bs, low_rank, 1)
# nonlinear activation in low rank space
v_x = tf.nn.tanh(v_x)
v_x = tf.einsum('ij,bjk->bik', self.C_list[i][expert_id], v_x) # (bs, low_rank, 1)
v_x = tf.nn.tanh(v_x)
# project back to $\mathbb{R}^{d}$
uv_x = tf.einsum('ij,bjk->bik', self.U_list[i][expert_id], v_x) # (bs, dim, 1)
dot_ = uv_x + self.bias[i]
dot_ = x_0 * dot_ # Hadamard-product
output_of_experts.append(tf.squeeze(dot_, axis=2))
# (3) mixture of low-rank experts
output_of_experts = tf.stack(output_of_experts, 2) # (bs, dim, num_experts)
gating_score_of_experts = tf.stack(gating_score_of_experts, 1) # (bs, num_experts, 1)
moe_out = tf.matmul(output_of_experts, tf.nn.softmax(gating_score_of_experts, 1))
x_l = moe_out + x_l # (bs, dim, 1)
x_l = tf.squeeze(x_l, axis=2)
return x_l
def get_config(self, ):
config = {'low_rank': self.low_rank, 'num_experts': self.num_experts, 'layer_num': self.layer_num,
'l2_reg': self.l2_reg, 'seed': self.seed}
base_config = super(CrossNetMix, self).get_config()
base_config.update(config)
return base_config
def compute_output_shape(self, input_shape):
return input_shape
class FM(Layer):
"""Factorization Machine models pairwise (order-2) feature interactions
without linear term and bias.
Input shape
- 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
Output shape
- 2D tensor with shape: ``(batch_size, 1)``.
References
- [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf)
"""
def __init__(self, **kwargs):
super(FM, self).__init__(**kwargs)
def build(self, input_shape):
if len(input_shape) != 3:
raise ValueError("Unexpected inputs dimensions % d,\
expect to be 3 dimensions" % (len(input_shape)))
super(FM, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
if K.ndim(inputs) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions"
% (K.ndim(inputs)))
concated_embeds_value = inputs
square_of_sum = tf.square(reduce_sum(
concated_embeds_value, axis=1, keep_dims=True))
sum_of_square = reduce_sum(
concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True)
cross_term = square_of_sum - sum_of_square
cross_term = 0.5 * reduce_sum(cross_term, axis=2, keep_dims=False)
return cross_term
def compute_output_shape(self, input_shape):
return (None, 1)
class InnerProductLayer(Layer):
"""InnerProduct Layer used in PNN that compute the element-wise
product or inner product between feature vectors.
Input shape
- a list of 3D tensor with shape: ``(batch_size,1,embedding_size)``.
Output shape
- 3D tensor with shape: ``(batch_size, N*(N-1)/2 ,1)`` if use reduce_sum. or 3D tensor with shape: ``(batch_size, N*(N-1)/2, embedding_size )`` if not use reduce_sum.
Arguments
- **reduce_sum**: bool. Whether return inner product or element-wise product
References
- [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf)
"""
def __init__(self, reduce_sum=True, **kwargs):
self.reduce_sum = reduce_sum
super(InnerProductLayer, self).__init__(**kwargs)
def build(self, input_shape):
if not isinstance(input_shape, list) or len(input_shape) < 2:
raise ValueError('A `InnerProductLayer` layer should be called '
'on a list of at least 2 inputs')
reduced_inputs_shapes = [shape.as_list() for shape in input_shape]
shape_set = set()
for i in range(len(input_shape)):
shape_set.add(tuple(reduced_inputs_shapes[i]))
if len(shape_set) > 1:
raise ValueError('A `InnerProductLayer` layer requires '
'inputs with same shapes '
'Got different shapes: %s' % (shape_set))
if len(input_shape[0]) != 3 or input_shape[0][1] != 1:
raise ValueError('A `InnerProductLayer` layer requires '
'inputs of a list with same shape tensor like (None,1,embedding_size)'
'Got different shapes: %s' % (input_shape[0]))
super(InnerProductLayer, self).build(
input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
if K.ndim(inputs[0]) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
embed_list = inputs
row = []
col = []
num_inputs = len(embed_list)
for i in range(num_inputs - 1):
for j in range(i + 1, num_inputs):
row.append(i)
col.append(j)
p = tf.concat([embed_list[idx]
for idx in row], axis=1) # batch num_pairs k
q = tf.concat([embed_list[idx]
for idx in col], axis=1)
inner_product = p * q
if self.reduce_sum:
inner_product = reduce_sum(
inner_product, axis=2, keep_dims=True)
return inner_product
def compute_output_shape(self, input_shape):
num_inputs = len(input_shape)
num_pairs = int(num_inputs * (num_inputs - 1) / 2)
input_shape = input_shape[0]
embed_size = input_shape[-1]
if self.reduce_sum:
return (input_shape[0], num_pairs, 1)
else:
return (input_shape[0], num_pairs, embed_size)
def get_config(self, ):
config = {'reduce_sum': self.reduce_sum, }
base_config = super(InnerProductLayer, self).get_config()
base_config.update(config)
return base_config
class InteractingLayer(Layer):
"""A Layer used in AutoInt that model the correlations between different feature fields by multi-head self-attention mechanism.
Input shape
- A 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
Output shape
- 3D tensor with shape:``(batch_size,field_size,att_embedding_size * head_num)``.
Arguments
- **att_embedding_size**: int.The embedding size in multi-head self-attention network.
- **head_num**: int.The head number in multi-head self-attention network.
- **use_res**: bool.Whether or not use standard residual connections before output.
- **seed**: A Python integer to use as random seed.
References
- [Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.](https://arxiv.org/abs/1810.11921)
"""
def __init__(self, att_embedding_size=8, head_num=2, use_res=True, scaling=False, seed=1024, **kwargs):
if head_num <= 0:
raise ValueError('head_num must be a int > 0')
self.att_embedding_size = att_embedding_size
self.head_num = head_num
self.use_res = use_res
self.seed = seed
self.scaling = scaling
super(InteractingLayer, self).__init__(**kwargs)
def build(self, input_shape):
if len(input_shape) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape)))
embedding_size = int(input_shape[-1])
self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num],
dtype=tf.float32,
initializer=TruncatedNormal(seed=self.seed))
self.W_key = self.add_weight(name='key', shape=[embedding_size, self.att_embedding_size * self.head_num],
dtype=tf.float32,
initializer=TruncatedNormal(seed=self.seed + 1))
self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num],
dtype=tf.float32,
initializer=TruncatedNormal(seed=self.seed + 2))
if self.use_res:
self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num],
dtype=tf.float32,
initializer=TruncatedNormal(seed=self.seed))
# Be sure to call this somewhere!
super(InteractingLayer, self).build(input_shape)
def call(self, inputs, **kwargs):
if K.ndim(inputs) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
querys = tf.tensordot(inputs, self.W_Query,
axes=(-1, 0)) # None F D*head_num
keys = tf.tensordot(inputs, self.W_key, axes=(-1, 0))
values = tf.tensordot(inputs, self.W_Value, axes=(-1, 0))
# head_num None F D
querys = tf.stack(tf.split(querys, self.head_num, axis=2))
keys = tf.stack(tf.split(keys, self.head_num, axis=2))
values = tf.stack(tf.split(values, self.head_num, axis=2))
inner_product = tf.matmul(
querys, keys, transpose_b=True) # head_num None F F
if self.scaling:
inner_product /= self.att_embedding_size ** 0.5
self.normalized_att_scores = softmax(inner_product)
result = tf.matmul(self.normalized_att_scores,
values) # head_num None F D
result = tf.concat(tf.split(result, self.head_num, ), axis=-1)
result = tf.squeeze(result, axis=0) # None F D*head_num
if self.use_res:
result += tf.tensordot(inputs, self.W_Res, axes=(-1, 0))
result = tf.nn.relu(result)
return result
def compute_output_shape(self, input_shape):
return (None, input_shape[1], self.att_embedding_size * self.head_num)
def get_config(self, ):
config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num, 'use_res': self.use_res,
'seed': self.seed}
base_config = super(InteractingLayer, self).get_config()
base_config.update(config)
return base_config
class OutterProductLayer(Layer):
"""OutterProduct Layer used in PNN.This implemention is
adapted from code that the author of the paper published on https://github.com/Atomu2014/product-nets.
Input shape
- A list of N 3D tensor with shape: ``(batch_size,1,embedding_size)``.
Output shape
- 2D tensor with shape:``(batch_size,N*(N-1)/2 )``.
Arguments
- **kernel_type**: str. The kernel weight matrix type to use,can be mat,vec or num
- **seed**: A Python integer to use as random seed.
References
- [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf)
"""
def __init__(self, kernel_type='mat', seed=1024, **kwargs):
if kernel_type not in ['mat', 'vec', 'num']:
raise ValueError("kernel_type must be mat,vec or num")
self.kernel_type = kernel_type
self.seed = seed
super(OutterProductLayer, self).__init__(**kwargs)
def build(self, input_shape):
if not isinstance(input_shape, list) or len(input_shape) < 2:
raise ValueError('A `OutterProductLayer` layer should be called '
'on a list of at least 2 inputs')
reduced_inputs_shapes = [shape.as_list() for shape in input_shape]
shape_set = set()
for i in range(len(input_shape)):
shape_set.add(tuple(reduced_inputs_shapes[i]))
if len(shape_set) > 1:
raise ValueError('A `OutterProductLayer` layer requires '
'inputs with same shapes '
'Got different shapes: %s' % (shape_set))
if len(input_shape[0]) != 3 or input_shape[0][1] != 1:
raise ValueError('A `OutterProductLayer` layer requires '
'inputs of a list with same shape tensor like (None,1,embedding_size)'
'Got different shapes: %s' % (input_shape[0]))
num_inputs = len(input_shape)
num_pairs = int(num_inputs * (num_inputs - 1) / 2)
input_shape = input_shape[0]
embed_size = int(input_shape[-1])
if self.kernel_type == 'mat':
self.kernel = self.add_weight(shape=(embed_size, num_pairs, embed_size),
initializer=glorot_uniform(
seed=self.seed),
name='kernel')
elif self.kernel_type == 'vec':
self.kernel = self.add_weight(shape=(num_pairs, embed_size,), initializer=glorot_uniform(self.seed),
name='kernel'
)
elif self.kernel_type == 'num':
self.kernel = self.add_weight(
shape=(num_pairs, 1), initializer=glorot_uniform(self.seed), name='kernel')
super(OutterProductLayer, self).build(
input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
if K.ndim(inputs[0]) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
embed_list = inputs
row = []
col = []
num_inputs = len(embed_list)
for i in range(num_inputs - 1):
for j in range(i + 1, num_inputs):
row.append(i)
col.append(j)
p = tf.concat([embed_list[idx]
for idx in row], axis=1) # batch num_pairs k
# Reshape([num_pairs, self.embedding_size])
q = tf.concat([embed_list[idx] for idx in col], axis=1)
# -------------------------
if self.kernel_type == 'mat':
p = tf.expand_dims(p, 1)
# k k* pair* k
# batch * pair
kp = reduce_sum(
# batch * pair * k
tf.multiply(
# batch * pair * k
tf.transpose(
# batch * k * pair
reduce_sum(
# batch * k * pair * k
tf.multiply(
p, self.kernel),
-1),
[0, 2, 1]),
q),
-1)
else:
# 1 * pair * (k or 1)
k = tf.expand_dims(self.kernel, 0)
# batch * pair
kp = reduce_sum(p * q * k, -1)
# p q # b * p * k
return kp
def compute_output_shape(self, input_shape):
num_inputs = len(input_shape)
num_pairs = int(num_inputs * (num_inputs - 1) / 2)
return (None, num_pairs)
def get_config(self, ):
config = {'kernel_type': self.kernel_type, 'seed': self.seed}
base_config = super(OutterProductLayer, self).get_config()
base_config.update(config)
return base_config
class FGCNNLayer(Layer):
"""Feature Generation Layer used in FGCNN,including Convolution,MaxPooling and Recombination.
Input shape
- A 3D tensor with shape:``(batch_size,field_size,embedding_size)``.
Output shape
- 3D tensor with shape: ``(batch_size,new_feture_num,embedding_size)``.
References
- [Liu B, Tang R, Chen Y, et al. Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1904.04447, 2019.](https://arxiv.org/pdf/1904.04447)
"""
def __init__(self, filters=(14, 16,), kernel_width=(7, 7,), new_maps=(3, 3,), pooling_width=(2, 2),
**kwargs):
if not (len(filters) == len(kernel_width) == len(new_maps) == len(pooling_width)):
raise ValueError("length of argument must be equal")
self.filters = filters
self.kernel_width = kernel_width
self.new_maps = new_maps
self.pooling_width = pooling_width
super(FGCNNLayer, self).__init__(**kwargs)
def build(self, input_shape):
if len(input_shape) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape)))
self.conv_layers = []
self.pooling_layers = []
self.dense_layers = []
pooling_shape = input_shape.as_list() + [1, ]
embedding_size = int(input_shape[-1])
for i in range(1, len(self.filters) + 1):
filters = self.filters[i - 1]
width = self.kernel_width[i - 1]
new_filters = self.new_maps[i - 1]
pooling_width = self.pooling_width[i - 1]
conv_output_shape = self._conv_output_shape(
pooling_shape, (width, 1))
pooling_shape = self._pooling_output_shape(
conv_output_shape, (pooling_width, 1))
self.conv_layers.append(Conv2D(filters=filters, kernel_size=(width, 1), strides=(1, 1),
padding='same',
activation='tanh', use_bias=True, ))
self.pooling_layers.append(
MaxPooling2D(pool_size=(pooling_width, 1)))
self.dense_layers.append(Dense(pooling_shape[1] * embedding_size * new_filters,
activation='tanh', use_bias=True))
self.flatten = Flatten()
super(FGCNNLayer, self).build(
input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
if K.ndim(inputs) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
embedding_size = int(inputs.shape[-1])
pooling_result = tf.expand_dims(inputs, axis=3)
new_feature_list = []
for i in range(1, len(self.filters) + 1):
new_filters = self.new_maps[i - 1]
conv_result = self.conv_layers[i - 1](pooling_result)
pooling_result = self.pooling_layers[i - 1](conv_result)
flatten_result = self.flatten(pooling_result)
new_result = self.dense_layers[i - 1](flatten_result)
new_feature_list.append(
tf.reshape(new_result, (-1, int(pooling_result.shape[1]) * new_filters, embedding_size)))
new_features = concat_func(new_feature_list, axis=1)
return new_features
def compute_output_shape(self, input_shape):
new_features_num = 0
features_num = input_shape[1]
for i in range(0, len(self.kernel_width)):
pooled_features_num = features_num // self.pooling_width[i]
new_features_num += self.new_maps[i] * pooled_features_num
features_num = pooled_features_num
return (None, new_features_num, input_shape[-1])
def get_config(self, ):
config = {'kernel_width': self.kernel_width, 'filters': self.filters, 'new_maps': self.new_maps,
'pooling_width': self.pooling_width}
base_config = super(FGCNNLayer, self).get_config()
base_config.update(config)
return base_config
def _conv_output_shape(self, input_shape, kernel_size):
# channels_last
space = input_shape[1:-1]
new_space = []
for i in range(len(space)):
new_dim = utils.conv_output_length(
space[i],
kernel_size[i],
padding='same',
stride=1,
dilation=1)
new_space.append(new_dim)
return ([input_shape[0]] + new_space + [self.filters])
def _pooling_output_shape(self, input_shape, pool_size):
# channels_last
rows = input_shape[1]
cols = input_shape[2]
rows = utils.conv_output_length(rows, pool_size[0], 'valid',
pool_size[0])
cols = utils.conv_output_length(cols, pool_size[1], 'valid',
pool_size[1])
return [input_shape[0], rows, cols, input_shape[3]]
class SENETLayer(Layer):
"""SENETLayer used in FiBiNET.
Input shape
- A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``.
Output shape
- A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``.
Arguments
- **reduction_ratio** : Positive integer, dimensionality of the
attention network output space.
- **seed** : A Python integer to use as random seed.
References
- [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf)
"""
def __init__(self, reduction_ratio=3, seed=1024, **kwargs):
self.reduction_ratio = reduction_ratio
self.seed = seed
super(SENETLayer, self).__init__(**kwargs)
def build(self, input_shape):
if not isinstance(input_shape, list) or len(input_shape) < 2:
raise ValueError('A `AttentionalFM` layer should be called '
'on a list of at least 2 inputs')
self.filed_size = len(input_shape)
self.embedding_size = input_shape[0][-1]
reduction_size = max(1, self.filed_size // self.reduction_ratio)
self.W_1 = self.add_weight(shape=(
self.filed_size, reduction_size), initializer=glorot_normal(seed=self.seed), name="W_1")
self.W_2 = self.add_weight(shape=(
reduction_size, self.filed_size), initializer=glorot_normal(seed=self.seed), name="W_2")
self.tensordot = Lambda(
lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0)))
# Be sure to call this somewhere!
super(SENETLayer, self).build(input_shape)
def call(self, inputs, training=None, **kwargs):
if K.ndim(inputs[0]) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
inputs = concat_func(inputs, axis=1)
Z = reduce_mean(inputs, axis=-1, )
A_1 = tf.nn.relu(self.tensordot([Z, self.W_1]))
A_2 = tf.nn.relu(self.tensordot([A_1, self.W_2]))
V = tf.multiply(inputs, tf.expand_dims(A_2, axis=2))
return tf.split(V, self.filed_size, axis=1)
def compute_output_shape(self, input_shape):
return input_shape
def compute_mask(self, inputs, mask=None):
return [None] * self.filed_size
def get_config(self, ):
config = {'reduction_ratio': self.reduction_ratio, 'seed': self.seed}
base_config = super(SENETLayer, self).get_config()
base_config.update(config)
return base_config
class BilinearInteraction(Layer):
"""BilinearInteraction Layer used in FiBiNET.
Input shape
- A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. Its length is ``filed_size``.
Output shape
- 3D tensor with shape: ``(batch_size,filed_size*(filed_size-1)/2,embedding_size)``.
Arguments
- **bilinear_type** : String, types of bilinear functions used in this layer.
- **seed** : A Python integer to use as random seed.
References
- [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf)
"""
def __init__(self, bilinear_type="interaction", seed=1024, **kwargs):
self.bilinear_type = bilinear_type
self.seed = seed
super(BilinearInteraction, self).__init__(**kwargs)
def build(self, input_shape):
if not isinstance(input_shape, list) or len(input_shape) < 2:
raise ValueError('A `AttentionalFM` layer should be called '
'on a list of at least 2 inputs')
embedding_size = int(input_shape[0][-1])
if self.bilinear_type == "all":
self.W = self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal(
seed=self.seed), name="bilinear_weight")
elif self.bilinear_type == "each":
self.W_list = [self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal(
seed=self.seed), name="bilinear_weight" + str(i)) for i in range(len(input_shape) - 1)]
elif self.bilinear_type == "interaction":
self.W_list = [self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal(
seed=self.seed), name="bilinear_weight" + str(i) + '_' + str(j)) for i, j in
itertools.combinations(range(len(input_shape)), 2)]
else:
raise NotImplementedError
super(BilinearInteraction, self).build(
input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
if K.ndim(inputs[0]) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
n = len(inputs)
if self.bilinear_type == "all":
vidots = [tf.tensordot(inputs[i], self.W, axes=(-1, 0)) for i in range(n)]
p = [tf.multiply(vidots[i], inputs[j]) for i, j in itertools.combinations(range(n), 2)]
elif self.bilinear_type == "each":
vidots = [tf.tensordot(inputs[i], self.W_list[i], axes=(-1, 0)) for i in range(n - 1)]
p = [tf.multiply(vidots[i], inputs[j]) for i, j in itertools.combinations(range(n), 2)]
elif self.bilinear_type == "interaction":
p = [tf.multiply(tf.tensordot(v[0], w, axes=(-1, 0)), v[1])
for v, w in zip(itertools.combinations(inputs, 2), self.W_list)]
else:
raise NotImplementedError
output = concat_func(p, axis=1)
return output
def compute_output_shape(self, input_shape):
filed_size = len(input_shape)
embedding_size = input_shape[0][-1]
return (None, filed_size * (filed_size - 1) // 2, embedding_size)
def get_config(self, ):
config = {'bilinear_type': self.bilinear_type, 'seed': self.seed}
base_config = super(BilinearInteraction, self).get_config()
base_config.update(config)
return base_config
class FieldWiseBiInteraction(Layer):
"""Field-Wise Bi-Interaction Layer used in FLEN,compress the
pairwise element-wise product of features into one single vector.
Input shape
- A list of 3D tensor with shape:``(batch_size,field_size,embedding_size)``.
Output shape
- 2D tensor with shape: ``(batch_size,embedding_size)``.
Arguments
- **use_bias** : Boolean, if use bias.
- **seed** : A Python integer to use as random seed.
References
- [FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690)
"""
def __init__(self, use_bias=True, seed=1024, **kwargs):
self.use_bias = use_bias
self.seed = seed
super(FieldWiseBiInteraction, self).__init__(**kwargs)
def build(self, input_shape):
if not isinstance(input_shape, list) or len(input_shape) < 2:
raise ValueError(
'A `Field-Wise Bi-Interaction` layer should be called '
'on a list of at least 2 inputs')
self.num_fields = len(input_shape)
embedding_size = input_shape[0][-1]
self.kernel_mf = self.add_weight(
name='kernel_mf',
shape=(int(self.num_fields * (self.num_fields - 1) / 2), 1),
initializer=Ones(),
regularizer=None,
trainable=True)
self.kernel_fm = self.add_weight(
name='kernel_fm',
shape=(self.num_fields, 1),
initializer=Constant(value=0.5),
regularizer=None,
trainable=True)
if self.use_bias:
self.bias_mf = self.add_weight(name='bias_mf',
shape=(embedding_size),
initializer=Zeros())
self.bias_fm = self.add_weight(name='bias_fm',
shape=(embedding_size),
initializer=Zeros())
super(FieldWiseBiInteraction,
self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
if K.ndim(inputs[0]) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" %
(K.ndim(inputs)))
field_wise_embeds_list = inputs
# MF module
field_wise_vectors = tf.concat([
reduce_sum(field_i_vectors, axis=1, keep_dims=True)
for field_i_vectors in field_wise_embeds_list
], 1)
left = []
right = []
for i, j in itertools.combinations(list(range(self.num_fields)), 2):
left.append(i)
right.append(j)
embeddings_left = tf.gather(params=field_wise_vectors,
indices=left,
axis=1)
embeddings_right = tf.gather(params=field_wise_vectors,
indices=right,
axis=1)
embeddings_prod = embeddings_left * embeddings_right
field_weighted_embedding = embeddings_prod * self.kernel_mf
h_mf = reduce_sum(field_weighted_embedding, axis=1)
if self.use_bias:
h_mf = tf.nn.bias_add(h_mf, self.bias_mf)
# FM module
square_of_sum_list = [
tf.square(reduce_sum(field_i_vectors, axis=1, keep_dims=True))
for field_i_vectors in field_wise_embeds_list
]
sum_of_square_list = [
reduce_sum(field_i_vectors * field_i_vectors,
axis=1,
keep_dims=True)
for field_i_vectors in field_wise_embeds_list
]
field_fm = tf.concat([
square_of_sum - sum_of_square for square_of_sum, sum_of_square in
zip(square_of_sum_list, sum_of_square_list)
], 1)
h_fm = reduce_sum(field_fm * self.kernel_fm, axis=1)
if self.use_bias:
h_fm = tf.nn.bias_add(h_fm, self.bias_fm)
return h_mf + h_fm
def compute_output_shape(self, input_shape):
return (None, input_shape[0][-1])
def get_config(self, ):
config = {'use_bias': self.use_bias, 'seed': self.seed}
base_config = super(FieldWiseBiInteraction, self).get_config()
base_config.update(config)
return base_config
class FwFMLayer(Layer):
"""Field-weighted Factorization Machines
Input shape
- 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
Output shape
- 2D tensor with shape: ``(batch_size, 1)``.
Arguments
- **num_fields** : integer for number of fields
- **regularizer** : L2 regularizer weight for the field strength parameters of FwFM
References
- [Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising]
https://arxiv.org/pdf/1806.03514.pdf
"""
def __init__(self, num_fields=4, regularizer=0.000001, **kwargs):
self.num_fields = num_fields
self.regularizer = regularizer
super(FwFMLayer, self).__init__(**kwargs)
def build(self, input_shape):
if len(input_shape) != 3:
raise ValueError("Unexpected inputs dimensions % d,\
expect to be 3 dimensions" % (len(input_shape)))
if input_shape[1] != self.num_fields:
raise ValueError("Mismatch in number of fields {} and \
concatenated embeddings dims {}".format(self.num_fields, input_shape[1]))
self.field_strengths = self.add_weight(name='field_pair_strengths',
shape=(self.num_fields, self.num_fields),
initializer=TruncatedNormal(),
regularizer=l2(self.regularizer),
trainable=True)
super(FwFMLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
if K.ndim(inputs) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions"
% (K.ndim(inputs)))
if inputs.shape[1] != self.num_fields:
raise ValueError("Mismatch in number of fields {} and \
concatenated embeddings dims {}".format(self.num_fields, inputs.shape[1]))
pairwise_inner_prods = []
for fi, fj in itertools.combinations(range(self.num_fields), 2):
# get field strength for pair fi and fj
r_ij = self.field_strengths[fi, fj]
# get embeddings for the features of both the fields
feat_embed_i = tf.squeeze(inputs[0:, fi:fi + 1, 0:], axis=1)
feat_embed_j = tf.squeeze(inputs[0:, fj:fj + 1, 0:], axis=1)
f = tf.scalar_mul(r_ij, batch_dot(feat_embed_i, feat_embed_j, axes=1))
pairwise_inner_prods.append(f)
sum_ = tf.add_n(pairwise_inner_prods)
return sum_
def compute_output_shape(self, input_shape):
return (None, 1)
def get_config(self):
config = super(FwFMLayer, self).get_config().copy()
config.update({
'num_fields': self.num_fields,
'regularizer': self.regularizer
})
return config
class FEFMLayer(Layer):
"""Field-Embedded Factorization Machines
Input shape
- 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
Output shape
- 2D tensor with shape:
``(batch_size, (num_fields * (num_fields-1))/2)`` # concatenated FEFM interaction embeddings
Arguments
- **regularizer** : L2 regularizer weight for the field pair matrix embeddings parameters of FEFM
References
- [Field-Embedded Factorization Machines for Click-through Rate Prediction]
https://arxiv.org/pdf/2009.09931.pdf
"""
def __init__(self, regularizer, **kwargs):
self.regularizer = regularizer
super(FEFMLayer, self).__init__(**kwargs)
def build(self, input_shape):
if len(input_shape) != 3:
raise ValueError("Unexpected inputs dimensions % d,\
expect to be 3 dimensions" % (len(input_shape)))
self.num_fields = int(input_shape[1])
embedding_size = int(input_shape[2])
self.field_embeddings = {}
for fi, fj in itertools.combinations(range(self.num_fields), 2):
field_pair_id = str(fi) + "-" + str(fj)
self.field_embeddings[field_pair_id] = self.add_weight(name='field_embeddings' + field_pair_id,
shape=(embedding_size, embedding_size),
initializer=TruncatedNormal(),
regularizer=l2(self.regularizer),
trainable=True)
super(FEFMLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
if K.ndim(inputs) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions"
% (K.ndim(inputs)))
pairwise_inner_prods = []
for fi, fj in itertools.combinations(range(self.num_fields), 2):
field_pair_id = str(fi) + "-" + str(fj)
feat_embed_i = tf.squeeze(inputs[0:, fi:fi + 1, 0:], axis=1)
feat_embed_j = tf.squeeze(inputs[0:, fj:fj + 1, 0:], axis=1)
field_pair_embed_ij = self.field_embeddings[field_pair_id]
feat_embed_i_tr = tf.matmul(feat_embed_i, field_pair_embed_ij + tf.transpose(field_pair_embed_ij))
f = batch_dot(feat_embed_i_tr, feat_embed_j, axes=1)
pairwise_inner_prods.append(f)
concat_vec = tf.concat(pairwise_inner_prods, axis=1)
return concat_vec
def compute_output_shape(self, input_shape):
num_fields = int(input_shape[1])
return (None, (num_fields * (num_fields - 1)) / 2)
def get_config(self):
config = super(FEFMLayer, self).get_config().copy()
config.update({
'regularizer': self.regularizer,
})
return config
class BridgeModule(Layer):
"""Bridge Module used in EDCN
Input shape
- A list of two 2D tensor with shape: ``(batch_size, units)``.
Output shape
- 2D tensor with shape: ``(batch_size, units)``.
Arguments
- **bridge_type**: The type of bridge interaction, one of 'pointwise_addition', 'hadamard_product', 'concatenation', 'attention_pooling'
- **activation**: Activation function to use.
References
- [Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models.](https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf)
"""
def __init__(self, bridge_type='hadamard_product', activation='relu', **kwargs):
self.bridge_type = bridge_type
self.activation = activation
super(BridgeModule, self).__init__(**kwargs)
def build(self, input_shape):
if not isinstance(input_shape, list) or len(input_shape) < 2:
raise ValueError(
'A `BridgeModule` layer should be called '
'on a list of 2 inputs')
self.dnn_dim = int(input_shape[0][-1])
if self.bridge_type == "concatenation":
self.dense = Dense(self.dnn_dim, self.activation)
elif self.bridge_type == "attention_pooling":
self.dense_x = DNN([self.dnn_dim, self.dnn_dim], self.activation, output_activation='softmax')
self.dense_h = DNN([self.dnn_dim, self.dnn_dim], self.activation, output_activation='softmax')
super(BridgeModule, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
x, h = inputs
if self.bridge_type == "pointwise_addition":
return x + h
elif self.bridge_type == "hadamard_product":
return x * h
elif self.bridge_type == "concatenation":
return self.dense(tf.concat([x, h], axis=-1))
elif self.bridge_type == "attention_pooling":
a_x = self.dense_x(x)
a_h = self.dense_h(h)
return a_x * x + a_h * h
def compute_output_shape(self, input_shape):
return (None, self.dnn_dim)
def get_config(self):
base_config = super(BridgeModule, self).get_config().copy()
config = {
'bridge_type': self.bridge_type,
'activation': self.activation
}
config.update(base_config)
return config
================================================
FILE: deepctr/layers/normalization.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen,weichenswc@163.com
"""
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.layers import Layer
try:
from tensorflow.python.ops.init_ops import Zeros, Ones
except ImportError:
from tensorflow.python.ops.init_ops_v2 import Zeros, Ones
class LayerNormalization(Layer):
def __init__(self, axis=-1, eps=1e-9, center=True,
scale=True, **kwargs):
self.axis = axis
self.eps = eps
self.center = center
self.scale = scale
super(LayerNormalization, self).__init__(**kwargs)
def build(self, input_shape):
self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:],
initializer=Ones(), trainable=True)
self.beta = self.add_weight(name='beta', shape=input_shape[-1:],
initializer=Zeros(), trainable=True)
super(LayerNormalization, self).build(input_shape)
def call(self, inputs):
mean = K.mean(inputs, axis=self.axis, keepdims=True)
variance = K.mean(K.square(inputs - mean), axis=-1, keepdims=True)
std = K.sqrt(variance + self.eps)
outputs = (inputs - mean) / std
if self.scale:
outputs *= self.gamma
if self.center:
outputs += self.beta
return outputs
def compute_output_shape(self, input_shape):
return input_shape
def get_config(self, ):
config = {'axis': self.axis, 'eps': self.eps, 'center': self.center, 'scale': self.scale}
base_config = super(LayerNormalization, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
================================================
FILE: deepctr/layers/sequence.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen,weichenswc@163.com
"""
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import backend as K
try:
from tensorflow.python.ops.init_ops import TruncatedNormal, Constant, glorot_uniform_initializer as glorot_uniform
except ImportError:
from tensorflow.python.ops.init_ops_v2 import TruncatedNormal, Constant, glorot_uniform
from tensorflow.python.keras.layers import LSTM, Lambda, Layer, Dropout
from .core import LocalActivationUnit
from .normalization import LayerNormalization
if tf.__version__ >= '2.0.0':
from ..contrib.rnn_v2 import dynamic_rnn
else:
from ..contrib.rnn import dynamic_rnn
from ..contrib.utils import QAAttGRUCell, VecAttGRUCell
from .utils import reduce_sum, reduce_max, div, softmax, reduce_mean
class SequencePoolingLayer(Layer):
"""The SequencePoolingLayer is used to apply pooling operation(sum,mean,max) on variable-length sequence feature/multi-value feature.
Input shape
- A list of two tensor [seq_value,seq_len]
- seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size)``
- seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence.
Output shape
- 3D tensor with shape: ``(batch_size, 1, embedding_size)``.
Arguments
- **mode**:str.Pooling operation to be used,can be sum,mean or max.
- **supports_masking**:If True,the input need to support masking.
"""
def __init__(self, mode='mean', supports_masking=False, **kwargs):
if mode not in ['sum', 'mean', 'max']:
raise ValueError("mode must be sum or mean")
self.mode = mode
self.eps = tf.constant(1e-8, tf.float32)
super(SequencePoolingLayer, self).__init__(**kwargs)
self.supports_masking = supports_masking
def build(self, input_shape):
if not self.supports_masking:
self.seq_len_max = int(input_shape[0][1])
super(SequencePoolingLayer, self).build(
input_shape) # Be sure to call this somewhere!
def call(self, seq_value_len_list, mask=None, **kwargs):
if self.supports_masking:
if mask is None:
raise ValueError(
"When supports_masking=True,input must support masking")
uiseq_embed_list = seq_value_len_list
mask = tf.cast(mask, tf.float32) # tf.to_float(mask)
user_behavior_length = reduce_sum(mask, axis=-1, keep_dims=True)
mask = tf.expand_dims(mask, axis=2)
else:
uiseq_embed_list, user_behavior_length = seq_value_len_list
mask = tf.sequence_mask(user_behavior_length,
self.seq_len_max, dtype=tf.float32)
mask = tf.transpose(mask, (0, 2, 1))
embedding_size = uiseq_embed_list.shape[-1]
mask = tf.tile(mask, [1, 1, embedding_size])
if self.mode == "max":
hist = uiseq_embed_list - (1 - mask) * 1e9
return reduce_max(hist, 1, keep_dims=True)
hist = reduce_sum(uiseq_embed_list * mask, 1, keep_dims=False)
if self.mode == "mean":
hist = div(hist, tf.cast(user_behavior_length, tf.float32) + self.eps)
hist = tf.expand_dims(hist, axis=1)
return hist
def compute_output_shape(self, input_shape):
if self.supports_masking:
return (None, 1, input_shape[-1])
else:
return (None, 1, input_shape[0][-1])
def compute_mask(self, inputs, mask):
return None
def get_config(self, ):
config = {'mode': self.mode, 'supports_masking': self.supports_masking}
base_config = super(SequencePoolingLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class WeightedSequenceLayer(Layer):
"""The WeightedSequenceLayer is used to apply weight score on variable-length sequence feature/multi-value feature.
Input shape
- A list of two tensor [seq_value,seq_len,seq_weight]
- seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size)``
- seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence.
- seq_weight is a 3D tensor with shape: ``(batch_size, T, 1)``
Output shape
- 3D tensor with shape: ``(batch_size, T, embedding_size)``.
Arguments
- **weight_normalization**: bool.Whether normalize the weight score before applying to sequence.
- **supports_masking**:If True,the input need to support masking.
"""
def __init__(self, weight_normalization=True, supports_masking=False, **kwargs):
super(WeightedSequenceLayer, self).__init__(**kwargs)
self.weight_normalization = weight_normalization
self.supports_masking = supports_masking
def build(self, input_shape):
if not self.supports_masking:
self.seq_len_max = int(input_shape[0][1])
super(WeightedSequenceLayer, self).build(
input_shape) # Be sure to call this somewhere!
def call(self, input_list, mask=None, **kwargs):
if self.supports_masking:
if mask is None:
raise ValueError(
"When supports_masking=True,input must support masking")
key_input, value_input = input_list
mask = tf.expand_dims(mask[0], axis=2)
else:
key_input, key_length_input, value_input = input_list
mask = tf.sequence_mask(key_length_input,
self.seq_len_max, dtype=tf.bool)
mask = tf.transpose(mask, (0, 2, 1))
embedding_size = key_input.shape[-1]
if self.weight_normalization:
paddings = tf.ones_like(value_input) * (-2 ** 32 + 1)
else:
paddings = tf.zeros_like(value_input)
value_input = tf.where(mask, value_input, paddings)
if self.weight_normalization:
value_input = softmax(value_input, dim=1)
if len(value_input.shape) == 2:
value_input = tf.expand_dims(value_input, axis=2)
value_input = tf.tile(value_input, [1, 1, embedding_size])
return tf.multiply(key_input, value_input)
def compute_output_shape(self, input_shape):
return input_shape[0]
def compute_mask(self, inputs, mask):
if self.supports_masking:
return mask[0]
else:
return None
def get_config(self, ):
config = {'weight_normalization': self.weight_normalization, 'supports_masking': self.supports_masking}
base_config = super(WeightedSequenceLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class AttentionSequencePoolingLayer(Layer):
"""The Attentional sequence pooling operation used in DIN.
Input shape
- A list of three tensor: [query,keys,keys_length]
- query is a 3D tensor with shape: ``(batch_size, 1, embedding_size)``
- keys is a 3D tensor with shape: ``(batch_size, T, embedding_size)``
- keys_length is a 2D tensor with shape: ``(batch_size, 1)``
Output shape
- 3D tensor with shape: ``(batch_size, 1, embedding_size)``.
Arguments
- **att_hidden_units**:list of positive integer, the attention net layer number and units in each layer.
- **att_activation**: Activation function to use in attention net.
- **weight_normalization**: bool.Whether normalize the attention score of local activation unit.
- **supports_masking**:If True,the input need to support masking.
References
- [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf)
"""
def __init__(self, att_hidden_units=(80, 40), att_activation='sigmoid', weight_normalization=False,
return_score=False,
supports_masking=False, **kwargs):
self.att_hidden_units = att_hidden_units
self.att_activation = att_activation
self.weight_normalization = weight_normalization
self.return_score = return_score
super(AttentionSequencePoolingLayer, self).__init__(**kwargs)
self.supports_masking = supports_masking
def build(self, input_shape):
if not self.supports_masking:
if not isinstance(input_shape, list) or len(input_shape) != 3:
raise ValueError('A `AttentionSequencePoolingLayer` layer should be called '
'on a list of 3 inputs')
if len(input_shape[0]) != 3 or len(input_shape[1]) != 3 or len(input_shape[2]) != 2:
raise ValueError(
"Unexpected inputs dimensions,the 3 tensor dimensions are %d,%d and %d , expect to be 3,3 and 2" % (
len(input_shape[0]), len(input_shape[1]), len(input_shape[2])))
if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1 or input_shape[2][1] != 1:
raise ValueError('A `AttentionSequencePoolingLayer` layer requires '
'inputs of a 3 tensor with shape (None,1,embedding_size),(None,T,embedding_size) and (None,1)'
'Got different shapes: %s' % (input_shape))
else:
pass
self.local_att = LocalActivationUnit(
self.att_hidden_units, self.att_activation, l2_reg=0, dropout_rate=0, use_bn=False, seed=1024, )
super(AttentionSequencePoolingLayer, self).build(
input_shape) # Be sure to call this somewhere!
def call(self, inputs, mask=None, training=None, **kwargs):
if self.supports_masking:
if mask is None:
raise ValueError(
"When supports_masking=True,input must support masking")
queries, keys = inputs
key_masks = tf.expand_dims(mask[-1], axis=1)
else:
queries, keys, keys_length = inputs
hist_len = keys.get_shape()[1]
key_masks = tf.sequence_mask(keys_length, hist_len)
attention_score = self.local_att([queries, keys], training=training)
outputs = tf.transpose(attention_score, (0, 2, 1))
if self.weight_normalization:
paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)
else:
paddings = tf.zeros_like(outputs)
outputs = tf.where(key_masks, outputs, paddings)
if self.weight_normalization:
outputs = softmax(outputs)
if not self.return_score:
outputs = tf.matmul(outputs, keys)
if tf.__version__ < '1.13.0':
outputs._uses_learning_phase = attention_score._uses_learning_phase
else:
outputs._uses_learning_phase = training is not None
return outputs
def compute_output_shape(self, input_shape):
if self.return_score:
return (None, 1, input_shape[1][1])
else:
return (None, 1, input_shape[0][-1])
def compute_mask(self, inputs, mask):
return None
def get_config(self, ):
config = {'att_hidden_units': self.att_hidden_units, 'att_activation': self.att_activation,
'weight_normalization': self.weight_normalization, 'return_score': self.return_score,
'supports_masking': self.supports_masking}
base_config = super(AttentionSequencePoolingLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class BiLSTM(Layer):
"""A multiple layer Bidirectional Residual LSTM Layer.
Input shape
- 3D tensor with shape ``(batch_size, timesteps, input_dim)``.
Output shape
- 3D tensor with shape: ``(batch_size, timesteps, units)``.
Arguments
- **units**: Positive integer, dimensionality of the output space.
- **layers**:Positive integer, number of LSTM layers to stacked.
- **res_layers**: Positive integer, number of residual connection to used in last ``res_layers``.
- **dropout_rate**: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs.
- **merge_mode**: merge_mode: Mode by which outputs of the forward and backward RNNs will be combined. One of { ``'fw'`` , ``'bw'`` , ``'sum'`` , ``'mul'`` , ``'concat'`` , ``'ave'`` , ``None`` }. If None, the outputs will not be combined, they will be returned as a list.
"""
def __init__(self, units, layers=2, res_layers=0, dropout_rate=0.2, merge_mode='ave', **kwargs):
if merge_mode not in ['fw', 'bw', 'sum', 'mul', 'ave', 'concat', None]:
raise ValueError('Invalid merge mode. '
'Merge mode should be one of '
'{"fw","bw","sum", "mul", "ave", "concat", None}')
self.units = units
self.layers = layers
self.res_layers = res_layers
self.dropout_rate = dropout_rate
self.merge_mode = merge_mode
super(BiLSTM, self).__init__(**kwargs)
self.supports_masking = True
def build(self, input_shape):
if len(input_shape) != 3:
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape)))
self.fw_lstm = []
self.bw_lstm = []
for _ in range(self.layers):
self.fw_lstm.append(
LSTM(self.units, dropout=self.dropout_rate, bias_initializer='ones', return_sequences=True,
unroll=True))
self.bw_lstm.append(
LSTM(self.units, dropout=self.dropout_rate, bias_initializer='ones', return_sequences=True,
go_backwards=True, unroll=True))
super(BiLSTM, self).build(
input_shape) # Be sure to call this somewhere!
def call(self, inputs, mask=None, **kwargs):
input_fw = inputs
input_bw = inputs
for i in range(self.layers):
output_fw = self.fw_lstm[i](input_fw)
output_bw = self.bw_lstm[i](input_bw)
output_bw = Lambda(lambda x: K.reverse(
x, 1), mask=lambda inputs, mask: mask)(output_bw)
if i >= self.layers - self.res_layers:
output_fw += input_fw
output_bw += input_bw
input_fw = output_fw
input_bw = output_bw
output_fw = input_fw
output_bw = input_bw
if self.merge_mode == "fw":
output = output_fw
elif self.merge_mode == "bw":
output = output_bw
elif self.merge_mode == 'concat':
output = tf.concat([output_fw, output_bw], axis=-1)
elif self.merge_mode == 'sum':
output = output_fw + output_bw
elif self.merge_mode == 'ave':
output = (output_fw + output_bw) / 2
elif self.merge_mode == 'mul':
output = output_fw * output_bw
elif self.merge_mode is None:
output = [output_fw, output_bw]
return output
def compute_output_shape(self, input_shape):
print(self.merge_mode)
if self.merge_mode is None:
return [input_shape, input_shape]
elif self.merge_mode == 'concat':
return input_shape[:-1] + (input_shape[-1] * 2,)
else:
return input_shape
def compute_mask(self, inputs, mask):
return mask
def get_config(self, ):
config = {'units': self.units, 'layers': self.layers,
'res_layers': self.res_layers, 'dropout_rate': self.dropout_rate, 'merge_mode': self.merge_mode}
base_config = super(BiLSTM, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class Transformer(Layer):
""" Simplified version of Transformer proposed in 《Attention is all you need》
Input shape
- a list of two 3D tensor with shape ``(batch_size, timesteps, input_dim)`` if ``supports_masking=True`` .
- a list of two 4 tensors, first two tensors with shape ``(batch_size, timesteps, input_dim)``,last two tensors with shape ``(batch_size, 1)`` if ``supports_masking=False`` .
Output shape
- 3D tensor with shape: ``(batch_size, 1, input_dim)`` if ``output_type='mean'`` or ``output_type='sum'`` , else ``(batch_size, timesteps, input_dim)`` .
Arguments
- **att_embedding_size**: int.The embedding size in multi-head self-attention network.
- **head_num**: int.The head number in multi-head self-attention network.
- **dropout_rate**: float between 0 and 1. Fraction of the units to drop.
- **use_positional_encoding**: bool. Whether or not use positional_encoding
- **use_res**: bool. Whether or not use standard residual connections before output.
- **use_feed_forward**: bool. Whether or not use pointwise feed foward network.
- **use_layer_norm**: bool. Whether or not use Layer Normalization.
- **blinding**: bool. Whether or not use blinding.
- **seed**: A Python integer to use as random seed.
- **supports_masking**:bool. Whether or not support masking.
- **attention_type**: str, Type of attention, the value must be one of { ``'scaled_dot_product'`` , ``'cos'`` , ``'ln'`` , ``'additive'`` }.
- **output_type**: ``'mean'`` , ``'sum'`` or `None`. Whether or not use average/sum pooling for output.
References
- [Vaswani, Ashish, et al. "Attention is all you need." Advances in Neural Information Processing Systems. 2017.](https://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf)
"""
def __init__(self, att_embedding_size=1, head_num=8, dropout_rate=0.0, use_positional_encoding=True, use_res=True,
use_feed_forward=True, use_layer_norm=False, blinding=True, seed=1024, supports_masking=False,
attention_type="scaled_dot_product", output_type="mean", **kwargs):
if head_num <= 0:
raise ValueError('head_num must be a int > 0')
self.att_embedding_size = att_embedding_size
self.head_num = head_num
self.num_units = att_embedding_size * head_num
self.use_res = use_res
self.use_feed_forward = use_feed_forward
self.seed = seed
self.use_positional_encoding = use_positional_encoding
self.dropout_rate = dropout_rate
self.use_layer_norm = use_layer_norm
self.blinding = blinding
self.attention_type = attention_type
self.output_type = output_type
super(Transformer, self).__init__(**kwargs)
self.supports_masking = supports_masking
def build(self, input_shape):
embedding_size = int(input_shape[0][-1])
if self.num_units != embedding_size:
raise ValueError(
"att_embedding_size * head_num must equal the last dimension size of inputs,got %d * %d != %d" % (
self.att_embedding_size, self.head_num, embedding_size))
self.seq_len_max = int(input_shape[0][-2])
self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num],
dtype=tf.float32,
initializer=TruncatedNormal(seed=self.seed))
self.W_key = self.add_weight(name='key', shape=[embedding_size, self.att_embedding_size * self.head_num],
dtype=tf.float32,
initializer=TruncatedNormal(seed=self.seed + 1))
self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num],
dtype=tf.float32,
initializer=TruncatedNormal(seed=self.seed + 2))
if self.attention_type == "additive":
self.b = self.add_weight('b', shape=[self.att_embedding_size], dtype=tf.float32,
initializer=glorot_uniform(seed=self.seed))
self.v = self.add_weight('v', shape=[self.att_embedding_size], dtype=tf.float32,
initializer=glorot_uniform(seed=self.seed))
elif self.attention_type == "ln":
self.att_ln_q = LayerNormalization()
self.att_ln_k = LayerNormalization()
# if self.use_res:
# self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32,
# initializer=TruncatedNormal(seed=self.seed))
if self.use_feed_forward:
self.fw1 = self.add_weight('fw1', shape=[self.num_units, 4 * self.num_units], dtype=tf.float32,
initializer=glorot_uniform(seed=self.seed))
self.fw2 = self.add_weight('fw2', shape=[4 * self.num_units, self.num_units], dtype=tf.float32,
initializer=glorot_uniform(seed=self.seed))
self.dropout = Dropout(
self.dropout_rate, seed=self.seed)
self.ln = LayerNormalization()
if self.use_positional_encoding:
self.query_pe = PositionEncoding()
self.key_pe = PositionEncoding()
# Be sure to call this somewhere!
super(Transformer, self).build(input_shape)
def call(self, inputs, mask=None, training=None, **kwargs):
if self.supports_masking:
queries, keys = inputs
query_masks, key_masks = mask
query_masks = tf.cast(query_masks, tf.float32)
key_masks = tf.cast(key_masks, tf.float32)
else:
queries, keys, query_masks, key_masks = inputs
query_masks = tf.sequence_mask(
query_masks, self.seq_len_max, dtype=tf.float32)
key_masks = tf.sequence_mask(
key_masks, self.seq_len_max, dtype=tf.float32)
query_masks = tf.squeeze(query_masks, axis=1)
key_masks = tf.squeeze(key_masks, axis=1)
if self.use_positional_encoding:
queries = self.query_pe(queries)
keys = self.key_pe(keys)
Q = tf.tensordot(queries, self.W_Query,
axes=(-1, 0)) # N T_q D*h
K = tf.tensordot(keys, self.W_key, axes=(-1, 0))
V = tf.tensordot(keys, self.W_Value, axes=(-1, 0))
# h*N T_q D
Q_ = tf.concat(tf.split(Q, self.head_num, axis=2), axis=0)
K_ = tf.concat(tf.split(K, self.head_num, axis=2), axis=0)
V_ = tf.concat(tf.split(V, self.head_num, axis=2), axis=0)
if self.attention_type == "scaled_dot_product":
# h*N T_q T_k
outputs = tf.matmul(Q_, K_, transpose_b=True)
outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5)
elif self.attention_type == "cos":
Q_cos = tf.nn.l2_normalize(Q_, dim=-1)
K_cos = tf.nn.l2_normalize(K_, dim=-1)
outputs = tf.matmul(Q_cos, K_cos, transpose_b=True) # h*N T_q T_k
outputs = outputs * 20 # Scale
elif self.attention_type == 'ln':
Q_ = self.att_ln_q(Q_)
K_ = self.att_ln_k(K_)
outputs = tf.matmul(Q_, K_, transpose_b=True) # h*N T_q T_k
# Scale
outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5)
elif self.attention_type == "additive":
Q_reshaped = tf.expand_dims(Q_, axis=-2)
K_reshaped = tf.expand_dims(K_, axis=-3)
outputs = tf.tanh(tf.nn.bias_add(Q_reshaped + K_reshaped, self.b))
outputs = tf.squeeze(tf.tensordot(outputs, tf.expand_dims(self.v, axis=-1), axes=[-1, 0]), axis=-1)
else:
raise ValueError("attention_type must be [scaled_dot_product,cos,ln,additive]")
key_masks = tf.tile(key_masks, [self.head_num, 1])
# (h*N, T_q, T_k)
key_masks = tf.tile(tf.expand_dims(key_masks, 1),
[1, tf.shape(queries)[1], 1])
paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)
# (h*N, T_q, T_k)
outputs = tf.where(tf.equal(key_masks, 1), outputs, paddings, )
if self.blinding:
try:
outputs = tf.matrix_set_diag(outputs, tf.ones_like(outputs)[
:, :, 0] * (-2 ** 32 + 1))
except AttributeError:
outputs = tf.compat.v1.matrix_set_diag(outputs, tf.ones_like(outputs)[
:, :, 0] * (-2 ** 32 + 1))
outputs -= reduce_max(outputs, axis=-1, keep_dims=True)
outputs = softmax(outputs)
query_masks = tf.tile(query_masks, [self.head_num, 1]) # (h*N, T_q)
# (h*N, T_q, T_k)
query_masks = tf.tile(tf.expand_dims(
query_masks, -1), [1, 1, tf.shape(keys)[1]])
outputs *= query_masks
outputs = self.dropout(outputs, training=training)
# Weighted sum
# ( h*N, T_q, C/h)
result = tf.matmul(outputs, V_)
result = tf.concat(tf.split(result, self.head_num, axis=0), axis=2)
if self.use_res:
# tf.tensordot(queries, self.W_Res, axes=(-1, 0))
result += queries
if self.use_layer_norm:
result = self.ln(result)
if self.use_feed_forward:
fw1 = tf.nn.relu(tf.tensordot(result, self.fw1, axes=[-1, 0]))
fw1 = self.dropout(fw1, training=training)
fw2 = tf.tensordot(fw1, self.fw2, axes=[-1, 0])
if self.use_res:
result += fw2
if self.use_layer_norm:
result = self.ln(result)
if self.output_type == "mean":
return reduce_mean(result, axis=1, keep_dims=True)
elif self.output_type == "sum":
return reduce_sum(result, axis=1, keep_dims=True)
else:
return result
def compute_output_shape(self, input_shape):
return (None, 1, self.att_embedding_size * self.head_num)
def compute_mask(self, inputs, mask=None):
return None
def get_config(self, ):
config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num,
'dropout_rate': self.dropout_rate, 'use_res': self.use_res,
'use_positional_encoding': self.use_positional_encoding, 'use_feed_forward': self.use_feed_forward,
'use_layer_norm': self.use_layer_norm, 'seed': self.seed, 'supports_masking': self.supports_masking,
'blinding': self.blinding, 'attention_type': self.attention_type, 'output_type': self.output_type}
base_config = super(Transformer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class PositionEncoding(Layer):
def __init__(self, pos_embedding_trainable=True,
zero_pad=False,
scale=True, **kwargs):
self.pos_embedding_trainable = pos_embedding_trainable
self.zero_pad = zero_pad
self.scale = scale
super(PositionEncoding, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
_, T, num_units = input_shape.as_list() # inputs.get_shape().as_list()
# First part of the PE function: sin and cos argument
position_enc = np.array([
[pos / np.power(10000, 2. * (i // 2) / num_units) for i in range(num_units)]
for pos in range(T)])
# Second part, apply the cosine to even columns and sin to odds.
position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i
position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1
if self.zero_pad:
position_enc[0, :] = np.zeros(num_units)
self.lookup_table = self.add_weight("lookup_table", (T, num_units),
initializer=Constant(position_enc),
trainable=self.pos_embedding_trainable)
# Be sure to call this somewhere!
super(PositionEncoding, self).build(input_shape)
def call(self, inputs, mask=None):
_, T, num_units = inputs.get_shape().as_list()
position_ind = tf.expand_dims(tf.range(T), 0)
outputs = tf.nn.embedding_lookup(self.lookup_table, position_ind)
if self.scale:
outputs = outputs * num_units ** 0.5
return outputs + inputs
def compute_output_shape(self, input_shape):
return input_shape
def compute_mask(self, inputs, mask=None):
return mask
def get_config(self, ):
config = {'pos_embedding_trainable': self.pos_embedding_trainable, 'zero_pad': self.zero_pad,
'scale': self.scale}
base_config = super(PositionEncoding, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class BiasEncoding(Layer):
def __init__(self, sess_max_count, seed=1024, **kwargs):
self.sess_max_count = sess_max_count
self.seed = seed
super(BiasEncoding, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
if self.sess_max_count == 1:
embed_size = input_shape[2].value
seq_len_max = input_shape[1].value
else:
try:
embed_size = input_shape[0][2].value
seq_len_max = input_shape[0][1].value
except AttributeError:
embed_size = input_shape[0][2]
seq_len_max = input_shape[0][1]
self.sess_bias_embedding = self.add_weight('sess_bias_embedding', shape=(self.sess_max_count, 1, 1),
initializer=TruncatedNormal(
mean=0.0, stddev=0.0001, seed=self.seed))
self.seq_bias_embedding = self.add_weight('seq_bias_embedding', shape=(1, seq_len_max, 1),
initializer=TruncatedNormal(
mean=0.0, stddev=0.0001, seed=self.seed))
self.item_bias_embedding = self.add_weight('item_bias_embedding', shape=(1, 1, embed_size),
initializer=TruncatedNormal(
mean=0.0, stddev=0.0001, seed=self.seed))
# Be sure to call this somewhere!
super(BiasEncoding, self).build(input_shape)
def call(self, inputs, mask=None):
"""
:param concated_embeds_value: None * field_size * embedding_size
:return: None*1
"""
transformer_out = []
for i in range(self.sess_max_count):
transformer_out.append(
inputs[i] + self.item_bias_embedding + self.seq_bias_embedding + self.sess_bias_embedding[i])
return transformer_out
def compute_output_shape(self, input_shape):
return input_shape
def compute_mask(self, inputs, mask=None):
return mask
def get_config(self, ):
config = {'sess_max_count': self.sess_max_count, 'seed': self.seed, }
base_config = super(BiasEncoding, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class DynamicGRU(Layer):
def __init__(self, num_units=None, gru_type='GRU', return_sequence=True, **kwargs):
self.num_units = num_units
self.return_sequence = return_sequence
self.gru_type = gru_type
super(DynamicGRU, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
input_seq_shape = input_shape[0]
if self.num_units is None:
self.num_units = input_seq_shape.as_list()[-1]
if self.gru_type == "AGRU":
self.gru_cell = QAAttGRUCell(self.num_units)
elif self.gru_type == "AUGRU":
self.gru_cell = VecAttGRUCell(self.num_units)
else:
try:
self.gru_cell = tf.nn.rnn_cell.GRUCell(self.num_units) # GRUCell
except AttributeError:
self.gru_cell = tf.compat.v1.nn.rnn_cell.GRUCell(self.num_units)
# Be sure to call this somewhere!
super(DynamicGRU, self).build(input_shape)
def call(self, input_list):
"""
:param concated_embeds_value: None * field_size * embedding_size
:return: None*1
"""
if self.gru_type == "GRU" or self.gru_type == "AIGRU":
rnn_input, sequence_length = input_list
att_score = None
else:
rnn_input, sequence_length, att_score = input_list
rnn_output, hidden_state = dynamic_rnn(self.gru_cell, inputs=rnn_input, att_scores=att_score,
sequence_length=tf.squeeze(sequence_length,
), dtype=tf.float32, scope=self.name)
if self.return_sequence:
return rnn_output
else:
return tf.expand_dims(hidden_state, axis=1)
def compute_output_shape(self, input_shape):
rnn_input_shape = input_shape[0]
if self.return_sequence:
return rnn_input_shape
else:
return (None, 1, rnn_input_shape[2])
def get_config(self, ):
config = {'num_units': self.num_units, 'gru_type': self.gru_type, 'return_sequence': self.return_sequence}
base_config = super(DynamicGRU, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class KMaxPooling(Layer):
"""K Max pooling that selects the k biggest value along the specific axis.
Input shape
- nD tensor with shape: ``(batch_size, ..., input_dim)``.
Output shape
- nD tensor with shape: ``(batch_size, ..., output_dim)``.
Arguments
- **k**: positive integer, number of top elements to look for along the ``axis`` dimension.
- **axis**: positive integer, the dimension to look for elements.
"""
def __init__(self, k=1, axis=-1, **kwargs):
self.k = k
self.axis = axis
super(KMaxPooling, self).__init__(**kwargs)
def build(self, input_shape):
if self.axis < 1 or self.axis > len(input_shape):
raise ValueError("axis must be 1~%d,now is %d" %
(len(input_shape), self.axis))
if self.k < 1 or self.k > input_shape[self.axis]:
raise ValueError("k must be in 1 ~ %d,now k is %d" %
(input_shape[self.axis], self.k))
self.dims = len(input_shape)
# Be sure to call this somewhere!
super(KMaxPooling, self).build(input_shape)
def call(self, inputs):
# swap the last and the axis dimensions since top_k will be applied along the last dimension
perm = list(range(self.dims))
perm[-1], perm[self.axis] = perm[self.axis], perm[-1]
shifted_input = tf.transpose(inputs, perm)
# extract top_k, returns two tensors [values, indices]
top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0]
output = tf.transpose(top_k, perm)
return output
def compute_output_shape(self, input_shape):
output_shape = list(input_shape)
output_shape[self.axis] = self.k
return tuple(output_shape)
def get_config(self, ):
config = {'k': self.k, 'axis': self.axis}
base_config = super(KMaxPooling, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
================================================
FILE: deepctr/layers/utils.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen,weichenswc@163.com
"""
import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.layers import Flatten, Layer, Add
from tensorflow.python.ops.lookup_ops import TextFileInitializer
try:
from tensorflow.python.ops.init_ops import Zeros, glorot_normal_initializer as glorot_normal
except ImportError:
from tensorflow.python.ops.init_ops_v2 import Zeros, glorot_normal
from tensorflow.python.keras.regularizers import l2
try:
from tensorflow.python.ops.lookup_ops import StaticHashTable
except ImportError:
from tensorflow.python.ops.lookup_ops import HashTable as StaticHashTable
class NoMask(Layer):
def __init__(self, **kwargs):
super(NoMask, self).__init__(**kwargs)
def build(self, input_shape):
# Be sure to call this somewhere!
super(NoMask, self).build(input_shape)
def call(self, x, mask=None, **kwargs):
return x
def compute_mask(self, inputs, mask):
return None
class Hash(Layer):
"""Looks up keys in a table when setup `vocabulary_path`, which outputs the corresponding values.
If `vocabulary_path` is not set, `Hash` will hash the input to [0,num_buckets). When `mask_zero` = True,
input value `0` or `0.0` will be set to `0`, and other value will be set in range [1,num_buckets).
The following snippet initializes a `Hash` with `vocabulary_path` file with the first column as keys and
second column as values:
* `1,emerson`
* `2,lake`
* `3,palmer`
>>> hash = Hash(
... num_buckets=3+1,
... vocabulary_path=filename,
... default_value=0)
>>> hash(tf.constant('lake')).numpy()
2
>>> hash(tf.constant('lakeemerson')).numpy()
0
Args:
num_buckets: An `int` that is >= 1. The number of buckets or the vocabulary size + 1
when `vocabulary_path` is setup.
mask_zero: default is False. The `Hash` value will hash input `0` or `0.0` to value `0` when
the `mask_zero` is `True`. `mask_zero` is not used when `vocabulary_path` is setup.
vocabulary_path: default `None`. The `CSV` text file path of the vocabulary hash, which contains
two columns seperated by delimiter `comma`, the first column is the value and the second is
the key. The key data type is `string`, the value data type is `int`. The path must
be accessible from wherever `Hash` is initialized.
default_value: default '0'. The default value if a key is missing in the table.
**kwargs: Additional keyword arguments.
"""
def __init__(self, num_buckets, mask_zero=False, vocabulary_path=None, default_value=0, **kwargs):
self.num_buckets = num_buckets
self.mask_zero = mask_zero
self.vocabulary_path = vocabulary_path
self.default_value = default_value
if self.vocabulary_path:
initializer = TextFileInitializer(vocabulary_path, 'string', 1, 'int64', 0, delimiter=',')
self.hash_table = StaticHashTable(initializer, default_value=self.default_value)
super(Hash, self).__init__(**kwargs)
def build(self, input_shape):
# Be sure to call this somewhere!
super(Hash, self).build(input_shape)
def call(self, x, mask=None, **kwargs):
if x.dtype != tf.string:
zero = tf.as_string(tf.zeros([1], dtype=x.dtype))
x = tf.as_string(x, )
else:
zero = tf.as_string(tf.zeros([1], dtype='int32'))
if self.vocabulary_path:
hash_x = self.hash_table.lookup(x)
return hash_x
num_buckets = self.num_buckets if not self.mask_zero else self.num_buckets - 1
try:
hash_x = tf.string_to_hash_bucket_fast(x, num_buckets,
name=None) # weak hash
except AttributeError:
hash_x = tf.strings.to_hash_bucket_fast(x, num_buckets,
name=None) # weak hash
if self.mask_zero:
mask = tf.cast(tf.not_equal(x, zero), dtype='int64')
hash_x = (hash_x + 1) * mask
return hash_x
def compute_output_shape(self, input_shape):
return input_shape
def get_config(self, ):
config = {'num_buckets': self.num_buckets, 'mask_zero': self.mask_zero, 'vocabulary_path': self.vocabulary_path,
'default_value': self.default_value}
base_config = super(Hash, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class Linear(Layer):
def __init__(self, l2_reg=0.0, mode=0, use_bias=False, seed=1024, **kwargs):
self.l2_reg = l2_reg
# self.l2_reg = tf.contrib.layers.l2_regularizer(float(l2_reg_linear))
if mode not in [0, 1, 2]:
raise ValueError("mode must be 0,1 or 2")
self.mode = mode
self.use_bias = use_bias
self.seed = seed
super(Linear, self).__init__(**kwargs)
def build(self, input_shape):
if self.use_bias:
self.bias = self.add_weight(name='linear_bias',
shape=(1,),
initializer=Zeros(),
trainable=True)
if self.mode == 1:
self.kernel = self.add_weight(
'linear_kernel',
shape=[int(input_shape[-1]), 1],
initializer=glorot_normal(self.seed),
regularizer=l2(self.l2_reg),
trainable=True)
elif self.mode == 2:
self.kernel = self.add_weight(
'linear_kernel',
shape=[int(input_shape[1][-1]), 1],
initializer=glorot_normal(self.seed),
regularizer=l2(self.l2_reg),
trainable=True)
super(Linear, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs, **kwargs):
if self.mode == 0:
sparse_input = inputs
linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=True)
elif self.mode == 1:
dense_input = inputs
fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0))
linear_logit = fc
else:
sparse_input, dense_input = inputs
fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0))
linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=False) + fc
if self.use_bias:
linear_logit += self.bias
return linear_logit
def compute_output_shape(self, input_shape):
return (None, 1)
def compute_mask(self, inputs, mask):
return None
def get_config(self, ):
config = {'mode': self.mode, 'l2_reg': self.l2_reg, 'use_bias': self.use_bias, 'seed': self.seed}
base_config = super(Linear, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class Concat(Layer):
def __init__(self, axis, supports_masking=True, **kwargs):
super(Concat, self).__init__(**kwargs)
self.axis = axis
self.supports_masking = supports_masking
def call(self, inputs):
return tf.concat(inputs, axis=self.axis)
def compute_mask(self, inputs, mask=None):
if not self.supports_masking:
return None
if mask is None:
mask = [inputs_i._keras_mask if hasattr(inputs_i, "_keras_mask") else None for inputs_i in inputs]
if mask is None:
return None
if not isinstance(mask, list):
raise ValueError('`mask` should be a list.')
if not isinstance(inputs, list):
raise ValueError('`inputs` should be a list.')
if len(mask) != len(inputs):
raise ValueError('The lists `inputs` and `mask` '
'should have the same length.')
if all([m is None for m in mask]):
return None
# Make a list of masks while making sure
# the dimensionality of each mask
# is the same as the corresponding input.
masks = []
for input_i, mask_i in zip(inputs, mask):
if mask_i is None:
# Input is unmasked. Append all 1s to masks,
masks.append(tf.ones_like(input_i, dtype='bool'))
elif K.ndim(mask_i) < K.ndim(input_i):
# Mask is smaller than the input, expand it
masks.append(tf.expand_dims(mask_i, axis=-1))
else:
masks.append(mask_i)
concatenated = K.concatenate(masks, axis=self.axis)
return K.all(concatenated, axis=-1, keepdims=False)
def get_config(self, ):
config = {'axis': self.axis, 'supports_masking': self.supports_masking}
base_config = super(Concat, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def concat_func(inputs, axis=-1, mask=False):
if len(inputs) == 1:
input = inputs[0]
if not mask:
input = NoMask()(input)
return input
return Concat(axis, supports_masking=mask)(inputs)
def reduce_mean(input_tensor,
axis=None,
keep_dims=False,
name=None,
reduction_indices=None):
try:
return tf.reduce_mean(input_tensor,
axis=axis,
keep_dims=keep_dims,
name=name,
reduction_indices=reduction_indices)
except TypeError:
return tf.reduce_mean(input_tensor,
axis=axis,
keepdims=keep_dims,
name=name)
def reduce_sum(input_tensor,
axis=None,
keep_dims=False,
name=None,
reduction_indices=None):
try:
return tf.reduce_sum(input_tensor,
axis=axis,
keep_dims=keep_dims,
name=name,
reduction_indices=reduction_indices)
except TypeError:
return tf.reduce_sum(input_tensor,
axis=axis,
keepdims=keep_dims,
name=name)
def reduce_max(input_tensor,
axis=None,
keep_dims=False,
name=None,
reduction_indices=None):
try:
return tf.reduce_max(input_tensor,
axis=axis,
keep_dims=keep_dims,
name=name,
reduction_indices=reduction_indices)
except TypeError:
return tf.reduce_max(input_tensor,
axis=axis,
keepdims=keep_dims,
name=name)
def div(x, y, name=None):
try:
return tf.div(x, y, name=name)
except AttributeError:
return tf.divide(x, y, name=name)
def softmax(logits, dim=-1, name=None):
try:
return tf.nn.softmax(logits, dim=dim, name=name)
except TypeError:
return tf.nn.softmax(logits, axis=dim, name=name)
class _Add(Layer):
def __init__(self, **kwargs):
super(_Add, self).__init__(**kwargs)
def build(self, input_shape):
# Be sure to call this somewhere!
super(_Add, self).build(input_shape)
def call(self, inputs, **kwargs):
if len(inputs) == 0:
return tf.constant([[0.0]])
return Add()(inputs)
def add_func(inputs):
if not isinstance(inputs, list):
return inputs
if len(inputs) == 1:
return inputs[0]
return _Add()(inputs)
def combined_dnn_input(sparse_embedding_list, dense_value_list):
if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0:
sparse_dnn_input = Flatten()(concat_func(sparse_embedding_list))
dense_dnn_input = Flatten()(concat_func(dense_value_list))
return concat_func([sparse_dnn_input, dense_dnn_input])
elif len(sparse_embedding_list) > 0:
return Flatten()(concat_func(sparse_embedding_list))
elif len(dense_value_list) > 0:
return Flatten()(concat_func(dense_value_list))
else:
raise NotImplementedError("dnn_feature_columns can not be empty list")
================================================
FILE: deepctr/models/__init__.py
================================================
from .afm import AFM
from .autoint import AutoInt
from .ccpm import CCPM
from .dcn import DCN
from .dcnmix import DCNMix
from .deepfefm import DeepFEFM
from .deepfm import DeepFM
from .difm import DIFM
from .fgcnn import FGCNN
from .fibinet import FiBiNET
from .flen import FLEN
from .fnn import FNN
from .fwfm import FwFM
from .ifm import IFM
from .mlr import MLR
from .multitask import SharedBottom, ESMM, MMOE, PLE
from .nfm import NFM
from .onn import ONN
from .pnn import PNN
from .sequence import DIN, DIEN, DSIN, BST
from .wdl import WDL
from .xdeepfm import xDeepFM
from .edcn import EDCN
__all__ = ["AFM", "CCPM", "DCN", "IFM", "DIFM", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN",
"WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST", "DeepFEFM",
"SharedBottom", "ESMM", "MMOE", "PLE", 'EDCN']
================================================
FILE: deepctr/models/afm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017.
(https://arxiv.org/abs/1708.04617)
"""
from tensorflow.python.keras.models import Model
from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns
from ..layers.core import PredictionLayer
from ..layers.interaction import AFMLayer, FM
from ..layers.utils import concat_func, add_func
def AFM(linear_feature_columns, dnn_feature_columns, fm_group=DEFAULT_GROUP_NAME, use_attention=True,
attention_factor=8,
l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, afm_dropout=0, seed=1024,
task='binary'):
"""Instantiates the Attentional Factorization Machine architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param fm_group: list, group_name of features that will be used to do feature interactions.
:param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine**
:param attention_factor: positive integer,units in attention net
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_att: float. L2 regularizer strength applied to attention net
:param afm_dropout: float in [0,1), Fraction of the attention net output units to dropout.
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(
linear_feature_columns + dnn_feature_columns)
inputs_list = list(features.values())
group_embedding_dict, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding,
seed, support_dense=False, support_group=True)
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
if use_attention:
fm_logit = add_func([AFMLayer(attention_factor, l2_reg_att, afm_dropout,
seed)(list(v)) for k, v in group_embedding_dict.items() if k in fm_group])
else:
fm_logit = add_func([FM()(concat_func(v, axis=1))
for k, v in group_embedding_dict.items() if k in fm_group])
final_logit = add_func([linear_logit, fm_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/autoint.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.(https://arxiv.org/abs/1810.11921)
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Flatten, Concatenate, Dense
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import InteractingLayer
from ..layers.utils import concat_func, add_func, combined_dnn_input
def AutoInt(linear_feature_columns, dnn_feature_columns, att_layer_num=3, att_embedding_size=8, att_head_num=2,
att_res=True,
dnn_hidden_units=(256, 128, 64), dnn_activation='relu', l2_reg_linear=1e-5,
l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_use_bn=False, dnn_dropout=0, seed=1024,
task='binary', ):
"""Instantiates the AutoInt Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param att_layer_num: int.The InteractingLayer number to be used.
:param att_embedding_size: int.The embedding size in multi-head self-attention network.
:param att_head_num: int.The head number in multi-head self-attention network.
:param att_res: bool.Whether or not use standard residual connections before output.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param dnn_activation: Activation function to use in DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
if len(dnn_hidden_units) <= 0 and att_layer_num <= 0:
raise ValueError("Either hidden_layer or att_layer_num must > 0")
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
att_input = concat_func(sparse_embedding_list, axis=1)
for _ in range(att_layer_num):
att_input = InteractingLayer(
att_embedding_size, att_head_num, att_res)(att_input)
att_output = Flatten()(att_input)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
if len(dnn_hidden_units) > 0 and att_layer_num > 0: # Deep & Interacting Layer
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
stack_out = Concatenate()([att_output, deep_out])
final_logit = Dense(1, use_bias=False)(stack_out)
elif len(dnn_hidden_units) > 0: # Only Deep
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, )
final_logit = Dense(1, use_bias=False)(deep_out)
elif att_layer_num > 0: # Only Interacting Layer
final_logit = Dense(1, use_bias=False)(att_output)
else: # Error
raise NotImplementedError
final_logit = add_func([final_logit, linear_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/ccpm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746.
(http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf)
"""
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Flatten, Conv2D, Lambda
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import DNN, PredictionLayer
from ..layers.sequence import KMaxPooling
from ..layers.utils import concat_func, add_func
def CCPM(linear_feature_columns, dnn_feature_columns, conv_kernel_width=(6, 5), conv_filters=(4, 4),
dnn_hidden_units=(128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_dropout=0,
seed=1024, task='binary'):
"""Instantiates the Convolutional Click Prediction Model architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param conv_kernel_width: list,list of positive integer or empty list,the width of filter in each conv layer.
:param conv_filters: list,list of positive integer or empty list,the number of filters in each conv layer.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN.
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param init_std: float,to use as the initialize std of embedding vector
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
if len(conv_kernel_width) != len(conv_filters):
raise ValueError(
"conv_kernel_width must have same element with conv_filters")
features = build_input_features(
linear_feature_columns + dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed,
l2_reg=l2_reg_linear)
sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding,
seed, support_dense=False)
n = len(sparse_embedding_list)
l = len(conv_filters)
conv_input = concat_func(sparse_embedding_list, axis=1)
pooling_result = Lambda(
lambda x: tf.expand_dims(x, axis=3))(conv_input)
for i in range(1, l + 1):
filters = conv_filters[i - 1]
width = conv_kernel_width[i - 1]
k = max(1, int((1 - pow(i / l, l - i)) * n)) if i < l else 3
conv_result = Conv2D(filters=filters, kernel_size=(width, 1), strides=(1, 1), padding='same',
activation='tanh', use_bias=True, )(pooling_result)
pooling_result = KMaxPooling(
k=min(k, int(conv_result.shape[1])), axis=1)(conv_result)
flatten_result = Flatten()(pooling_result)
dnn_out = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout)(flatten_result)
dnn_logit = Dense(1, use_bias=False)(
dnn_out)
final_logit = add_func([dnn_logit, linear_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/dcn.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Shuxun Zan, zanshuxun@aliyun.com
Reference:
[1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. (https://arxiv.org/abs/1708.05123)
[2] Wang R, Shivanna R, Cheng D Z, et al. DCN-M: Improved Deep & Cross Network for Feature Cross Learning in Web-scale Learning to Rank Systems[J]. 2020. (https://arxiv.org/abs/2008.13535)
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Concatenate
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import CrossNet
from ..layers.utils import add_func, combined_dnn_input
def DCN(linear_feature_columns, dnn_feature_columns, cross_num=2, cross_parameterization='vector',
dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5,
l2_reg_cross=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_use_bn=False,
dnn_activation='relu', task='binary'):
"""Instantiates the Deep&Cross Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param cross_num: positive integet,cross layer number
:param cross_parameterization: str, ``"vector"`` or ``"matrix"``, how to parameterize the cross network.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_cross: float. L2 regularizer strength applied to cross net
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not DNN
:param dnn_activation: Activation function to use in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
if len(dnn_hidden_units) == 0 and cross_num == 0:
raise ValueError("Either hidden_layer or cross layer must > 0")
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
if len(dnn_hidden_units) > 0 and cross_num > 0: # Deep & Cross
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
cross_out = CrossNet(cross_num, parameterization=cross_parameterization, l2_reg=l2_reg_cross)(dnn_input)
stack_out = Concatenate()([cross_out, deep_out])
final_logit = Dense(1, use_bias=False)(stack_out)
elif len(dnn_hidden_units) > 0: # Only Deep
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
final_logit = Dense(1, use_bias=False)(deep_out)
elif cross_num > 0: # Only Cross
cross_out = CrossNet(cross_num, parameterization=cross_parameterization, l2_reg=l2_reg_cross)(dnn_input)
final_logit = Dense(1, use_bias=False)(cross_out)
else: # Error
raise NotImplementedError
final_logit = add_func([final_logit, linear_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/dcnmix.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Shuxun Zan, zanshuxun@aliyun.com
Reference:
[1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. (https://arxiv.org/abs/1708.05123)
[2] Wang R, Shivanna R, Cheng D Z, et al. DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems[J]. 2020. (https://arxiv.org/abs/2008.13535)
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Concatenate
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import CrossNetMix
from ..layers.utils import add_func, combined_dnn_input
def DCNMix(linear_feature_columns, dnn_feature_columns, cross_num=2,
dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, low_rank=32, num_experts=4,
l2_reg_cross=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_use_bn=False,
dnn_activation='relu', task='binary'):
"""Instantiates the Deep&Cross Network with mixture of experts architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param cross_num: positive integet,cross layer number
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_cross: float. L2 regularizer strength applied to cross net
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not DNN
:param dnn_activation: Activation function to use in DNN
:param low_rank: Positive integer, dimensionality of low-rank sapce.
:param num_experts: Positive integer, number of experts.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
if len(dnn_hidden_units) == 0 and cross_num == 0:
raise ValueError("Either hidden_layer or cross layer must > 0")
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
if len(dnn_hidden_units) > 0 and cross_num > 0: # Deep & Cross
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
cross_out = CrossNetMix(low_rank=low_rank, num_experts=num_experts, layer_num=cross_num,
l2_reg=l2_reg_cross)(dnn_input)
stack_out = Concatenate()([cross_out, deep_out])
final_logit = Dense(1, use_bias=False)(stack_out)
elif len(dnn_hidden_units) > 0: # Only Deep
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
final_logit = Dense(1, use_bias=False,)(deep_out)
elif cross_num > 0: # Only Cross
cross_out = CrossNetMix(low_rank=low_rank, num_experts=num_experts, layer_num=cross_num,
l2_reg=l2_reg_cross)(dnn_input)
final_logit = Dense(1, use_bias=False, )(cross_out)
else: # Error
raise NotImplementedError
final_logit = add_func([final_logit, linear_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/deepfefm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Harshit Pande
Reference:
[1] Field-Embedded Factorization Machines for Click-through Rate Prediction]
(https://arxiv.org/pdf/2009.09931.pdf)
this file also supports all the possible Ablation studies for reproducibility
"""
from itertools import chain
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Lambda
from ..feature_column import input_from_feature_columns, get_linear_logit, build_input_features, DEFAULT_GROUP_NAME
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import FEFMLayer
from ..layers.utils import concat_func, combined_dnn_input, reduce_sum, add_func
def DeepFEFM(linear_feature_columns, dnn_feature_columns, use_fefm=True,
dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding_feat=0.00001,
l2_reg_embedding_field=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0.0,
exclude_feature_embed_in_dnn=False,
use_linear=True, use_fefm_embed_in_dnn=True, dnn_activation='relu', dnn_use_bn=False, task='binary'):
"""Instantiates the DeepFEFM Network architecture or the shallow FEFM architecture (Ablation studies supported)
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param fm_group: list, group_name of features that will be used to do feature interactions.
:param use_fefm: bool,use FEFM logit or not (doesn't effect FEFM embeddings in DNN, controls only the use of final FEFM logit)
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding_feat: float. L2 regularizer strength applied to embedding vector of features
:param l2_reg_embedding_field: float, L2 regularizer to field embeddings
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param exclude_feature_embed_in_dnn: bool, used in ablation studies for removing feature embeddings in DNN
:param use_linear: bool, used in ablation studies
:param use_fefm_embed_in_dnn: bool, True if FEFM interaction embeddings are to be used in FEFM (set False for Ablation)
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(linear_feature_columns + dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, seed=seed, prefix='linear')
group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding_feat,
seed, support_group=True)
fefm_interaction_embedding = concat_func([FEFMLayer(
regularizer=l2_reg_embedding_field)(concat_func(v, axis=1))
for k, v in group_embedding_dict.items() if k in [DEFAULT_GROUP_NAME]],
axis=1)
dnn_input = combined_dnn_input(list(chain.from_iterable(group_embedding_dict.values())), dense_value_list)
# if use_fefm_embed_in_dnn is set to False it is Ablation4 (Use false only for Ablation)
if use_fefm_embed_in_dnn:
if exclude_feature_embed_in_dnn:
# Ablation3: remove feature vector embeddings from the DNN input
dnn_input = fefm_interaction_embedding
else:
# No ablation
dnn_input = concat_func([dnn_input, fefm_interaction_embedding], axis=1)
dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
dnn_logit = Dense(1, use_bias=False, )(dnn_out)
fefm_logit = Lambda(lambda x: reduce_sum(x, axis=1, keep_dims=True))(fefm_interaction_embedding)
if len(dnn_hidden_units) == 0 and use_fefm is False and use_linear is True: # only linear
final_logit = linear_logit
elif len(dnn_hidden_units) == 0 and use_fefm is True and use_linear is True: # linear + FEFM
final_logit = add_func([linear_logit, fefm_logit])
elif len(dnn_hidden_units) > 0 and use_fefm is False and use_linear is True: # linear + Deep # Ablation1
final_logit = add_func([linear_logit, dnn_logit])
elif len(dnn_hidden_units) > 0 and use_fefm is True and use_linear is True: # linear + FEFM + Deep
final_logit = add_func([linear_logit, fefm_logit, dnn_logit])
elif len(dnn_hidden_units) == 0 and use_fefm is True and use_linear is False: # only FEFM (shallow)
final_logit = fefm_logit
elif len(dnn_hidden_units) > 0 and use_fefm is False and use_linear is False: # only Deep
final_logit = dnn_logit
elif len(dnn_hidden_units) > 0 and use_fefm is True and use_linear is False: # FEFM + Deep # Ablation2
final_logit = add_func([fefm_logit, dnn_logit])
else:
raise NotImplementedError
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/deepfm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247)
"""
from itertools import chain
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense
from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import FM
from ..layers.utils import concat_func, add_func, combined_dnn_input
def DeepFM(linear_feature_columns, dnn_feature_columns, fm_group=(DEFAULT_GROUP_NAME,), dnn_hidden_units=(256, 128, 64),
l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
dnn_activation='relu', dnn_use_bn=False, task='binary'):
"""Instantiates the DeepFM Network architecture.
:param linear_feature_columns: An iterable containing all the features used by the linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by the deep part of the model.
:param fm_group: list, group_name of features that will be used to do feature interactions.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(
linear_feature_columns + dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding,
seed, support_group=True)
fm_logit = add_func([FM()(concat_func(v, axis=1))
for k, v in group_embedding_dict.items() if k in fm_group])
dnn_input = combined_dnn_input(list(chain.from_iterable(
group_embedding_dict.values())), dense_value_list)
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
dnn_logit = Dense(1, use_bias=False)(dnn_output)
final_logit = add_func([linear_logit, fm_logit, dnn_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/difm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
zanshuxun, zanshuxun@aliyun.com
Reference:
[1] Lu W, Yu Y, Chang Y, et al. A Dual Input-aware Factorization Machine for CTR Prediction[C]
//IJCAI. 2020: 3139-3145.(https://www.ijcai.org/Proceedings/2020/0434.pdf)
"""
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Lambda, Flatten
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns, SparseFeat, \
VarLenSparseFeat
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import FM, InteractingLayer
from ..layers.utils import concat_func, add_func, combined_dnn_input
def DIFM(linear_feature_columns, dnn_feature_columns,
att_embedding_size=8, att_head_num=8, att_res=True, dnn_hidden_units=(256, 128, 64),
l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
dnn_activation='relu', dnn_use_bn=False, task='binary'):
"""Instantiates the DIFM Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param att_embedding_size: integer, the embedding size in multi-head self-attention network.
:param att_head_num: int. The head number in multi-head self-attention network.
:param att_res: bool. Whether or not use standard residual connections before output.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
if not len(dnn_hidden_units) > 0:
raise ValueError("dnn_hidden_units is null!")
features = build_input_features(
linear_feature_columns + dnn_feature_columns)
sparse_feat_num = len(list(filter(lambda x: isinstance(x, SparseFeat) or isinstance(x, VarLenSparseFeat),
dnn_feature_columns)))
inputs_list = list(features.values())
sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
if not len(sparse_embedding_list) > 0:
raise ValueError("there are no sparse features")
att_input = concat_func(sparse_embedding_list, axis=1)
att_out = InteractingLayer(att_embedding_size, att_head_num, att_res, scaling=True)(att_input)
att_out = Flatten()(att_out)
m_vec = Dense(sparse_feat_num, use_bias=False)(att_out)
dnn_input = combined_dnn_input(sparse_embedding_list, [])
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
m_bit = Dense(sparse_feat_num, use_bias=False)(dnn_output)
input_aware_factor = add_func([m_vec, m_bit]) # the complete input-aware factor m_x
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear, sparse_feat_refine_weight=input_aware_factor)
fm_input = concat_func(sparse_embedding_list, axis=1)
refined_fm_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=-1))(
[fm_input, input_aware_factor])
fm_logit = FM()(refined_fm_input)
final_logit = add_func([linear_logit, fm_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/edcn.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Yi He, heyi_jack@163.com
Reference:
[1] Chen, B., Wang, Y., Liu, et al. Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models. CIKM, 2021, October (https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf)
"""
from tensorflow.python.keras.layers import Dense, Reshape, Concatenate
from tensorflow.python.keras.models import Model
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN, RegulationModule
from ..layers.interaction import CrossNet, BridgeModule
from ..layers.utils import add_func, concat_func
def EDCN(linear_feature_columns,
dnn_feature_columns,
cross_num=2,
cross_parameterization='vector',
bridge_type='concatenation',
tau=1.0,
l2_reg_linear=1e-5,
l2_reg_embedding=1e-5,
l2_reg_cross=1e-5,
l2_reg_dnn=0,
seed=1024,
dnn_dropout=0,
dnn_use_bn=False,
dnn_activation='relu',
task='binary'):
"""Instantiates the Enhanced Deep&Cross Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param cross_num: positive integet,cross layer number
:param cross_parameterization: str, ``"vector"`` or ``"matrix"``, how to parameterize the cross network.
:param bridge_type: The type of bridge interaction, one of ``"pointwise_addition"``, ``"hadamard_product"``, ``"concatenation"`` , ``"attention_pooling"``
:param tau: Positive float, the temperature coefficient to control distribution of field-wise gating unit
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_cross: float. L2 regularizer strength applied to cross net
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not DNN
:param dnn_activation: Activation function to use in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
if cross_num == 0:
raise ValueError("Cross layer num must > 0")
print('EDCN brige type: ', bridge_type)
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear)
sparse_embedding_list, _ = input_from_feature_columns(
features, dnn_feature_columns, l2_reg_embedding, seed, support_dense=False)
emb_input = concat_func(sparse_embedding_list, axis=1)
deep_in = RegulationModule(tau)(emb_input)
cross_in = RegulationModule(tau)(emb_input)
field_size = len(sparse_embedding_list)
embedding_size = int(sparse_embedding_list[0].shape[-1])
cross_dim = field_size * embedding_size
for i in range(cross_num):
cross_out = CrossNet(1, parameterization=cross_parameterization,
l2_reg=l2_reg_cross)(cross_in)
deep_out = DNN([cross_dim], dnn_activation, l2_reg_dnn,
dnn_dropout, dnn_use_bn, seed=seed)(deep_in)
print(cross_out, deep_out)
bridge_out = BridgeModule(bridge_type)([cross_out, deep_out])
if i + 1 < cross_num:
bridge_out_list = Reshape([field_size, embedding_size])(bridge_out)
deep_in = RegulationModule(tau)(bridge_out_list)
cross_in = RegulationModule(tau)(bridge_out_list)
stack_out = Concatenate()([cross_out, deep_out, bridge_out])
final_logit = Dense(1, use_bias=False)(stack_out)
final_logit = add_func([final_logit, linear_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/fgcnn.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Liu B, Tang R, Chen Y, et al. Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1904.04447, 2019.
(https://arxiv.org/pdf/1904.04447)
"""
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Lambda, Flatten, Concatenate
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import InnerProductLayer, FGCNNLayer
from ..layers.utils import concat_func, add_func
def unstack(input_tensor):
input_ = tf.expand_dims(input_tensor, axis=2)
return tf.unstack(input_, input_.shape[1], 1)
def FGCNN(linear_feature_columns, dnn_feature_columns, conv_kernel_width=(7, 7, 7, 7), conv_filters=(14, 16, 18, 20),
new_maps=(3, 3, 3, 3),
pooling_width=(2, 2, 2, 2), dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5,
l2_reg_dnn=0,
dnn_dropout=0,
seed=1024,
task='binary', ):
"""Instantiates the Feature Generation by Convolutional Neural Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param conv_kernel_width: list,list of positive integer or empty list,the width of filter in each conv layer.
:param conv_filters: list,list of positive integer or empty list,the number of filters in each conv layer.
:param new_maps: list, list of positive integer or empty list, the feature maps of generated features.
:param pooling_width: list, list of positive integer or empty list,the width of pooling layer.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net.
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
if not (len(conv_kernel_width) == len(conv_filters) == len(new_maps) == len(pooling_width)):
raise ValueError(
"conv_kernel_width,conv_filters,new_maps and pooling_width must have same length")
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
deep_emb_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed)
fg_deep_emb_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed,
prefix='fg')
fg_input = concat_func(fg_deep_emb_list, axis=1)
origin_input = concat_func(deep_emb_list, axis=1)
if len(conv_filters) > 0:
new_features = FGCNNLayer(
conv_filters, conv_kernel_width, new_maps, pooling_width)(fg_input)
combined_input = concat_func([origin_input, new_features], axis=1)
else:
combined_input = origin_input
inner_product = Flatten()(
InnerProductLayer()(Lambda(unstack, mask=[None] * int(combined_input.shape[1]))(combined_input)))
linear_signal = Flatten()(combined_input)
dnn_input = Concatenate()([linear_signal, inner_product])
dnn_input = Flatten()(dnn_input)
final_logit = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout)(dnn_input)
final_logit = Dense(1, use_bias=False)(final_logit)
final_logit = add_func([final_logit, linear_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/fibinet.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019.
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Flatten
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import SENETLayer, BilinearInteraction
from ..layers.utils import concat_func, add_func, combined_dnn_input
def FiBiNET(linear_feature_columns, dnn_feature_columns, bilinear_type='interaction', reduction_ratio=3,
dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5,
l2_reg_embedding=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu',
task='binary'):
"""Instantiates the Feature Importance and Bilinear feature Interaction NETwork architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param bilinear_type: str,bilinear function type used in Bilinear Interaction Layer,can be ``'all'`` , ``'each'`` or ``'interaction'``
:param reduction_ratio: integer in [1,inf), reduction ratio used in SENET Layer
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to wide part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(linear_feature_columns + dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
senet_embedding_list = SENETLayer(
reduction_ratio, seed)(sparse_embedding_list)
senet_bilinear_out = BilinearInteraction(
bilinear_type=bilinear_type, seed=seed)(senet_embedding_list)
bilinear_out = BilinearInteraction(
bilinear_type=bilinear_type, seed=seed)(sparse_embedding_list)
dnn_input = combined_dnn_input([Flatten()(concat_func([senet_bilinear_out, bilinear_out]))], dense_value_list)
dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input)
dnn_logit = Dense(1, use_bias=False)(dnn_out)
final_logit = add_func([linear_logit, dnn_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/flen.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Tingyi Tan, 5636374@qq.com
Reference:
[1] Chen W, Zhan L, Ci Y, Lin C. FLEN: Leveraging Field for Scalable CTR Prediction . arXiv preprint arXiv:1911.04690, 2019.(https://arxiv.org/pdf/1911.04690)
"""
from itertools import chain
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import FieldWiseBiInteraction
from ..layers.utils import concat_func, add_func, combined_dnn_input
def FLEN(linear_feature_columns,
dnn_feature_columns,
dnn_hidden_units=(256, 128, 64),
l2_reg_linear=0.00001,
l2_reg_embedding=0.00001,
l2_reg_dnn=0,
seed=1024,
dnn_dropout=0.0,
dnn_activation='relu',
dnn_use_bn=False,
task='binary'):
"""Instantiates the FLEN Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(linear_feature_columns +
dnn_feature_columns)
inputs_list = list(features.values())
group_embedding_dict, dense_value_list = input_from_feature_columns(
features,
dnn_feature_columns,
l2_reg_embedding,
seed,
support_group=True)
linear_logit = get_linear_logit(features,
linear_feature_columns,
seed=seed,
prefix='linear',
l2_reg=l2_reg_linear)
fm_mf_out = FieldWiseBiInteraction(seed=seed)(
[concat_func(v, axis=1) for k, v in group_embedding_dict.items()])
dnn_input = combined_dnn_input(
list(chain.from_iterable(group_embedding_dict.values())),
dense_value_list)
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
dnn_logit = Dense(1, use_bias=False)(concat_func([fm_mf_out, dnn_output]))
final_logit = add_func([linear_logit, dnn_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/fnn.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf)
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.utils import add_func, combined_dnn_input
def FNN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
dnn_activation='relu', task='binary'):
"""Instantiates the Factorization-supported Neural Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_linear: float. L2 regularizer strength applied to linear weight
:param l2_reg_dnn: float . L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(
linear_feature_columns + dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input)
dnn_logit = Dense(1, use_bias=False)(deep_out)
final_logit = add_func([dnn_logit, linear_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/fwfm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Harshit Pande
Reference:
[1] Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising
(https://arxiv.org/pdf/1806.03514.pdf)
"""
from itertools import chain
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense
from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import FwFMLayer
from ..layers.utils import concat_func, add_func, combined_dnn_input
def FwFM(linear_feature_columns, dnn_feature_columns, fm_group=(DEFAULT_GROUP_NAME,), dnn_hidden_units=(256, 128, 64),
l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_field_strength=0.00001, l2_reg_dnn=0,
seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'):
"""Instantiates the FwFM Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param fm_group: list, group_name of features that will be used to do feature interactions.
:param dnn_hidden_units: list,list of positive integer or empty list if do not want DNN, the layer number and units
in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_field_strength: float. L2 regularizer strength applied to the field pair strength parameters
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(linear_feature_columns + dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed,
support_group=True)
fwfm_logit = add_func([FwFMLayer(num_fields=len(v), regularizer=l2_reg_field_strength)
(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group])
final_logit_components = [linear_logit, fwfm_logit]
if dnn_hidden_units:
dnn_input = combined_dnn_input(list(chain.from_iterable(
group_embedding_dict.values())), dense_value_list)
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
dnn_logit = Dense(1, use_bias=False)(dnn_output)
final_logit_components.append(dnn_logit)
final_logit = add_func(final_logit_components)
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/ifm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
zanshuxun, zanshuxun@aliyun.com
Reference:
[1] Yu Y, Wang Z, Yuan B. An Input-aware Factorization Machine for Sparse Prediction[C]//IJCAI. 2019: 1466-1472.
(https://www.ijcai.org/Proceedings/2019/0203.pdf)
"""
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Lambda
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns, SparseFeat, \
VarLenSparseFeat
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import FM
from ..layers.utils import concat_func, add_func, combined_dnn_input, softmax
def IFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
dnn_activation='relu', dnn_use_bn=False, task='binary'):
"""Instantiates the IFM Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
if not len(dnn_hidden_units) > 0:
raise ValueError("dnn_hidden_units is null!")
features = build_input_features(
linear_feature_columns + dnn_feature_columns)
sparse_feat_num = len(list(filter(lambda x: isinstance(x, SparseFeat) or isinstance(x, VarLenSparseFeat),
dnn_feature_columns)))
inputs_list = list(features.values())
sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
if not len(sparse_embedding_list) > 0:
raise ValueError("there are no sparse features")
dnn_input = combined_dnn_input(sparse_embedding_list, [])
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
# here, dnn_output is the m'_{x}
dnn_output = Dense(sparse_feat_num, use_bias=False)(dnn_output)
# input_aware_factor m_{x,i}
input_aware_factor = Lambda(lambda x: tf.cast(tf.shape(x)[-1], tf.float32) * softmax(x, dim=1))(dnn_output)
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear, sparse_feat_refine_weight=input_aware_factor)
fm_input = concat_func(sparse_embedding_list, axis=1)
refined_fm_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=-1))(
[fm_input, input_aware_factor])
fm_logit = FM()(refined_fm_input)
final_logit = add_func([linear_logit, fm_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/mlr.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017.(https://arxiv.org/abs/1704.05194)
"""
from tensorflow.python.keras.layers import Activation, dot
from tensorflow.python.keras.models import Model
from ..feature_column import build_input_features, get_linear_logit
from ..layers.core import PredictionLayer
from ..layers.utils import concat_func
def MLR(region_feature_columns, base_feature_columns=None, region_num=4,
l2_reg_linear=1e-5, seed=1024, task='binary',
bias_feature_columns=None):
"""Instantiates the Mixed Logistic Regression/Piece-wise Linear Model.
:param region_feature_columns: An iterable containing all the features used by region part of the model.
:param base_feature_columns: An iterable containing all the features used by base part of the model.
:param region_num: integer > 1,indicate the piece number
:param l2_reg_linear: float. L2 regularizer strength applied to weight
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param bias_feature_columns: An iterable containing all the features used by bias part of the model.
:return: A Keras model instance.
"""
if region_num <= 1:
raise ValueError("region_num must > 1")
if base_feature_columns is None or len(base_feature_columns) == 0:
base_feature_columns = region_feature_columns
if bias_feature_columns is None:
bias_feature_columns = []
features = build_input_features(region_feature_columns + base_feature_columns + bias_feature_columns)
inputs_list = list(features.values())
region_score = get_region_score(features, region_feature_columns, region_num, l2_reg_linear, seed)
learner_score = get_learner_score(features, base_feature_columns, region_num, l2_reg_linear, seed, task=task)
final_logit = dot([region_score, learner_score], axes=-1)
if bias_feature_columns is not None and len(bias_feature_columns) > 0:
bias_score = get_learner_score(features, bias_feature_columns, 1, l2_reg_linear, seed, prefix='bias_',
task='binary')
final_logit = dot([final_logit, bias_score], axes=-1)
model = Model(inputs=inputs_list, outputs=final_logit)
return model
def get_region_score(features, feature_columns, region_number, l2_reg, seed, prefix='region_', seq_mask_zero=True):
region_logit = concat_func([get_linear_logit(features, feature_columns, seed=seed + i,
prefix=prefix + str(i + 1), l2_reg=l2_reg) for i in
range(region_number)])
return Activation('softmax')(region_logit)
def get_learner_score(features, feature_columns, region_number, l2_reg, seed, prefix='learner_', seq_mask_zero=True,
task='binary'):
region_score = [PredictionLayer(task=task, use_bias=False)(
get_linear_logit(features, feature_columns, seed=seed + i, prefix=prefix + str(i + 1),
l2_reg=l2_reg)) for i in
range(region_number)]
return concat_func(region_score)
================================================
FILE: deepctr/models/multitask/__init__.py
================================================
from .esmm import ESMM
from .mmoe import MMOE
from .ple import PLE
from .sharedbottom import SharedBottom
================================================
FILE: deepctr/models/multitask/esmm.py
================================================
"""
Author:
Mincai Lai, laimc@shanghaitech.edu.cn
Weichen Shen, weichenswc@163.com
Reference:
[1] Ma X, Zhao L, Huang G, et al. Entire space multi-task model: An effective approach for estimating post-click conversion rate[C]//The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval. 2018.(https://arxiv.org/abs/1804.07931)
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Multiply
from ...feature_column import build_input_features, input_from_feature_columns
from ...layers.core import PredictionLayer, DNN
from ...layers.utils import combined_dnn_input
def ESMM(dnn_feature_columns, tower_dnn_hidden_units=(256, 128, 64), l2_reg_embedding=0.00001, l2_reg_dnn=0,
seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task_types=('binary', 'binary'),
task_names=('ctr', 'ctcvr')):
"""Instantiates the Entire Space Multi-Task Model architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task DNN.
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector.
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN.
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task_types: str, indicating the loss of each tasks, ``"binary"`` for binary logloss or ``"regression"`` for regression loss.
:param task_names: list of str, indicating the predict target of each tasks. default value is ['ctr', 'ctcvr']
:return: A Keras model instance.
"""
if len(task_names) != 2:
raise ValueError("the length of task_names must be equal to 2")
for task_type in task_types:
if task_type != 'binary':
raise ValueError("task must be binary in ESMM, {} is illegal".format(task_type))
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
ctr_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(
dnn_input)
cvr_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(
dnn_input)
ctr_logit = Dense(1, use_bias=False)(ctr_output)
cvr_logit = Dense(1, use_bias=False)(cvr_output)
ctr_pred = PredictionLayer('binary', name=task_names[0])(ctr_logit)
cvr_pred = PredictionLayer('binary')(cvr_logit)
ctcvr_pred = Multiply(name=task_names[1])([ctr_pred, cvr_pred]) # CTCVR = CTR * CVR
model = Model(inputs=inputs_list, outputs=[ctr_pred, ctcvr_pred])
return model
================================================
FILE: deepctr/models/multitask/mmoe.py
================================================
"""
Author:
Mincai Lai, laimc@shanghaitech.edu.cn
Weichen Shen, weichenswc@163.com
Reference:
[1] Ma J, Zhao Z, Yi X, et al. Modeling task relationships in multi-task learning with multi-gate mixture-of-experts[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. 2018.(https://dl.acm.org/doi/abs/10.1145/3219819.3220007)
"""
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Lambda
from ...feature_column import build_input_features, input_from_feature_columns
from ...layers.core import PredictionLayer, DNN
from ...layers.utils import combined_dnn_input, reduce_sum
def MMOE(dnn_feature_columns, num_experts=3, expert_dnn_hidden_units=(256, 128), tower_dnn_hidden_units=(64,),
gate_dnn_hidden_units=(), l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
dnn_activation='relu',
dnn_use_bn=False, task_types=('binary', 'binary'), task_names=('ctr', 'ctcvr')):
"""Instantiates the Multi-gate Mixture-of-Experts multi-task learning architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param num_experts: integer, number of experts.
:param expert_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of expert DNN.
:param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task-specific DNN.
:param gate_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of gate DNN.
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task_types: list of str, indicating the loss of each tasks, ``"binary"`` for binary logloss, ``"regression"`` for regression loss. e.g. ['binary', 'regression']
:param task_names: list of str, indicating the predict target of each tasks
:return: a Keras model instance
"""
num_tasks = len(task_names)
if num_tasks <= 1:
raise ValueError("num_tasks must be greater than 1")
if num_experts <= 1:
raise ValueError("num_experts must be greater than 1")
if len(task_types) != num_tasks:
raise ValueError("num_tasks must be equal to the length of task_types")
for task_type in task_types:
if task_type not in ['binary', 'regression']:
raise ValueError("task must be binary or regression, {} is illegal".format(task_type))
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
# build expert layer
expert_outs = []
for i in range(num_experts):
expert_network = DNN(expert_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed,
name='expert_' + str(i))(dnn_input)
expert_outs.append(expert_network)
expert_concat = Lambda(lambda x: tf.stack(x, axis=1))(expert_outs) # None,num_experts,dim
mmoe_outs = []
for i in range(num_tasks): # one mmoe layer: nums_tasks = num_gates
# build gate layers
gate_input = DNN(gate_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed,
name='gate_' + task_names[i])(dnn_input)
gate_out = Dense(num_experts, use_bias=False, activation='softmax',
name='gate_softmax_' + task_names[i])(gate_input)
gate_out = Lambda(lambda x: tf.expand_dims(x, axis=-1))(gate_out)
# gate multiply the expert
gate_mul_expert = Lambda(lambda x: reduce_sum(x[0] * x[1], axis=1, keep_dims=False),
name='gate_mul_expert_' + task_names[i])([expert_concat, gate_out])
mmoe_outs.append(gate_mul_expert)
task_outs = []
for task_type, task_name, mmoe_out in zip(task_types, task_names, mmoe_outs):
# build tower layer
tower_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed,
name='tower_' + task_name)(mmoe_out)
logit = Dense(1, use_bias=False)(tower_output)
output = PredictionLayer(task_type, name=task_name)(logit)
task_outs.append(output)
model = Model(inputs=inputs_list, outputs=task_outs)
return model
================================================
FILE: deepctr/models/multitask/ple.py
================================================
"""
Author:
Mincai Lai, laimc@shanghaitech.edu.cn
Weichen Shen, weichenswc@163.com
Reference:
[1] Tang H, Liu J, Zhao M, et al. Progressive layered extraction (ple): A novel multi-task learning (mtl) model for personalized recommendations[C]//Fourteenth ACM Conference on Recommender Systems. 2020.(https://dl.acm.org/doi/10.1145/3383313.3412236)
"""
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Lambda
from ...feature_column import build_input_features, input_from_feature_columns
from ...layers.core import PredictionLayer, DNN
from ...layers.utils import combined_dnn_input, reduce_sum
def PLE(dnn_feature_columns, shared_expert_num=1, specific_expert_num=1, num_levels=2,
expert_dnn_hidden_units=(256,), tower_dnn_hidden_units=(64,), gate_dnn_hidden_units=(),
l2_reg_embedding=0.00001,
l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False,
task_types=('binary', 'binary'), task_names=('ctr', 'ctcvr')):
"""Instantiates the multi level of Customized Gate Control of Progressive Layered Extraction architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param shared_expert_num: integer, number of task-shared experts.
:param specific_expert_num: integer, number of task-specific experts.
:param num_levels: integer, number of CGC levels.
:param expert_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of expert DNN.
:param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task-specific DNN.
:param gate_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of gate DNN.
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector.
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN.
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN.
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN.
:param task_types: list of str, indicating the loss of each tasks, ``"binary"`` for binary logloss, ``"regression"`` for regression loss. e.g. ['binary', 'regression']
:param task_names: list of str, indicating the predict target of each tasks
:return: a Keras model instance.
"""
num_tasks = len(task_names)
if num_tasks <= 1:
raise ValueError("num_tasks must be greater than 1")
if len(task_types) != num_tasks:
raise ValueError("num_tasks must be equal to the length of task_types")
for task_type in task_types:
if task_type not in ['binary', 'regression']:
raise ValueError("task must be binary or regression, {} is illegal".format(task_type))
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
# single Extraction Layer
def cgc_net(inputs, level_name, is_last=False):
# inputs: [task1, task2, ... taskn, shared task]
specific_expert_outputs = []
# build task-specific expert layer
for i in range(num_tasks):
for j in range(specific_expert_num):
expert_network = DNN(expert_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn,
seed=seed,
name=level_name + 'task_' + task_names[i] + '_expert_specific_' + str(j))(
inputs[i])
specific_expert_outputs.append(expert_network)
# build task-shared expert layer
shared_expert_outputs = []
for k in range(shared_expert_num):
expert_network = DNN(expert_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn,
seed=seed,
name=level_name + 'expert_shared_' + str(k))(inputs[-1])
shared_expert_outputs.append(expert_network)
# task_specific gate (count = num_tasks)
cgc_outs = []
for i in range(num_tasks):
# concat task-specific expert and task-shared expert
cur_expert_num = specific_expert_num + shared_expert_num
# task_specific + task_shared
cur_experts = specific_expert_outputs[
i * specific_expert_num:(i + 1) * specific_expert_num] + shared_expert_outputs
expert_concat = Lambda(lambda x: tf.stack(x, axis=1))(cur_experts)
# build gate layers
gate_input = DNN(gate_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn,
seed=seed,
name=level_name + 'gate_specific_' + task_names[i])(
inputs[i]) # gate[i] for task input[i]
gate_out = Dense(cur_expert_num, use_bias=False, activation='softmax',
name=level_name + 'gate_softmax_specific_' + task_names[i])(gate_input)
gate_out = Lambda(lambda x: tf.expand_dims(x, axis=-1))(gate_out)
# gate multiply the expert
gate_mul_expert = Lambda(lambda x: reduce_sum(x[0] * x[1], axis=1, keep_dims=False),
name=level_name + 'gate_mul_expert_specific_' + task_names[i])(
[expert_concat, gate_out])
cgc_outs.append(gate_mul_expert)
# task_shared gate, if the level not in last, add one shared gate
if not is_last:
cur_expert_num = num_tasks * specific_expert_num + shared_expert_num
cur_experts = specific_expert_outputs + shared_expert_outputs # all the expert include task-specific expert and task-shared expert
expert_concat = Lambda(lambda x: tf.stack(x, axis=1))(cur_experts)
# build gate layers
gate_input = DNN(gate_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn,
seed=seed,
name=level_name + 'gate_shared')(inputs[-1]) # gate for shared task input
gate_out = Dense(cur_expert_num, use_bias=False, activation='softmax',
name=level_name + 'gate_softmax_shared')(gate_input)
gate_out = Lambda(lambda x: tf.expand_dims(x, axis=-1))(gate_out)
# gate multiply the expert
gate_mul_expert = Lambda(lambda x: reduce_sum(x[0] * x[1], axis=1, keep_dims=False),
name=level_name + 'gate_mul_expert_shared')(
[expert_concat, gate_out])
cgc_outs.append(gate_mul_expert)
return cgc_outs
# build Progressive Layered Extraction
ple_inputs = [dnn_input] * (num_tasks + 1) # [task1, task2, ... taskn, shared task]
ple_outputs = []
for i in range(num_levels):
if i == num_levels - 1: # the last level
ple_outputs = cgc_net(inputs=ple_inputs, level_name='level_' + str(i) + '_', is_last=True)
else:
ple_outputs = cgc_net(inputs=ple_inputs, level_name='level_' + str(i) + '_', is_last=False)
ple_inputs = ple_outputs
task_outs = []
for task_type, task_name, ple_out in zip(task_types, task_names, ple_outputs):
# build tower layer
tower_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed,
name='tower_' + task_name)(ple_out)
logit = Dense(1, use_bias=False)(tower_output)
output = PredictionLayer(task_type, name=task_name)(logit)
task_outs.append(output)
model = Model(inputs=inputs_list, outputs=task_outs)
return model
================================================
FILE: deepctr/models/multitask/sharedbottom.py
================================================
"""
Author:
Mincai Lai, laimc@shanghaitech.edu.cn
Weichen Shen, weichenswc@163.com
Reference:
[1] Ruder S. An overview of multi-task learning in deep neural networks[J]. arXiv preprint arXiv:1706.05098, 2017.(https://arxiv.org/pdf/1706.05098.pdf)
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense
from ...feature_column import build_input_features, input_from_feature_columns
from ...layers.core import PredictionLayer, DNN
from ...layers.utils import combined_dnn_input
def SharedBottom(dnn_feature_columns, bottom_dnn_hidden_units=(256, 128), tower_dnn_hidden_units=(64,),
l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu',
dnn_use_bn=False, task_types=('binary', 'binary'), task_names=('ctr', 'ctcvr')):
"""Instantiates the SharedBottom multi-task learning Network architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param bottom_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of shared bottom DNN.
:param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task-specific DNN.
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task_types: list of str, indicating the loss of each tasks, ``"binary"`` for binary logloss or ``"regression"`` for regression loss. e.g. ['binary', 'regression']
:param task_names: list of str, indicating the predict target of each tasks
:return: A Keras model instance.
"""
num_tasks = len(task_names)
if num_tasks <= 1:
raise ValueError("num_tasks must be greater than 1")
if len(task_types) != num_tasks:
raise ValueError("num_tasks must be equal to the length of task_types")
for task_type in task_types:
if task_type not in ['binary', 'regression']:
raise ValueError("task must be binary or regression, {} is illegal".format(task_type))
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
shared_bottom_output = DNN(bottom_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(
dnn_input)
tasks_output = []
for task_type, task_name in zip(task_types, task_names):
tower_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed,
name='tower_' + task_name)(shared_bottom_output)
logit = Dense(1, use_bias=False)(tower_output)
output = PredictionLayer(task_type, name=task_name)(logit)
tasks_output.append(output)
model = Model(inputs=inputs_list, outputs=tasks_output)
return model
================================================
FILE: deepctr/models/nfm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027)
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Dropout
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import BiInteractionPooling
from ..layers.utils import concat_func, add_func, combined_dnn_input
def NFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, bi_dropout=0,
dnn_dropout=0, dnn_activation='relu', task='binary'):
"""Instantiates the Neural Factorization Machine architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_linear: float. L2 regularizer strength applied to linear part.
:param l2_reg_dnn: float . L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param biout_dropout: When not ``None``, the probability we will drop out the output of BiInteractionPooling Layer.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in deep net
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(
linear_feature_columns + dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
fm_input = concat_func(sparse_embedding_list, axis=1)
bi_out = BiInteractionPooling()(fm_input)
if bi_dropout:
bi_out = Dropout(bi_dropout)(bi_out, training=None)
dnn_input = combined_dnn_input([bi_out], dense_value_list)
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input)
dnn_logit = Dense(1, use_bias=False)(dnn_output)
final_logit = add_func([linear_logit, dnn_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/onn.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Yang Y, Xu B, Shen F, et al. Operation-aware Neural Networks for User Response Prediction[J]. arXiv preprint arXiv:1904.12579, 2019. (https://arxiv.org/pdf/1904.12579)
"""
import itertools
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.layers import (Dense, Embedding, Lambda,
multiply, Flatten)
try:
from tensorflow.python.keras.layers import BatchNormalization
except ImportError:
import tensorflow as tf
BatchNormalization = tf.keras.layers.BatchNormalization
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.regularizers import l2
from ..feature_column import SparseFeat, VarLenSparseFeat, build_input_features, get_linear_logit
from ..inputs import get_dense_input
from ..layers.core import DNN, PredictionLayer
from ..layers.sequence import SequencePoolingLayer
from ..layers.utils import concat_func, Hash, NoMask, add_func, combined_dnn_input
def ONN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, dnn_dropout=0,
seed=1024, use_bn=True, reduce_sum=False, task='binary',
):
"""Instantiates the Operation-aware Neural Networks architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_linear: float. L2 regularizer strength applied to linear part.
:param l2_reg_dnn: float . L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param use_bn: bool,whether use bn after ffm out or not
:param reduce_sum: bool,whether apply reduce_sum on cross vector
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(linear_feature_columns + dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
varlen_sparse_feature_columns = list(
filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
sparse_embedding = {fc_j.embedding_name: {fc_i.embedding_name: Embedding(fc_j.vocabulary_size, fc_j.embedding_dim,
embeddings_initializer=fc_j.embeddings_initializer,
embeddings_regularizer=l2(
l2_reg_embedding),
mask_zero=isinstance(fc_j,
VarLenSparseFeat),
name='sparse_emb_' + str(
fc_j.embedding_name) + '_' + fc_i.embedding_name)
for fc_i in
sparse_feature_columns + varlen_sparse_feature_columns} for fc_j in
sparse_feature_columns + varlen_sparse_feature_columns}
dense_value_list = get_dense_input(features, dnn_feature_columns)
embed_list = []
for fc_i, fc_j in itertools.combinations(sparse_feature_columns + varlen_sparse_feature_columns, 2):
i_input = features[fc_i.name]
if fc_i.use_hash:
i_input = Hash(fc_i.vocabulary_size)(i_input)
j_input = features[fc_j.name]
if fc_j.use_hash:
j_input = Hash(fc_j.vocabulary_size)(j_input)
fc_i_embedding = feature_embedding(fc_i, fc_j, sparse_embedding, i_input)
fc_j_embedding = feature_embedding(fc_j, fc_i, sparse_embedding, j_input)
element_wise_prod = multiply([fc_i_embedding, fc_j_embedding])
if reduce_sum:
element_wise_prod = Lambda(lambda element_wise_prod: K.sum(
element_wise_prod, axis=-1))(element_wise_prod)
embed_list.append(element_wise_prod)
ffm_out = Flatten()(concat_func(embed_list, axis=1))
if use_bn:
ffm_out = BatchNormalization()(ffm_out)
dnn_input = combined_dnn_input([ffm_out], dense_value_list)
dnn_out = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout)(dnn_input)
dnn_logit = Dense(1, use_bias=False)(dnn_out)
final_logit = add_func([dnn_logit, linear_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
def feature_embedding(fc_i, fc_j, embedding_dict, input_feature):
fc_i_embedding = embedding_dict[fc_i.name][fc_j.name](input_feature)
if isinstance(fc_i, SparseFeat):
return NoMask()(fc_i_embedding)
else:
return SequencePoolingLayer(fc_i.combiner, supports_masking=True)(fc_i_embedding)
================================================
FILE: deepctr/models/pnn.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf)
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense, Reshape, Flatten
from ..feature_column import build_input_features, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import InnerProductLayer, OutterProductLayer
from ..layers.utils import concat_func, combined_dnn_input
def PNN(dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=0.00001, l2_reg_dnn=0,
seed=1024, dnn_dropout=0, dnn_activation='relu', use_inner=True, use_outter=False, kernel_type='mat',
task='binary'):
"""Instantiates the Product-based Neural Network architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param use_inner: bool,whether use inner-product or not.
:param use_outter: bool,whether use outter-product or not.
:param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'``
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
if kernel_type not in ['mat', 'vec', 'num']:
raise ValueError("kernel_type must be mat,vec or num")
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
inner_product = Flatten()(
InnerProductLayer()(sparse_embedding_list))
outter_product = OutterProductLayer(kernel_type)(sparse_embedding_list)
# ipnn deep input
linear_signal = Reshape(
[sum(map(lambda x: int(x.shape[-1]), sparse_embedding_list))])(concat_func(sparse_embedding_list))
if use_inner and use_outter:
deep_input = concat_func([linear_signal, inner_product, outter_product])
elif use_inner:
deep_input = concat_func([linear_signal, inner_product])
elif use_outter:
deep_input = concat_func([linear_signal, outter_product])
else:
deep_input = linear_signal
dnn_input = combined_dnn_input([deep_input], dense_value_list)
dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input)
dnn_logit = Dense(1, use_bias=False)(dnn_out)
output = PredictionLayer(task)(dnn_logit)
model = Model(inputs=inputs_list,
outputs=output)
return model
================================================
FILE: deepctr/models/sequence/__init__.py
================================================
from .bst import BST
from .dien import DIEN
from .din import DIN
from .dsin import DSIN
================================================
FILE: deepctr/models/sequence/bst.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Zichao Li, 2843656167@qq.com
Reference:
Qiwei Chen, Huan Zhao, Wei Li, Pipei Huang, and Wenwu Ou. 2019. Behavior sequence transformer for e-commerce recommendation in Alibaba. In Proceedings of the 1st International Workshop on Deep Learning Practice for High-Dimensional Sparse Data (DLP-KDD '19). Association for Computing Machinery, New York, NY, USA, Article 12, 1–4. DOI:https://doi.org/10.1145/3326937.3341261
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import (Dense, Flatten)
from ...feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features
from ...inputs import get_varlen_pooling_list, create_embedding_matrix, embedding_lookup, varlen_embedding_lookup, \
get_dense_input
from ...layers.core import DNN, PredictionLayer
from ...layers.sequence import Transformer, AttentionSequencePoolingLayer
from ...layers.utils import concat_func, combined_dnn_input
def BST(dnn_feature_columns, history_feature_list, transformer_num=1, att_head_num=8,
use_bn=False, dnn_hidden_units=(256, 128, 64), dnn_activation='relu', l2_reg_dnn=0,
l2_reg_embedding=1e-6, dnn_dropout=0.0, seed=1024, task='binary'):
"""Instantiates the BST architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param history_feature_list: list, to indicate sequence sparse field.
:param transformer_num: int, the number of transformer layer.
:param att_head_num: int, the number of heads in multi-head self attention.
:param use_bn: bool. Whether use BatchNormalization before activation or not in deep net
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param dnn_activation: Activation function to use in DNN
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())
user_behavior_length = features["seq_length"]
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
dense_feature_columns = list(
filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else []
varlen_sparse_feature_columns = list(
filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
history_feature_columns = []
sparse_varlen_feature_columns = []
history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list))
for fc in varlen_sparse_feature_columns:
feature_name = fc.name
if feature_name in history_fc_names:
history_feature_columns.append(fc)
else:
sparse_varlen_feature_columns.append(fc)
embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, seed, prefix="",
seq_mask_zero=True)
query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns,
return_feat_list=history_feature_list, to_list=True)
hist_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns,
return_feat_list=history_fc_names, to_list=True)
dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns,
mask_feat_list=history_feature_list, to_list=True)
dense_value_list = get_dense_input(features, dense_feature_columns)
sequence_embed_dict = varlen_embedding_lookup(embedding_dict, features, sparse_varlen_feature_columns)
sequence_embed_list = get_varlen_pooling_list(sequence_embed_dict, features, sparse_varlen_feature_columns,
to_list=True)
dnn_input_emb_list += sequence_embed_list
query_emb = concat_func(query_emb_list)
deep_input_emb = concat_func(dnn_input_emb_list)
hist_emb = concat_func(hist_emb_list)
transformer_output = hist_emb
for _ in range(transformer_num):
att_embedding_size = transformer_output.get_shape().as_list()[-1] // att_head_num
transformer_layer = Transformer(att_embedding_size=att_embedding_size, head_num=att_head_num,
dropout_rate=dnn_dropout, use_positional_encoding=True, use_res=True,
use_feed_forward=True, use_layer_norm=True, blinding=False, seed=seed,
supports_masking=False, output_type=None)
transformer_output = transformer_layer([transformer_output, transformer_output,
user_behavior_length, user_behavior_length])
attn_output = AttentionSequencePoolingLayer(att_hidden_units=(64, 16), weight_normalization=True,
supports_masking=False)([query_emb, transformer_output,
user_behavior_length])
deep_input_emb = concat_func([deep_input_emb, attn_output], axis=-1)
deep_input_emb = Flatten()(deep_input_emb)
dnn_input = combined_dnn_input([deep_input_emb], dense_value_list)
output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed=seed)(dnn_input)
final_logit = Dense(1, use_bias=False)(output)
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/sequence/dien.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Zhou G, Mou N, Fan Y, et al. Deep Interest Evolution Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1809.03672, 2018. (https://arxiv.org/pdf/1809.03672.pdf)
"""
import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import (Concatenate, Dense, Permute, multiply, Flatten)
from ...feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features
from ...inputs import get_varlen_pooling_list, create_embedding_matrix, embedding_lookup, varlen_embedding_lookup, \
get_dense_input
from ...layers.core import DNN, PredictionLayer
from ...layers.sequence import AttentionSequencePoolingLayer, DynamicGRU
from ...layers.utils import concat_func, reduce_mean, combined_dnn_input
def auxiliary_loss(h_states, click_seq, noclick_seq, mask, stag=None):
#:param h_states:
#:param click_seq:
#:param noclick_seq: #[B,T-1,E]
#:param mask:#[B,1]
#:param stag:
#:return:
hist_len, _ = click_seq.get_shape().as_list()[1:]
mask = tf.sequence_mask(mask, hist_len)
mask = mask[:, 0, :]
mask = tf.cast(mask, tf.float32)
click_input_ = tf.concat([h_states, click_seq], -1)
noclick_input_ = tf.concat([h_states, noclick_seq], -1)
auxiliary_nn = DNN([100, 50, 1], activation='sigmoid')
click_prop_ = auxiliary_nn(click_input_, stag=stag)[:, :, 0]
noclick_prop_ = auxiliary_nn(noclick_input_, stag=stag)[
:, :, 0] # [B,T-1]
try:
click_loss_ = - tf.reshape(tf.log(click_prop_),
[-1, tf.shape(click_seq)[1]]) * mask
except AttributeError:
click_loss_ = - tf.reshape(tf.compat.v1.log(click_prop_),
[-1, tf.shape(click_seq)[1]]) * mask
try:
noclick_loss_ = - \
tf.reshape(tf.log(1.0 - noclick_prop_),
[-1, tf.shape(noclick_seq)[1]]) * mask
except AttributeError:
noclick_loss_ = - \
tf.reshape(tf.compat.v1.log(1.0 - noclick_prop_),
[-1, tf.shape(noclick_seq)[1]]) * mask
loss_ = reduce_mean(click_loss_ + noclick_loss_)
return loss_
def interest_evolution(concat_behavior, deep_input_item, user_behavior_length, gru_type="GRU", use_neg=False,
neg_concat_behavior=None, att_hidden_size=(64, 16), att_activation='sigmoid',
att_weight_normalization=False, ):
if gru_type not in ["GRU", "AIGRU", "AGRU", "AUGRU"]:
raise ValueError("gru_type error ")
aux_loss_1 = None
embedding_size = None
rnn_outputs = DynamicGRU(embedding_size, return_sequence=True,
name="gru1")([concat_behavior, user_behavior_length])
if gru_type == "AUGRU" and use_neg:
aux_loss_1 = auxiliary_loss(rnn_outputs[:, :-1, :], concat_behavior[:, 1:, :],
neg_concat_behavior[:, 1:, :],
tf.subtract(user_behavior_length, 1), stag="gru") # [:, 1:]
if gru_type == "GRU":
rnn_outputs2 = DynamicGRU(embedding_size, return_sequence=True,
name="gru2")([rnn_outputs, user_behavior_length])
# attention_score = AttentionSequencePoolingLayer(hidden_size=att_hidden_size, activation=att_activation, weight_normalization=att_weight_normalization, return_score=True)([
# deep_input_item, rnn_outputs2, user_behavior_length])
# outputs = Lambda(lambda x: tf.matmul(x[0], x[1]))(
# [attention_score, rnn_outputs2])
# hist = outputs
hist = AttentionSequencePoolingLayer(att_hidden_units=att_hidden_size, att_activation=att_activation,
weight_normalization=att_weight_normalization, return_score=False)([
deep_input_item, rnn_outputs2, user_behavior_length])
else: # AIGRU AGRU AUGRU
scores = AttentionSequencePoolingLayer(att_hidden_units=att_hidden_size, att_activation=att_activation,
weight_normalization=att_weight_normalization, return_score=True)([
deep_input_item, rnn_outputs, user_behavior_length])
if gru_type == "AIGRU":
hist = multiply([rnn_outputs, Permute([2, 1])(scores)])
final_state2 = DynamicGRU(embedding_size, gru_type="GRU", return_sequence=False, name='gru2')(
[hist, user_behavior_length])
else: # AGRU AUGRU
final_state2 = DynamicGRU(embedding_size, gru_type=gru_type, return_sequence=False,
name='gru2')([rnn_outputs, user_behavior_length, Permute([2, 1])(scores)])
hist = final_state2
return hist, aux_loss_1
def DIEN(dnn_feature_columns, history_feature_list,
gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, dnn_hidden_units=(256, 128, 64),
dnn_activation='relu',
att_hidden_units=(64, 16), att_activation="dice", att_weight_normalization=True,
l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, seed=1024, task='binary'):
"""Instantiates the Deep Interest Evolution Network architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param history_feature_list: list,to indicate sequence sparse field
:param gru_type: str,can be GRU AIGRU AUGRU AGRU
:param use_negsampling: bool, whether or not use negtive sampling
:param alpha: float ,weight of auxiliary_loss
:param use_bn: bool. Whether use BatchNormalization before activation or not in deep net
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param dnn_activation: Activation function to use in DNN
:param att_hidden_units: list,list of positive integer , the layer number and units in each layer of attention net
:param att_activation: Activation function to use in attention net
:param att_weight_normalization: bool.Whether normalize the attention score of local activation unit.
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param init_std: float,to use as the initialize std of embedding vector
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(dnn_feature_columns)
user_behavior_length = features["seq_length"]
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
dense_feature_columns = list(
filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else []
varlen_sparse_feature_columns = list(
filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
history_feature_columns = []
neg_history_feature_columns = []
sparse_varlen_feature_columns = []
history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list))
neg_history_fc_names = list(map(lambda x: "neg_" + x, history_fc_names))
for fc in varlen_sparse_feature_columns:
feature_name = fc.name
if feature_name in history_fc_names:
history_feature_columns.append(fc)
elif feature_name in neg_history_fc_names:
neg_history_feature_columns.append(fc)
else:
sparse_varlen_feature_columns.append(fc)
inputs_list = list(features.values())
embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, seed, prefix="",
seq_mask_zero=False)
query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns,
return_feat_list=history_feature_list, to_list=True)
keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns,
return_feat_list=history_fc_names, to_list=True)
dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns,
mask_feat_list=history_feature_list, to_list=True)
dense_value_list = get_dense_input(features, dense_feature_columns)
sequence_embed_dict = varlen_embedding_lookup(embedding_dict, features, sparse_varlen_feature_columns)
sequence_embed_list = get_varlen_pooling_list(sequence_embed_dict, features, sparse_varlen_feature_columns,
to_list=True)
dnn_input_emb_list += sequence_embed_list
keys_emb = concat_func(keys_emb_list)
deep_input_emb = concat_func(dnn_input_emb_list)
query_emb = concat_func(query_emb_list)
if use_negsampling:
neg_uiseq_embed_list = embedding_lookup(embedding_dict, features, neg_history_feature_columns,
neg_history_fc_names, to_list=True)
neg_concat_behavior = concat_func(neg_uiseq_embed_list)
else:
neg_concat_behavior = None
hist, aux_loss_1 = interest_evolution(keys_emb, query_emb, user_behavior_length, gru_type=gru_type,
use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior,
att_hidden_size=att_hidden_units,
att_activation=att_activation,
att_weight_normalization=att_weight_normalization, )
deep_input_emb = Concatenate()([deep_input_emb, hist])
deep_input_emb = Flatten()(deep_input_emb)
dnn_input = combined_dnn_input([deep_input_emb], dense_value_list)
output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed=seed)(dnn_input)
final_logit = Dense(1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(output)
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
if use_negsampling:
model.add_loss(alpha * aux_loss_1)
try:
tf.keras.backend.get_session().run(tf.global_variables_initializer())
except AttributeError:
tf.compat.v1.keras.backend.get_session().run(tf.compat.v1.global_variables_initializer())
tf.compat.v1.experimental.output_all_intermediates(True)
return model
================================================
FILE: deepctr/models/sequence/din.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068. (https://arxiv.org/pdf/1706.06978.pdf)
"""
from tensorflow.python.keras.layers import Dense, Flatten
from tensorflow.python.keras.models import Model
from ...feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features
from ...inputs import create_embedding_matrix, embedding_lookup, get_dense_input, varlen_embedding_lookup, \
get_varlen_pooling_list
from ...layers.core import DNN, PredictionLayer
from ...layers.sequence import AttentionSequencePoolingLayer
from ...layers.utils import concat_func, combined_dnn_input
def DIN(dnn_feature_columns, history_feature_list, dnn_use_bn=False,
dnn_hidden_units=(256, 128, 64), dnn_activation='relu', att_hidden_size=(80, 40), att_activation="dice",
att_weight_normalization=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, seed=1024,
task='binary'):
"""Instantiates the Deep Interest Network architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param history_feature_list: list,to indicate sequence sparse field
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param dnn_activation: Activation function to use in deep net
:param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net
:param att_activation: Activation function to use in attention net
:param att_weight_normalization: bool.Whether normalize the attention score of local activation unit.
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(dnn_feature_columns)
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
dense_feature_columns = list(
filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else []
varlen_sparse_feature_columns = list(
filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
history_feature_columns = []
sparse_varlen_feature_columns = []
history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list))
for fc in varlen_sparse_feature_columns:
feature_name = fc.name
if feature_name in history_fc_names:
history_feature_columns.append(fc)
else:
sparse_varlen_feature_columns.append(fc)
inputs_list = list(features.values())
embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, seed, prefix="")
query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, history_feature_list,
history_feature_list, to_list=True)
keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names,
history_fc_names, to_list=True)
dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns,
mask_feat_list=history_feature_list, to_list=True)
dense_value_list = get_dense_input(features, dense_feature_columns)
sequence_embed_dict = varlen_embedding_lookup(embedding_dict, features, sparse_varlen_feature_columns)
sequence_embed_list = get_varlen_pooling_list(sequence_embed_dict, features, sparse_varlen_feature_columns,
to_list=True)
dnn_input_emb_list += sequence_embed_list
keys_emb = concat_func(keys_emb_list, mask=True)
deep_input_emb = concat_func(dnn_input_emb_list)
query_emb = concat_func(query_emb_list, mask=True)
hist = AttentionSequencePoolingLayer(att_hidden_size, att_activation,
weight_normalization=att_weight_normalization, supports_masking=True)([
query_emb, keys_emb])
deep_input_emb = concat_func([deep_input_emb, hist])
deep_input_emb = Flatten()(deep_input_emb)
dnn_input = combined_dnn_input([deep_input_emb], dense_value_list)
output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
final_logit = Dense(1, use_bias=False)(output)
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/sequence/dsin.py
================================================
# coding: utf-8
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Feng Y, Lv F, Shen W, et al. Deep Session Interest Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.06482, 2019.(https://arxiv.org/abs/1905.06482)
"""
from collections import OrderedDict
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import (Concatenate, Dense, Embedding,
Flatten, Input)
from tensorflow.python.keras.regularizers import l2
from ...feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features
from ...inputs import (get_embedding_vec_list, get_inputs_list, embedding_lookup, get_dense_input)
from ...layers.core import DNN, PredictionLayer
from ...layers.sequence import (AttentionSequencePoolingLayer, BiasEncoding,
BiLSTM, Transformer)
from ...layers.utils import concat_func, combined_dnn_input
def DSIN(dnn_feature_columns, sess_feature_list, sess_max_count=5, bias_encoding=False,
att_embedding_size=1, att_head_num=8, dnn_hidden_units=(256, 128, 64), dnn_activation='relu', dnn_dropout=0,
dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, seed=1024, task='binary',
):
"""Instantiates the Deep Session Interest Network architecture.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param sess_feature_list: list,to indicate sequence sparse field
:param sess_max_count: positive int, to indicate the max number of sessions
:param sess_len_max: positive int, to indicate the max length of each session
:param bias_encoding: bool. Whether use bias encoding or postional encoding
:param att_embedding_size: positive int, the embedding size of each attention head
:param att_head_num: positive int, the number of attention head
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param dnn_activation: Activation function to use in deep net
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
hist_emb_size = sum(
map(lambda fc: fc.embedding_dim, filter(lambda fc: fc.name in sess_feature_list, dnn_feature_columns)))
if (att_embedding_size * att_head_num != hist_emb_size):
raise ValueError(
"hist_emb_size must equal to att_embedding_size * att_head_num ,got %d != %d *%d" % (
hist_emb_size, att_embedding_size, att_head_num))
features = build_input_features(dnn_feature_columns)
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
dense_feature_columns = list(
filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else []
varlen_sparse_feature_columns = list(
filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
sparse_varlen_feature_columns = []
history_fc_names = list(map(lambda x: "sess" + x, sess_feature_list))
for fc in varlen_sparse_feature_columns:
feature_name = fc.name
if feature_name in history_fc_names:
continue
else:
sparse_varlen_feature_columns.append(fc)
inputs_list = list(features.values())
user_behavior_input_dict = {}
for idx in range(sess_max_count):
sess_input = OrderedDict()
for i, feat in enumerate(sess_feature_list):
sess_input[feat] = features["sess_" + str(idx) + "_" + feat]
user_behavior_input_dict["sess_" + str(idx)] = sess_input
user_sess_length = Input(shape=(1,), name='sess_length')
embedding_dict = {feat.embedding_name: Embedding(feat.vocabulary_size, feat.embedding_dim,
embeddings_initializer=feat.embeddings_initializer,
embeddings_regularizer=l2(
l2_reg_embedding),
name='sparse_emb_' +
str(i) + '-' + feat.name,
mask_zero=(feat.name in sess_feature_list)) for i, feat in
enumerate(sparse_feature_columns)}
query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, sess_feature_list,
sess_feature_list, to_list=True)
dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns,
mask_feat_list=sess_feature_list, to_list=True)
dense_value_list = get_dense_input(features, dense_feature_columns)
query_emb = concat_func(query_emb_list, mask=True)
dnn_input_emb = Flatten()(concat_func(dnn_input_emb_list))
tr_input = sess_interest_division(embedding_dict, user_behavior_input_dict, sparse_feature_columns,
sess_feature_list, sess_max_count, bias_encoding=bias_encoding)
Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False,
use_positional_encoding=(not bias_encoding), seed=seed, supports_masking=True,
blinding=True)
sess_fea = sess_interest_extractor(
tr_input, sess_max_count, Self_Attention)
interest_attention_layer = AttentionSequencePoolingLayer(att_hidden_units=(64, 16), weight_normalization=True,
supports_masking=False)(
[query_emb, sess_fea, user_sess_length])
lstm_outputs = BiLSTM(hist_emb_size,
layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea)
lstm_attention_layer = AttentionSequencePoolingLayer(att_hidden_units=(64, 16), weight_normalization=True)(
[query_emb, lstm_outputs, user_sess_length])
dnn_input_emb = Concatenate()(
[dnn_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer)])
dnn_input_emb = combined_dnn_input([dnn_input_emb], dense_value_list)
output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input_emb)
output = Dense(1, use_bias=False)(output)
output = PredictionLayer(task)(output)
sess_input_list = []
for i in range(sess_max_count):
sess_name = "sess_" + str(i)
sess_input_list.extend(get_inputs_list(
[user_behavior_input_dict[sess_name]]))
model = Model(inputs=inputs_list + [user_sess_length], outputs=output)
return model
def sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, sparse_fg_list, sess_feture_list,
sess_max_count,
bias_encoding=True):
tr_input = []
for i in range(sess_max_count):
sess_name = "sess_" + str(i)
keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input_dict[sess_name],
sparse_fg_list, sess_feture_list, sess_feture_list)
keys_emb = concat_func(keys_emb_list, mask=True)
tr_input.append(keys_emb)
if bias_encoding:
tr_input = BiasEncoding(sess_max_count)(tr_input)
return tr_input
def sess_interest_extractor(tr_input, sess_max_count, TR):
tr_out = []
for i in range(sess_max_count):
tr_out.append(TR(
[tr_input[i], tr_input[i]]))
sess_fea = concat_func(tr_out, axis=1)
return sess_fea
================================================
FILE: deepctr/models/wdl.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf)
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.utils import add_func, combined_dnn_input
def WDL(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001,
l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu',
task='binary'):
"""Instantiates the Wide&Deep Learning architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
:param l2_reg_linear: float. L2 regularizer strength applied to wide part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: float. L2 regularizer strength applied to DNN
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(
linear_feature_columns + dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input)
dnn_logit = Dense(1, use_bias=False)(dnn_out)
final_logit = add_func([dnn_logit, linear_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/models/xdeepfm.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen, weichenswc@163.com
Reference:
[1] Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.(https://arxiv.org/pdf/1803.05170.pdf)
"""
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense
from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import CIN
from ..layers.utils import concat_func, add_func, combined_dnn_input
def xDeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
cin_layer_size=(128, 128,), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001,
l2_reg_embedding=0.00001, l2_reg_dnn=0, l2_reg_cin=0, seed=1024, dnn_dropout=0,
dnn_activation='relu', dnn_use_bn=False, task='binary'):
"""Instantiates the xDeepFM architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
:param cin_layer_size: list,list of positive integer or empty list, the feature maps in each hidden layer of Compressed Interaction Network
:param cin_split_half: bool.if set to True, half of the feature maps in each hidden will connect to output unit
:param cin_activation: activation function used on feature maps
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: L2 regularizer strength applied to embedding vector
:param l2_reg_dnn: L2 regularizer strength applied to deep net
:param l2_reg_cin: L2 regularizer strength applied to CIN.
:param seed: integer ,to use as random seed.
:param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
:param dnn_activation: Activation function to use in DNN
:param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
features = build_input_features(
linear_feature_columns + dnn_feature_columns)
inputs_list = list(features.values())
linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
l2_reg=l2_reg_linear)
sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
l2_reg_embedding, seed)
fm_input = concat_func(sparse_embedding_list, axis=1)
dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
dnn_logit = Dense(1, use_bias=False)(dnn_output)
final_logit = add_func([linear_logit, dnn_logit])
if len(cin_layer_size) > 0:
exFM_out = CIN(cin_layer_size, cin_activation,
cin_split_half, l2_reg_cin, seed)(fm_input)
exFM_logit = Dense(1, use_bias=False)(exFM_out)
final_logit = add_func([final_logit, exFM_logit])
output = PredictionLayer(task)(final_logit)
model = Model(inputs=inputs_list, outputs=output)
return model
================================================
FILE: deepctr/utils.py
================================================
# -*- coding:utf-8 -*-
"""
Author:
Weichen Shen,weichenswc@163.com
"""
import json
import logging
from threading import Thread
import requests
try:
from packaging.version import parse
except ImportError:
from pip._vendor.packaging.version import parse
def check_version(version):
"""Return version of package on pypi.python.org using json."""
def check(version):
try:
url_pattern = 'https://pypi.python.org/pypi/deepctr/json'
req = requests.get(url_pattern)
latest_version = parse('0')
version = parse(version)
if req.status_code == requests.codes.ok:
j = json.loads(req.text.encode('utf-8'))
releases = j.get('releases', [])
for release in releases:
ver = parse(release)
if ver.is_prerelease or ver.is_postrelease:
continue
latest_version = max(latest_version, ver)
if latest_version > version:
logging.warning(
'\nDeepCTR version {0} detected. Your version is {1}.\nUse `pip install -U deepctr` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v{0}'.format(
latest_version, version))
except:
print("Please check the latest version manually on https://pypi.org/project/deepctr/#history")
return
Thread(target=check, args=(version,)).start()
================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = DeepCTR
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
================================================
FILE: docs/make.bat
================================================
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
set SPHINXPROJ=DeepCTR
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
:end
popd
================================================
FILE: docs/requirements.readthedocs.txt
================================================
tensorflow==2.6.2
recommonmark==0.7.1
================================================
FILE: docs/source/Estimators.rst
================================================
DeepCTR Estimators API
======================
.. toctree::
CCPM
FNN
PNN
WDL
DeepFM
NFM
AFM
DCN
xDeepFM
AutoInt
FiBiNET
================================================
FILE: docs/source/Examples.md
================================================
# Examples
## Classification: Criteo
The Criteo Display Ads dataset is for the purpose of predicting ads click-through rate. It has 13 integer features and
26 categorical features where each category has a high cardinality.

In this example,we simply normailize the dense feature between 0 and 1,you can try other transformation technique like
log normalization or discretization.Then we use [SparseFeat](./Features.html#sparsefeat)
and [DenseFeat](./Features.html#densefeat) to generate feature columns for sparse features and dense features.
This example shows how to use ``DeepFM`` to solve a simple binary classification task. You can get the demo
data [criteo_sample.txt](https://github.com/shenweichen/DeepCTR/tree/master/examples/criteo_sample.txt)
and run the following codes.
```python
import pandas as pd
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from deepctr.models import *
from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names
if __name__ == "__main__":
data = pd.read_csv('./criteo_sample.txt')
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']
# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
# 2.count #unique features for each sparse field,and record dense feature field name
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4)
for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, )
for feat in dense_features]
dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)
train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
model.compile("adam", "binary_crossentropy",
metrics=['binary_crossentropy'], )
history = model.fit(train_model_input, train[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=256)
print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
```
## Classification: Criteo with feature hashing on the fly
This example shows how to use ``DeepFM`` to solve a simple binary classification task using feature hashing. You can get
the demo data [criteo_sample.txt](https://github.com/shenweichen/DeepCTR/tree/master/examples/criteo_sample.txt)
and run the following codes.
```python
import pandas as pd
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names
if __name__ == "__main__":
data = pd.read_csv('./criteo_sample.txt')
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']
# 1.do simple Transformation for dense features
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
# 2.set hashing space for each sparse field,and record dense feature field name
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=1000, embedding_dim=4, use_hash=True, dtype='string')
# since the input is string
for feat in sparse_features] + [DenseFeat(feat, 1, )
for feat in dense_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns, )
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)
train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
model.compile("adam", "binary_crossentropy",
metrics=['binary_crossentropy'], )
history = model.fit(train_model_input, train[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=256)
print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
```
## Regression: Movielens
The MovieLens data has been used for personalized tag recommendation,which contains 668, 953 tag applications of users
on movies. Here is a small fraction of data include only sparse field.

This example shows how to use ``DeepFM`` to solve a simple binary regression task. You can get the demo data
[movielens_sample.txt](https://github.com/shenweichen/DeepCTR/tree/master/examples/movielens_sample.txt) and run the
following codes.
```python
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, get_feature_names
if __name__ == "__main__":
data = pd.read_csv("./movielens_sample.txt")
sparse_features = ["movie_id", "user_id",
"gender", "age", "occupation", "zip"]
target = ['rating']
# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
# 2.count #unique features for each sparse field
fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4)
for feat in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)
train_model_input = {name: train[name].values for name in feature_names}
test_model_input = {name: test[name].values for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )
history = model.fit(train_model_input, train[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=256)
print("test MSE", round(mean_squared_error(
test[target].values, pred_ans), 4))
```
## Multi-value Input : Movielens
The MovieLens data has been used for personalized tag recommendation,which contains 668, 953 tag applications of users
on movies. Here is a small fraction of data include sparse fields and a multivalent field.

There are 2 additional steps to use DeepCTR with sequence feature input.
1. Generate the paded and encoded sequence feature of sequence input feature(**value 0 is for padding**).
2. Generate config of sequence feature with [VarLenSparseFeat](./Features.html#varlensparsefeat)
This example shows how to use ``DeepFM`` with sequence(multi-value) feature. You can get the demo data
[movielens_sample.txt](https://github.com/shenweichen/DeepCTR/tree/master/examples/movielens_sample.txt) and run the
following codes.
```python
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names
def split(x):
key_ans = x.split('|')
for key in key_ans:
if key not in key2index:
# Notice : input value 0 is a special "padding",so we do not use 0 to encode valid feature for sequence input
key2index[key] = len(key2index) + 1
return list(map(lambda x: key2index[x], key_ans))
if __name__ == "__main__":
data = pd.read_csv("./movielens_sample.txt")
sparse_features = ["movie_id", "user_id",
"gender", "age", "occupation", "zip", ]
target = ['rating']
# 1.Label Encoding for sparse features,and process sequence features
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
# preprocess the sequence feature
key2index = {}
genres_list = list(map(split, data['genres'].values))
genres_length = np.array(list(map(len, genres_list)))
max_len = max(genres_length)
# Notice : padding=`post`
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', )
# 2.count #unique features for each sparse field and generate feature config for sequence feature
fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4)
for feat in sparse_features]
use_weighted_sequence = False
if use_weighted_sequence:
varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len(
key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean',
weight_name='genres_weight')] # Notice : value 0 is for padding for sequence input feature
else:
varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len(
key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean',
weight_name=None)] # Notice : value 0 is for padding for sequence input feature
linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
model_input = {name: data[name] for name in feature_names} #
model_input["genres"] = genres_list
model_input["genres_weight"] = np.random.randn(data.shape[0], max_len, 1)
# 4.Define Model,compile and train
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )
history = model.fit(model_input, data[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
```
## Multi-value Input : Movielens with feature hashing on the fly
```python
import numpy as np
import pandas as pd
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names
from deepctr.models import DeepFM
if __name__ == "__main__":
data = pd.read_csv("./movielens_sample.txt")
sparse_features = ["movie_id", "user_id",
"gender", "age", "occupation", "zip", ]
data[sparse_features] = data[sparse_features].astype(str)
target = ['rating']
# 1.Use hashing encoding on the fly for sparse features,and process sequence features
genres_list = list(map(lambda x: x.split('|'), data['genres'].values))
genres_length = np.array(list(map(len, genres_list)))
max_len = max(genres_length)
# Notice : padding=`post`
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str)
# 2.set hashing space for each sparse field and generate feature config for sequence feature
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True, dtype='string')
for feat in sparse_features]
varlen_feature_columns = [
VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4, use_hash=True, dtype="string"),
maxlen=max_len, combiner='mean',
)] # Notice : value 0 is for padding for sequence input feature
linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
model_input = {name: data[name] for name in feature_names}
model_input['genres'] = genres_list
# 4.Define Model,compile and train
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )
history = model.fit(model_input, data[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
```
## Hash Layer with pre-defined key-value vocabulary
This examples how to use pre-defined key-value vocabulary in `Hash` Layer.`movielens_age_vocabulary.csv` stores the
key-value mapping for `age` feature.
```python
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names
import numpy as np
import pandas as pd
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
try:
import tensorflow.compat.v1 as tf
except ImportError as e:
import tensorflow as tf
if __name__ == "__main__":
data = pd.read_csv("./movielens_sample.txt")
sparse_features = ["movie_id", "user_id",
"gender", "age", "occupation", "zip", ]
data[sparse_features] = data[sparse_features].astype(str)
target = ['rating']
# 1.Use hashing encoding on the fly for sparse features,and process sequence features
genres_list = list(map(lambda x: x.split('|'), data['genres'].values))
genres_length = np.array(list(map(len, genres_list)))
max_len = max(genres_length)
# Notice : padding=`post`
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str)
# 2.set hashing space for each sparse field and generate feature config for sequence feature
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True,
vocabulary_path='./movielens_age_vocabulary.csv' if feat == 'age' else None,
dtype='string')
for feat in sparse_features]
varlen_feature_columns = [
VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4,
use_hash=True, dtype="string"),
maxlen=max_len, combiner='mean',
)] # Notice : value 0 is for padding for sequence input feature
linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
model_input = {name: data[name] for name in feature_names}
model_input['genres'] = genres_list
# 4.Define Model,compile and train
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )
if not hasattr(tf, 'version') or tf.version.VERSION < '2.0.0':
with tf.Session() as sess:
sess.run(tf.tables_initializer())
history = model.fit(model_input, data[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
else:
history = model.fit(model_input, data[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
```
## Estimator with TFRecord: Classification Criteo
This example shows how to use ``DeepFMEstimator`` to solve a simple binary classification task. You can get the demo
data [criteo_sample.tr.tfrecords](https://github.com/shenweichen/DeepCTR/tree/master/examples/criteo_sample.tr.tfrecords)
and [criteo_sample.te.tfrecords](https://github.com/shenweichen/DeepCTR/tree/master/examples/criteo_sample.te.tfrecords)
and run the following codes.
```python
import tensorflow as tf
from tensorflow.python.ops.parsing_ops import FixedLenFeature
from deepctr.estimator import DeepFMEstimator
from deepctr.estimator.inputs import input_fn_tfrecord
if __name__ == "__main__":
# 1.generate feature_column for linear part and dnn part
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
dnn_feature_columns = []
linear_feature_columns = []
for i, feat in enumerate(sparse_features):
dnn_feature_columns.append(tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_identity(feat, 1000), 4))
linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, 1000))
for feat in dense_features:
dnn_feature_columns.append(tf.feature_column.numeric_column(feat))
linear_feature_columns.append(tf.feature_column.numeric_column(feat))
# 2.generate input data for model
feature_description = {k: FixedLenFeature(dtype=tf.int64, shape=1) for k in sparse_features}
feature_description.update(
{k: FixedLenFeature(dtype=tf.float32, shape=1) for k in dense_features})
feature_description['label'] = FixedLenFeature(dtype=tf.float32, shape=1)
train_model_input = input_fn_tfrecord('./criteo_sample.tr.tfrecords', feature_description, 'label', batch_size=256,
num_epochs=1, shuffle_factor=10)
test_model_input = input_fn_tfrecord('./criteo_sample.te.tfrecords', feature_description, 'label',
batch_size=2 ** 14, num_epochs=1, shuffle_factor=0)
# 3.Define Model,train,predict and evaluate
model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary',
config=tf.estimator.RunConfig(tf_random_seed=2021))
model.train(train_model_input)
eval_result = model.evaluate(test_model_input)
print(eval_result)
```
## Estimator with Pandas DataFrame: Classification Criteo
This example shows how to use ``DeepFMEstimator`` to solve a simple binary classification task. You can get the demo
data [criteo_sample.txt](https://github.com/shenweichen/DeepCTR/tree/master/examples/criteo_sample.txt)
and run the following codes.
```python
import pandas as pd
import tensorflow as tf
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from deepctr.estimator import DeepFMEstimator
from deepctr.estimator.inputs import input_fn_pandas
if __name__ == "__main__":
data = pd.read_csv('./criteo_sample.txt')
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']
# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
# 2.count #unique features for each sparse field,and record dense feature field name
dnn_feature_columns = []
linear_feature_columns = []
for i, feat in enumerate(sparse_features):
dnn_feature_columns.append(tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1), 4))
linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1))
for feat in dense_features:
dnn_feature_columns.append(tf.feature_column.numeric_column(feat))
linear_feature_columns.append(tf.feature_column.numeric_column(feat))
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2021)
# Not setting default value for continuous feature. filled with mean.
train_model_input = input_fn_pandas(train, sparse_features + dense_features, 'label', shuffle=True)
test_model_input = input_fn_pandas(test, sparse_features + dense_features, None, shuffle=False)
# 4.Define Model,train,predict and evaluate
model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary',
config=tf.estimator.RunConfig(tf_random_seed=2021))
model.train(train_model_input)
pred_ans_iter = model.predict(test_model_input)
pred_ans = list(map(lambda x: x['pred'], pred_ans_iter))
#
print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
```
## MultiTask Learning:MMOE
The UCI census-income dataset is extracted from the 1994 census database. It contains 299,285 instances of demographic
information of American adults. There are 40 features in total. We construct a multi-task learning problem from this
dataset by setting some of the features as prediction targets :
- Task 1: Predict whether the income exceeds $50K;
- Task 2: Predict whether this person’s marital status is never married.
This example shows how to use ``MMOE`` to solve a multi task learning problem. You can get the demo
data [census-income.sample](https://github.com/shenweichen/DeepCTR/tree/master/examples/census-income.sample) and run
the following codes.
```python
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names
from deepctr.models import MMOE
if __name__ == "__main__":
column_names = ['age', 'class_worker', 'det_ind_code', 'det_occ_code', 'education', 'wage_per_hour', 'hs_college',
'marital_stat', 'major_ind_code', 'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member',
'unemp_reason', 'full_or_part_emp', 'capital_gains', 'capital_losses', 'stock_dividends',
'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat', 'det_hh_summ',
'instance_weight', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt',
'num_emp', 'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship',
'own_or_self', 'vet_question', 'vet_benefits', 'weeks_worked', 'year', 'income_50k']
data = pd.read_csv('./census-income.sample', header=None, names=column_names)
data['label_income'] = data['income_50k'].map({' - 50000.': 0, ' 50000+.': 1})
data['label_marital'] = data['marital_stat'].apply(lambda x: 1 if x == ' Never married' else 0)
data.drop(labels=['income_50k', 'marital_stat'], axis=1, inplace=True)
columns = data.columns.values.tolist()
sparse_features = ['class_worker', 'det_ind_code', 'det_occ_code', 'education', 'hs_college', 'major_ind_code',
'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member', 'unemp_reason',
'full_or_part_emp', 'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat',
'det_hh_summ', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt',
'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship',
'vet_question']
dense_features = [col for col in columns if
col not in sparse_features and col not in ['label_income', 'label_marital']]
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4) for feat in sparse_features]
+ [DenseFeat(feat, 1, ) for feat in dense_features]
dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)
train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = MMOE(dnn_feature_columns, tower_dnn_hidden_units=[], task_types=['binary', 'binary'],
task_names=['label_income', 'label_marital'])
model.compile("adam", loss=["binary_crossentropy", "binary_crossentropy"],
metrics=['binary_crossentropy'], )
history = model.fit(train_model_input, [train['label_income'].values, train['label_marital'].values],
batch_size=256, epochs=10, verbose=2, validation_split=0.2)
pred_ans = model.predict(test_model_input, batch_size=256)
print("test income AUC", round(roc_auc_score(test['label_income'], pred_ans[0]), 4))
print("test marital AUC", round(roc_auc_score(test['label_marital'], pred_ans[1]), 4))
```
================================================
FILE: docs/source/FAQ.md
================================================
# FAQ
## 1. Save or load weights/models
----------------------------------------
To save/load weights,you can write codes just like any other keras models.
```python
model = DeepFM()
model.save_weights('DeepFM_w.h5')
model.load_weights('DeepFM_w.h5')
```
To save/load models,just a little different.
```python
from tensorflow.python.keras.models import save_model,load_model
model = DeepFM()
save_model(model, 'DeepFM.h5')# save_model, same as before
from deepctr.layers import custom_objects
model = load_model('DeepFM.h5',custom_objects)# load_model,just add a parameter
```
## 2. Set learning rate and use earlystopping
---------------------------------------------------
You can use any models in DeepCTR like a keras model object.
Here is a example of how to set learning rate and earlystopping:
```python
import deepctr
from tensorflow.python.keras.optimizers import Adam,Adagrad
from tensorflow.python.keras.callbacks import EarlyStopping
model = deepctr.models.DeepFM(linear_feature_columns,dnn_feature_columns)
model.compile(Adagrad(0.1024),'binary_crossentropy',metrics=['binary_crossentropy'])
es = EarlyStopping(monitor='val_binary_crossentropy')
history = model.fit(model_input, data[target].values,batch_size=256, epochs=10, verbose=2, validation_split=0.2,callbacks=[es] )
```
If you are using Estimator models, you can set learning rate like:
```python
from deepctr.estimator import DeepFMEstimator
import tensorflow as tf
model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary',
linear_optimizer=tf.train.FtrlOptimizer(0.05), dnn_optimizer=tf.train.AdagradOptimizer(0.1)
)
```
## 3. Get the attentional weights of feature interactions in AFM
--------------------------------------------------------------------------
First,make sure that you have install the latest version of deepctr.
Then,use the following code,the `attentional_weights[:,i,0]` is the `feature_interactions[i]`'s attentional weight of all samples.
```python
import itertools
import deepctr
from deepctr.models import AFM
from deepctr.feature_column import get_feature_names
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Lambda
model = AFM(linear_feature_columns,dnn_feature_columns)
model.fit(model_input,target)
afmlayer = model.layers[-3]
afm_weight_model = Model(model.input,outputs=Lambda(lambda x:afmlayer.normalized_att_score)(model.input))
attentional_weights = afm_weight_model.predict(model_input,batch_size=4096)
feature_names = get_feature_names(dnn_feature_columns)
feature_interactions = list(itertools.combinations(feature_names ,2))
```
## 4. How to extract the embedding vectors in deepfm?
```python
feature_columns = [SparseFeat('user_id',120,),SparseFeat('item_id',60,),SparseFeat('cate_id',60,)]
def get_embedding_weights(dnn_feature_columns,model):
embedding_dict = {}
for fc in dnn_feature_columns:
if hasattr(fc,'embedding_name'):
if fc.embedding_name is not None:
name = fc.embedding_name
else:
name = fc.name
embedding_dict[name] = model.get_layer("sparse_emb_"+name).get_weights()[0]
return embedding_dict
embedding_dict = get_embedding_weights(feature_columns,model)
user_id_emb = embedding_dict['user_id']
item_id_emb = embedding_dict['item_id']
```
## 5. How to add a long dense feature vector as a input to the model?
```python
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names
import numpy as np
feature_columns = [SparseFeat('user_id',120,),SparseFeat('item_id',60,),DenseFeat("pic_vec",5)]
fixlen_feature_names = get_feature_names(feature_columns)
user_id = np.array([[1],[0],[1]])
item_id = np.array([[30],[20],[10]])
pic_vec = np.array([[0.1,0.5,0.4,0.3,0.2],[0.1,0.5,0.4,0.3,0.2],[0.1,0.5,0.4,0.3,0.2]])
label = np.array([1,0,1])
model_input = {'user_id':user_id,'item_id':item_id,'pic_vec':pic_vec}
model = DeepFM(feature_columns,feature_columns)
model.compile('adagrad','binary_crossentropy')
model.fit(model_input,label)
```
## 6. How to use pretrained weights to initialize embedding weights and frozen embedding weights?
-----------------------------------------------------------------------------------------------------
Use `tf.initializers.identity()` to set the `embeddings_initializer` of `SparseFeat`,and set `trainable=False` to frozen embedding weights.
```python
import numpy as np
import tensorflow as tf
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat,get_feature_names
pretrained_item_weights = np.random.randn(60,4)
pretrained_weights_initializer = tf.initializers.constant(pretrained_item_weights)
feature_columns = [SparseFeat('user_id',120,),SparseFeat('item_id',60,embedding_dim=4,embeddings_initializer=pretrained_weights_initializer,trainable=False)]
fixlen_feature_names = get_feature_names(feature_columns)
user_id = np.array([[1],[0],[1]])
item_id = np.array([[30],[20],[10]])
label = np.array([1,0,1])
model_input = {'user_id':user_id,'item_id':item_id,}
model = DeepFM(feature_columns,feature_columns)
model.compile('adagrad','binary_crossentropy')
model.fit(model_input,label)
```
## 7. How to run the demo with GPU ?
just install deepctr with
```bash
$ pip install deepctr[gpu]
```
## 8. How to run the demo with multiple GPUs
you can use multiple gpus with tensorflow version higher than ``1.4``,see [run_classification_criteo_multi_gpu.py](https://github.com/shenweichen/DeepCTR/blob/master/examples/run_classification_criteo_multi_gpu.py)
================================================
FILE: docs/source/Features.md
================================================
# Features
## Overview
With the great success of deep learning,DNN-based techniques have been widely used in CTR prediction task.
DNN based CTR prediction models usually have following 4 modules:
`Input,Embedding,Low-order&High-order Feature Extractor,Prediction`
- Input&Embedding
> The data in CTR estimation task usually includes high sparse,high cardinality categorical features and some dense numerical features.
> Since DNN are good at handling dense numerical features,we usually map the sparse categorical features to dense numerical through `embedding technique`.
> For numerical features,we usually apply `discretization` or `normalization` on them.
- Feature Extractor
> Low-order Extractor learns feature interaction through product between vectors.Factorization-Machine and it's variants are widely used to learn the low-order feature interaction.
> High-order Extractor learns feature combination through complex neural network functions like MLP,Cross Net,etc.
## Feature Columns
### SparseFeat
``SparseFeat`` is a namedtuple with
signature ``SparseFeat(name, vocabulary_size, embedding_dim, use_hash, vocabulary_path, dtype, embeddings_initializer, embedding_name, group_name, trainable)``
- name : feature name
- vocabulary_size : number of unique feature values for sparse feature or hashing space when `use_hash=True`
- embedding_dim : embedding dimension
- use_hash : default `False`.If `True` the input will be hashed to space of size `vocabulary_size`.
- vocabulary_path : default `None`. The `CSV` text file path of the vocabulary table used
by `tf.lookup.TextFileInitializer`, which assigns one entry in the table for each line in the file. One entry contains
two columns separated by comma, the first is the value column, the second is the key column. The `0` value is reserved
to use if a key is missing in the table, so hash value need start from `1`.
- dtype : default `int32`.dtype of input tensor.
- embeddings_initializer : initializer for the `embeddings` matrix.
- embedding_name : default `None`. If None, the embedding_name will be same as `name`.
- group_name : feature group of this feature.
- trainable: default `True`.Whether or not the embedding is trainable.
### DenseFeat
``DenseFeat`` is a namedtuple with signature ``DenseFeat(name, dimension, dtype, transform_fn)``
- name : feature name
- dimension : dimension of dense feature vector.
- dtype : default `float32`.dtype of input tensor.
- transform_fn : If not `None` , a function that can be used to transform values of the feature. the function takes the
input Tensor as its argument, and returns the output Tensor.
(e.g. `lambda x: (x - 3.0) / 4.2)`.
### VarLenSparseFeat
``VarLenSparseFeat`` is a namedtuple with
signature ``VarLenSparseFeat(sparsefeat, maxlen, combiner, length_name, weight_name,weight_norm)``
- sparsefeat : a instance of `SparseFeat`
- maxlen : maximum length of this feature for all samples
- combiner : pooling method,can be ``sum``,``mean`` or ``max``
- length_name : feature length name,if `None`, value 0 in feature is for padding.
- weight_name : default `None`. If not None, the sequence feature will be multiplyed by the feature whose name
is `weight_name`.
- weight_norm : default `True`. Whether normalize the weight score or not.
## Models
### CCPM (Convolutional Click Prediction Model)
CCPM can extract local-global key features from an input instance with varied elements, which can be implemented for not
only single ad impression but also sequential ad impression.
[**CCPM Model API**](./deepctr.models.ccpm.html)
[**CCPM Estimator API**](./deepctr.estimator.models.ccpm.html)

[Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746.](http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf)
### FNN (Factorization-supported Neural Network)
According to the paper,FNN learn embedding vectors of categorical data via pre-trained FM. It use FM's latent vector to
initialiaze the embedding vectors.During the training stage,it concatenates the embedding vectors and feeds them into a
MLP(MultiLayer Perceptron).
[**FNN Model API**](./deepctr.models.fnn.html)
[**FNN Estimator API**](./deepctr.estimator.models.fnn.html)

[Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.](https://arxiv.org/pdf/1601.02376.pdf)
### PNN (Product-based Neural Network)
PNN concatenates sparse feature embeddings and the product between embedding vectors as the input of MLP.
[**PNN Model API**](./deepctr.models.pnn.html)
[**PNN Estimator API**](./deepctr.estimator.models.pnn.html)

[Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf)
### Wide & Deep
WDL's deep part concatenates sparse feature embeddings as the input of MLP,the wide part use handcrafted feature as
input. The logits of deep part and wide part are added to get the prediction probability.
[**WDL Model API**](./deepctr.models.wdl.html)
[**WDL Estimator API**](./deepctr.estimator.models.wdl.html)

[Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.](https://arxiv.org/pdf/1606.07792.pdf)
### DeepFM
DeepFM can be seen as an improvement of WDL and FNN.Compared with WDL,DeepFM use FM instead of LR in the wide part and
use concatenation of embedding vectors as the input of MLP in the deep part. Compared with FNN,the embedding vector of
FM and input to MLP are same. And they do not need a FM pretrained vector to initialiaze,they are learned end2end.
[**DeepFM Model API**](./deepctr.models.deepfm.html)
[**DeepFM Estimator API**](./deepctr.estimator.models.deepfm.html)

[Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.](http://www.ijcai.org/proceedings/2017/0239.pdf)
### MLR(Mixed Logistic Regression/Piece-wise Linear Model)
MLR can be viewed as a combination of $2m$ LR model, $m$ is the piece(region) number. $m$ LR model learns the weight
that the sample belong to each region,another m LR model learn sample's click probability in the region. Finally,the
sample's CTR is a weighted sum of each region's click probability.Notice the weight is normalized weight.
[**MLR Model API**](./deepctr.models.mlr.html)

[Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017.](http://arxiv.org/abs/1704.05194)
### NFM (Neural Factorization Machine)
NFM use a bi-interaction pooling layer to learn feature interaction between embedding vectors and compress the result
into a singe vector which has the same size as a single embedding vector. And then fed it into a MLP.The output logit of
MLP and the output logit of linear part are added to get the prediction probability.
[**NFM Model API**](./deepctr.models.nfm.html)
[**NFM Estimator API**](./deepctr.estimator.models.nfm.html)

[He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364.](https://arxiv.org/pdf/1708.05027.pdf)
### AFM (Attentional Factorization Machine)
AFM is a variant of FM,tradional FM sums the inner product of embedding vector uniformly. AFM can be seen as weighted
sum of feature interactions.The weight is learned by a small MLP.
[**AFM Model API**](./deepctr.models.afm.html)
[**AFM Estimator API**](./deepctr.estimator.models.afm.html)

[Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017.](http://www.ijcai.org/proceedings/2017/435)
### DCN (Deep & Cross Network)
DCN use a Cross Net to learn both low and high order feature interaction explicitly,and use a MLP to learn feature
interaction implicitly. The output of Cross Net and MLP are concatenated.The concatenated vector are feed into one fully
connected layer to get the prediction probability.
[**DCN Model API**](./deepctr.models.dcn.html)
[**DCN Estimator API**](./deepctr.estimator.models.dcn.html)

[Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12.](https://arxiv.org/abs/1708.05123)
### DCN-Mix (Improved Deep & Cross Network with mix of experts and matrix kernel)
DCN-Mix uses a matrix kernel instead of vector kernel in CrossNet compared with DCN,and it uses mixture of experts to
learn feature interactions.
[**DCN-Mix Model API**](./deepctr.models.dcnmix.html)

[Wang R, Shivanna R, Cheng D Z, et al. DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems[J]. arXiv preprint arXiv:2008.13535, 2020.
](https://arxiv.org/abs/2008.13535)
### xDeepFM
xDeepFM use a Compressed Interaction Network (CIN) to learn both low and high order feature interaction explicitly,and
use a MLP to learn feature interaction implicitly. In each layer of CIN,first compute outer products between $x^k$ and
$x_0$ to get a tensor $Z_{k+1}$,then use a 1DConv to learn feature maps $H_{k+1}$ on this tensor. Finally,apply sum
pooling on all the feature maps $H_k$ to get one vector.The vector is used to compute the logit that CIN contributes.
[**xDeepFM Model API**](./deepctr.models.xdeepfm.html)
[**xDeepFM Estimator API**](./deepctr.estimator.models.xdeepfn.html)


[Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.](https://arxiv.org/pdf/1803.05170.pdf)
### AutoInt(Automatic Feature Interaction)
AutoInt use a interacting layer to model the interactions between different features. Within each interacting layer,
each feature is allowed to interact with all the other features and is able to automatically identify relevant features
to form meaningful higher-order features via the multi-head attention mechanism. By stacking multiple interacting
layers,AutoInt is able to model different orders of feature interactions.
[**AutoInt Model API**](./deepctr.models.autoint.html)
[**AutoInt Estimator API**](./deepctr.estimator.models.autoint.html)


[Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via self-attentive neural networks[C]//Proceedings of the 28th ACM International Conference on Information and Knowledge Management. 2019: 1161-1170.
](https://arxiv.org/abs/1810.11921)
### ONN(Operation-aware Neural Networks for User Response Prediction)
ONN models second order feature interactions like like FFM and preserves second-order interaction information as much as
possible.Further more,deep neural network is used to learn higher-ordered feature interactions.
[**ONN Model API**](./deepctr.models.onn.html)

[Yang Y, Xu B, Shen F, et al. Operation-aware Neural Networks for User Response Prediction[J]. arXiv preprint arXiv:1904.12579, 2019.](https://arxiv.org/pdf/1904.12579.pdf)
### FGCNN(Feature Generation by Convolutional Neural Network)
FGCNN models with two components: Feature Generation and Deep Classifier. Feature Generation leverages the strength of
CNN to generate local patterns and recombine them to generate new features. Deep Classifier adopts the structure of IPNN
to learn interactions from the augmented feature space.
[**FGCNN Model API**](./deepctr.models.fgcnn.html)

[Liu B, Tang R, Chen Y, et al. Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1904.04447, 2019.](https://arxiv.org/pdf/1904.04447)
### FiBiNET(Feature Importance and Bilinear feature Interaction NETwork)
Feature Importance and Bilinear feature Interaction NETwork is proposed to dynamically learn the feature importance and
fine-grained feature interactions. On the one hand, the FiBiNET can dynamically learn the importance of fea- tures via
the Squeeze-Excitation network (SENET) mechanism; on the other hand, it is able to effectively learn the feature
interactions via bilinear function.
[**FiBiNET Model API**](./deepctr.models.fibinet.html)
[**FiBiNET Estimator API**](./deepctr.estimator.models.fibinet.html)

[Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019.](https://arxiv.org/pdf/1905.09433.pdf)
### FLEN(Field-Leveraged Embedding Network)
A large-scale CTR prediction model with efficient usage of field information to alleviate gradient coupling problem.
[**FLEN Model API**](./deepctr.models.flen.html)
[FLEN example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_flen.py)

[Chen W, Zhan L, Ci Y, Lin C. FLEN: Leveraging Field for Scalable CTR Prediction[J]. arXiv preprint arXiv:1911.04690, 2019.](https://arxiv.org/pdf/1911.04690.pdf)
### IFM(Input-aware Factorization Machine)
IFM improves FMs by explicitly considering the impact of each individual input upon the representation of features,
which learns a unique input-aware factor for the same feature in different instances via a neural network.
[**IFM Model API**](./deepctr.models.ifm.html)

[Yu Y, Wang Z, Yuan B. An Input-aware Factorization Machine for Sparse Prediction[C]//IJCAI. 2019: 1466-1472.](https://www.ijcai.org/Proceedings/2019/0203.pdf)
### DIFM(Dual Input-aware Factorization Machine)
Dual Input-aware Factorization Machines (DIFMs) can adaptively reweight the original feature representations at the
bit-wise and vector-wise levels simultaneously.
[**DIFM Model API**](./deepctr.models.difm.html)

[Lu W, Yu Y, Chang Y, et al. A Dual Input-aware Factorization Machine for CTR Prediction[C]//IJCAI. 2020: 3139-3145.](https://www.ijcai.org/Proceedings/2020/0434.pdf)
### DeepFEFM(Deep Field-Embedded Factorization Machine)
FEFM learns symmetric matrix embeddings for each field pair along with the usual single vector embeddings for each
feature. FEFM has significantly lower model complexity than FFM and roughly the same complexity as FwFM.
[**DeepFEFM Model API**](./deepctr.models.deepfefm.html)

[Pande H. Field-Embedded Factorization Machines for Click-through rate prediction[J]. arXiv preprint arXiv:2009.09931, 2020.](https://arxiv.org/pdf/2009.09931)
### EDCN(Enhancing Explicit and Implicit Feature Interactions DCN)
EDCN introduces two advanced modules, namelybridge moduleandregulation module, which work collaboratively tocapture the layer-wise interactive signals and learn discriminativefeature distributions for each hidden layer of the parallel networks.
[**EDCN Model API**](./deepctr.models.edcn.html)

[Chen B, Wang Y, Liu Z, et al. Enhancing explicit and implicit feature interactions via information sharing for parallel deep ctr models[C]//Proceedings of the 30th ACM International Conference on Information & Knowledge Management. 2021: 3757-3766.](https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf)
## Sequence Models
### DIN (Deep Interest Network)
DIN introduce a attention method to learn from sequence(multi-valued) feature. Tradional method usually use sum/mean
pooling on sequence feature. DIN use a local activation unit to get the activation score between candidate item and
history items. User's interest are represented by weighted sum of user behaviors. user's interest vector and other
embedding vectors are concatenated and fed into a MLP to get the prediction.
[**DIN Model API**](./deepctr.models.sequence.din.html)
[DIN example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_din.py)

[Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf)
### DIEN (Deep Interest Evolution Network)
Deep Interest Evolution Network (DIEN) uses interest extractor layer to capture temporal interests from history behavior
sequence. At this layer, an auxiliary loss is proposed to supervise interest extracting at each step. As user interests
are diverse, especially in the e-commerce system, interest evolving layer is proposed to capture interest evolving
process that is relative to the target item. At interest evolving layer, attention mechanism is embedded into the
sequential structure novelly, and the effects of relative interests are strengthened during interest evolution.
[**DIEN Model API**](./deepctr.models.sequence.dien.html)
[DIEN example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_dien.py)

[Zhou G, Mou N, Fan Y, et al. Deep Interest Evolution Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1809.03672, 2018.](https://arxiv.org/pdf/1809.03672.pdf)
### DSIN(Deep Session Interest Network)
Deep Session Interest Network (DSIN) extracts users' multiple historical sessions in their behavior sequences. First it
uses self-attention mechanism with bias encoding to extract users' interests in each session. Then apply Bi-LSTM to
model how users' interests evolve and interact among sessions. Finally, local activation unit is used to adaptively
learn the influences of various session interests on the target item.
[**DSIN Model API**](./deepctr.models.sequence.dsin.html)
[DSIN example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_dsin.py)

[Feng Y, Lv F, Shen W, et al. Deep Session Interest Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.06482, 2019.](https://arxiv.org/abs/1905.06482)
### BST(Behavior Sequence Transformer)
BST use the powerful Transformer model to capture the sequential signals underlying users’ behavior sequences .
[**BST Model API**](./deepctr.models.sequence.bst.html)
[BST example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_din.py)

[Qiwei Chen, Huan Zhao, Wei Li, Pipei Huang, and Wenwu Ou. 2019. Behavior sequence transformer for e-commerce recommendation in Alibaba. In Proceedings of the 1st International Workshop on Deep Learning Practice for High-Dimensional Sparse Data (DLP-KDD '19). Association for Computing Machinery, New York, NY, USA, Article 12, 1–4. DOI:)](https://arxiv.org/pdf/1905.06874.pdf)
## MultiTask Models
### SharedBottom
Hard parameter sharing is the most commonly used approach to MTL in neural networks. It is generally applied by sharing the hidden layers between all tasks, while keeping several task-specific output layers.
[**SharedBottom Model API**](./deepctr.models.multitask.sharedbottom.html)

[Ruder S. An overview of multi-task learning in deep neural networks[J]. arXiv preprint arXiv:1706.05098, 2017.](https://arxiv.org/pdf/1706.05098.pdf)
### ESMM(Entire Space Multi-task Model)
ESMM models CVR in a brand-new perspective by making good use of sequential pattern of user actions, i.e., impression →
click → conversion. The proposed Entire Space Multi-task Model (ESMM) can eliminate the two problems simultaneously by
i) modeling CVR directly over the entire space, ii) employing a feature representation transfer learning strategy.
[**ESMM Model API**](./deepctr.models.multitask.esmm.html)

[Ma X, Zhao L, Huang G, et al. Entire space multi-task model: An effective approach for estimating post-click conversion rate[C]//The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval. 2018.](https://arxiv.org/abs/1804.07931)
### MMOE(Multi-gate Mixture-of-Experts)
Multi-gate Mixture-of-Experts (MMoE) explicitly learns to model task relationships from data. We adapt the Mixture-of-
Experts (MoE) structure to multi-task learning by sharing the expert submodels across all tasks, while also having a
gating network trained to optimize each task.
[**MMOE Model API**](./deepctr.models.multitask.mmoe.html)

[Ma J, Zhao Z, Yi X, et al. Modeling task relationships in multi-task learning with multi-gate mixture-of-experts[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. 2018.](https://dl.acm.org/doi/abs/10.1145/3219819.3220007)
### PLE(Progressive Layered Extraction)
PLE separates shared components and task-specific components explicitly and adopts a progressive rout- ing mechanism to
extract and separate deeper semantic knowledge gradually, improving efficiency of joint representation learning and
information routing across tasks in a general setup.
[**PLE Model API**](./deepctr.models.multitask.ple.html)

[Tang H, Liu J, Zhao M, et al. Progressive layered extraction (ple): A novel multi-task learning (mtl) model for personalized recommendations[C]//Fourteenth ACM Conference on Recommender Systems. 2020.](https://dl.acm.org/doi/10.1145/3383313.3412236)
## Layers
The models of deepctr are modular, so you can use different modules to build your own models.
The module is a class that inherits from `tf.keras.layers.Layer`,it has the same attributes and methods as keras Layers
like `tf.keras.layers.Dense()` etc
You can see layers API in [Layers](./Layers.html)
================================================
FILE: docs/source/History.md
================================================
# History
- 11/10/2022 : [v0.9.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.9.3) released.Add [EDCN](./Features.html#edcn-enhancing-explicit-and-implicit-feature-interactions-dcn).
- 10/15/2022 : [v0.9.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.9.2) released.Support python `3.9`,`3.10`.
- 06/11/2022 : [v0.9.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.9.1) released.Improve compatibility with tensorflow `2.x`.
- 09/03/2021 : [v0.9.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.9.0) released.Add multitask learning models:[SharedBottom](./Features.html#sharedbottom),[ESMM](./Features.html#esmm-entire-space-multi-task-model),[MMOE](./Features.html#mmoe-multi-gate-mixture-of-experts) and [PLE](./Features.html#ple-progressive-layered-extraction). [running example](./Examples.html#multitask-learning-mmoe)
- 07/18/2021 : [v0.8.7](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.7) released.Support pre-defined key-value vocabulary in `Hash` Layer. [example](./Examples.html#hash-layer-with-pre-defined-key-value-vocabulary)
- 06/14/2021 : [v0.8.6](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.6) released.Add [IFM](./Features.html#ifm-input-aware-factorization-machine) [DIFM](./Features.html#difm-dual-input-aware-factorization-machine), [FEFM and DeepFEFM](./Features.html#deepfefm-deep-field-embedded-factorization-machine) model.
- 03/13/2021 : [v0.8.5](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.5) released.Add [BST](./Features.html#bst-behavior-sequence-transformer) model.
- 02/12/2021 : [v0.8.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.4) released.Fix bug in DCN-Mix.
- 01/06/2021 : [v0.8.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.3) released.Add [DCN-Mix](./Features.html#dcn-mix-improved-deep-cross-network-with-mix-of-experts-and-matrix-kernel) model.Support `transform_fn` in `DenseFeat`.
- 10/11/2020 : [v0.8.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.2) released.Refactor `DNN` Layer.
- 09/12/2020 : [v0.8.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.1) released.Improve the reproducibility & fix some bugs.
- 06/27/2020 : [v0.8.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.0) released.
- Support `Tensorflow Estimator` for large scale data and distributed training. [example: Estimator with TFRecord](./Examples.html#estimator-with-tfrecord-classification-criteo)
- Support different initializers for different embedding weights and loading pretrained embeddings. [example](./FAQ.html#how-to-use-pretrained-weights-to-initialize-embedding-weights-and-frozen-embedding-weights)
- Add new model `FwFM`.
- 05/17/2020 : [v0.7.5](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.5) released.Fix numerical instability in `LayerNormalization`.
- 03/15/2020 : [v0.7.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.4) released.Add [FLEN](./Features.html#flen-field-leveraged-embedding-network) and `FieldWiseBiInteraction`.
- 03/04/2020 : [v0.7.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.3) released.Fix the inconsistency of prediction results when the model is loaded with trained weights.
- 02/08/2020 : [v0.7.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.2) released.Fix some bugs.
- 01/28/2020 : [v0.7.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.1) released.Simplify [VarLenSparseFeat](./Features.html#varlensparsefeat),support setting weight_normalization.Fix problem of embedding size of `SparseFeat` in `linear_feature_columns`.
- 11/24/2019 : [v0.7.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.0) released.Refactor [feature columns](./Features.html#feature-columns).Different features can use different `embedding_dim` and group-wise interaction is available by setting `group_name`.
- 11/06/2019 : [v0.6.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.6.3) released.Add `WeightedSequenceLayer` and support [weighted sequence feature input](./Examples.html#multi-value-input-movielens).
- 10/03/2019 : [v0.6.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.6.2) released.Simplify the input logic.
- 09/08/2019 : [v0.6.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.6.1) released.Fix bugs in `CCPM` and `DynamicGRU`.
- 08/02/2019 : [v0.6.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.6.0) released.Now DeepCTR is compatible with tensorflow `1.14` and `2.0.0`.
- 07/21/2019 : [v0.5.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.5.2) released.Refactor `Linear` Layer.
- 07/10/2019 : [v0.5.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.5.1) released.Add [FiBiNET](./Features.html#fibinet-feature-importance-and-bilinear-feature-interaction-network).
- 06/30/2019 : [v0.5.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.5.0) released.Refactor inputs module.
- 05/19/2019 : [v0.4.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.4.1) released.Add [DSIN](./Features.html#dsin-deep-session-interest-network).
- 05/04/2019 : [v0.4.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.4.0) released.Support [feature hashing on the fly](./Examples.html#classification-criteo-with-feature-hashing-on-the-fly) and python2.7.
- 04/27/2019 : [v0.3.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.3.4) released.Add [FGCNN](./Features.html#fgcnn-feature-generation-by-convolutional-neural-network) and `FGCNNLayer`.
- 04/21/2019 : [v0.3.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.3.3) released.Add [CCPM](./Features.html#ccpm-convolutional-click-prediction-model).
- 03/30/2019 : [v0.3.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.3.2) released.Add [DIEN](./Features.html#dien-deep-interest-evolution-network) and [ONN](./Features.html#onn-operation-aware-neural-networks-for-user-response-prediction) Model.
- 02/17/2019 : [v0.3.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.3.1) released.Refactor layers ,add `BiLSTM` and `Transformer`.
- 01/24/2019 : [v0.2.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.3) released.Use a new feature config generation method and fix bugs.
- 01/01/2019 : [v0.2.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.2) released.Add [sequence(multi-value) input support](./Examples.html#multi-value-input-movielens) for `AFM,AutoInt,DCN,DeepFM,FNN,NFM,PNN,xDeepFM` models.
- 12/27/2018 : [v0.2.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.1) released.Add [AutoInt](./Features.html#autoint-automatic-feature-interaction) Model.
- 12/22/2018 : [v0.2.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.0) released.Add [xDeepFM](./Features.html#xdeepfm) and automatic check for new version.
- 12/19/2018 : [v0.1.6](https://github.com/shenweichen/DeepCTR/releases/tag/v0.1.6) released.Now DeepCTR is compatible with tensorflow from `1.4-1.12` except for `1.7` and `1.8`.
- 11/29/2018 : [v0.1.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.1.4) released.Add [FAQ](./FAQ.html) in docs
- 11/24/2018 : DeepCTR first version v0.1.0 is released on [PyPi](https://pypi.org/project/deepctr/)
================================================
FILE: docs/source/Layers.rst
================================================
DeepCTR Layers API
======================
.. toctree::
:maxdepth: 3
:caption: API:
Core Layers
Interaction Layers
Activation Layers
Normalization Layers
Sequence Layers
================================================
FILE: docs/source/Model_Methods.md
================================================
# Methods
## compile
```python
compile(optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None)
```
Configures the model for training.
**Arguments**
- **optimizer**: String (name of optimizer) or optimizer instance. See [optimizers](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/optimizers/).
- **loss**: String (name of objective function) or objective function. See [losses](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/losses). If the model has multiple outputs, you can use a different loss on each output by passing a dictionary or a list of losses. The loss value that will be minimized by the model will then be the sum of all individual losses.
- **metrics**: List of metrics to be evaluated by the model during training and testing. Typically you will use `metrics=['accuracy']`. To specify different metrics for different outputs of a multi-output model, you could also pass a dictionary, such as `metrics={'output_a': 'accuracy'}`.
- **loss_weights**: Optional list or dictionary specifying scalar coefficients (Python floats) to weight the loss contributions of different model outputs. The loss value that will be minimized by the model will then be the weighted sum of all individual losses, weighted by the `loss_weights` coefficients. If a list, it is expected to have a 1:1 mapping to the model's outputs. If a tensor, it is expected to map output names (strings) to scalar coefficients.
- **sample_weight_mode**: If you need to do timestep-wise sample weighting (2D weights), set this to `"temporal"`. `None` defaults to sample-wise weights (1D). If the model has multiple outputs, you can use a different `sample_weight_mode` on each output by passing a dictionary or a list of modes.
- **weighted_metrics**: List of metrics to be evaluated and weighted by sample_weight or class_weight during training and testing.
- **target_tensors**: By default, Keras will create placeholders for the model's target, which will be fed with the target data during training. If instead you would like to use your own target tensors (in turn, Keras will not expect external Numpy data for these targets at training time), you can specify them via the `target_tensors` argument. It can be a single tensor (for a single-output model), a list of tensors, or a dict mapping output names to target tensors.
**Raises**
- **ValueError**: In case of invalid arguments for `optimizer`, `loss`, `metrics` or `sample_weight_mode`.
## fit
```python
fit(x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1)
```
Trains the model for a given number of epochs (iterations on a dataset).
**Arguments**
- **x**: Numpy array of training data (if the model has a single input), or list of Numpy arrays (if the model has multiple inputs). If input layers in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. `x` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors).
- **y**: Numpy array of target (label) data (if the model has a single output), or list of Numpy arrays (if the model has multiple outputs). If output layers in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. `y` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors).
- **batch_size**: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 32.
- **epochs**: Integer. Number of epochs to train the model. An epoch is an iteration over the entire `x` and `y` data provided. Note that in conjunction with `initial_epoch`, `epochs` is to be understood as "final epoch". The model is not trained for a number of iterations given by `epochs`, but merely until the epoch of index `epochs` is reached.
- **verbose**: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.
- **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during training and validation (if ). See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks).
- **validation_split**: Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling.
- **validation_data**: tuple `(x_val, y_val)` or tuple `(x_val, y_val, val_sample_weights)` on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`.
- **shuffle**: Boolean (whether to shuffle the training data before each epoch) or str (for 'batch'). 'batch' is a special option for dealing with the limitations of HDF5 data; it shuffles in batch-sized chunks. Has no effect when `steps_per_epoch` is not `None`.
- **class_weight**: Optional dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). This can be useful to tell the model to "pay more attention" to samples from an under-represented class.
- **sample_weight**: Optional Numpy array of weights for the training samples, used for weighting the loss function (during training only). You can either pass a flat (1D) Numpy array with the same length as the input samples (1:1 mapping between weights and samples), or in the case of temporal data, you can pass a 2D array with shape `(samples, sequence_length)`, to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`.
- **initial_epoch**: Integer. Epoch at which to start training (useful for resuming a previous training run).
- **steps_per_epoch**: Integer or `None`. Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. When training with input tensors such as TensorFlow data tensors, the default `None` is equal to the number of samples in your dataset divided by the batch size, or 1 if that cannot be determined.
validation_steps: Only relevant if `steps_per_epoch` is specified. Total number of steps (batches of samples) to validate before stopping.
- **validation_freq**: Only relevant if validation data is provided. Integer or list/tuple/set. If an integer, specifies how many training epochs to run before a new validation run is performed, e.g. `validation_freq=2` runs validation every 2 epochs. If a list, tuple, or set, specifies the epochs on which to run validation, e.g. `validation_freq=[1, 2, 10]` runs validation at the end of the 1st, 2nd, and 10th epochs.
**Returns**
- A `History` object. Its `History.history` attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable).
**Raises**
- **RuntimeError**: If the model was never compiled.
ValueError: In case of mismatch between the provided input data and what the model expects.
## evaluate
```python
evaluate(x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None, callbacks=None)
```
Returns the loss value & metrics values for the model in test mode.
Computation is done in batches.
**Arguments**
- **x**: Numpy array of test data (if the model has a single input), or list of Numpy arrays (if the model has multiple inputs). If input layers in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. `x` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors).
- **y**: Numpy array of target (label) data (if the model has a single output), or list of Numpy arrays (if the model has multiple outputs). If output layers in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. `y` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors).
- **batch_size**: Integer or `None`. Number of samples per evaluation step. If unspecified, `batch_size` will default to 32.
- **verbose**: 0 or 1. Verbosity mode. 0 = silent, 1 = progress bar.
- **sample_weight**: Optional Numpy array of weights for the test samples, used for weighting the loss function. You can either pass a flat (1D) Numpy array with the same length as the input samples (1:1 mapping between weights and samples), or in the case of temporal data, you can pass a 2D array with shape `(samples, sequence_length)`, to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`.
- **steps**: Integer or `None`. Total number of steps (batches of samples) before declaring the evaluation round finished. Ignored with the default value of `None`.
- **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during evaluation. See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks).
**Returns**
- Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs.
## predict
```python
predict(x, batch_size=None, verbose=0, steps=None, callbacks=None)
```
Generates output predictions for the input samples.
Computation is done in batches.
**Arguments**
- **x**: The input data, as a Numpy array (or list of Numpy arrays if the model has multiple inputs).
batch_size: Integer. If unspecified, it will default to 32.
- **verbose**: Verbosity mode, 0 or 1.
- **steps**: Total number of steps (batches of samples) before declaring the prediction round finished. Ignored with the default value of None.
- **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during prediction. See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks).
**Returns**
- Numpy array(s) of predictions.
**Raises**
- **ValueError**: In case of mismatch between the provided input data and the model's expectations, or in case a stateful model receives a number of samples that is not a multiple of the batch size.
## train_on_batch
```python
train_on_batch(x, y, sample_weight=None, class_weight=None)
```
Runs a single gradient update on a single batch of data.
**Arguments**
- **x**: Numpy array of training data, or list of Numpy arrays if the model has multiple inputs. If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays.
- **y**: Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays.
- **sample_weight**: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile().
- **class_weight**: Optional dictionary mapping class indices (integers) to a weight (float) to apply to the model's loss for the samples from this class during training. This can be useful to tell the model to "pay more attention" to samples from an under-represented class.
**Returns**
- Scalar training loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs.
## test_on_batch
```python
test_on_batch(x, y, sample_weight=None)
```
Test the model on a single batch of samples.
**Arguments**
- **x**: Numpy array of test data, or list of Numpy arrays if the model has multiple inputs. If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays.
- **y**: Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays.
- **sample_weight**: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`.
**Returns**
- Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs.
## predict_on_batch
```python
predict_on_batch(x)
```
Returns predictions for a single batch of samples.
**Arguments**
- **x**: Input samples, as a Numpy array.
**Returns**
- Numpy array(s) of predictions.
## fit_generator
```python
fit_generator(generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, validation_freq=1, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0)
```
Trains the model on data generated batch-by-batch by a Python generator (or an instance of `Sequence`).
The generator is run in parallel to the model, for efficiency. For instance, this allows you to do real-time data augmentation on images on CPU in parallel to training your model on GPU.
The use of `tf.keras.utils.Sequence` guarantees the ordering and guarantees the single use of every input per epoch when using `use_multiprocessing=True`.
**Arguments**
- **generator**: A generator or an instance of `Sequence` (`tf.keras.utils.Sequence`) object in order to avoid duplicate data when using multiprocessing. The output of the generator must be either
a tuple `(inputs, targets)` or
a tuple `(inputs, targets, sample_weights)`.
This tuple (a single output of the generator) makes a single batch. Therefore, all arrays in this tuple must have the same length (equal to the size of this batch). Different batches may have different sizes. For example, the last batch of the epoch is commonly smaller than the others, if the size of the dataset is not divisible by the batch size. The generator is expected to loop over its data indefinitely. An epoch finishes when `steps_per_epoch` batches have been seen by the model.
- **steps_per_epoch**: Integer. Total number of steps (batches of samples) to yield from `generator` before declaring one epoch finished and starting the next epoch. It should typically be equal to `ceil(num_samples / batch_size)` Optional for `Sequence`: if unspecified, will use the `len(generator)` as a number of steps.
- **epochs**: Integer. Number of epochs to train the model. An epoch is an iteration over the entire data provided, as defined by `steps_per_epoch`. Note that in conjunction with `initial_epoch`, `epochs` is to be understood as "final epoch". The model is not trained for a number of iterations given by `epochs`, but merely until the epoch of index `epochs` is reached.
- **verbose**: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.
- **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during training. See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks).
- **validation_data**: This can be either
a generator or a `Sequence` object for the validation data
tuple `(x_val, y_val)`
tuple `(x_val, y_val, val_sample_weights)`
on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data.
- **validation_steps**: Only relevant if `validation_data` is a generator. Total number of steps (batches of samples) to yield from `validation_data` generator before stopping at the end of every epoch. It should typically be equal to the number of samples of your validation dataset divided by the batch size. Optional for `Sequence`: if unspecified, will use the `len(validation_data)` as a number of steps.
- **validation_freq**: Only relevant if validation data is provided. Integer or `collections.Container` instance (e.g. list, tuple, etc.). If an integer, specifies how many training epochs to run before a new validation run is performed, e.g. `validation_freq=2` runs validation every 2 epochs. If a Container, specifies the epochs on which to run validation, e.g. `validation_freq=[1, 2, 10]` runs validation at the end of the 1st, 2nd, and 10th epochs.
- **class_weight**: Optional dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). This can be useful to tell the model to "pay more attention" to samples from an under-represented class.
- **max_queue_size**: Integer. Maximum size for the generator queue. If unspecified, `max_queue_size` will default to 10.
- **workers**: Integer. Maximum number of processes to spin up when using process-based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread.
- **use_multiprocessing**: Boolean. If `True`, use process-based threading. If unspecified, `use_multiprocessing` will default to `False`. Note that because this implementation relies on multiprocessing, you should not pass non-picklable arguments to the generator as they can't be passed easily to children processes.
- **shuffle**: Boolean. Whether to shuffle the order of the batches at the beginning of each epoch. Only used with instances of `Sequence` (`tf.keras.utils.Sequence`). Has no effect when `steps_per_epoch` is not `None`.
initial_epoch: Integer. Epoch at which to start training (useful for resuming a previous training run).
**Returns**
- A `History` object. Its `History.history` attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable).
**Raises**
- **ValueError**: In case the generator yields data in an invalid format.
**Example**
```python
def generate_arrays_from_file(path):
while True:
with open(path) as f:
for line in f:
# create numpy arrays of input data
# and labels, from each line in the file
x1, x2, y = process_line(line)
yield ({'input_1': x1, 'input_2': x2}, {'output': y})
model.fit_generator(generate_arrays_from_file('/my_file.txt'),
steps_per_epoch=10000, epochs=10)
```
## evaluate_generator
```python
evaluate_generator(generator, steps=None, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0)
```
Evaluates the model on a data generator.
The generator should return the same kind of data as accepted by `test_on_batch`.
**Arguments**
- **generator**: Generator yielding tuples (inputs, targets) or (inputs, targets, sample_weights) or an instance of Sequence (tf.keras.utils.Sequence) object in order to avoid duplicate data when using multiprocessing.
- **steps**: Total number of steps (batches of samples) to yield from `generator` before stopping. Optional for `Sequence`: if unspecified, will use the `len(generator)` as a number of steps.
- **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during training. See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks).
- **max_queue_size**: maximum size for the generator queue
- **workers**: Integer. Maximum number of processes to spin up when using process based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread.
- **use_multiprocessing**: if True, use process based threading. Note that because this implementation relies on multiprocessing, you should not pass non picklable arguments to the generator as they can't be passed easily to children processes.
- **verbose**: verbosity mode, 0 or 1.
**Returns**
- Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs.
**Raises**
- **ValueError**: In case the generator yields data in an invalid format.
## predict_generator
```python
predict_generator(generator, steps=None, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0)
```
Generates predictions for the input samples from a data generator.
The generator should return the same kind of data as accepted by `predict_on_batch`.
**Arguments**
- **generator**: Generator yielding batches of input samples or an instance of Sequence (`tf.keras.utils.Sequence`) object in order to avoid duplicate data when using multiprocessing.
- **steps**: Total number of steps (batches of samples) to yield from `generator` before stopping. Optional for `Sequence`: if unspecified, will use the `len(generator)` as a number of steps.
- **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during training. See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks).
- **max_queue_size**: Maximum size for the generator queue.
- **workers**: Integer. Maximum number of processes to spin up when using process based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread.
- **use_multiprocessing**: If `True`, use process based threading. Note that because this implementation relies on multiprocessing, you should not pass non picklable arguments to the generator as they can't be passed easily to children processes.
- **verbose**: verbosity mode, 0 or 1.
**Returns**
- Numpy array(s) of predictions.
**Raises**
- **ValueError**: In case the generator yields data in an invalid format.
## get_layer
```python
get_layer(name=None, index=None)
```
Retrieves a layer based on either its name (unique) or index.
If `name` and `index` are both provided, `index` will take precedence.
Indices are based on order of horizontal graph traversal (bottom-up).
**Arguments**
- **name**: String, name of layer.
- **index**: Integer, index of layer.
**Returns**
- A layer instance.
**Raises**
- **ValueError**: In case of invalid layer name or index.
================================================
FILE: docs/source/Models.rst
================================================
DeepCTR Models API
======================
.. toctree::
Model Methods
CCPM
FNN
PNN
WDL
DeepFM
MLR
NFM
AFM
DCN
DCNMix
DIN
DIEN
DSIN
BST
xDeepFM
AutoInt
ONN
FGCNN
FiBiNET
FLEN
IFM
DIFM
DeepFEFM
SharedBottom
ESMM
MMOE
PLE
EDCN
================================================
FILE: docs/source/Quick-Start.md
================================================
# Quick-Start
[](https://dsw-dev.data.aliyun.com/#/?fileUrl=https://pai-public-data.oss-cn-beijing.aliyuncs.com/deep-ctr/Getting-started-4-steps-to-DeepCTR.ipynb&fileName=Getting-started-4-steps-to-DeepCTR.ipynb)
## Installation Guide
Now `deepctr` is available for python `2.7 `and `3.5, 3.6, 3.7`.
`deepctr` depends on tensorflow, you can specify to install the cpu version or gpu version through `pip`.
### CPU version
```bash
$ pip install deepctr[cpu]
```
### GPU version
```bash
$ pip install deepctr[gpu]
```
## Getting started: 4 steps to DeepCTR
### Step 1: Import model
```python
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names
data = pd.read_csv('./criteo_sample.txt')
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I'+str(i) for i in range(1, 14)]
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0,)
target = ['label']
```
### Step 2: Simple preprocessing
Usually we have two methods to encode the sparse categorical feature for embedding
- Label Encoding: map the features to integer value from 0 ~ len(#unique) - 1
```python
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
```
- Hash Encoding: map the features to a fix range,like 0 ~ 9999.We have 2 methods to do that:
- Do feature hashing before training
```python
for feat in sparse_features:
lbe = HashEncoder()
data[feat] = lbe.transform(data[feat])
```
- Do feature hashing on the fly in training process
We can do feature hashing by setting `use_hash=True` in `SparseFeat` or `VarlenSparseFeat` in Step3.
And for dense numerical features,they are usually discretized to buckets,here we use normalization.
```python
mms = MinMaxScaler(feature_range=(0,1))
data[dense_features] = mms.fit_transform(data[dense_features])
```
### Step 3: Generate feature columns
For sparse features, we transform them into dense vectors by embedding techniques.
For dense numerical features, we concatenate them to the input tensors of fully connected layer.
And for varlen(multi-valued) sparse features,you can use [VarlenSparseFeat](./Features.html#varlensparsefeat). Visit [examples](./Examples.html#multi-value-input-movielens) of using `VarlenSparseFeat`
- Label Encoding
```python
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1,embedding_dim=4)
for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,)
for feat in dense_features]
```
- Feature Hashing on the fly
```python
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=1e6,embedding_dim=4, use_hash=True, dtype='string') # the input is string
for feat in sparse_features] + [DenseFeat(feat, 1, )
for feat in dense_features]
```
- generate feature columns
```python
dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
```
### Step 4: Generate the training samples and train the model
```python
train, test = train_test_split(data, test_size=0.2)
train_model_input = {name:train[name].values for name in feature_names}
test_model_input = {name:test[name].values for name in feature_names}
model = DeepFM(linear_feature_columns,dnn_feature_columns,task='binary')
model.compile("adam", "binary_crossentropy",
metrics=['binary_crossentropy'], )
history = model.fit(train_model_input, train[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=256)
```
You can check the full code [here](./Examples.html#classification-criteo).
You also can run a distributed training job with the keras model on Kubernetes using [ElasticDL](https://github.com/sql-machine-learning/elasticdl/blob/develop/docs/tutorials/elasticdl_deepctr_keras.md).
## Getting started: 4 steps to DeepCTR Estimator with TFRecord
### Step 1: Import model
```python
import tensorflow as tf
from tensorflow.python.ops.parsing_ops import FixedLenFeature
from deepctr.estimator.inputs import input_fn_tfrecord
from deepctr.estimator.models import DeepFMEstimator
```
### Step 2: Generate feature columns for linear part and dnn part
```python
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
dnn_feature_columns = []
linear_feature_columns = []
for i, feat in enumerate(sparse_features):
dnn_feature_columns.append(tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_identity(feat, 1000), 4))
linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, 1000))
for feat in dense_features:
dnn_feature_columns.append(tf.feature_column.numeric_column(feat))
linear_feature_columns.append(tf.feature_column.numeric_column(feat))
```
### Step 3: Generate the training samples with TFRecord format
```python
feature_description = {k: FixedLenFeature(dtype=tf.int64, shape=1) for k in sparse_features}
feature_description.update(
{k: FixedLenFeature(dtype=tf.float32, shape=1) for k in dense_features})
feature_description['label'] = FixedLenFeature(dtype=tf.float32, shape=1)
train_model_input = input_fn_tfrecord('./criteo_sample.tr.tfrecords', feature_description, 'label', batch_size=256,
num_epochs=1, shuffle_factor=10)
test_model_input = input_fn_tfrecord('./criteo_sample.te.tfrecords', feature_description, 'label',
batch_size=2 ** 14, num_epochs=1, shuffle_factor=0)
```
### Step 4: Train and evaluate the model
```python
model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary')
model.train(train_model_input)
eval_result = model.evaluate(test_model_input)
print(eval_result)
```
You can check the full code [here](./Examples.html#estimator-with-tfrecord-classification-criteo).
You also can run a distributed training job with the estimator model on Kubernetes using [ElasticDL](https://github.com/sql-machine-learning/elasticdl/blob/develop/docs/tutorials/elasticdl_deepctr_estimator.md).
================================================
FILE: docs/source/conf.py
================================================
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.abspath('../../'))
# -- Project information -----------------------------------------------------
project = 'DeepCTR'
copyright = '2017-present, Weichen Shen'
author = 'Weichen Shen'
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
release = '0.9.3'
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.mathjax',
'sphinx.ext.ifconfig',
'sphinx.ext.viewcode',
'sphinx.ext.githubpages',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = ['.rst', '.md']
#source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path .
exclude_patterns = []
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'DeepCTRdoc'
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'DeepCTR.tex', 'DeepCTR Documentation',
'Weichen Shen', 'manual'),
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'deepctr', 'DeepCTR Documentation',
[author], 1)
]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'DeepCTR', 'DeepCTR Documentation',
author, 'DeepCTR', 'One line description of project.',
'Miscellaneous'),
]
# -- Extension configuration -------------------------------------------------
todo_include_todos = False
html_theme = 'sphinx_rtd_theme'
source_parsers = {
'.md': 'recommonmark.parser.CommonMarkParser',
}
================================================
FILE: docs/source/deepctr.contrib.rnn.rst
================================================
deepctr.contrib.rnn module
==========================
.. automodule:: deepctr.contrib.rnn
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.contrib.rst
================================================
deepctr.contrib package
=======================
Submodules
----------
.. toctree::
deepctr.contrib.rnn
deepctr.contrib.utils
Module contents
---------------
.. automodule:: deepctr.contrib
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.contrib.utils.rst
================================================
deepctr.contrib.utils module
============================
.. automodule:: deepctr.contrib.utils
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.feature_column.rst
================================================
deepctr.estimator.feature\_column module
========================================
.. automodule:: deepctr.estimator.feature_column
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.inputs.rst
================================================
deepctr.estimator.inputs module
===============================
.. automodule:: deepctr.estimator.inputs
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.afm.rst
================================================
deepctr.estimator.models.afm module
===================================
.. automodule:: deepctr.estimator.models.afm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.autoint.rst
================================================
deepctr.estimator.models.autoint module
=======================================
.. automodule:: deepctr.estimator.models.autoint
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.ccpm.rst
================================================
deepctr.estimator.models.ccpm module
====================================
.. automodule:: deepctr.estimator.models.ccpm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.dcn.rst
================================================
deepctr.estimator.models.dcn module
===================================
.. automodule:: deepctr.estimator.models.dcn
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.deepfefm.rst
================================================
deepctr.estimator.models.deepfefm module
======================================
.. automodule:: deepctr.estimator.models.deepfefm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.deepfm.rst
================================================
deepctr.estimator.models.deepfm module
======================================
.. automodule:: deepctr.estimator.models.deepfm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.fibinet.rst
================================================
deepctr.estimator.models.fibinet module
=======================================
.. automodule:: deepctr.estimator.models.fibinet
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.fnn.rst
================================================
deepctr.estimator.models.fnn module
===================================
.. automodule:: deepctr.estimator.models.fnn
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.fwfm.rst
================================================
deepctr.estimator.models.fwfm module
========================================
.. automodule:: deepctr.estimator.models.fwfm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.nfm.rst
================================================
deepctr.estimator.models.nfm module
===================================
.. automodule:: deepctr.estimator.models.nfm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.pnn.rst
================================================
deepctr.estimator.models.pnn module
===================================
.. automodule:: deepctr.estimator.models.pnn
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.rst
================================================
deepctr.estimator.models package
================================
Submodules
----------
.. toctree::
deepctr.estimator.models.afm
deepctr.estimator.models.autoint
deepctr.estimator.models.ccpm
deepctr.estimator.models.dcn
deepctr.estimator.models.deepfm
deepctr.estimator.models.deepfwfm
deepctr.estimator.models.fibinet
deepctr.estimator.models.fnn
deepctr.estimator.models.nfm
deepctr.estimator.models.pnn
deepctr.estimator.models.wdl
deepctr.estimator.models.xdeepfm
Module contents
---------------
.. automodule:: deepctr.estimator.models
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.wdl.rst
================================================
deepctr.estimator.models.wdl module
===================================
.. automodule:: deepctr.estimator.models.wdl
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.models.xdeepfm.rst
================================================
deepctr.estimator.models.xdeepfm module
=======================================
.. automodule:: deepctr.estimator.models.xdeepfm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.rst
================================================
deepctr.estimator package
=========================
Subpackages
-----------
.. toctree::
deepctr.estimator.models
Submodules
----------
.. toctree::
deepctr.estimator.feature_column
deepctr.estimator.inputs
deepctr.estimator.utils
Module contents
---------------
.. automodule:: deepctr.estimator
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.estimator.utils.rst
================================================
deepctr.estimator.utils module
==============================
.. automodule:: deepctr.estimator.utils
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.feature_column.rst
================================================
deepctr.feature\_column module
==============================
.. automodule:: deepctr.feature_column
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.inputs.rst
================================================
deepctr.inputs module
=====================
.. automodule:: deepctr.inputs
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.layers.activation.rst
================================================
deepctr.layers.activation module
================================
.. automodule:: deepctr.layers.activation
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.layers.core.rst
================================================
deepctr.layers.core module
==========================
.. automodule:: deepctr.layers.core
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.layers.interaction.rst
================================================
deepctr.layers.interaction module
=================================
.. automodule:: deepctr.layers.interaction
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.layers.normalization.rst
================================================
deepctr.layers.normalization module
===================================
.. automodule:: deepctr.layers.normalization
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.layers.rst
================================================
deepctr.layers package
======================
Submodules
----------
.. toctree::
deepctr.layers.activation
deepctr.layers.core
deepctr.layers.interaction
deepctr.layers.normalization
deepctr.layers.sequence
deepctr.layers.utils
Module contents
---------------
.. automodule:: deepctr.layers
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.layers.sequence.rst
================================================
deepctr.layers.sequence module
==============================
.. automodule:: deepctr.layers.sequence
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.layers.utils.rst
================================================
deepctr.layers.utils module
===========================
.. automodule:: deepctr.layers.utils
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.models.afm.rst
================================================
deepctr.models.afm module
=========================
.. automodule:: deepctr.models.afm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.autoint.rst
================================================
deepctr.models.autoint module
=============================
.. automodule:: deepctr.models.autoint
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.ccpm.rst
================================================
deepctr.models.ccpm module
==========================
.. automodule:: deepctr.models.ccpm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.dcn.rst
================================================
deepctr.models.dcn module
=========================
.. automodule:: deepctr.models.dcn
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.dcnmix.rst
================================================
deepctr.models.dcnmix module
=========================
.. automodule:: deepctr.models.dcnmix
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.deepfefm.rst
================================================
deepctr.models.deepfefm module
==============================
.. automodule:: deepctr.models.deepfefm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.deepfm.rst
================================================
deepctr.models.deepfm module
============================
.. automodule:: deepctr.models.deepfm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.deepfwfm.rst
================================================
deepctr.models.deepfwfm module
==============================
.. automodule:: deepctr.models.deepfwfm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.difm.rst
================================================
deepctr.models.difm module
=============================
.. automodule:: deepctr.models.difm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.edcn.rst
================================================
deepctr.models.edcn module
=========================
.. automodule:: deepctr.models.edcn
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.fgcnn.rst
================================================
deepctr.models.fgcnn module
===========================
.. automodule:: deepctr.models.fgcnn
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.fibinet.rst
================================================
deepctr.models.fibinet module
=============================
.. automodule:: deepctr.models.fibinet
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.flen.rst
================================================
deepctr.models.flen module
=============================
.. automodule:: deepctr.models.flen
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.fnn.rst
================================================
deepctr.models.fnn module
=========================
.. automodule:: deepctr.models.fnn
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.ifm.rst
================================================
deepctr.models.ifm module
=============================
.. automodule:: deepctr.models.ifm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.mlr.rst
================================================
deepctr.models.mlr module
=========================
.. automodule:: deepctr.models.mlr
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.multitask.esmm.rst
================================================
deepctr.models.multitask.esmm module
=============================
.. automodule:: deepctr.models.multitask.esmm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.multitask.mmoe.rst
================================================
deepctr.models.multitask.mmoe module
=============================
.. automodule:: deepctr.models.multitask.mmoe
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.multitask.ple.rst
================================================
deepctr.models.multitask.ple module
=============================
.. automodule:: deepctr.models.multitask.ple
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.multitask.sharedbottom.rst
================================================
deepctr.models.multitask.sharedbottom module
=============================
.. automodule:: deepctr.models.multitask.sharedbottom
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.nfm.rst
================================================
deepctr.models.nfm module
=========================
.. automodule:: deepctr.models.nfm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.onn.rst
================================================
deepctr.models.onn module
==========================
.. automodule:: deepctr.models.onn
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.pnn.rst
================================================
deepctr.models.pnn module
=========================
.. automodule:: deepctr.models.pnn
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.rst
================================================
deepctr.models package
======================
Submodules
----------
.. toctree::
deepctr.models.afm
deepctr.models.autoint
deepctr.models.ccpm
deepctr.models.dcn
deepctr.models.dcnmix
deepctr.models.edcn
deepctr.models.deepfm
deepctr.models.dien
deepctr.models.din
deepctr.models.dsin
deepctr.models.fgcnn
deepctr.models.fibinet
deepctr.models.fnn
deepctr.models.mlr
deepctr.models.onn
deepctr.models.nfm
deepctr.models.pnn
deepctr.models.wdl
deepctr.models.xdeepfm
deepctr.models.flen
deepctr.models.ifm
deepctr.models.difm
deepctr.models.deepfefm
deepctr.models.multitask.sharedbottom
deepctr.models.multitask.esmm
deepctr.models.multitask.mmoe
deepctr.models.multitask.ple
Module contents
---------------
.. automodule:: deepctr.models
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.models.sequence.bst.rst
================================================
deepctr.models.sequence.bst module
=========================
.. automodule:: deepctr.models.sequence.bst
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.sequence.dien.rst
================================================
deepctr.models.sequence.dien module
==========================
.. automodule:: deepctr.models.sequence.dien
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.sequence.din.rst
================================================
deepctr.models.sequence.din module
=========================
.. automodule:: deepctr.models.sequence.din
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.sequence.dsin.rst
================================================
deepctr.models.sequence.dsin module
==========================
.. automodule:: deepctr.models.sequence.dsin
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.wdl.rst
================================================
deepctr.models.wdl module
=========================
.. automodule:: deepctr.models.wdl
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.models.xdeepfm.rst
================================================
deepctr.models.xdeepfm module
=============================
.. automodule:: deepctr.models.xdeepfm
:members:
:no-undoc-members:
:no-show-inheritance:
================================================
FILE: docs/source/deepctr.rst
================================================
deepctr package
===============
Subpackages
-----------
.. toctree::
deepctr.contrib
deepctr.layers
deepctr.models
Submodules
----------
.. toctree::
deepctr.inputs
deepctr.utils
Module contents
---------------
.. automodule:: deepctr
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/deepctr.utils.rst
================================================
deepctr.utils module
====================
.. automodule:: deepctr.utils
:members:
:undoc-members:
:show-inheritance:
================================================
FILE: docs/source/index.rst
================================================
.. DeepCTR documentation master file, created by
sphinx-quickstart on Fri Nov 23 21:08:54 2018.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to DeepCTR's documentation!
===================================
|Downloads|_ |Stars|_ |Forks|_ |PyPii|_ |Issues|_ |Chat|_
.. |Downloads| image:: https://pepy.tech/badge/deepctr
.. _Downloads: https://pepy.tech/project/deepctr
.. |Stars| image:: https://img.shields.io/github/stars/shenweichen/deepctr.svg
.. _Stars: https://github.com/shenweichen/DeepCTR
.. |Forks| image:: https://img.shields.io/github/forks/shenweichen/deepctr.svg
.. _Forks: https://github.com/shenweichen/DeepCTR/fork
.. |PyPii| image:: https://img.shields.io/pypi/v/deepctr.svg
.. _PyPii: https://pypi.org/project/deepctr
.. |Issues| image:: https://img.shields.io/github/issues/shenweichen/deepctr.svg
.. _Issues: https://github.com/shenweichen/deepctr/issues
.. |Chat| image:: https://img.shields.io/badge/chat-wechat-brightgreen?style=flat
.. _Chat: ./#disscussiongroup
DeepCTR is a **Easy-to-use** , **Modular** and **Extendible** package of deep-learning based CTR models along with lots of core components layer which can be used to easily build custom models.You can use any complex model with ``model.fit()`` and ``model.predict()``.
- Provide ``tf.keras.Model`` like interface for **quick experiment**. `example `_
- Provide ``tensorflow estimator`` interface for **large scale data** and **distributed training**. `example `_
- It is compatible with both ``tf 1.x`` and ``tf 2.x``.
Let's `Get Started! <./Quick-Start.html>`_ (`Chinese Introduction `_)
You can read the latest code and related projects
- DeepCTR: https://github.com/shenweichen/DeepCTR
- DeepMatch: https://github.com/shenweichen/DeepMatch
- DeepCTR-Torch: https://github.com/shenweichen/DeepCTR-Torch
News
-----
11/10/2022 : Add `EDCN` . `Changelog `_
10/15/2022 : Support python `3.9` , `3.10` . `Changelog `_
06/11/2022 : Improve compatibility with tensorflow `2.x`. `Changelog `_
DisscussionGroup
-----------------------
公众号:**浅梦学习笔记** wechat ID: **deepctrbot**
`Discussions `_ `学习小组主题集合 `_
.. image:: ../pics/code2.jpg
.. toctree::
:maxdepth: 2
:caption: Home:
Quick-Start
Features
Examples
FAQ
History
.. toctree::
:maxdepth: 3
:caption: API:
Models
Estimators
Layers
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
================================================
FILE: docs/source/modules.rst
================================================
deepctr
=======
.. toctree::
:maxdepth: 4
deepctr
================================================
FILE: examples/avazu_sample.txt
================================================
id,click,hour,C1,banner_pos,site_id,site_domain,site_category,app_id,app_domain,app_category,device_id,device_ip,device_model,device_type,device_conn_type,C14,C15,C16,C17,C18,C19,C20,C21
1000009418151094273,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,ddd2926e,44956a24,1,2,15706,320,50,1722,0,35,-1,79
10000169349117863715,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,96809ac8,711ee120,1,0,15704,320,50,1722,0,35,100084,79
10000371904215119486,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,b3cf8def,8a4875bd,1,0,15704,320,50,1722,0,35,100084,79
10000640724480838376,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,e8275b8f,6332421a,1,0,15706,320,50,1722,0,35,100084,79
10000679056417042096,0,14102100,1005,1,fe8cc448,9166c161,0569f928,ecad2386,7801e8d9,07d7df22,a99f214a,9644d0bf,779d90c2,1,0,18993,320,50,2161,0,35,-1,157
10000720757801103869,0,14102100,1005,0,d6137915,bb1ef334,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,05241af0,8a4875bd,1,0,16920,320,50,1899,0,431,100077,117
10000724729988544911,0,14102100,1005,0,8fda644b,25d4cfcd,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,b264c159,be6db1d7,1,0,20362,320,50,2333,0,39,-1,157
10000918755742328737,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,e6f67278,be74e6fe,1,0,20632,320,50,2374,3,39,-1,23
10000949271186029916,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,37e8da74,5db079b5,1,2,15707,320,50,1722,0,35,-1,79
10001264480619467364,0,14102100,1002,0,84c7ba46,c4e18dd6,50e219e0,ecad2386,7801e8d9,07d7df22,c357dbff,f1ac7184,373ecbe6,0,0,21689,320,50,2496,3,167,100191,23
10001868339616595934,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,5d877109,8f5c9827,1,0,17747,320,50,1974,2,39,100019,33
10001966791793526909,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,6f407810,1f0bc64f,1,0,15701,320,50,1722,0,35,-1,79
10002028568167339219,0,14102100,1005,0,9e8cf15d,0d3cb7be,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,58811cdf,8326c04b,1,2,20596,320,50,2161,0,35,100148,157
10002044883120869786,0,14102100,1005,0,d6137915,bb1ef334,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,72aab6df,04258293,1,0,19771,320,50,2227,0,687,100077,48
10002518649031436658,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,98fed791,d9b5648e,0f2161f8,a99f214a,6dec2796,aad45b01,1,0,20984,320,50,2371,0,551,-1,46
10003539039235338011,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,a4f47b2e,8a4875bd,1,0,15699,320,50,1722,0,35,100084,79
10003585669470236873,0,14102100,1005,0,d9750ee7,98572c79,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,9b1fe278,128f4ba1,1,0,17914,320,50,2043,2,39,-1,32
10004105575081229495,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,c26c53cf,be87996b,1,2,15708,320,50,1722,0,35,100084,79
10004181428767727519,0,14102100,1005,1,0c2fe9d6,27e3c518,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,b7a69808,158e4944,1,0,6558,320,50,571,2,39,-1,32
10004482643316086592,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,66a5f0f3,d9b5648e,cef3e649,a99f214a,fa60af6b,b4b19c97,1,0,21234,320,50,2434,3,163,100088,61
10004510652136496837,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,8a308c73,3223bcfe,1,0,20352,320,50,2333,0,39,-1,157
10004574413841529209,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,1b6530bc,1aa0e912,1,0,15706,320,50,1722,0,35,-1,79
10004670021948955159,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,a2d12b33,607e78f2,1,0,20366,320,50,2333,0,39,-1,157
10004765361151096125,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,c6563308,7fdd04d2,1,0,15701,320,50,1722,0,35,-1,79
10005249248600843539,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,e99d0c2e,d25693ce,1,0,15706,320,50,1722,0,35,100083,79
10005334911727438633,0,14102100,1010,1,85f751fd,c4e18dd6,50e219e0,ffc6ffd0,7801e8d9,0f2161f8,fb23c543,69890c7f,9fef9da8,4,0,21665,320,50,2493,3,35,-1,117
10005541670676403131,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,c62f7206,69f9dd0e,1,0,20984,320,50,2371,0,551,100217,46
10005609489911213467,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,54c5d545,2347f47a,0f2161f8,9af87478,2a2bfc89,ecf10acf,1,0,21611,320,50,2480,3,297,100111,61
10005649443863261125,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,50d86760,d787e91b,1,0,20366,320,50,2333,0,39,-1,157
10005951398749600249,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,431b3174,f39b265e,1,0,15706,320,50,1722,0,35,-1,79
10006192453619779489,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,685d1c4c,2347f47a,8ded1f7a,6a943594,8a014cbb,81b42528,1,3,15708,320,50,1722,0,35,-1,79
10006415976094813740,0,14102100,1005,0,f84e52b6,d7e2f29b,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,a8649089,e9b8d8d7,1,0,16838,320,50,1882,3,35,-1,13
10006490708516192015,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,a4459495,517bef98,1,0,15708,320,50,1722,0,35,100083,79
10006557235872316145,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,ac77b71a,d787e91b,1,0,15699,320,50,1722,0,35,-1,79
10006629065800243858,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,6769bdb2,d787e91b,1,0,20362,320,50,2333,0,39,-1,157
10006777279679619273,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,d2bb6502,2347f47a,8ded1f7a,4b2309e9,22c2dcf4,d6e0e6ff,1,3,18987,320,50,2158,3,291,100193,61
10006789981076459409,0,14102100,1005,0,030440fe,08ba7db9,76b2941d,ecad2386,7801e8d9,07d7df22,a99f214a,692824c7,293291c1,1,0,20596,320,50,2161,0,35,-1,157
10006958186789044052,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,0acbeaa3,45a51db4,f95efa07,a99f214a,ce6e6bbd,2cd8ff6d,1,0,18993,320,50,2161,0,35,100034,157
10007163879183388340,0,14102100,1005,0,030440fe,08ba7db9,76b2941d,ecad2386,7801e8d9,07d7df22,a99f214a,5035aded,3db9fde9,1,0,18993,320,50,2161,0,35,-1,157
10007164336863914220,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,b2b14786,36d749e5,1,0,15706,320,50,1722,0,35,-1,79
10007197383452514432,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,07f39509,49ea3580,1,0,15704,320,50,1722,0,35,100084,79
10007446479189647526,0,14102100,1005,0,6ec06dbd,d262cf1e,f66779e6,ecad2386,7801e8d9,07d7df22,a99f214a,3aea6370,6360f9ec,1,0,19870,320,50,2271,0,687,100075,48
10007768440836622373,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,e2a1ca37,2347f47a,8ded1f7a,432cd280,45919d0d,1ccc7835,1,0,15708,320,50,1722,0,35,-1,79
10007830732992705885,0,14102100,1010,1,85f751fd,c4e18dd6,50e219e0,a607e6a7,7801e8d9,0f2161f8,890abcbb,9f02f646,e8c7729d,4,0,21665,320,50,2493,3,35,-1,117
10007847530896919634,1,14102100,1002,0,84c7ba46,c4e18dd6,50e219e0,ecad2386,7801e8d9,07d7df22,767a174e,3e805b2a,cf19f7f7,0,0,21661,320,50,2446,3,171,100228,156
10007908698866493310,0,14102100,1005,1,0eb72673,d2f72222,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,834f84b2,76dc4769,1,0,16208,320,50,1800,3,167,100075,23
10007944429976961145,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,07875ea4,aaffed8f,1,0,15701,320,50,1722,0,35,-1,79
10009147085943364421,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,905d2fbc,1b13b020,1,0,17037,320,50,1934,2,39,-1,16
10009190848778773294,0,14102100,1005,1,5ee41ff2,17d996e6,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,fc7f99ee,70359270,1,0,16920,320,50,1899,0,431,-1,117
10009635774586344851,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,37018b2d,24f6b932,1,0,20352,320,50,2333,0,39,-1,157
10009699694430474960,1,14102100,1005,0,4dd0a958,79cf0c8d,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,f6a5ae09,88fe1d5d,1,0,20366,320,50,2333,0,39,-1,157
10009807995169380879,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,396df801,2347f47a,0f2161f8,a99f214a,554d9f5f,36a30aeb,1,0,15705,320,50,1722,0,35,100084,79
10009910814812262951,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,a079ef6b,2347f47a,75d80bbe,a99f214a,f8c8df20,be87996b,1,2,18993,320,50,2161,0,35,100131,157
10010452321736390000,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,cede6db1,a0f5f879,1,0,15701,320,50,1722,0,35,100084,79
10010485868773711631,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,1cb5985e,1ccc7835,1,0,15701,320,50,1722,0,35,100084,79
10010504760200486071,0,14102100,1005,1,5ee41ff2,17d996e6,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,d012a1cb,ecb851b2,1,0,16615,320,50,1863,3,39,100188,23
10010730108771379386,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,08dd2eb8,cdf6ea96,1,0,20634,320,50,2374,3,39,-1,23
10010804179216291475,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,9a5911ad,1ccc7835,1,0,15704,320,50,1722,0,35,-1,79
1001082718558099372,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,1779deee,2347f47a,f95efa07,a99f214a,5a96d22e,9e3836ff,1,0,18993,320,50,2161,0,35,-1,157
10010924186026106882,0,14102100,1005,0,030440fe,08ba7db9,76b2941d,ecad2386,7801e8d9,07d7df22,a99f214a,8f6c30bb,744ae245,1,0,18993,320,50,2161,0,35,-1,157
10010966574628106108,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,0acbeaa3,45a51db4,f95efa07,a99f214a,061893d4,68b900d9,1,0,20596,320,50,2161,0,35,100034,157
10011085150831357375,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,07875ea4,d787e91b,1,0,15699,320,50,1722,0,35,-1,79
10011205200760015892,0,14102100,1005,0,6256f5b4,28f93029,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,04a1662e,521f95fe,1,0,17212,320,50,1887,3,39,100202,23
1001139595064240144,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,c9758700,76dc4769,1,0,15705,320,50,1722,0,35,-1,79
10011406079394798455,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,9ae68bb9,24f6b932,1,0,20362,320,50,2333,0,39,-1,157
1001156047808171144,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,2801fd97,575d0d2a,1,0,15708,320,50,1722,0,35,100084,79
10011561503992804801,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,931519c4,e9b8d8d7,1,0,17747,320,50,1974,2,39,100021,33
10011650513707909570,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,febd1138,82e27996,0f2161f8,a99f214a,1ce4451d,99e427c9,1,0,21611,320,50,2480,3,297,100111,61
10011658782619041235,1,14102100,1005,0,0aab7161,660aeadc,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,0086332e,1f0bc64f,1,0,15699,320,50,1722,0,35,-1,79
10011677979251422697,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,82310cab,f39b265e,1,0,15707,320,50,1722,0,35,-1,79
1001179289293608710,0,14102100,1005,1,e023ba3e,75f9ddc3,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,f7c9ee04,56f254f5,1,0,17914,320,50,2043,2,39,-1,32
10012212068904346443,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,6769bdb2,d787e91b,1,0,20352,320,50,2333,0,39,-1,157
10012222478217629851,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,3738b922,d787e91b,1,0,15705,320,50,1722,0,35,100084,79
10012820175855462623,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,8acb1161,1f0bc64f,1,0,15707,320,50,1722,0,35,-1,79
10013076841337920650,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,ed326aa2,4ceb2e0b,1,0,15702,320,50,1722,0,35,-1,79
10013222055782902774,0,14102100,1005,0,5b08c53b,7687a86e,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,09b19f16,7eef184d,1,0,17654,300,250,1994,2,39,-1,33
10013330254346467994,0,14102100,1005,0,f5476ff8,00e1b9c0,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,da162469,8b1aa260,1,0,18993,320,50,2161,0,35,-1,157
10013378798301872145,1,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,40fb49ca,be74e6fe,1,0,20362,320,50,2333,0,39,-1,157
10013493678511778479,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,39947756,2347f47a,cef3e649,a2cbb1e0,d784a354,9f8d0424,1,2,18993,320,50,2161,0,35,-1,157
10013552540914034684,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,e2fcccd2,5c5a694b,0f2161f8,a99f214a,c21a1e56,89416188,1,0,4687,320,50,423,2,39,100148,32
10013750748974177308,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,8eb51743,a0f5f879,1,0,15703,320,50,1722,0,35,100083,79
1001378691598807810,0,14102100,1002,0,85f751fd,c4e18dd6,50e219e0,a37bf1e4,7801e8d9,07d7df22,1ab3feec,c45c8256,8debacdb,0,0,21691,320,50,2495,2,167,-1,23
10013840276980995258,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,e2fcccd2,5c5a694b,0f2161f8,a99f214a,07533d06,76dc4769,1,0,4687,320,50,423,2,39,100148,32
10013846047025246486,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,2e93a860,f39b265e,1,0,15702,320,50,1722,0,35,100083,79
10014026899633599058,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,9cdc12cc,711ee120,1,0,15699,320,50,1722,0,35,100084,79
10014063680973162331,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,665810f3,78d9bd10,1,0,15699,320,50,1722,0,35,100083,79
10014190212266331300,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,9c13b419,2347f47a,f95efa07,a99f214a,ed9450c2,1f0bc64f,1,0,20633,320,50,2374,3,39,-1,23
10014285064795240866,1,14102100,1002,0,84c7ba46,c4e18dd6,50e219e0,ecad2386,7801e8d9,07d7df22,c357dbff,06f76b24,373ecbe6,0,0,21682,320,50,2496,3,167,100191,23
10014385711019128754,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,12c3d700,ef726eae,1,0,15704,320,50,1722,0,35,-1,79
10014630626523032142,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,0345a137,3bd9e8e7,1,0,15702,320,50,1722,0,35,100083,79
10014764617325763141,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,4e873691,c6263d8a,1,0,15703,320,50,1722,0,35,-1,79
10014885175555340290,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,27f3fa06,d25693ce,1,0,15705,320,50,1722,0,35,100083,79
10014887683839786798,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,e2fcccd2,5c5a694b,0f2161f8,a99f214a,fac78767,84ebbcd4,1,0,4687,320,50,423,2,39,100148,32
10015140740686523448,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,c51f82bc,d9b5648e,0f2161f8,a99f214a,2d227840,9b5ce758,1,0,21611,320,50,2480,3,297,100111,61
10015211672544614902,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,42606fe6,cb0fb677,1,0,17037,320,50,1934,2,39,-1,16
10015376300289320595,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,03108db9,a0f5f879,1,0,15701,320,50,1722,0,35,100084,79
10015405794859644629,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,0b697be1,1f0bc64f,1,0,15701,320,50,1722,0,35,100084,79
10015629448289660116,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,58db4f0c,6332421a,1,0,15708,320,50,1722,0,35,-1,79
100156980486870304,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,02b9b0fc,1aa0e912,1,0,15706,320,50,1722,0,35,-1,79
10015745448500295401,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,6b9769f2,4c8aeb60,1,0,15701,320,50,1722,0,35,-1,79
================================================
FILE: examples/census-income.sample
================================================
138481,62, Private,43,23, High school graduate,0, Not in universe, Married-civilian spouse present, Education, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1819.08, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, 50000+.
91960,18, Private,40,19, 11th grade,0, High school, Never married, Entertainment, Sales, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,645.07, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
112171,19, Not in universe,0,0, High school graduate,0, College or university, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,396.66, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,16,94, - 50000.
118554,9, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, Mexican-American, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2052.26, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, Mexico, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
193623,31, Private,45,3, Bachelors degree(BA AB BS),0, Not in universe, Never married, Other professional services, Executive admin and managerial, Black, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,614.61, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
198699,29, Private,33,29, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Retail trade, Other service, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1971.05, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
85495,52, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1079.49, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Peru, Peru, Peru, Foreign born- U S citizen by naturalization,0, Not in universe,2,0,95, - 50000.
196125,0, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1774.28, Not in universe, Not in universe, Not in universe, Not in universe under 1 year old, Not in universe,0, Both parents present, Taiwan, Taiwan, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
132109,16, Private,33,41, 9th grade,0, High school, Never married, Retail trade, Handlers equip cleaners etc , White, All other, Male, Not in universe, Job loser - on layoff, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,368.31, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
31996,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1272.86, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, Italy, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
197276,25, Private,8,36, 12th grade no diploma,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, White, Central or South American, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, West, California, Householder, Householder,1964.79, MSA to MSA, Same county, Same county, No, Yes,2, Not in universe, El-Salvador, El-Salvador, El-Salvador, Foreign born- Not a citizen of U S ,0, Not in universe,2,20,94, - 50000.
43637,52, Private,37,31, 11th grade,0, Not in universe, Never married, Business and repair services, Other service, Black, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,4059.47, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,95, - 50000.
160024,3, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, Mexican-American, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,927.49, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
184841,7, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, NA, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1516.17, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
90343,2, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,890.26, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, Philippines, Philippines, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
196773,72, Not in universe,0,0, High school graduate,0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,589.54, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Germany, Germany, Germany, Foreign born- U S citizen by naturalization,0, Not in universe,2,0,95, - 50000.
102326,61, Private,35,26, High school graduate,0, Not in universe, Divorced, Finance insurance and real estate, Adm support including clerical, White, All other, Female, No, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1042.72, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
94179,45, Self-employed-not incorporated,33,19, Associates degree-occup /vocational,0, Not in universe, Divorced, Retail trade, Sales, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,1602,0, Single, Not in universe, Not in universe, Child 18+ ever marr Not in a subfamily, Child 18 or older,4184.67, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
115094,45, Private,3,39, Some college but no degree,725, Not in universe, Married-civilian spouse present, Mining, Transportation and material moving, White, All other, Male, No, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1361.67, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,48,94, - 50000.
139808,13, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Other, Mexican-American, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1749.06, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
10547,12, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2473.12, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
140760,27, Not in universe,0,0, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2523.97, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,95, - 50000.
143136,11, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2195.61, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
198740,25, Private,37,2, Bachelors degree(BA AB BS),0, Not in universe, Never married, Business and repair services, Executive admin and managerial, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,10, Single, Not in universe, Not in universe, Other Rel 18+ never marr not in subfamily, Other relative of householder,1152.64, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, Philippines, Philippines, Philippines, Foreign born- Not a citizen of U S ,0, Not in universe,2,50,95, - 50000.
171302,5, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,467.65, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
51270,45, Private,38,31, High school graduate,0, Not in universe, Married-civilian spouse present, Business and repair services, Other service, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1155.2, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, Poland, Poland, Poland, Foreign born- Not a citizen of U S ,0, Not in universe,2,16,95, - 50000.
102571,16, Private,33,19, 10th grade,0, High school, Never married, Retail trade, Sales, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2072.15, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,20,94, - 50000.
87901,46, Private,45,4, Bachelors degree(BA AB BS),0, Not in universe, Never married, Other professional services, Professional specialty, White, All other, Male, No, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2405.49, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,45,95, 50000+.
40034,37, Private,39,2, High school graduate,0, Not in universe, Divorced, Personal services except private HH, Executive admin and managerial, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Head of household, Not in universe, Not in universe, Householder, Householder,1456.55, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
91671,42, Self-employed-not incorporated,44,32, High school graduate,0, Not in universe, Married-civilian spouse present, Social services, Other service, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1141.93, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,50,95, - 50000.
97009,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,900.5, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
178794,76, Not in universe,0,0, 10th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1131.39, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
84772,30, Not in universe,0,0, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, West, California, Spouse of householder, Spouse of householder,1707.88, MSA to MSA, Same county, Same county, No, Yes,0, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,94, - 50000.
7953,79, Not in universe,0,0, 11th grade,0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,119, Head of household, Not in universe, Not in universe, Householder, Householder,1644.11, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
56916,27, Private,39,32, High school graduate,0, Not in universe, Never married, Personal services except private HH, Other service, Black, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Head of household, Not in universe, Not in universe, RP of unrelated subfamily, Nonrelative of householder,1717.06, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
150887,5, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child under 18 of RP of unrel subfamily, Nonrelative of householder,4578.98, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
182649,5, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Amer Indian Aleut or Eskimo, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1020.52, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
177755,69, State government,50,28, High school graduate,0, Not in universe, Married-civilian spouse present, Public administration, Protective services, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,5, Joint one under 65 & one 65+, Not in universe, Not in universe, Householder, Householder,404.72, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, ?, ?, United-States, Native- Born in the United States,0, Not in universe,2,6,94, - 50000.
143031,69, Not in universe,0,0, 7th and 8th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,400, Nonfiler, Not in universe, Not in universe, Householder, Householder,1723.61, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, Poland, Poland, Poland, Foreign born- U S citizen by naturalization,0, Not in universe,2,0,94, - 50000.
17047,46, Local government,43,10, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Divorced, Education, Professional specialty, White, All other, Female, Yes, Not in universe, Children or Armed Forces,0,1876,139, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1722.26, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,36,94, - 50000.
5446,57, Private,42,13, Associates degree-occup /vocational,1329, Not in universe, Divorced, Medical except hospital, Technicians and related support, White, All other, Female, No, Not in universe, Children or Armed Forces,2202,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1168.63, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,94, - 50000.
171213,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1793.11, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
173292,43, Private,21,26, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-nondurable goods, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,3762.14, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
79813,5, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,3050.97, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, ?, ?, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
181506,57, Private,27,35, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-nondurable goods, Precision production craft & repair, White, Puerto Rican, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1101.85, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
67884,0, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,970.2, Not in universe, Not in universe, Not in universe, Not in universe under 1 year old, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
1095,0, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1952.21, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, Poland, ?, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
47621,47, Private,39,31, 11th grade,0, Not in universe, Married-civilian spouse present, Personal services except private HH, Other service, White, Central or South American, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,791.11, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, Columbia, Native- Born abroad of American Parent(s),0, Not in universe,2,52,95, - 50000.
65460,49, State government,43,3, Bachelors degree(BA AB BS),0, Not in universe, Divorced, Education, Executive admin and managerial, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Householder, Householder,251.25, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, Canada, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
140996,47, Private,33,26, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Retail trade, Adm support including clerical, White, Mexican-American, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1283.79, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,1,94, - 50000.
23431,31, Self-employed-not incorporated,2,43, High school graduate,0, Not in universe, Married-civilian spouse present, Agriculture, Farming forestry and fishing, White, All other, Female, Not in universe, Not in universe, PT for non-econ reasons usually FT,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,823.78, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
18488,57, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,548.37, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
63908,19, Private,33,29, Some college but no degree,0, College or university, Never married, Retail trade, Other service, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Grandchild 18+ never marr not in subfamily, Other relative of householder,942.2, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,94, - 50000.
147955,25, Not in universe,0,0, Bachelors degree(BA AB BS),0, Not in universe, Never married, Not in universe or children, Not in universe, White, Other Spanish, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,1087.39, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Mexico, Puerto-Rico, Mexico, Native- Born abroad of American Parent(s),0, Not in universe,2,0,95, - 50000.
1219,43, Private,33,26, High school graduate,0, Not in universe, Married-civilian spouse present, Retail trade, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,50, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,3440.67, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
98929,44, Private,30,26, Bachelors degree(BA AB BS),0, Not in universe, Never married, Communications, Adm support including clerical, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Head of household, Not in universe, Not in universe, Householder, Householder,1040.96, Nonmover, Nonmover, Nonmover, Yes, Not in universe,5, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
64415,34, Local government,47,28, Some college but no degree,0, Not in universe, Never married, Public administration, Protective services, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1161.47, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, No,1,52,95, - 50000.
197617,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2177.31, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
178368,35, Not in universe,0,0, 9th grade,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,1864.42, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
40399,19, Not in universe,0,0, Some college but no degree,0, College or university, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,598.21, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, United-States, ?, United-States, Native- Born in the United States,0, Not in universe,2,13,94, - 50000.
157159,22, Self-employed-not incorporated,37,15, Associates degree-occup /vocational,0, Not in universe, Never married, Business and repair services, Technicians and related support, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Secondary individual, Nonrelative of householder,4074.15, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, Holand-Netherlands, Native- Born abroad of American Parent(s),0, Not in universe,2,36,95, - 50000.
39951,45, Federal government,49,1, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Divorced, Public administration, Executive admin and managerial, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,1980,0, Single, Not in universe, Not in universe, Householder, Householder,1632.8, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,95, - 50000.
80149,28, Private,39,31, 5th or 6th grade,0, Not in universe, Never married, Personal services except private HH, Other service, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Other Rel 18+ never marr not in subfamily, Other relative of householder,2028.73, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, Mexico, Mexico, Mexico, Foreign born- U S citizen by naturalization,2, Not in universe,2,52,94, - 50000.
33078,70, Not in universe,0,0, High school graduate,0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,401,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,983.2, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Canada, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
118945,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1702.46, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
173073,17, Not in universe,0,0, 11th grade,0, High school, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1522.83, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
154955,33, Private,42,13, Some college but no degree,0, Not in universe, Divorced, Medical except hospital, Technicians and related support, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,177, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2359.01, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, Germany, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
22221,63, Not in universe,0,0, 10th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,7959.51, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
38335,33, Not in universe,0,0, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, Mexican-American, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1363.13, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,95, - 50000.
123934,10, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1778.48, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
185904,64, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,24, Joint one under 65 & one 65+, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2461.72, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
71771,39, Private,29,38, Some college but no degree,0, Not in universe, Never married, Transportation, Transportation and material moving, White, Mexican-American, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,702.43, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
69160,2, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,926.58, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
147725,77, Not in universe,0,0, Prof school degree (MD DDS DVM LLB JD),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,1455,0,0, Joint both 65+, Not in universe, Not in universe, Householder, Householder,1623.8, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,1,94, - 50000.
84225,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2589.81, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
58184,42, Private,5,36, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,2553.09, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
191708,30, Private,33,19, High school graduate,0, Not in universe, Never married, Retail trade, Sales, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Midwest, Tennessee, Child 18+ never marr Not in a subfamily, Child 18 or older,433.4, NonMSA to nonMSA, Same county, Same county, No, No,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
73103,48, Private,33,12, Some college but no degree,0, Not in universe, Married-civilian spouse present, Retail trade, Professional specialty, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,281.59, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,1, Not in universe,2,25,95, - 50000.
25855,20, Never worked,0,0, Some college but no degree,0, College or university, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, New entrant, Unemployed part- time,0,0,0, Nonfiler, Not in universe, Not in universe, In group quarters, Group Quarters- Secondary individual,1394.7, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, South Korea, South Korea, South Korea, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,95, - 50000.
20809,65, State government,43,9, Doctorate degree(PhD EdD),0, Not in universe, Married-civilian spouse present, Education, Professional specialty, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,2174,250, Joint both 65+, Not in universe, Not in universe, Householder, Householder,1580.56, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, 50000+.
121724,31, Local government,43,10, Bachelors degree(BA AB BS),0, Not in universe, Never married, Education, Professional specialty, White, All other, Male, Yes, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2220.04, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,43,94, - 50000.
87147,51, Not in universe,0,0, 9th grade,0, Not in universe, Widowed, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, South, Texas, Nonfamily householder, Householder,2542.38, MSA to MSA, Same county, Same county, No, Yes,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
45361,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1423.77, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
10963,42, Private,38,42, Some college but no degree,0, Not in universe, Married-civilian spouse present, Business and repair services, Handlers equip cleaners etc , White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Midwest, Montana, Spouse of householder, Spouse of householder,6282.42, MSA to MSA, Different county same state, Different county same state, No, No,6, Not in universe, El-Salvador, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,94, - 50000.
43878,20, Private,2,44, High school graduate,0, Not in universe, Never married, Agriculture, Farming forestry and fishing, White, All other, Male, Not in universe, Re-entrant, Unemployed full-time,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,258.24, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,4,95, - 50000.
19256,9, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1509.08, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, United-States, United-States, Germany, Native- Born abroad of American Parent(s),0, Not in universe,0,0,95, - 50000.
71391,48, Private,38,42, 1st 2nd 3rd or 4th grade,0, Not in universe, Married-civilian spouse present, Business and repair services, Handlers equip cleaners etc , Asian or Pacific Islander, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,2395.72, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, ?, ?, ?, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,95, - 50000.
138769,17, Not in universe,0,0, 10th grade,0, High school, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,588.0, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
98200,33, Private,42,30, High school graduate,0, Not in universe, Married-civilian spouse present, Medical except hospital, Other service, White, Chicano, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, West, New York, Householder, Householder,438.7, MSA to MSA, Same county, Same county, No, Yes,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,15,94, - 50000.
7213,2, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1043.07, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
891,15, Not in universe,0,0, 9th grade,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1206.13, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,2,94, - 50000.
45910,68, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint one under 65 & one 65+, Not in universe, Not in universe, Householder, Householder,1634.16, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
68156,16, Not in universe,0,0, 9th grade,0, High school, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,662.39, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
111042,52, Not in universe,0,0, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,10000, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1024.89, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, 50000+.
197422,67, Private,34,2, High school graduate,0, Not in universe, Widowed, Finance insurance and real estate, Executive admin and managerial, White, All other, Male, No, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Householder, Householder,1539.89, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, Ireland, Ireland, United-States, Native- Born in the United States,0, Not in universe,2,52,94, 50000+.
10440,8, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Grandchild <18 never marr not in subfamily, Other relative of householder,938.92, ?, ?, ?, Not in universe under 1 year old, ?,0, Neither parent present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
9427,42, Not in universe,0,0, 10th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2701.7, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
7449,48, Private,12,2, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Executive admin and managerial, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1965.34, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, China, Vietnam, Vietnam, Foreign born- U S citizen by naturalization,0, Not in universe,2,52,95, - 50000.
128836,8, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2298.82, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
48918,72, Not in universe,0,0, 7th and 8th grade,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,419.51, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
93667,23, Private,33,19, Associates degree-academic program,825, Not in universe, Married-civilian spouse present, Retail trade, Sales, White, All other, Female, No, Not in universe, Full-time schedules,0,0,75, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2615.23, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
29020,42, Private,45,15, Associates degree-academic program,0, Not in universe, Widowed, Other professional services, Technicians and related support, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,290, Head of household, Not in universe, Not in universe, Householder, Householder,1552.03, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
109337,10, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1640.4, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
40199,4, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2397.57, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
39475,37, Private,41,8, Associates degree-occup /vocational,2355, Not in universe, Never married, Hospital services, Professional specialty, White, All other, Female, No, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1196.52, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Yes,1,52,94, 50000+.
159112,63, Without pay,6,35, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Precision production craft & repair, White, All other, Male, Not in universe, Not in universe, PT for non-econ reasons usually FT,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,4441.94, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
152918,41, Not in universe,0,0, 1st 2nd 3rd or 4th grade,0, Not in universe, Separated, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, South, ?, Secondary individual, Nonrelative of householder,2745.08, NonMSA to nonMSA, Different county same state, Different county same state, No, No,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
88096,4, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,777.43, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, Philippines, Philippines, Philippines, Foreign born- Not a citizen of U S ,0, Not in universe,0,0,95, - 50000.
175317,43, Private,44,12, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Divorced, Social services, Professional specialty, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Head of household, Not in universe, Not in universe, Householder, Householder,2639.54, ?, ?, ?, Not in universe under 1 year old, ?,5, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,95, - 50000.
80470,49, Private,34,17, Bachelors degree(BA AB BS),0, Not in universe, Divorced, Finance insurance and real estate, Sales, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,500, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1811.45, Nonmover, Nonmover, Nonmover, Yes, Not in universe,5, Not in universe, United-States, ?, United-States, Native- Born in the United States,0, Not in universe,2,52,94, 50000+.
161690,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,281.98, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
3630,41, Not in universe,0,0, High school graduate,0, Not in universe, Divorced, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Householder, Householder,1689.66, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
12305,46, State government,43,29, High school graduate,840, Not in universe, Married-civilian spouse present, Education, Other service, White, All other, Female, No, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1227.32, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,36,95, - 50000.
100405,33, Not in universe,0,0, Some college but no degree,0, Not in universe, Divorced, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child 18+ ever marr Not in a subfamily, Child 18 or older,2798.03, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
181953,35, Private,11,37, 11th grade,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,3603.1, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
165427,43, Private,35,23, Some college but no degree,0, Not in universe, Divorced, Finance insurance and real estate, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, South, Utah, Secondary individual, Nonrelative of householder,450.49, MSA to MSA, Different region, Different state in South, No, Yes,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,48,94, - 50000.
48964,25, Private,34,3, Bachelors degree(BA AB BS),0, Not in universe, Never married, Finance insurance and real estate, Executive admin and managerial, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,10, Single, South, Utah, Nonfamily householder, Householder,2776.11, MSA to MSA, Same county, Same county, No, Yes,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,94, - 50000.
111549,80, Not in universe,0,0, 11th grade,0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2674.96, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
12284,37, Local government,40,23, High school graduate,0, Not in universe, Married-civilian spouse present, Entertainment, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2434.3, Nonmover, Nonmover, Nonmover, Yes, Not in universe,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
111003,24, Private,1,44, 11th grade,0, Not in universe, Never married, Agriculture, Farming forestry and fishing, White, Puerto Rican, Male, Not in universe, Job loser - on layoff, Children or Armed Forces,2463,0,0, Single, Not in universe, Not in universe, Householder, Householder,895.49, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, Puerto-Rico, Puerto-Rico, United-States, Native- Born in the United States,0, Not in universe,2,40,94, - 50000.
4035,52, State government,43,10, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Married-civilian spouse present, Education, Professional specialty, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,3000, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1559.39, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,15,95, 50000+.
57559,34, Private,24,26, High school graduate,0, Not in universe, Divorced, Manufacturing-nondurable goods, Adm support including clerical, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child 18+ ever marr Not in a subfamily, Child 18 or older,2878.31, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
197612,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1985.13, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
186539,35, Not in universe,0,0, 1st 2nd 3rd or 4th grade,0, Not in universe, Separated, Not in universe or children, Not in universe, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Householder, Householder,1346.86, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,95, - 50000.
80242,45, Private,22,36, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Manufacturing-nondurable goods, Machine operators assmblrs & inspctrs, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1108.95, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,94, - 50000.
180617,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1932.0, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
88587,3, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child under 18 of RP of unrel subfamily, Nonrelative of householder,4108.89, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
7041,45, Not in universe,0,0, Some college but no degree,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Midwest, Oklahoma, Spouse of householder, Spouse of householder,1443.81, MSA to MSA, Same county, Same county, No, No,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
139291,44, Private,44,41, 5th or 6th grade,0, Not in universe, Never married, Social services, Handlers equip cleaners etc , White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, South, Delaware, Secondary individual, Nonrelative of householder,982.19, NonMSA to nonMSA, Different county same state, Different county same state, No, No,5, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
184023,49, Local government,42,30, High school graduate,0, Not in universe, Widowed, Medical except hospital, Other service, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,993.85, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
9438,69, Not in universe,0,0, High school graduate,0, Not in universe, Widowed, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Single, Not in universe, Not in universe, Householder, Householder,2296.9, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
33628,65, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint one under 65 & one 65+, Not in universe, Not in universe, Householder, Householder,2588.07, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
129715,43, Private,31,42, High school graduate,0, Not in universe, Married-civilian spouse present, Utilities and sanitary services, Handlers equip cleaners etc , White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1036.94, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
13495,19, Private,33,19, Some college but no degree,0, College or university, Never married, Retail trade, Sales, White, Puerto Rican, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,1243.04, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, United-States, ?, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
50850,10, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2245.99, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, Philippines, Philippines, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
138847,46, Private,34,25, Some college but no degree,0, Not in universe, Married-civilian spouse present, Finance insurance and real estate, Adm support including clerical, Black, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,688.01, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
150171,34, Private,33,19, Associates degree-academic program,0, Not in universe, Divorced, Retail trade, Sales, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2227.01, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
156089,48, State government,40,23, High school graduate,0, Not in universe, Married-civilian spouse present, Entertainment, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,607.6, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
197936,19, Private,33,19, High school graduate,0, College or university, Never married, Retail trade, Sales, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,2578.61, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
78488,45, Local government,48,21, Bachelors degree(BA AB BS),0, Not in universe, Separated, Public administration, Adm support including clerical, Black, All other, Female, Yes, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Householder, Householder,1569.36, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
44829,38, Private,33,16, High school graduate,0, Not in universe, Never married, Retail trade, Sales, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,268, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,3254.97, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
132454,41, Private,36,27, High school graduate,0, Not in universe, Married-civilian spouse present, Private household services, Private household services, White, Central or South American, Female, No, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,812.57, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, ?, ?, ?, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,95, - 50000.
52840,71, Not in universe,0,0, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both 65+, Not in universe, Not in universe, Householder, Householder,1823.75, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
121217,27, Federal government,29,25, Associates degree-occup /vocational,1575, Not in universe, Married-civilian spouse present, Transportation, Adm support including clerical, White, All other, Male, Yes, Not in universe, Children or Armed Forces,7298,0,0, Joint both under 65, Northeast, Michigan, Householder, Householder,1031.69, MSA to MSA, Same county, Same county, No, No,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
198823,29, Not in universe,0,0, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1205.55, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, India, India, India, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,95, - 50000.
148775,36, Not in universe,0,0, High school graduate,0, Not in universe, Separated, Not in universe or children, Not in universe, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Not in labor force,0,0,0, Single, Not in universe, Not in universe, Child 18+ ever marr Not in a subfamily, Child 18 or older,1307.46, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, Mexico, Mexico, United-States, Native- Born in the United States,0, Not in universe,2,45,95, - 50000.
1702,52, Self-employed-not incorporated,39,32, Bachelors degree(BA AB BS),0, Not in universe, Divorced, Personal services except private HH, Other service, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,2000, Single, Not in universe, Not in universe, Nonfamily householder, Householder,984.25, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,46,95, - 50000.
120926,2, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2596.51, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
125722,40, Self-employed-not incorporated,33,2, Associates degree-occup /vocational,0, Not in universe, Married-civilian spouse present, Retail trade, Executive admin and managerial, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Nonfiler, Not in universe, Not in universe, Spouse of householder, Spouse of householder,198.29, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
110416,31, Private,45,12, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Other professional services, Professional specialty, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,300, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1920.41, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
47866,5, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2154.9, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
35144,31, Not in universe,0,0, Associates degree-occup /vocational,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Northeast, Connecticut, Householder, Householder,2491.83, MSA to MSA, Different county same state, Different county same state, No, No,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
167869,1, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2046.83, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, Philippines, Philippines, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000.
12432,32, Federal government,49,26, High school graduate,0, Not in universe, Married-civilian spouse present, Public administration, Adm support including clerical, White, Puerto Rican, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1020.27, Nonmover, Nonmover, Nonmover, Yes, Not in universe,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
71994,35, Self-employed-not incorporated,37,10, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Business and repair services, Professional specialty, White, All other, Male, Not in universe, Other job loser, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1132.61, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,27,94, - 50000.
190244,34, Not in universe,0,0, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, South, District of Columbia, Householder, Householder,2031.36, MSA to MSA, Different state same division, Different state in South, No, No,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
1881,45, Private,33,16, Some college but no degree,0, Not in universe, Never married, Retail trade, Sales, White, Mexican-American, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Head of household, Not in universe, Not in universe, Householder, Householder,1537.21, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
48449,40, Private,4,34, Some college but no degree,0, Not in universe, Married-civilian spouse present, Construction, Precision production craft & repair, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1631.75, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
99405,59, Private,4,2, Some college but no degree,2100, Not in universe, Married-civilian spouse present, Construction, Executive admin and managerial, White, All other, Male, Yes, Not in universe, Full-time schedules,0,0,200, Joint both under 65, Not in universe, Not in universe, Householder, Householder,2477.26, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, 50000+.
71526,26, State government,43,12, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Married-civilian spouse present, Education, Professional specialty, White, All other, Female, Not in universe, Not in universe, PT for econ reasons usually PT,0,0,0, Joint both under 65, Not in universe, Not in universe, In group quarters, Householder,1108.83, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
107493,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1651.17, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
142743,54, Federal government,45,4, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Married-civilian spouse present, Other professional services, Professional specialty, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1081.54, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, 50000+.
2258,30, Private,35,2, High school graduate,0, Not in universe, Divorced, Finance insurance and real estate, Executive admin and managerial, White, All other, Female, No, Not in universe, Children or Armed Forces,2354,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2924.14, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,1, Not in universe,2,52,94, - 50000.
66048,68, Not in universe,0,0, Some college but no degree,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both 65+, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2467.44, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
174145,57, Local government,50,5, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Public administration, Professional specialty, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,1902,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1455.29, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
7609,28, Private,43,44, Associates degree-occup /vocational,0, Not in universe, Separated, Education, Farming forestry and fishing, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,4173.77, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
1906,33, Private,41,7, Prof school degree (MD DDS DVM LLB JD),0, Not in universe, Married-civilian spouse present, Hospital services, Professional specialty, White, Central or South American, Male, Not in universe, Not in universe, Children or Armed Forces,3103,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,2406.32, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, ?, ?, ?, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,94, - 50000.
8197,51, Private,14,37, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,3137,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,2659.34, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,42,94, - 50000.
7752,59, Private,9,36, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,761.06, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
74808,19, Private,40,28, Some college but no degree,0, College or university, Never married, Entertainment, Protective services, Asian or Pacific Islander, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,1264.75, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,95, - 50000.
194746,64, Not in universe,0,0, High school graduate,0, Not in universe, Divorced, Not in universe or children, Not in universe, White, Other Spanish, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,915.28, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, ?, United-States, United-States, Native- Born in the United States,0, Not in universe,2,4,94, - 50000.
156141,38, Private,41,8, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Hospital services, Professional specialty, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,991.45, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,95, - 50000.
132259,41, Not in universe,0,0, High school graduate,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,3270.26, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
90484,0, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2294.02, Not in universe, Not in universe, Not in universe, Not in universe under 1 year old, Not in universe,0, Both parents present, United-States, ?, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
78109,9, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,4408.46, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
145093,60, Private,13,37, Some college but no degree,0, Not in universe, Married-spouse absent, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,500, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1392.3, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, 50000+.
108692,52, Not in universe,0,0, 11th grade,0, Not in universe, Divorced, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Householder, Householder,1476.96, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
155779,70, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,67, Joint both 65+, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1385.67, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
38262,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, Puerto Rican, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Northeast, North Carolina, Child <18 never marr not in subfamily, Child under 18 never married,1153.13, MSA to MSA, Same county, Same county, No, No,0, Mother only present, Puerto-Rico, Puerto-Rico, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
89021,30, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, West, California, Spouse of householder, Spouse of householder,463.68, MSA to nonMSA, Different division same region, Different state in West, No, No,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
177664,74, Not in universe,0,0, Bachelors degree(BA AB BS),0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,4882, Single, Northeast, ?, Nonfamily householder, Householder,1591.41, MSA to MSA, Different county same state, Different county same state, No, No,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
188163,10, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,776.08, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, ?, ?, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
125830,46, Local government,43,10, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Education, Professional specialty, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,3103,0,100, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1006.86, Nonmover, Nonmover, Nonmover, Yes, Not in universe,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,1, Not in universe,2,52,94, - 50000.
78253,26, Federal government,29,25, High school graduate,0, Not in universe, Never married, Transportation, Adm support including clerical, Asian or Pacific Islander, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,1000, Single, Not in universe, Not in universe, Nonfamily householder, Householder,915.75, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, ?, ?, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
171521,25, Private,31,37, Some college but no degree,0, Not in universe, Never married, Utilities and sanitary services, Machine operators assmblrs & inspctrs, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Midwest, Kentucky, Nonfamily householder, Householder,1417.25, MSA to MSA, Same county, Same county, No, No,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
122703,30, Private,45,31, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Other professional services, Other service, White, Mexican (Mexicano), Male, Not in universe, Not in universe, Full-time schedules,2885,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1207.48, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,95, - 50000.
57986,62, State government,41,36, High school graduate,0, Not in universe, Married-civilian spouse present, Hospital services, Machine operators assmblrs & inspctrs, White, All other, Female, No, Not in universe, Full-time schedules,0,0,0, Joint one under 65 & one 65+, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1252.17, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000.
100807,58, Not in universe,0,0, Some college but no degree,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,330, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1550.66, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
199197,39, Not in universe,0,0, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,3802.81, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, ?, ?, ?, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,95, - 50000.
44919,55, Private,33,16, High school graduate,1400, Not in universe, Married-civilian spouse present, Retail trade, Sales, White, All other, Female, No, Not in universe, Children or Armed Forces,0,0,100, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2392.55, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000.
48655,74, Not in universe,0,0, Some college but no degree,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2367.66, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000.
37451,76, Not in universe,0,0, High school graduate,0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Householder, Householder,1551.72, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
9376,39, Private,37,3, Some college but no degree,0, Not in universe, Never married, Business and repair services, Executive admin and managerial, White, Puerto Rican, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Head of household, Not in universe, Not in universe, Householder, Householder,774.83, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, Puerto-Rico, Puerto-Rico, Puerto-Rico, Native- Born in Puerto Rico or U S Outlying,0, Not in universe,2,52,95, - 50000.
176075,71, Not in universe,0,0, 9th grade,0, Not in universe, Divorced, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,609.05, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,20,94, - 50000.
40950,37, Private,42,2, Associates degree-academic program,0, Not in universe, Married-civilian spouse present, Medical except hospital, Executive admin and managerial, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,4000, Joint both under 65, Midwest, Mississippi, Spouse of householder, Spouse of householder,1532.26, MSA to nonMSA, Different region, Different state in Midwest, No, No,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,1, Not in universe,2,52,94, - 50000.
187455,31, Private,33,19, 11th grade,0, Not in universe, Married-spouse absent, Retail trade, Sales, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child 18+ ever marr Not in a subfamily, Child 18 or older,1366.06, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, India, India, India, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,94, - 50000.
94473,0, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,558.42, Not in universe, Not in universe, Not in universe, Not in universe under 1 year old, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000.
177027,77, Not in universe,0,0, High school graduate,0, Not in universe, Divorced, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,3316.65, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, ?, ?, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000.
98120,76, Private,21,31, 7th and 8th grade,0, Not in universe, Married-civilian spouse present, Manufacturing-nondurable goods, Other service, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both 65+, Not in universe, Not in universe, Householder, Householder,785.0, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, Canada, Canada, United-States, Native- Born in the United States,0, No,1,52,94, - 50000.
179503,34, Private,25,37, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-nondurable goods, Machine operators assmblrs & inspctrs, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1515.34, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, 50000+.
================================================
FILE: examples/criteo_sample.txt
================================================
label,I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26
0,,3,260.0,,17668.0,,,33.0,,,,0.0,,05db9164,08d6d899,9143c832,f56b7dd5,25c83c98,7e0ccccf,df5c2d18,0b153874,a73ee510,8f48ce11,a7b606c4,ae1bb660,eae197fd,b28479f6,bfef54b3,bad5ee18,e5ba7672,87c6f83c,,,0429f84b,,3a171ecb,c0d61a5c,,
0,,-1,19.0,35.0,30251.0,247.0,1.0,35.0,160.0,,1.0,,35.0,68fd1e64,04e09220,95e13fd4,a1e6a194,25c83c98,fe6b92e5,f819e175,062b5529,a73ee510,ab9456b4,6153cf57,8882c6cd,769a1844,b28479f6,69f825dd,23056e4f,d4bb7bd8,6fc84bfb,,,5155d8a3,,be7c41b4,ded4aac9,,
0,0.0,0,2.0,12.0,2013.0,164.0,6.0,35.0,523.0,0.0,3.0,,18.0,05db9164,38a947a1,3f55fb72,5de245c7,30903e74,7e0ccccf,b72ec13d,1f89b562,a73ee510,acce978c,3547565f,a5b0521a,12880350,b28479f6,c12fc269,95a8919c,e5ba7672,675c9258,,,2e01979f,,bcdee96c,6d5d1302,,
0,,13,1.0,4.0,16836.0,200.0,5.0,4.0,29.0,,2.0,,4.0,05db9164,8084ee93,02cf9876,c18be181,25c83c98,,e14874c9,0b153874,7cc72ec2,2462946f,636405ac,8fe001f4,31b42deb,07d13a8f,422c8577,36103458,e5ba7672,52e44668,,,e587c466,,32c7478e,3b183c5c,,
0,0.0,0,104.0,27.0,1990.0,142.0,4.0,32.0,37.0,0.0,1.0,,27.0,05db9164,207b2d81,5d076085,862b5ba0,25c83c98,fbad5c96,17c22666,0b153874,a73ee510,534fc986,feb49a68,f24b551c,8978af5c,64c94865,32ec6582,b6d021e8,e5ba7672,25c88e42,21ddcdc9,b1252a9d,0e8585d2,,32c7478e,0d4a6d1a,001f3601,92c878de
0,0.0,-1,63.0,40.0,1470.0,61.0,4.0,37.0,46.0,0.0,1.0,,40.0,68fd1e64,207b2d81,9dd3c4fc,a09fab49,25c83c98,,271190b7,5b392875,a73ee510,49d5fa15,26a64614,3c5900b5,51351dd6,b28479f6,c38116c9,0decd005,e5ba7672,d3303ea5,21ddcdc9,b1252a9d,7633c7c8,,32c7478e,17f458f7,001f3601,71236095
0,0.0,370,4.0,1.0,1787.0,65.0,14.0,25.0,489.0,0.0,7.0,,25.0,05db9164,2a69d406,fcae8bfa,13508380,25c83c98,,cd846c62,0b153874,a73ee510,3b08e48b,0ec1e215,18917580,44af41ef,07d13a8f,3b2d8705,51b69881,3486227d,642f2610,55dd3565,b1252a9d,5c8dc711,,423fab69,45ab94c8,2bf691b1,c84c4aec
1,19.0,10,30.0,10.0,1.0,3.0,33.0,47.0,126.0,3.0,5.0,,2.0,05db9164,403ea497,2cbec47f,3e2bfbda,30903e74,,7227c706,0b153874,a73ee510,5fcee6b1,9625b211,21a23bfe,dccbd94b,b28479f6,91f74a64,587267a3,e5ba7672,a78bd508,21ddcdc9,5840adea,c2a93b37,,32c7478e,1793a828,e8b83407,2fede552
0,0.0,0,36.0,22.0,4684.0,217.0,9.0,35.0,135.0,0.0,1.0,0.0,43.0,8cf07265,0aadb108,c798ded6,91e6318a,25c83c98,fe6b92e5,2aef1419,0b153874,a73ee510,3b08e48b,d027c970,1b2022a0,00e20e7b,1adce6ef,2de5271c,b74e1eb0,e5ba7672,7ce63c71,,,af5dc647,,dbb486d7,1793a828,,
0,2.0,11,8.0,23.0,30.0,11.0,2.0,8.0,23.0,1.0,1.0,,11.0,05db9164,58e67aaf,ea997bbe,72bea89f,384874ce,7e0ccccf,5b18f3d9,0b153874,a73ee510,012f45e7,720446f5,33ec1af8,034e5f3b,051219e6,d83fb924,4558136f,07c540c4,c21c3e4c,21ddcdc9,a458ea53,31c8e642,,c7dc6720,3e983c86,9b3e8820,d597922b
0,2.0,1,190.0,25.0,8.0,26.0,2.0,27.0,25.0,1.0,1.0,,25.0,05db9164,e77e5e6e,c23785fe,67dd8a70,25c83c98,7e0ccccf,0c41b6a1,37e4aa92,a73ee510,78d5c363,4ba74619,d8acd6f9,879fa878,07d13a8f,2eb18840,df604f5b,e5ba7672,449d6705,6f3756eb,5840adea,07b6c66f,,423fab69,246f2e7f,e8b83407,350a6bdb
0,,2,2.0,1.0,5533.0,1.0,41.0,1.0,33.0,,5.0,0.0,1.0,05db9164,d7988e72,25111132,d13862c2,25c83c98,6f6d9be8,84c427f0,5b392875,a73ee510,00f2b452,41b3f655,7c5cd1c7,ce5114a2,64c94865,846fb5bd,696fb81d,e5ba7672,0f2f9850,b6baba3f,a458ea53,06e40c52,8ec974f4,32c7478e,3fdb382b,e8b83407,49d68486
0,0.0,5,,,18424.0,461.0,23.0,4.0,231.0,0.0,2.0,,,05db9164,ed7b1c58,b063fe4e,4b972461,25c83c98,7e0ccccf,afa309bd,5b392875,a73ee510,23de5a4a,77212bd7,8cdc4941,7203f04e,b28479f6,298421a5,3084c78b,e5ba7672,8814ed47,,,514b7308,,c7dc6720,2fd70e1c,,
0,8.0,-1,,,732.0,2.0,22.0,2.0,2.0,1.0,4.0,,,68fd1e64,38a947a1,,,25c83c98,7e0ccccf,1c86e0eb,0b153874,a73ee510,e8f7c7e8,755e4a50,,5978055e,b28479f6,7ba31d46,,e5ba7672,9b82aca5,,,,,32c7478e,,,
1,0.0,0,24.0,36.0,5022.0,436.0,25.0,32.0,192.0,0.0,9.0,0.0,36.0,5bfa8ab5,84b4e42f,45f68c2a,39547932,384874ce,fbad5c96,85e1a170,0b153874,a73ee510,2bf8bed1,a4ea009a,78a16776,1e9339bc,91233270,cdb87fb5,e15ad623,8efede7f,67bd0ece,,,78c1dd4b,,c7dc6720,4f7b7578,,
0,,82,20.0,4.0,507333.0,,0.0,4.0,4.0,,0.0,,4.0,05db9164,38d50e09,5d0ec1e8,e63708e9,25c83c98,fbad5c96,bc324536,0b153874,7cc72ec2,f6540b40,2bcfb78f,506bb280,e6fc496d,07d13a8f,ee569ce2,81db2bec,e5ba7672,582152eb,21ddcdc9,5840adea,4a8f0a7f,c9d4222a,32c7478e,1989e165,001f3601,09929967
0,,24,3.0,2.0,10195.0,,0.0,32.0,55.0,,0.0,,2.0,5a9ed9b0,68b3edbf,b00d1501,d16679b9,4cf72387,7e0ccccf,36b796aa,0b153874,a73ee510,8b7e0638,7373475d,e0d76380,cfbfce5c,b28479f6,f511c49f,1203a270,e5ba7672,752d8b8a,,,73d06dde,,3a171ecb,aee52b6f,,
0,,105,4.0,1.0,2200.0,,0.0,1.0,1.0,,0.0,,1.0,05db9164,38d50e09,fc1cad4b,40ed41e5,25c83c98,7e0ccccf,88afd773,51d76abe,a73ee510,3b08e48b,c6cb726f,153ff04a,176d07bc,b28479f6,42b3012c,1bf03082,776ce399,582152eb,21ddcdc9,5840adea,84ec2c79,,be7c41b4,a415643d,001f3601,c4304c4b
1,5.0,85,52.0,6.0,36.0,36.0,30.0,24.0,281.0,1.0,5.0,2.0,6.0,9a89b36c,1cfdf714,9d427ddf,4eadb673,25c83c98,7e0ccccf,2555b4d9,0b153874,a73ee510,4c89c3af,0e4ebdac,cf724373,779f824b,07d13a8f,f775a6d5,6512dce6,8efede7f,e88ffc9d,21ddcdc9,b1252a9d,361a1080,,423fab69,3fdb382b,cb079c2d,49d68486
0,2.0,3,4.0,1.0,4.0,1.0,2.0,1.0,1.0,1.0,1.0,,1.0,68fd1e64,2eb7b10e,378112d3,684abf7b,25c83c98,fbad5c96,0d15142a,5b392875,a73ee510,ac473633,df7e8e0b,38176faa,84c02464,1adce6ef,0816fba2,f2c6a810,07c540c4,21eb63af,,,8b7fb864,,423fab69,45b2acf4,,
0,,1,5.0,36.0,239721.0,,0.0,0.0,123.0,,0.0,,62.0,8cf07265,4f25e98b,a68b0bcf,c194aaab,25c83c98,fbad5c96,a2f7459e,0b153874,7cc72ec2,b393caa5,15eced00,ab1307ec,bd251a95,64c94865,40e29d2a,65a31309,e5ba7672,7ef5affa,738584ec,a458ea53,fca82615,,32c7478e,74f7ceeb,9d93af03,d14e41ff
0,,4,,,1572.0,,0.0,17.0,55.0,,0.0,,,05db9164,8947f767,6bbe880c,feb6eb1a,4cf72387,7e0ccccf,3babeb61,0b153874,a73ee510,3b08e48b,565788d0,d06dc48e,8e7ad399,1adce6ef,ba8b8b16,30e6420c,776ce399,bd17c3da,ba92e49d,b1252a9d,65f3080f,,be7c41b4,42a310e6,010f6491,0eabc199
0,0.0,0,,,1464.0,4.0,5.0,3.0,4.0,0.0,1.0,,,68fd1e64,38a947a1,dd8e6407,db4eb846,25c83c98,13718bbd,963d99df,062b5529,a73ee510,3b08e48b,bffe9c30,eb43b195,e62d6c68,07d13a8f,3d2c6113,de815c2d,776ce399,d3c7daaa,,,5def73cb,,32c7478e,aa5529de,,
1,0.0,43,2.0,3.0,1700.0,21.0,6.0,10.0,21.0,0.0,1.0,,7.0,5a9ed9b0,46bbf321,c5d94b65,5cc8f91d,25c83c98,7e0ccccf,4157815a,1f89b562,a73ee510,4e979b5e,7056d78a,75c79158,08775c1b,e8dce07a,80d1ee72,208d4baf,e5ba7672,906ff5cb,,,6a909d9a,,3a171ecb,1f68c81f,,
0,0.0,1,2.0,1.0,2939.0,39.0,17.0,3.0,437.0,0.0,7.0,,1.0,68fd1e64,38a947a1,98351ee6,811ce8e8,25c83c98,fbad5c96,4a6c02fb,37e4aa92,a73ee510,3b08e48b,0cb221d0,617c70e9,ea18ebd8,07d13a8f,31b59ad3,121f63c9,e5ba7672,065917ca,,,c3739d01,,423fab69,d4af2638,,
1,9.0,1,2.0,5.0,18.0,5.0,9.0,5.0,5.0,1.0,1.0,0.0,5.0,5a9ed9b0,9819deea,6813d33b,f922efad,25c83c98,fbad5c96,34cbc0af,0b153874,a73ee510,bac95df6,88196a93,b99ddbc8,1211c647,b28479f6,1150f5ed,87acb535,07c540c4,7e32f7a4,,,a4b7004c,,32c7478e,b34f3128,,
0,,1,2.0,16.0,14404.0,79.0,2.0,16.0,103.0,,1.0,,16.0,05db9164,38a947a1,5492524f,ae59cd56,25c83c98,7e0ccccf,7925e09b,5b392875,7cc72ec2,56c80038,1cba690a,e00462bb,1d0f2da8,64c94865,51c5d5ca,ebbb82d7,07c540c4,be5810bd,,,bd1f6272,c9d4222a,32c7478e,043a382b,,
0,0.0,26,7.0,1.0,3412.0,104.0,10.0,2.0,6.0,0.0,1.0,1.0,1.0,05db9164,287130e0,5e25fa67,dd47ba3b,25c83c98,13718bbd,412cb2ce,0b153874,a73ee510,3b08e48b,b9ec9192,8ebd48c3,df5886ca,07d13a8f,10040656,e05d680b,3486227d,891589e7,ff6cdd42,a458ea53,a2b7caec,,c7dc6720,1481ceb4,e8b83407,988b0775
0,8.0,-1,60.0,11.0,11.0,7.0,9.0,30.0,39.0,1.0,2.0,,7.0,2d4ea12b,d97d4ce8,c725873a,d0189e5a,25c83c98,fe6b92e5,07d75b52,1f89b562,a73ee510,4f1c6ae7,a2c1d2d9,49fee879,ea31804b,1adce6ef,46218630,3b87fa92,e5ba7672,fb342121,7be4df37,5840adea,d90f665b,,32c7478e,6c1cdd05,ea9a246c,1219b447
0,,1,13.0,1.0,3150.0,163.0,1.0,1.0,32.0,,1.0,,1.0,39af2607,c44e8a72,3f7f3d24,8eb89744,4cf72387,7e0ccccf,86651165,0b153874,a73ee510,3b08e48b,39dd23e7,538a49e7,0159bf9f,b28479f6,1addf65e,0596b5be,07c540c4,456d734d,af1445c4,a458ea53,cf79f8fa,c9d4222a,3a171ecb,d5b4ea7d,010f6491,deffd9e3
0,1.0,302,71.0,3.0,270.0,19.0,1.0,6.0,19.0,1.0,1.0,,19.0,68fd1e64,876465ad,da89f77a,37ee624b,43b19349,fe6b92e5,2b3ce8b7,5b392875,a73ee510,8a99abc1,4352b29b,8065cc64,5f4de855,b28479f6,9c382f7a,a14df6f7,d4bb7bd8,08154af3,21ddcdc9,5840adea,e7f0c6dc,,bcdee96c,3e30919e,f55c04b6,2fede552
1,1.0,0,1.0,0.0,2.0,0.0,4.0,0.0,0.0,1.0,2.0,,0.0,241546e0,6887a43c,9b792af9,9c6d05a0,25c83c98,6f6d9be8,adbcc874,0b153874,a73ee510,fbbf2c95,46031dab,6532318c,377af8aa,1adce6ef,ef6b7bdf,2c9d222f,e5ba7672,8f0f692f,21ddcdc9,a458ea53,cc6a9262,,32c7478e,a5862ce8,445bbe3b,b6a3490e
0,11.0,251,9.0,5.0,21.0,6.0,34.0,5.0,5.0,1.0,4.0,,5.0,05db9164,4322636e,e007dfac,77b99936,4ea20c7d,fe6b92e5,2be44e4e,25239412,a73ee510,18e09007,364e8b48,9c841b74,34cbb1bc,07d13a8f,14674f9b,9b3f7aa2,e5ba7672,9d3171e9,21ddcdc9,a458ea53,61b4555a,ad3062eb,32c7478e,38b97a31,ea9a246c,074bb89f
1,10.0,1,4.0,4.0,1.0,0.0,10.0,4.0,4.0,1.0,1.0,,0.0,09ca0b81,4f25e98b,0b2640f7,4badfc0c,4cf72387,fe6b92e5,df5c2d18,0b153874,a73ee510,da272362,a7b606c4,33c282f5,eae197fd,07d13a8f,dfab705f,635c3e13,e5ba7672,7ef5affa,2f4b9dd2,b1252a9d,cff19dc6,,c7dc6720,8535db9f,001f3601,b98a5b90
0,0.0,-1,1.0,23.0,3169.0,147.0,62.0,0.0,753.0,0.0,9.0,1.0,39.0,05db9164,942f9a8d,69b028e3,003ceb8c,25c83c98,7e0ccccf,3f4ec687,1f89b562,a73ee510,c5fe5cb9,c4adf918,424ba327,85dbe138,b28479f6,ac182643,169f1150,8efede7f,1f868fdd,1d04f4a4,b1252a9d,15414e28,,32c7478e,aa9b9ab9,9d93af03,c73ed234
0,0.0,35,13.0,5.0,4939.0,140.0,1.0,22.0,61.0,0.0,1.0,,11.0,05db9164,4f25e98b,5e25fa67,dd47ba3b,a9411994,7e0ccccf,2e62d414,0b153874,a73ee510,4b415bb3,258875ea,8ebd48c3,dcc8f90a,07d13a8f,5be89da3,e05d680b,d4bb7bd8,bc5a0ff7,ff6cdd42,a458ea53,a2b7caec,,32c7478e,1481ceb4,e8b83407,988b0775
0,,1,13.0,2.0,59865.0,292.0,0.0,2.0,87.0,,0.0,0.0,2.0,68fd1e64,287130e0,b87cffc0,ffacf4e8,43b19349,,04277bf9,5b392875,7cc72ec2,4ea0d483,7e2c5c15,5ea407f3,91a1b611,b28479f6,9efd8b77,9906d656,07c540c4,891589e7,55dd3565,a458ea53,37a23b2d,,32c7478e,3fdb382b,ea9a246c,49d68486
1,,0,,1.0,16732.0,2.0,1.0,1.0,1.0,,1.0,,1.0,87552397,6e638bbc,598b72ce,3c7eb23c,25c83c98,fbad5c96,675e81f6,0b153874,a73ee510,d9b71390,4a77ddca,f21f7d11,dc1d72e4,07d13a8f,d4525f76,e2e3cf1c,d4bb7bd8,f6a2fc70,21ddcdc9,a458ea53,605776ee,,32c7478e,f93938dd,e8b83407,322cbe58
1,0.0,212,,,1632.0,65.0,24.0,1.0,113.0,0.0,6.0,,,be589b51,b0d4a6f6,50a6bc33,335e428a,25c83c98,7e0ccccf,1171550e,1f89b562,a73ee510,23724df8,031ba22d,4baf63a1,bb7a2c12,32813e21,b0369b63,c73993da,e5ba7672,e01eacde,,,1d14288c,,3a171ecb,c9bc2384,,
0,10.0,11,3.0,3.0,1026.0,3.0,88.0,3.0,131.0,1.0,15.0,0.0,3.0,9a89b36c,1cfdf714,8b14bdd6,3bf2df8b,25c83c98,,e807f153,0b153874,a73ee510,8627508e,1054ae5c,3cd57e51,d7ce3abd,b28479f6,d345b1a0,4d664c70,27c07bd6,e88ffc9d,712d530c,b1252a9d,9ecb9e0d,,bcdee96c,a8380e43,cb079c2d,37c5e077
0,,5,22.0,5.0,10324.0,,0.0,5.0,13.0,,0.0,,5.0,f434fac1,40ed0c67,374195a1,6f5d5092,4cf72387,6f6d9be8,555d7949,1f89b562,a73ee510,3b08e48b,91e8fc27,752343e3,9ff13f22,1adce6ef,f8ebf901,c43b15fe,776ce399,2585827d,21ddcdc9,5840adea,a66e7b01,,be7c41b4,e33735a0,e8b83407,f95af538
0,,779,1.0,1.0,676.0,,0.0,4.0,4.0,,0.0,,1.0,68fd1e64,e5fb1af3,9b953c56,7be07df9,25c83c98,7e0ccccf,5e4f7d2b,0b153874,a73ee510,3b08e48b,25f4f871,6bca71b1,e67cdf97,07d13a8f,b5de5956,fb8ca891,d4bb7bd8,13145934,55dd3565,b1252a9d,b1ae3ed2,ad3062eb,423fab69,3fdb382b,9b3e8820,49d68486
0,,179,61.0,,3316.0,,,1.0,,,,,,f473b8dc,38a947a1,223b0e16,ca55061c,43b19349,7e0ccccf,7f2c5a6e,64523cfa,a73ee510,f6c6d9f8,d21494f8,156f99ef,f47f13e4,1adce6ef,0e78291e,5fbf4a84,1e88c74f,1999bae9,,,deb9605d,,32c7478e,e448275f,,
0,1.0,1,5.0,7.0,1238.0,13.0,9.0,15.0,89.0,0.0,3.0,0.0,7.0,8cf07265,09e68b86,aa8c1539,85dd697c,25c83c98,7e0ccccf,92ce5a7d,37e4aa92,a73ee510,15fa156b,e0c3cae0,d8c29807,e8df3343,8ceecbc8,d2f03b75,c64d548f,8efede7f,63cdbb21,cf99e5de,5840adea,5f957280,c9d4222a,55dd3565,1793a828,e8b83407,b7d9c3bc
0,2.0,72,20.0,11.0,4.0,11.0,24.0,14.0,69.0,1.0,7.0,,11.0,05db9164,09e68b86,6ef2aa66,20af9140,25c83c98,7e0ccccf,372a0c4c,0b153874,a73ee510,a08eee5a,ec88dd34,4df84614,94881fc3,b28479f6,52baadf5,cf3ec61f,3486227d,5aed7436,7be4df37,b1252a9d,98a79791,,bcdee96c,3fdb382b,e8b83407,49d68486
0,,57,60.0,20.0,11862.0,20.0,1.0,19.0,20.0,,1.0,,20.0,5bfa8ab5,4f25e98b,15363e12,f9e8a6fb,384874ce,,65c53f25,0b153874,a73ee510,3b08e48b,ad2bc6f4,d63df4e6,39ccb769,b28479f6,8ab5b746,a694f6ce,d4bb7bd8,7ef5affa,21ddcdc9,a458ea53,a370fd83,,32c7478e,d5b01f55,9b3e8820,85cebe8c
0,4.0,1,29.0,30.0,112.0,30.0,27.0,33.0,144.0,2.0,4.0,0.0,30.0,05db9164,58e67aaf,99815367,771966f0,4cf72387,6f6d9be8,cdc0ad95,5b392875,a73ee510,b0c25211,69926409,e802f466,2fc3058f,051219e6,d83fb924,f6613e51,e5ba7672,c21c3e4c,21ddcdc9,a458ea53,3aa05bfb,,32c7478e,9f0d87bf,9b3e8820,bde577f6
0,2.0,4,53.0,14.0,1499.0,20.0,11.0,19.0,98.0,0.0,3.0,7.0,14.0,75ac2fe6,287130e0,b264d69e,ce831e6d,25c83c98,,5aef82b1,0b153874,a73ee510,7fdb06fe,010265ac,74138b6d,0e5bc979,f7c1b33f,42793602,b49f63ab,8efede7f,891589e7,55dd3565,b1252a9d,a1229e5f,,32c7478e,3fdb382b,ea9a246c,49d68486
0,,5,3.0,5.0,17405.0,,0.0,8.0,8.0,,0.0,,6.0,05db9164,c5c1d6ae,8018e37d,d8660950,43b19349,fbad5c96,c1e20400,5b392875,a73ee510,3b08e48b,60a1c175,22cad86a,9b9e44d2,07d13a8f,b25845fd,2a27c935,776ce399,561cabfe,21ddcdc9,5840adea,d479575f,,be7c41b4,9b18ad04,7a402766,67ebe777
0,,49,1.0,1.0,3116.0,72.0,3.0,1.0,48.0,,1.0,,1.0,7e5c2ff4,2c8c5f5d,13cd0697,352cefe6,25c83c98,7e0ccccf,4fb73f5f,985e3fcb,a73ee510,3b08e48b,6a447eb3,c3cdaf85,9dfda2b9,1adce6ef,5edc1a28,08514295,e5ba7672,f5f4ae5b,,,6387fda4,,55dd3565,d36c7dbf,,
0,,2865,23.0,0.0,23584.0,,0.0,2.0,47.0,,0.0,,2.0,05db9164,0468d672,cedcacac,7967fcf5,25c83c98,7e0ccccf,33b15f2c,0b153874,a73ee510,0f6ee8ce,419d31d4,553e02c3,08961fd0,1adce6ef,4f3b3616,91a6eec5,1e88c74f,9880032b,21ddcdc9,5840adea,a97b62ca,,423fab69,727a7cc7,ea9a246c,6935065e
0,,119,4.0,4.0,13528.0,,0.0,7.0,35.0,,0.0,,4.0,87552397,38a947a1,695a85e0,d502349a,25c83c98,7e0ccccf,82f666b6,0b153874,a73ee510,631ddef6,e51ddf94,67b31aac,3516f6e6,cfef1c29,d33de6b0,d2b0336b,07c540c4,48ce336b,,,ea6a0e31,,3a171ecb,da408463,,
0,,25,5.0,4.0,0.0,,0.0,4.0,4.0,,0.0,,1.0,68fd1e64,71ca0a25,44e7b8ec,3b989466,307e775a,7e0ccccf,d0519bab,0b153874,a73ee510,3b08e48b,38914a66,d7cd5e08,c281c227,1adce6ef,ae3a9888,4032eea3,1e88c74f,9bf8ffef,21ddcdc9,5840adea,53def47b,c9d4222a,dbb486d7,8849cfac,001f3601,aa5f0a15
0,2.0,180,94.0,7.0,151.0,38.0,2.0,30.0,26.0,1.0,1.0,,25.0,5bfa8ab5,421b43cd,33ebdbb6,29998ed1,25c83c98,fbad5c96,6ad82e7a,0b153874,a73ee510,451bd4e4,c1ee56d0,6aaba33c,ebd756bd,b28479f6,2d0bb053,b041b04a,e5ba7672,2804effd,,,723b4dfd,,32c7478e,b34f3128,,
0,,2,0.0,,,,,0.0,,,,,,be589b51,38a947a1,4470baf4,8c8a4c47,307e775a,fe6b92e5,ae1dfa39,0b153874,7cc72ec2,3b08e48b,ee26f284,bb669e25,48b975db,b28479f6,717db705,2b2ce127,2005abd1,ade68c22,,,2b796e4a,,be7c41b4,8d365d3b,,
0,,0,9.0,,17907.0,59.0,2.0,0.0,98.0,,1.0,,,68fd1e64,80e26c9b,ba1947d0,85dd697c,25c83c98,fe6b92e5,3d63f4e6,0b153874,a73ee510,94e68c1d,af6a4ffc,34a238e0,2a1579a2,b28479f6,a785131a,da441c7e,e5ba7672,005c6740,21ddcdc9,5840adea,8717ea07,,32c7478e,1793a828,e8b83407,b9809574
0,7.0,84,,7.0,10.0,6.0,29.0,41.0,288.0,1.0,4.0,,5.0,05db9164,38a947a1,840eeb3a,f7263320,25c83c98,7e0ccccf,3baecfcb,0b153874,a73ee510,98d5faa2,96a54d80,317bfd7d,dbe5226f,07d13a8f,d4a5a2be,1689e4de,e5ba7672,5d961bca,,,dc55d6df,,423fab69,aa0115d2,,
0,0.0,0,1.0,,3667.0,42.0,2.0,30.0,37.0,0.0,1.0,1.0,,05db9164,e5fb1af3,909286bb,252734c9,25c83c98,7e0ccccf,b28fa88b,0b153874,a73ee510,4b8a7639,9f0003f4,233fde4c,5afd9e51,b28479f6,23287566,1871ac47,8efede7f,13145934,1d1eb838,b1252a9d,23da7042,,bcdee96c,1be0cc0a,e8b83407,f89dfbcc
0,5.0,1,46.0,6.0,1046.0,112.0,5.0,43.0,111.0,1.0,1.0,,6.0,05db9164,4f25e98b,f86649de,f56f6045,25c83c98,fe6b92e5,21c0ea1a,0b153874,a73ee510,cfa407de,bc862fb6,b9b3b7ef,4f487d87,07d13a8f,dfab705f,33301a0b,e5ba7672,7ef5affa,92524a76,a458ea53,d5a53bc3,c9d4222a,423fab69,3fdb382b,001f3601,79883c16
0,,7,4.0,3.0,75211.0,,0.0,3.0,3.0,,0.0,,3.0,8cf07265,0468d672,00d3cdb7,d4125c6f,25c83c98,7e0ccccf,71ccc25b,0b153874,7cc72ec2,e89812b3,5cab60cb,d286aff3,ce418dc9,07d13a8f,a888f201,7d9d720d,1e88c74f,9880032b,21ddcdc9,5840adea,8443660f,,3a171ecb,52d7797f,e8b83407,ddf88ddd
1,,54,1.0,1.0,,,0.0,1.0,1.0,,0.0,,1.0,68fd1e64,38a947a1,0d15d9b5,bfe24cb7,b0530c50,,d9aa9d97,0b153874,7cc72ec2,3b08e48b,6e647667,72a52d4c,85dbe138,b28479f6,06809048,58cacba8,2005abd1,670f513e,,,b7ba6151,,32c7478e,7b80ab11,,
0,,0,34.0,3.0,,,0.0,3.0,3.0,,0.0,,3.0,68fd1e64,287130e0,38610f2f,28d2973d,25c83c98,,88002ee1,0b153874,7cc72ec2,3b08e48b,f1b78ab4,b345f76c,6e5da64f,b28479f6,9efd8b77,569a0480,2005abd1,891589e7,712d530c,b1252a9d,c2af6d9f,,32c7478e,58e38a64,ea9a246c,70451962
1,,1,1.0,,7814.0,119.0,1.0,19.0,30.0,,1.0,,,05db9164,80e26c9b,eb08d440,f922efad,25c83c98,fe6b92e5,41e1828d,0b153874,a73ee510,3b08e48b,b6358cf2,654bb16a,61c65daf,1adce6ef,0f942372,87acb535,d4bb7bd8,005c6740,21ddcdc9,5840adea,a4b7004c,,32c7478e,b34f3128,e8b83407,9904c656
0,2.0,5,11.0,9.0,24.0,9.0,110.0,9.0,148.0,1.0,10.0,0.0,9.0,be30ca83,8f5b4275,b009d929,c7043c4b,30903e74,fbad5c96,a90a99c5,51d76abe,a73ee510,e6003298,c804061c,3563ab62,1cc9ac51,1adce6ef,a6bf53df,b688c8cc,8efede7f,65c9624a,21ddcdc9,5840adea,2754aaf1,c9d4222a,55dd3565,3b183c5c,e8b83407,adb5d234
0,,19,1.0,1.0,7476.0,9.0,9.0,1.0,9.0,,1.0,,1.0,8cf07265,537e899b,5037b88e,9dde01fd,25c83c98,fbad5c96,aafae983,0b153874,a73ee510,dc790dda,c3a20c8d,680d7261,7ce5cdf0,07d13a8f,6d68e99c,c0673b44,e5ba7672,b34aa802,,,e049c839,,32c7478e,6095f986,,
0,4.0,0,131.0,1.0,0.0,1.0,14.0,10.0,40.0,1.0,3.0,,0.0,05db9164,80e26c9b,13193952,f922efad,25c83c98,fe6b92e5,124131fa,1f89b562,a73ee510,a1ee64a6,9ba53fcc,654bb16a,42156eb4,1adce6ef,0f942372,87acb535,e5ba7672,005c6740,21ddcdc9,5840adea,a4b7004c,ad3062eb,bcdee96c,b34f3128,e8b83407,9904c656
1,0.0,5,2.0,1.0,1526.0,3.0,9.0,2.0,2.0,0.0,1.0,,1.0,05db9164,38a947a1,60c37737,8a77aa30,25c83c98,fe6b92e5,1c63b114,1f89b562,a73ee510,f6f942d1,67841877,94a1cc80,781f4d92,b28479f6,962bbefe,3eef319d,e5ba7672,0ad1cc71,,,1c63c71e,c9d4222a,3a171ecb,ad80aaa7,,
0,1.0,1,5.0,18.0,475.0,63.0,15.0,4.0,803.0,1.0,4.0,,63.0,05db9164,3e4b7926,7442ec70,bb8645c3,0942e0a7,7e0ccccf,3a7402e7,51d76abe,a73ee510,aa91245c,b4bb4248,a5ab10e6,3eb2f9dc,07d13a8f,e6863a8e,1cdb3603,e5ba7672,e261f8d8,21ddcdc9,5840adea,1380864e,,32c7478e,be2f0db5,47907db5,68d9ada1
0,,1,1.0,18.0,10791.0,,0.0,1.0,281.0,,0.0,,18.0,05db9164,46bbf321,c5d94b65,5cc8f91d,4cf72387,7e0ccccf,2773eaab,5b392875,a73ee510,1a428761,06474f17,75c79158,2ec4b007,91233270,cddd56a1,208d4baf,1e88c74f,906ff5cb,,,6a909d9a,ad3062eb,3a171ecb,1f68c81f,,
0,1.0,-1,,,528.0,15.0,8.0,2.0,585.0,1.0,4.0,,,05db9164,ef69887a,3fea0364,9c32fadc,30903e74,,ec1a1856,0b153874,a73ee510,22a99f9d,a04e019f,cc606cbe,07a906b4,b28479f6,902a109f,0ab5ee0c,e5ba7672,4bcc9449,083e89d9,b1252a9d,6c38450e,,32c7478e,394c5a53,47907db5,1d7b6578
0,,18,9.0,0.0,,,0.0,7.0,16.0,,0.0,,7.0,68fd1e64,38a947a1,2273663d,3beb8147,25c83c98,fbad5c96,88002ee1,985e3fcb,7cc72ec2,3b08e48b,f1b78ab4,c47972c1,6e5da64f,1adce6ef,8d3c9c0c,e638c51d,2005abd1,35176a17,,,0370bc83,ad3062eb,55dd3565,cde6fafb,,
0,,5,,13.0,10467.0,170.0,4.0,13.0,96.0,,1.0,,13.0,be589b51,8084ee93,02cf9876,c18be181,0942e0a7,7e0ccccf,ad82323c,37e4aa92,a73ee510,bdfd8a02,7ca25fd2,8fe001f4,d3802338,b28479f6,b2ff8c6b,36103458,e5ba7672,52e44668,,,e587c466,,32c7478e,3b183c5c,,
1,,27,,,27753.0,,,3.0,,,,,,05db9164,efb7db0e,bf05882d,9e3f04df,25c83c98,7e0ccccf,73e2fc5e,062b5529,a73ee510,f8f0e86f,4e46b019,9da0a604,07c072b7,b28479f6,5ab7247d,929eef3c,d4bb7bd8,a863ac26,,,fb19a39b,ad3062eb,3a171ecb,cc4079ea,,
0,0.0,49,,,3732.0,20.0,1.0,3.0,20.0,0.0,1.0,,,17f69355,09e68b86,5be9b239,ace52998,25c83c98,,82cfb145,0b153874,a73ee510,9b8e7680,3f31bb3e,e5b118b4,c6378246,b28479f6,52baadf5,f68bd494,d4bb7bd8,5aed7436,21ddcdc9,a458ea53,ba3c688b,,32c7478e,3fdb382b,b9266ff0,49d68486
1,1.0,19,18.0,16.0,178.0,32.0,34.0,34.0,200.0,0.0,9.0,,16.0,05db9164,ea3a5818,7ee60f5f,bebc14b3,25c83c98,6f6d9be8,4f900c22,f0e5818a,a73ee510,47e01053,7c4f062c,cc22efeb,76dfc898,b28479f6,0a069322,606df1fe,e5ba7672,a1d0cc4f,21ddcdc9,b1252a9d,aebdd3c2,8ec974f4,32c7478e,e4e10900,b9266ff0,7a1ac642
1,0.0,1,2.0,5.0,6613.0,104.0,1.0,17.0,74.0,0.0,1.0,,5.0,8cf07265,8db5bc37,,,25c83c98,7e0ccccf,5a103f30,0b153874,a73ee510,3b08e48b,8487a168,,636195f8,64c94865,00e52733,,d4bb7bd8,821c30b8,,,,,32c7478e,,,
0,,1,,,29111.0,,,0.0,,,,,,ae82ea21,5dac953d,d032c263,c18be181,384874ce,,6b406125,5b392875,a73ee510,f1311559,278636c9,dfbb09fb,b87a829f,b28479f6,78e3b025,84898b2a,e5ba7672,35a9ed38,,,0014c32a,c0061c6d,32c7478e,3b183c5c,,
0,,58,,20.0,21659.0,1033.0,9.0,1.0,151.0,,2.0,,43.0,05db9164,80e26c9b,,,25c83c98,7e0ccccf,622305e6,5b392875,a73ee510,e70742b0,319687c9,,62036f49,07d13a8f,f3635baf,,e5ba7672,f54016b9,21ddcdc9,5840adea,,,3a171ecb,,e8b83407,00ed90d0
0,0.0,11,11.0,5.0,4325.0,61.0,4.0,14.0,68.0,0.0,2.0,0.0,5.0,68fd1e64,d8fc04df,f652979e,32a55192,25c83c98,7e0ccccf,19d92932,5b392875,a73ee510,f710483a,d54a5851,ed5cfa27,a36387e6,b28479f6,9da6bb5f,3141102a,1e88c74f,cbadff99,21ddcdc9,5840adea,3df2213d,,3a171ecb,42998020,010f6491,dd8b4f5c
1,,2560,2.0,0.0,63552.0,398.0,0.0,7.0,122.0,,0.0,,1.0,9a89b36c,39dfaa0d,a17519ab,5b392af8,25c83c98,fbad5c96,14ba4967,64523cfa,7cc72ec2,9ffc445a,c21c44c8,834b5edc,5b3fc509,07d13a8f,60fa10e5,e66306df,d4bb7bd8,df4fffb7,21ddcdc9,5840adea,9988d803,,c7dc6720,abe3a684,010f6491,f3737bd0
0,0.0,30,2.0,15.0,2712.0,210.0,5.0,43.0,242.0,0.0,2.0,,15.0,05db9164,207b2d81,2b280564,ad5ffc6b,25c83c98,fe6b92e5,559eb1e1,0b153874,a73ee510,51e04895,91875c79,2a064dba,ea519e47,64c94865,11b2ae92,7d9b60c8,e5ba7672,395856b0,21ddcdc9,a458ea53,9c3eb598,,32c7478e,c0b8dfd6,001f3601,81be451e
0,0.0,49,,3.0,1732.0,20.0,1.0,14.0,16.0,0.0,1.0,,3.0,8cf07265,e112a9de,4e1c9eda,22504558,25c83c98,fbad5c96,01620311,0b153874,a73ee510,66c281d9,922bbb91,23bc90a1,ad61640d,1adce6ef,6da7d68c,776f5665,e5ba7672,d495a339,,,5a5953a2,,32c7478e,8f079aa5,,
0,,-1,,,357.0,,0.0,10.0,11.0,,0.0,,,68fd1e64,403ea497,2cbec47f,3e2bfbda,25c83c98,7e0ccccf,9d8d7034,0b153874,a73ee510,b3d657b8,51ef0313,21a23bfe,e8f6ccfe,07d13a8f,e3209fc2,587267a3,e5ba7672,a78bd508,21ddcdc9,5840adea,c2a93b37,,32c7478e,1793a828,e8b83407,2fede552
0,2.0,7,,22.0,37.0,22.0,4.0,1.0,135.0,1.0,3.0,,22.0,98237733,b26462db,dad8b3db,06b1cf6e,25c83c98,7e0ccccf,ade953a9,5b392875,a73ee510,0eca1729,29e4ad33,422e8212,80467802,07d13a8f,72fbc65c,25b075e4,e5ba7672,35ee3e9e,,,a13bd40d,,3a171ecb,0ff91809,,
0,,68,1.0,1.0,24513.0,43.0,4.0,12.0,62.0,,1.0,,1.0,fc9c62bb,80e26c9b,,,25c83c98,6f6d9be8,e746fe19,1f89b562,a73ee510,c9ac91cb,0bc63bd0,,ef007ecc,b28479f6,4c1df281,,e5ba7672,f54016b9,21ddcdc9,5840adea,,,32c7478e,,e8b83407,c4e4eabb
1,0.0,304,1.0,,13599.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,,68fd1e64,064c8f31,70168f62,585ab217,25c83c98,fe6b92e5,b3a5258d,0b153874,a73ee510,7cda6c86,30b2a438,eb83af8a,aebdb575,07d13a8f,81d3f724,69f67894,3486227d,d4a314a2,21ddcdc9,5840adea,e1627e2c,,32c7478e,a6e7d8d3,001f3601,2fede552
0,0.0,2,4.0,7.0,1568.0,70.0,4.0,42.0,117.0,0.0,1.0,,36.0,de4dac42,b7ca2abd,022a0b3c,d6b6e0bf,25c83c98,13718bbd,33cca6fa,0b153874,a73ee510,fb999b75,9f7c4fc1,05e68866,2b9fb512,07d13a8f,2f453358,6de617d3,e5ba7672,4771e483,,,df66957b,,3a171ecb,b34f3128,,
0,,0,3.0,2.0,,,0.0,3.0,13.0,,0.0,,2.0,05db9164,38a947a1,d125aecd,82a61820,25c83c98,7e0ccccf,d18f8f99,0b153874,7cc72ec2,3b08e48b,6c27619d,49507531,61e43922,07d13a8f,bb1e9ca8,0fd6d3ca,2005abd1,e96a7df2,,,7eefff0d,,be7c41b4,cafb4e4d,,
0,0.0,0,5.0,1.0,1751.0,37.0,1.0,8.0,11.0,0.0,1.0,,1.0,8cf07265,09e68b86,fc25ffd0,991a22ae,25c83c98,fbad5c96,6da2fbd6,f0e5818a,a73ee510,78ed0c4d,7bbe6c06,c35b992b,ea1f21b7,1adce6ef,dbc5e126,068a2c9f,e5ba7672,5aed7436,21ddcdc9,b1252a9d,df9de95c,,423fab69,3fdb382b,cb079c2d,49d68486
1,3.0,22,7.0,9.0,269.0,11.0,12.0,15.0,573.0,1.0,7.0,,9.0,05db9164,558b4efb,1b5e2c32,8a2b280f,25c83c98,13718bbd,6d51a5b0,966033bc,a73ee510,2e48a61d,61af8052,733bbdf2,2f3ee7fb,64c94865,2cd24ac0,8ac5e229,e5ba7672,c68ebaa0,21ddcdc9,5840adea,0be61dd1,,32c7478e,3b183c5c,ea9a246c,9973f80f
1,,1,,,14447.0,328.0,15.0,0.0,432.0,,9.0,0.0,,5bfa8ab5,26ece8a8,58ca7e87,3db5e097,25c83c98,fbad5c96,877d7f71,0b153874,a73ee510,afc4d756,5bd8a4ae,91f87a19,7a3043c0,07d13a8f,102fc449,834b85f5,3486227d,87fd936e,,,e339163e,,423fab69,c9a8db2a,,
0,,1,4.0,1.0,235065.0,,0.0,3.0,1.0,,0.0,,1.0,5a9ed9b0,a8da270e,6392b1c1,4e1c036b,25c83c98,6f6d9be8,863329da,0b153874,7cc72ec2,fbc2dc95,a89c45cb,4ea4e9d5,a4fafa5b,b28479f6,f2252b1c,b7f61016,e5ba7672,130ebfcd,,,f15fe1ee,,32c7478e,2896ad66,,
0,1.0,4,75.0,21.0,246.0,69.0,1.0,33.0,33.0,1.0,1.0,,31.0,3b65d647,512fdf0c,b3ee24fe,631a0f79,25c83c98,7e0ccccf,86b374da,1f89b562,a73ee510,3b08e48b,07678d3e,9b665b9c,0159bf9f,b28479f6,fc29c5a9,b7a016ed,e5ba7672,fd3919f9,21ddcdc9,5840adea,1df3ad93,,3a171ecb,3aebd96a,724b04da,56be3401
1,,64,3.0,7.0,14747.0,38.0,4.0,16.0,25.0,,3.0,,17.0,05db9164,8b0005b7,62acd884,7736c782,25c83c98,fbad5c96,b01d50d5,5b392875,a73ee510,3b08e48b,cd1b7031,0b7afe9e,4d8657a2,07d13a8f,715f1291,7d0949a5,07c540c4,dff11f14,,,c12eabbb,,3a171ecb,af0cb2c3,,
0,,0,2.0,,4317.0,0.0,8.0,0.0,0.0,,1.0,,,68fd1e64,09e68b86,29dbbee7,15c721d8,4cf72387,,f33e4fa1,5b392875,a73ee510,e5330e23,7b5deffb,526eb908,269889be,b28479f6,52baadf5,e71dfc2d,e5ba7672,5aed7436,39e30682,b1252a9d,b4770b64,,32c7478e,2f34b1ef,e8b83407,4a449e4c
0,0.0,1,5.0,0.0,11738.0,490.0,10.0,13.0,140.0,0.0,1.0,,1.0,52f1e825,9819deea,a2b48926,f922efad,4cf72387,7e0ccccf,d385ea68,0b153874,a73ee510,3b08e48b,7940fc2a,b99ddbc8,00e20e7b,b28479f6,1150f5ed,87acb535,e5ba7672,7e32f7a4,,,a4b7004c,ad3062eb,32c7478e,b34f3128,,
1,0.0,53,17.0,4.0,1517.0,87.0,1.0,5.0,11.0,0.0,1.0,0.0,4.0,05db9164,38d50e09,948ee031,b7ab56a2,384874ce,fbad5c96,879ccac6,0b153874,a73ee510,9ca0fba4,e931c5cd,42bee2f2,580817cd,b28479f6,06373944,67b3c631,e5ba7672,fffe2a63,21ddcdc9,b1252a9d,bd074856,,32c7478e,df487a73,001f3601,c27f155b
0,,0,7.0,14.0,3751.0,646.0,0.0,37.0,432.0,,0.0,,14.0,0e78bd46,ae46a29d,770451b6,f922efad,25c83c98,fe6b92e5,01620311,0b153874,a73ee510,5a01afad,922bbb91,4bba7327,ad61640d,b28479f6,cccdd69e,e2e2fcd9,e5ba7672,e32bf683,,,b964dee0,c9d4222a,32c7478e,b34f3128,,
0,1.0,1,14.0,1.0,118.0,1.0,4.0,1.0,32.0,1.0,1.0,,1.0,05db9164,4f25e98b,79bdb97a,bdbe850d,43b19349,,38eb9cf4,0b153874,a73ee510,49d1ad89,7f8ffe57,30ed85b5,46f42a63,07d13a8f,dfab705f,e75cb6ea,e5ba7672,7ef5affa,21ddcdc9,a458ea53,72c8ca0c,,32c7478e,3fdb382b,001f3601,49d68486
0,3.0,1,25.0,9.0,1396.0,39.0,5.0,32.0,37.0,0.0,2.0,,10.0,05db9164,dde11b16,c6616b04,e6996139,25c83c98,3bf701e7,2e8a689b,0b153874,a73ee510,efea433b,e51ddf94,3a802941,3516f6e6,07d13a8f,e28388cc,f4944655,3486227d,43dfe9bd,,,81f8278e,,3a171ecb,772b286f,,
0,,0,37.0,10.0,15.0,,0.0,10.0,10.0,,0.0,,10.0,05db9164,95e2d337,da3ad2bd,a95c56ca,25c83c98,fbad5c96,d7f3ff9f,1f89b562,a73ee510,3b08e48b,29473fc8,359d194a,aa902020,051219e6,003cf364,8023d5ba,776ce399,7b06fafe,d913d8f1,a458ea53,15bb899d,,32c7478e,6c25dad0,2bf691b1,59e91663
0,,0,4.0,,11534.0,,0.0,0.0,1.0,,0.0,,,39af2607,78ccd99e,55f298ba,1de19bc2,25c83c98,fbad5c96,63b7fcf7,1f89b562,a73ee510,3b08e48b,779482a8,624029b0,7d65a908,051219e6,9917ad07,270e2a53,1e88c74f,e7e991cb,21ddcdc9,a458ea53,5ff5ac4a,ad3062eb,32c7478e,d65fa724,875ea8a7,86601e0a
0,,498,,0.0,92.0,,0.0,0.0,0.0,,0.0,,0.0,5bfa8ab5,90081f33,fd22e418,36375a46,43b19349,fbad5c96,6c338953,0b153874,a73ee510,3b08e48b,553ebda3,fb991bf5,49fe3d4e,b28479f6,50b07d60,d1a4e968,776ce399,7da6ea7e,,,9fb07dd2,,be7c41b4,359dd977,,
1,8.0,7,20.0,8.0,5.0,22.0,172.0,21.0,568.0,1.0,21.0,,0.0,05db9164,404660bb,97d1681e,ffe40d5f,25c83c98,7e0ccccf,1c86e0eb,1f89b562,a73ee510,f3b83678,755e4a50,7e7a6264,5978055e,1adce6ef,6ddbba94,e7af7559,e5ba7672,4b17f8a2,21ddcdc9,5840adea,5a49c6db,,32c7478e,faf5d8b3,f0f449dd,984e0db0
0,,4,1.0,1.0,270.0,170.0,1.0,19.0,196.0,,1.0,0.0,1.0,3b65d647,4c2bc594,d032c263,c18be181,25c83c98,fbad5c96,cd98cc3d,0b153874,a73ee510,493b74f2,dcc84468,dfbb09fb,b72482f5,8ceecbc8,7ac43a46,84898b2a,e5ba7672,bc48b783,,,0014c32a,,55dd3565,3b183c5c,,
0,,6,52.0,15.0,383.0,,0.0,21.0,21.0,,0.0,,15.0,05db9164,09e68b86,88290645,0676a23d,25c83c98,fe6b92e5,f14f1abf,0b153874,a73ee510,3b08e48b,7b5deffb,f6d35a1e,269889be,b28479f6,52baadf5,90d6ddcd,776ce399,5aed7436,21ddcdc9,b1252a9d,29d21ab1,,32c7478e,69e4f188,e8b83407,e001324a
0,0.0,57,2.0,6.0,1683.0,550.0,5.0,48.0,412.0,0.0,1.0,0.0,102.0,39af2607,c5fe64d9,fda0b584,13508380,25c83c98,7e0ccccf,295cc387,0b153874,a73ee510,3b08e48b,7d5ece85,ffcedb7a,e4b5ce61,07d13a8f,52b49730,f39f1141,d4bb7bd8,c235abed,4cc48856,a458ea53,fdc724a8,,32c7478e,45ab94c8,46fbac64,c84c4aec
0,,90,,0.0,1455.0,,0.0,6.0,10.0,,0.0,,2.0,05db9164,6f609dc9,d032c263,c18be181,25c83c98,7e0ccccf,315c76f3,37e4aa92,a73ee510,3b08e48b,e51ddf94,dfbb09fb,3516f6e6,07d13a8f,c169c458,84898b2a,776ce399,381bd833,,,0014c32a,,3a171ecb,3b183c5c,,
0,,29,4.0,4.0,12245.0,,0.0,19.0,73.0,,0.0,,4.0,05db9164,3df44d94,d032c263,c18be181,4cf72387,7e0ccccf,81bb0302,5b392875,a73ee510,f918493f,b7094596,dfbb09fb,1f9d2c38,b28479f6,e0052e65,84898b2a,07c540c4,e7648a8f,,,0014c32a,,32c7478e,3b183c5c,,
0,3.0,-1,3.0,2.0,285.0,5.0,6.0,8.0,30.0,1.0,4.0,,5.0,05db9164,73b37f46,cd82408a,eb45e6e4,25c83c98,7e0ccccf,ead731f4,0b153874,a73ee510,3b08e48b,e9c32980,d1fb0874,3fe840eb,ec19f520,f3a94039,6d87c0d4,07c540c4,d1605c46,,,ed01532f,,3a171ecb,8d49fa4b,,
1,,2,3.0,,5091.0,0.0,6.0,0.0,3.0,,5.0,,,5a9ed9b0,4f25e98b,10ee5afb,1d29846e,db679829,,1971812a,0b153874,a73ee510,aed8755c,5307d8e2,5e76bfca,8368e64b,b28479f6,8ab5b746,5fb9ff62,07c540c4,7ef5affa,2e30f394,5840adea,e208a45f,,32c7478e,3fdb382b,001f3601,49d68486
0,,78,8.0,,35203.0,853.0,2.0,0.0,98.0,,1.0,,,05db9164,c41a84c8,d627c43e,759c4a2e,25c83c98,fbad5c96,61beb1aa,0b153874,a73ee510,a5270a71,81a23494,2d15871c,3796b047,b28479f6,55d28d38,9243e635,07c540c4,2b46823a,,,ec5ac7c6,ad3062eb,32c7478e,590b856f,,
1,37.0,113,2815.0,5.0,2.0,3.0,26.0,49.0,78.0,0.0,1.0,,3.0,05db9164,c5c1d6ae,b2de8002,f9a7e394,25c83c98,7e0ccccf,0d00feb3,0b153874,a73ee510,ff4776d6,640d8b63,76517c94,18041128,b28479f6,29a18ba0,afc96aa6,e5ba7672,836a67dd,21ddcdc9,5840adea,c0cd6339,78e2e389,32c7478e,7e60320b,7a402766,ba14bbcb
0,5.0,1,28.0,22.0,11.0,24.0,5.0,22.0,22.0,3.0,3.0,,21.0,05db9164,89ddfee8,7e4ea1b2,bc17b20f,25c83c98,,a6624a99,5b392875,a73ee510,3b08e48b,f161ec47,49a5dd4f,1e18519e,051219e6,d5223973,9fa82d1c,e5ba7672,5bb2ec8e,4b1019ff,a458ea53,40b11f62,,32c7478e,eaa38671,f0f449dd,8b3e7faa
0,,0,1.0,33.0,11774.0,,0.0,1.0,502.0,,0.0,,33.0,5a9ed9b0,2ae0a573,0739daa8,4fbef8bb,4cf72387,7e0ccccf,ca4fd8f8,0b153874,a73ee510,3b08e48b,a0060bca,9148b680,22d23aac,07d13a8f,413cc8c6,64e0265f,776ce399,f2fc99b1,,,38879cfe,ad3062eb,32c7478e,7836b4d5,,
0,,1,14.0,3.0,3008.0,15.0,6.0,5.0,146.0,,3.0,,3.0,68fd1e64,a0e12995,b3693f43,f888df5a,25c83c98,7e0ccccf,fcf0132a,0b153874,a73ee510,aed3d80e,d650f1bd,63314ad3,863f8f8a,07d13a8f,73e2709e,ea1c4696,e5ba7672,1616f155,21ddcdc9,5840adea,67afd8d0,,c7dc6720,e3aea32f,9b3e8820,e75c9ae9
1,0.0,1,27.0,38.0,1499.0,73.0,14.0,35.0,269.0,0.0,4.0,0.0,38.0,8cf07265,04e09220,b1ecc6c4,5dff9b29,4cf72387,fe6b92e5,53ef84c0,0b153874,a73ee510,267caf03,643327e3,2436ff75,478ebe53,07d13a8f,f6b23a53,f4ead43c,e5ba7672,6fc84bfb,,,4f1aa25f,,423fab69,ded4aac9,,
0,,5,44.0,4.0,12143.0,,0.0,4.0,4.0,,0.0,,4.0,05db9164,38d50e09,0c7bb149,a35517fb,25c83c98,3bf701e7,e14874c9,0b153874,7cc72ec2,3b08e48b,636405ac,96fa9c01,31b42deb,07d13a8f,ee569ce2,7ce58da8,776ce399,582152eb,21ddcdc9,5840adea,d1d4f4a9,ad3062eb,3a171ecb,03955d00,001f3601,4e7af834
1,3.0,2,37.0,87.0,190.0,90.0,3.0,49.0,88.0,2.0,2.0,,88.0,68fd1e64,38a947a1,,,43b19349,,d385ea68,0b153874,a73ee510,3b08e48b,7940fc2a,,00e20e7b,07d13a8f,7f1c4567,,d4bb7bd8,95f5c722,,,,,32c7478e,,,
0,,8,8.0,5.0,25660.0,,0.0,3.0,5.0,,0.0,,5.0,05db9164,90081f33,fd22e418,36375a46,25c83c98,7e0ccccf,0bdc3959,0b153874,a73ee510,3b08e48b,c6cb726f,fb991bf5,176d07bc,b28479f6,13f8263b,d1a4e968,1e88c74f,c191a3ff,,,9fb07dd2,,32c7478e,359dd977,,
0,0.0,0,35.0,4.0,190.0,85.0,43.0,18.0,177.0,0.0,3.0,1.0,8.0,05db9164,207b2d81,2b280564,ad5ffc6b,5a3e1872,7e0ccccf,4aa938fc,0b153874,a73ee510,efea433b,7e40f08a,2a064dba,1aa94af3,07d13a8f,0c67c4ca,7d9b60c8,3486227d,395856b0,21ddcdc9,a458ea53,9c3eb598,,32c7478e,c0b8dfd6,001f3601,7a2fb9af
1,2.0,1,19.0,20.0,1.0,20.0,2.0,14.0,20.0,1.0,1.0,0.0,12.0,68fd1e64,06174070,a3829614,b0ed6de7,4cf72387,fe6b92e5,71c23d74,0b153874,a73ee510,c6c8dd7c,ae4c531b,3b917db0,01c2bbc7,cfef1c29,73438c3b,12e989e9,07c540c4,836a11e3,a34d2cf6,5840adea,9179411e,,32c7478e,1793a828,e8b83407,fa3124de
0,1.0,1849,4.0,0.0,28.0,0.0,1.0,0.0,0.0,1.0,1.0,,0.0,be589b51,ef69887a,771a1642,2e946ee2,4cf72387,,5d7d417f,0b153874,a73ee510,50c56209,52d28861,77f29381,a4b04123,b28479f6,902a109f,9fe6f065,07c540c4,4bcc9449,566c492c,5840adea,7b6393e8,,32c7478e,3fdb382b,47907db5,2fc5e3d4
0,0.0,65,,7.0,10346.0,67.0,1.0,16.0,67.0,0.0,1.0,0.0,7.0,8cf07265,68b3edbf,77f2f2e5,d16679b9,4cf72387,7e0ccccf,e465eb54,5b392875,a73ee510,f0c8b1be,01a88896,9f32b866,dfb2a8fa,07d13a8f,fd888b80,31ca40b6,d4bb7bd8,cf1cde40,,,dfcfc3fa,,93bad2c0,aee52b6f,,
0,7.0,164,33.0,12.0,84.0,63.0,8.0,19.0,18.0,1.0,2.0,,18.0,87773c45,58e67aaf,104c93d5,90b69619,25c83c98,7e0ccccf,e3b8f237,0b153874,a73ee510,aed3d80e,1aa6cf31,61ea5878,3b03d76e,1adce6ef,d002b6d9,33a55538,e5ba7672,c21c3e4c,444a605d,b1252a9d,37c3d851,,32c7478e,364442f6,9b3e8820,bdc8589e
0,,10,5.0,3.0,8913.0,68.0,2.0,42.0,168.0,,2.0,0.0,3.0,68fd1e64,1cfdf714,3f850fa0,db781543,25c83c98,7e0ccccf,2555b4d9,0b153874,a73ee510,f9065d00,98579192,3317996d,779f824b,d2dfe871,ca8b2a1a,bc3ccba9,27c07bd6,e88ffc9d,e27c6abe,a458ea53,6b4fc63c,,423fab69,c94ffa50,cb079c2d,d5ca783a
0,,15,9.0,1.0,20553.0,,,12.0,,,,,4.0,05db9164,0b8e9caf,6858baef,3f647607,4cf72387,fbad5c96,b647358a,0b153874,a73ee510,3b08e48b,88731e13,f6148255,2723b688,b28479f6,5340cb84,03b5b1e2,07c540c4,ca6a63cf,,,3b66cfcf,,bcdee96c,08b0ce98,,
0,0.0,-1,,,1539.0,115.0,17.0,20.0,276.0,0.0,5.0,,,68fd1e64,287130e0,9dfde63d,9c9a6068,25c83c98,6f6d9be8,32da4b59,5b392875,a73ee510,eff5602f,9ee336c5,1310a7dd,094e10ad,b28479f6,9efd8b77,b3dc5e07,e5ba7672,891589e7,bdffef68,b1252a9d,33706b2d,,32c7478e,88cba9eb,9b3e8820,1ba54abc
0,0.0,3,,5.0,1920.0,22.0,50.0,5.0,98.0,0.0,4.0,0.0,5.0,68fd1e64,3df44d94,d032c263,c18be181,25c83c98,7e0ccccf,9ec884dc,5b392875,a73ee510,aa6da1ef,5b906b78,dfbb09fb,c95c9034,b28479f6,b96e7224,84898b2a,3486227d,79a92e0a,,,0014c32a,,bcdee96c,3b183c5c,,
0,2.0,0,6.0,2.0,70.0,10.0,248.0,1.0,1034.0,1.0,32.0,,2.0,05db9164,404660bb,f1397040,09003f7b,25c83c98,7e0ccccf,1c86e0eb,0b153874,a73ee510,67eea4ef,755e4a50,0cdb9a18,5978055e,07d13a8f,633f1661,82708081,e5ba7672,4b17f8a2,21ddcdc9,5840adea,4c14738f,,32c7478e,a86c0565,f0f449dd,984e0db0
1,,1,10.0,6.0,11665.0,,0.0,10.0,6.0,,0.0,,6.0,05db9164,38a947a1,7fd859b3,19ae4fbd,25c83c98,,16401b7d,0b153874,a73ee510,3b08e48b,20ec800a,6aa4c9a8,18a5e4b8,cfef1c29,cb0f0e06,b50d9336,1e88c74f,3c4f2d82,,,cc86f2c1,,32c7478e,1793a828,,
0,12.0,1,1.0,15.0,548.0,24.0,12.0,18.0,20.0,2.0,2.0,,16.0,05db9164,0c0567c2,700014ea,560f248f,25c83c98,7e0ccccf,fe4dce68,0b153874,a73ee510,ab9e9acf,68357db6,093a009d,768f6658,07d13a8f,aa39dd42,9e6ff465,e5ba7672,bb983d97,,,5c859cae,,32c7478e,996f5a43,,
1,0.0,152,3.0,3.0,1847.0,96.0,12.0,6.0,11.0,0.0,1.0,0.0,3.0,05db9164,4f25e98b,6d1384bc,74ce146b,4cf72387,7e0ccccf,26817995,a61cc0ef,a73ee510,cf500eab,8b92652b,a4b73157,c5bc951e,b28479f6,8ab5b746,19f6b83c,e5ba7672,7ef5affa,21ddcdc9,b1252a9d,9efd5ec7,,c7dc6720,3fdb382b,001f3601,49d68486
0,0.0,1,9.0,0.0,6431.0,136.0,2.0,6.0,98.0,0.0,1.0,,2.0,05db9164,6887a43c,9b792af9,9c6d05a0,43b19349,,60d4eb86,e8663cb1,a73ee510,07c7b3f7,0ad37b4b,6532318c,f9d99d81,8ceecbc8,4e06592a,2c9d222f,e5ba7672,8f0f692f,21ddcdc9,b1252a9d,cc6a9262,,32c7478e,a5862ce8,445bbe3b,1793fb3f
0,,-1,,,20646.0,,0.0,5.0,8.0,,0.0,,,9a89b36c,09e68b86,0271c22e,caa16f04,25c83c98,,47aa6d2e,0b153874,a73ee510,9d4b7dce,c30e7b00,f993725b,4f8670dc,1adce6ef,dbc5e126,1c3a7247,e5ba7672,5aed7436,21ddcdc9,5840adea,4d2b0d06,,32c7478e,3fdb382b,e8b83407,8ded0b41
0,,14,3.0,2.0,306036.0,,0.0,2.0,105.0,,0.0,,2.0,68fd1e64,09e68b86,cce54c2c,6e8c7c0e,4cf72387,,c642e324,a6d156f4,7cc72ec2,b6900243,82af9502,9e82f486,90dca23e,07d13a8f,36721ddc,e3a83d5c,d4bb7bd8,5aed7436,2b558521,a458ea53,ebfa4c53,,32c7478e,a9d9c151,e8b83407,3a97b421
0,,-1,,,,,,0.0,,,,,,5a9ed9b0,38a947a1,,,4cf72387,7e0ccccf,e7698644,66f29b89,7cc72ec2,3b08e48b,f9d0f35e,,b55434a9,07d13a8f,681a3f32,,2005abd1,19ef42ad,,,,c9d4222a,be7c41b4,,,
1,1.0,2,6.0,2.0,8.0,9.0,1.0,2.0,2.0,1.0,1.0,0.0,2.0,05db9164,f0cf0024,619e87b2,cfc23926,384874ce,7e0ccccf,02914429,5b392875,a73ee510,575cd9b2,419d31d4,c0d8d575,08961fd0,1adce6ef,55dc357b,29a3715b,e5ba7672,b04e4670,21ddcdc9,a458ea53,e54f0804,,423fab69,936da3dd,ea9a246c,27029e68
0,0.0,17,34.0,11.0,1784.0,50.0,1.0,25.0,102.0,0.0,1.0,0.0,11.0,68fd1e64,e77e5e6e,fdd14ae2,8b7d76a3,25c83c98,fbad5c96,15ce37bc,0b153874,a73ee510,25e9e422,ff78732c,07cecd0e,9b656adc,f862f261,903024b9,d08de474,e5ba7672,449d6705,1d1eb838,a458ea53,26e36622,,55dd3565,3fdb382b,33d94071,49d68486
0,0.0,1,7.0,8.0,4501.0,184.0,2.0,4.0,184.0,0.0,1.0,,46.0,05db9164,58e67aaf,8b376137,270b5720,4cf72387,7e0ccccf,67b7679f,0b153874,a73ee510,19feb952,16faa766,8d526153,4422e246,b28479f6,62eca3c0,23c4fd37,07c540c4,c21c3e4c,6301e460,b1252a9d,632bf881,,bcdee96c,18109ace,9b3e8820,070f6cb2
0,,183,3.0,3.0,5778.0,,0.0,3.0,9.0,,0.0,,3.0,39af2607,c5c1d6ae,027b4cc5,9affccc2,25c83c98,6f6d9be8,d2bfca2c,5b392875,a73ee510,3b08e48b,f72b4bd1,7e98747a,01f32ac8,07d13a8f,99153e7d,64223df7,776ce399,836a67dd,21ddcdc9,5840adea,301fc194,,be7c41b4,365def8b,7a402766,00efb483
0,,13,3.0,10.0,48.0,16.0,11.0,10.0,163.0,,3.0,0.0,6.0,05db9164,40ed0c67,61b8caf0,5ef5cf67,25c83c98,7e0ccccf,a7565058,d7c4a8f5,a73ee510,567ba666,69afd526,765cb3ea,84def884,07d13a8f,622c34d8,5c646b1e,e5ba7672,2585827d,21ddcdc9,5840adea,c4c42074,,3a171ecb,42df8359,e8b83407,c0fca43d
0,,1,25.0,22.0,39424.0,66.0,1.0,28.0,60.0,,0.0,,29.0,5a9ed9b0,9b25e48b,f25edca2,418ae7fb,25c83c98,7e0ccccf,a5a83bdd,5b392875,a73ee510,5ea6fa93,f697a983,ad46dc69,e5643e9a,07d13a8f,054ebda1,967bc626,3486227d,7d8c03aa,2442feac,a458ea53,30244f84,,c7dc6720,3a6f67d1,010f6491,f4642e0e
0,,1,13.0,3.0,5646.0,49.0,3.0,3.0,59.0,,1.0,,3.0,8cf07265,558b4efb,40361716,f2159098,25c83c98,fbad5c96,6005554a,062b5529,a73ee510,b1442b2a,c19406bc,842839b9,07fdb6cc,07d13a8f,c1ddc990,9f1d1f70,27c07bd6,c68ebaa0,21ddcdc9,5840adea,16f71b82,ad3062eb,32c7478e,3b183c5c,ea9a246c,2f44e540
1,0.0,1,2.0,2.0,1795.0,4.0,1.0,2.0,2.0,0.0,1.0,,2.0,05db9164,38a947a1,bd4d1b8d,097de257,25c83c98,,788ff59f,0b153874,a73ee510,3b08e48b,9c9d4957,3263408b,9325eab4,07d13a8f,456583e6,c57bda3a,d4bb7bd8,4b0f5ddd,,,6fb7987f,,32c7478e,9b7eed78,,
1,1.0,2,603.0,11.0,2.0,11.0,2.0,11.0,11.0,1.0,2.0,,11.0,05db9164,58e67aaf,f5cdf14a,39cc9792,4cf72387,7e0ccccf,9ff9bbde,0b153874,a73ee510,8c8662e4,f89fe102,5d84eb4a,83e6ca2e,1adce6ef,d002b6d9,a98ec356,07c540c4,c21c3e4c,c79aad78,b1252a9d,ec4a835a,,423fab69,b44bd498,9b3e8820,8fd6bdd6
1,9.0,1,39.0,6.0,48.0,14.0,13.0,30.0,68.0,2.0,4.0,,6.0,be589b51,4f25e98b,761d2b40,5f379ae0,4cf72387,fe6b92e5,9b98e9fc,0b153874,a73ee510,2a47dab8,7f8ffe57,beb94e00,46f42a63,07d13a8f,dfab705f,9066bcfb,e5ba7672,7ef5affa,49463d54,b1252a9d,822be048,c9d4222a,32c7478e,3fdb382b,001f3601,49d68486
0,1.0,12,4.0,2.0,5.0,3.0,25.0,19.0,113.0,1.0,2.0,2.0,2.0,68fd1e64,a5b69ae3,0b793d71,813cb08c,4cf72387,7e0ccccf,468a0854,0b153874,a73ee510,3b08e48b,a60de4e5,f9bf526c,605bbc24,b28479f6,9703aa2f,9ee32e6f,8efede7f,a1654f4f,21ddcdc9,5840adea,7a380bd1,,32c7478e,08b0ce98,2bf691b1,984e0db0
0,0.0,0,21.0,5.0,2865.0,,0.0,31.0,1.0,0.0,0.0,,31.0,ae82ea21,38d50e09,01a0648b,657dc3b9,25c83c98,7e0ccccf,0c41b6a1,0b153874,a73ee510,56ef22e9,4ba74619,11fcf7fa,879fa878,07d13a8f,fa321567,5e1b6b9d,e5ba7672,52b872ed,21ddcdc9,a458ea53,bfeb50f6,,423fab69,df487a73,e8b83407,c27f155b
0,,-1,66.0,29.0,2940.0,87.0,69.0,35.0,82.0,,5.0,0.0,32.0,68fd1e64,1cfdf714,3cb0ff62,9b17f367,43b19349,7e0ccccf,e2de05d6,0b153874,a73ee510,1ce1e29d,b26d847d,59a625a9,38016f21,1adce6ef,f3002fbd,229bf6f4,3486227d,e88ffc9d,edb3d180,a458ea53,5362f5c3,,423fab69,f20c047e,cb079c2d,0facb2ea
1,,370,,3.0,357.0,,0.0,4.0,5.0,,0.0,,3.0,68fd1e64,2ae0a573,af21d90e,dc0a11c7,4cf72387,,ed0714a0,1f89b562,a73ee510,f1b39deb,b85b416c,a4425bd8,c3f71b59,07d13a8f,413cc8c6,41bec2fe,d4bb7bd8,f2fc99b1,,,95ee3d7a,,32c7478e,7836b4d5,,
0,0.0,237,1.0,1.0,4619.0,53.0,17.0,16.0,272.0,0.0,1.0,,1.0,f473b8dc,89ddfee8,f153af65,13508380,25c83c98,3bf701e7,c96de117,37e4aa92,a73ee510,995c2a7f,ad757a5a,99ec4e40,93b18cb5,07d13a8f,59a58e86,13ede1b5,3486227d,ae46962e,55dd3565,b1252a9d,8a93f0a1,ad3062eb,423fab69,45ab94c8,f0f449dd,c84c4aec
0,,0,2.0,3.0,10327.0,648.0,11.0,3.0,127.0,,3.0,,3.0,39af2607,68b3edbf,ad4b77ff,d16679b9,25c83c98,7e0ccccf,b00f5963,c8ddd494,a73ee510,ac82cac0,b91c2548,a2f4e8b5,a03da696,b28479f6,12f48803,89052618,e5ba7672,cf1cde40,,,d4703ebd,,bcdee96c,aee52b6f,,
1,,3,,24.0,1853.0,36.0,10.0,9.0,175.0,,2.0,,24.0,05db9164,38a947a1,03689820,21817e80,25c83c98,7e0ccccf,50a5390e,0b153874,a73ee510,0466803a,159499d1,79b98d3d,4ab361e1,b28479f6,72f85ad5,8e47fca6,e5ba7672,5ba7fffe,,,15fb7955,,32c7478e,71dc4ef2,,
0,4.0,1,2.0,17.0,7.0,4.0,4.0,18.0,18.0,1.0,1.0,3.0,3.0,05db9164,0a519c5c,77f2f2e5,d16679b9,43b19349,fbad5c96,c78204a1,0b153874,a73ee510,3b08e48b,5f5e6091,9f32b866,aa655a2f,07d13a8f,b812f9f2,31ca40b6,27c07bd6,2efa89c6,,,dfcfc3fa,,3a171ecb,aee52b6f,,
0,0.0,10,1.0,0.0,5781.0,164.0,5.0,6.0,160.0,0.0,5.0,,5.0,8cf07265,e112a9de,af5655e7,22504558,4cf72387,7e0ccccf,133643ef,0b153874,a73ee510,64145819,84bc66d0,252162ec,bcb2e77c,1adce6ef,11da3cff,776f5665,e5ba7672,a7cf409e,,,5c7c443c,,32c7478e,8f079aa5,,
0,,2,2.0,3.0,3379.0,,0.0,5.0,4.0,,0.0,,3.0,09ca0b81,287130e0,20fb5e45,aafb54fa,25c83c98,fbad5c96,bf115338,56563555,a73ee510,3b08e48b,41516dc9,2ea11a49,8b11c4b8,1adce6ef,310d155b,b9a4d133,776ce399,891589e7,f30f7842,a458ea53,86a8e85e,c9d4222a,be7c41b4,bc491035,e8b83407,bd2ec696
0,0.0,1,7.0,12.0,3011.0,126.0,5.0,41.0,121.0,0.0,2.0,,12.0,be589b51,d833535f,77f2f2e5,d16679b9,43b19349,fe6b92e5,6978304f,0b153874,a73ee510,fbbf2c95,78f92234,9f32b866,9be66b48,b28479f6,a66dcf27,31ca40b6,e5ba7672,7b49e3d2,,,dfcfc3fa,,3a171ecb,aee52b6f,,
1,2.0,1,3.0,1.0,63.0,1.0,21.0,2.0,108.0,2.0,9.0,2.0,1.0,68fd1e64,e5fb1af3,be0a348d,e0e934af,25c83c98,13718bbd,372a0c4c,0b153874,a73ee510,e8e8c8ac,ec88dd34,7ac672aa,94881fc3,07d13a8f,b5de5956,e3d99bf0,27c07bd6,13145934,42e59f55,5840adea,8f78192f,,3a171ecb,198d16cc,e8b83407,0e2018ec
0,,1,3.0,1.0,563.0,,0.0,5.0,3.0,,0.0,,1.0,05db9164,55e0a784,5b54e5b4,c5699aad,25c83c98,7e0ccccf,dcab49d9,0b153874,a73ee510,34dd9626,cd3a0eb4,c492212b,715b22a3,07d13a8f,45e17a48,1f55226d,1e88c74f,6c5555bd,21ddcdc9,b1252a9d,99712f38,,423fab69,167193c9,e8b83407,ae5fce01
0,,1,4.0,2.0,8684.0,11.0,1.0,3.0,7.0,,1.0,,2.0,05db9164,e5fb1af3,c8b80f97,311f127a,25c83c98,fe6b92e5,372a0c4c,0b153874,a73ee510,6f0b6a04,2e15139e,9ffdd484,94881fc3,07d13a8f,b5de5956,5891d119,d4bb7bd8,13145934,cc4c70c1,a458ea53,cd11300e,ad3062eb,3a171ecb,cf300ce9,001f3601,814b9a6b
0,8.0,1,3.0,14.0,351.0,50.0,8.0,35.0,37.0,1.0,1.0,,18.0,05db9164,e9b8a266,be3b6a18,62169fb6,0942e0a7,7e0ccccf,d55d70ca,5b392875,a73ee510,1d56e466,9cf09d42,6647ec34,f66b043c,b28479f6,fb67e61d,236709b9,e5ba7672,d452c287,,,77799c4f,c9d4222a,32c7478e,5fd07f39,,
1,0.0,-1,,,1398.0,0.0,1.0,0.0,0.0,0.0,1.0,,,05db9164,512fdf0c,98bb788f,e0a2ecca,0942e0a7,7e0ccccf,d01ba955,7b6fecd5,a73ee510,3b08e48b,c0edaa76,167ba71f,34fc0029,07d13a8f,aa322bcf,5e622e84,d4bb7bd8,fd3919f9,21ddcdc9,5840adea,43d01030,,c7dc6720,4acb8523,724b04da,c986348f
1,,74,3.0,4.0,17991.0,32.0,11.0,9.0,98.0,,10.0,,4.0,5a9ed9b0,8947f767,9ea04474,2b0aadf8,25c83c98,6f6d9be8,368f84ee,0b153874,a73ee510,3b08e48b,6dc69f41,4640585e,fca56425,f7c1b33f,7f758956,d8831736,e5ba7672,bd17c3da,bf212c4c,b1252a9d,d4f22efc,,32c7478e,0ac1b18a,010f6491,6d73203e
0,,38,14.0,46.0,6426.0,888.0,12.0,9.0,862.0,,1.0,,46.0,05db9164,95e2d337,0d71b822,3fb81b62,30903e74,7e0ccccf,8f572b5e,0b153874,a73ee510,897188be,434d6c13,28283f53,7301027a,b28479f6,17a3bcd8,9e724f87,e5ba7672,7b06fafe,21ddcdc9,5840adea,07b818d7,,c7dc6720,b2df17ed,c243e98b,33757f80
0,0.0,1,,2.0,14496.0,895.0,3.0,7.0,58.0,0.0,1.0,,2.0,05db9164,9a82ab91,d032c263,c18be181,25c83c98,7e0ccccf,d9f4e70f,0b153874,a73ee510,27f4bf82,da89cb9b,dfbb09fb,165642be,07d13a8f,33d2c881,84898b2a,07c540c4,004fdf10,,,0014c32a,,32c7478e,3b183c5c,,
0,0.0,14,15.0,11.0,4108.0,125.0,4.0,35.0,111.0,0.0,1.0,,14.0,05db9164,e3a0dc66,2ba709bb,7be47200,25c83c98,fe6b92e5,8a850658,0b153874,a73ee510,3094253e,d9b1e3ff,fa5eca9d,cd98af01,07d13a8f,c251e774,22283336,e5ba7672,b608c073,,,fd0e41ce,c9d4222a,c7dc6720,f2e9f0dd,,
1,,18,23.0,,42024.0,,,0.0,,,,,,05db9164,09e68b86,aa8c1539,85dd697c,25c83c98,,b87f4a4a,5b392875,a73ee510,e70742b0,319687c9,d8c29807,62036f49,07d13a8f,801ee1ae,c64d548f,e5ba7672,63cdbb21,cf99e5de,5840adea,5f957280,,32c7478e,1793a828,e8b83407,b7d9c3bc
1,1.0,2,76.0,4.0,0.0,4.0,1.0,4.0,4.0,1.0,1.0,,4.0,05db9164,38a947a1,f1a544c6,9c65ce26,25c83c98,fbad5c96,df5c2d18,0b153874,a73ee510,903f1f14,a7b606c4,8f1a16da,eae197fd,b28479f6,b842e9bb,789e0e3e,e5ba7672,38f08461,,,79fe2943,,bcdee96c,325bcd40,,
0,1.0,0,29.0,5.0,40.0,5.0,1.0,5.0,5.0,1.0,1.0,,5.0,8cf07265,09e68b86,8530c58f,abfc27b2,25c83c98,,197b4575,0b153874,a73ee510,6c47047a,606866a9,8a433ec1,e40e52ae,64c94865,91126f30,cc93bd1d,d4bb7bd8,5aed7436,6d82104d,a458ea53,c1429b47,,3a171ecb,a0634086,e8b83407,9c015713
0,1.0,2921,,0.0,48.0,17.0,20.0,10.0,84.0,1.0,2.0,1.0,0.0,39af2607,4f25e98b,b0874fd0,b696e406,25c83c98,fbad5c96,dc7659bd,0b153874,a73ee510,03e48276,e51ddf94,6536f6f8,3516f6e6,b28479f6,8ab5b746,271d5b6c,27c07bd6,7ef5affa,21ddcdc9,a458ea53,a716bbe2,,3a171ecb,3fdb382b,001f3601,a39e1586
0,,55,10.0,12.0,299.0,,0.0,23.0,26.0,,0.0,,26.0,17f69355,38a947a1,4470baf4,8c8a4c47,25c83c98,7e0ccccf,2a37bb01,5b392875,a73ee510,3b08e48b,61ba19ac,bb669e25,fa17cc68,b28479f6,a3443e75,2b2ce127,776ce399,ade68c22,,,2b796e4a,ad3062eb,be7c41b4,8d365d3b,,
0,2.0,8,6.0,3.0,5.0,3.0,25.0,11.0,722.0,1.0,6.0,,3.0,05db9164,09e68b86,57231f4a,c38a1d7d,25c83c98,fbad5c96,968a6688,0b153874,a73ee510,e851ff7b,f25fe7e9,2849c511,dd183b4c,f7c1b33f,5726b2dc,2b7f6e55,e5ba7672,5aed7436,4a237258,b1252a9d,fd3ca145,c9d4222a,32c7478e,0ea7be91,e8b83407,f610730e
1,1.0,493,155.0,2.0,1.0,0.0,8.0,7.0,45.0,1.0,7.0,,0.0,68fd1e64,78ccd99e,ac203f6f,13508380,25c83c98,7e0ccccf,e24d7cb8,0b153874,a73ee510,6f07d986,03458ded,2d72bfb9,8019075f,07d13a8f,162f3329,eedd265a,e5ba7672,e7e991cb,21ddcdc9,b1252a9d,56b58097,c9d4222a,423fab69,45ab94c8,e8b83407,c84c4aec
0,,35,,,293044.0,,,7.0,,,,,,05db9164,38a947a1,1678e0d8,bd6ffe0f,25c83c98,7e0ccccf,e2ec9176,0b153874,7cc72ec2,3b08e48b,6fc6ad29,704629a2,b0c30eeb,b28479f6,443b0c0b,809c9e0e,e5ba7672,f0959f21,,,6a41d841,,be7c41b4,0ee762c3,,
0,,8,8.0,12.0,39343.0,1820.0,0.0,19.0,318.0,,0.0,,12.0,05db9164,d57c0709,d032c263,c18be181,25c83c98,7e0ccccf,122c542a,0b153874,a73ee510,801e8634,7fee217f,dfbb09fb,6e2907f1,cfef1c29,487ddf17,84898b2a,e5ba7672,3ae505af,,,0014c32a,,423fab69,3b183c5c,,
0,5.0,0,1.0,,92.0,0.0,5.0,0.0,0.0,1.0,1.0,,,05db9164,78ccd99e,bf30cf68,49c94103,30903e74,7e0ccccf,a1eeac3d,1f89b562,a73ee510,12bb8262,2e9d5aa6,975f89b0,0a9ac04c,f862f261,ada14dd8,a9b56248,e5ba7672,e7e991cb,21ddcdc9,a458ea53,0d7a15fd,,32c7478e,fb890da1,33d94071,86174332
1,,0,1.0,,19088.0,11.0,11.0,0.0,89.0,,2.0,,,68fd1e64,c5fe64d9,01ac13ea,f6dbd8fb,4cf72387,6f6d9be8,6cdb3998,062b5529,a73ee510,b173a655,5874c9c9,16a886e7,740c210d,07d13a8f,52b49730,a249bde3,e5ba7672,c235abed,f30f7842,a458ea53,c4b9fb56,8ec974f4,32c7478e,44aeb111,33d94071,df46df55
0,,248,1.0,1.0,79620.0,,,1.0,,,,,1.0,da4eff0f,d833535f,77f2f2e5,d16679b9,25c83c98,fe6b92e5,8f801a1a,1f89b562,7cc72ec2,3b08e48b,f295b28a,9f32b866,f5df7ab9,07d13a8f,943169c2,31ca40b6,d4bb7bd8,281769c2,,,dfcfc3fa,,3a171ecb,aee52b6f,,
0,0.0,0,3.0,2.0,3150.0,21.0,4.0,3.0,24.0,0.0,2.0,,2.0,05db9164,80e26c9b,e346a5fd,85dd697c,4cf72387,,55fc227e,0b153874,a73ee510,b1aa986c,d8d7567b,539c5644,47d6a934,b28479f6,a785131a,aafa191e,e5ba7672,005c6740,21ddcdc9,5840adea,7e5b7cc4,,32c7478e,1793a828,e8b83407,b9809574
0,,0,10.0,2.0,41706.0,84.0,0.0,5.0,49.0,,0.0,,2.0,8cf07265,942f9a8d,d1ffd05c,9df780c1,25c83c98,7e0ccccf,49b74ebc,1f89b562,a73ee510,0e9ead52,c4adf918,f0c1019c,85dbe138,b28479f6,ac182643,52bee03d,d4bb7bd8,1f868fdd,5b885066,a458ea53,35198a67,ad3062eb,32c7478e,30ab4eb4,e8b83407,85fd868a
1,4.0,-1,6.0,6.0,872.0,31.0,37.0,42.0,334.0,1.0,16.0,,6.0,8cf07265,d4bd9877,a55127b0,90044821,4cf72387,3bf701e7,6a858837,0b153874,a73ee510,3b08e48b,eb9eb939,a0015d5d,2b54e95d,07d13a8f,10139ce3,b458da0e,e5ba7672,62acb0f3,,,d7a43622,,423fab69,dcba8699,,
0,,38,,,43205.0,680.0,0.0,2.0,20.0,,0.0,0.0,,68fd1e64,2c8c5f5d,0f09a700,38aca36b,4cf72387,fbad5c96,91282309,0b153874,7cc72ec2,dcbc7c2b,9e511730,25644e7d,04e4a7e0,64c94865,c1124d0c,4c7535f3,3486227d,f5f4ae5b,,,5b6b6b73,,3a171ecb,1793a828,,
0,,0,6.0,6.0,124027.0,,0.0,5.0,19.0,,0.0,,6.0,05db9164,38a947a1,acbabfa5,187dc42d,25c83c98,fbad5c96,e14874c9,51d76abe,7cc72ec2,ff5a1549,636405ac,8d2c704a,31b42deb,07d13a8f,55808bb2,c66a58da,e5ba7672,824dcc94,,,9308de7e,ad3062eb,3a171ecb,9d8b4082,,
1,2.0,6,,,300.0,25.0,2.0,25.0,68.0,1.0,1.0,,,5a9ed9b0,38a947a1,b1b6f323,be4cb064,25c83c98,7e0ccccf,00dd27a6,0b153874,a73ee510,98bd7a24,55065437,d28c687a,80dcea18,1adce6ef,fc42663d,f2a191bd,e5ba7672,c9da8737,,,5911ddcb,,32c7478e,1335030a,,
0,,27,,,112878.0,2106.0,0.0,2.0,95.0,,0.0,,,5a9ed9b0,38a947a1,2d8004c4,40ed41e5,25c83c98,7e0ccccf,4d9d55ae,5b392875,7cc72ec2,3b08e48b,55065437,ad972965,80dcea18,07d13a8f,c68ba31d,1206a8a1,d4bb7bd8,e96a7df2,,,54d8bb06,,3a171ecb,a415643d,,
0,0.0,3001,2.0,,3134.0,47.0,1.0,0.0,1.0,0.0,1.0,0.0,,05db9164,403ea497,2cbec47f,3e2bfbda,25c83c98,,19672560,0b153874,a73ee510,a8d1ae09,2591ca7a,21a23bfe,9b7d472e,07d13a8f,e3209fc2,587267a3,3486227d,a78bd508,21ddcdc9,5840adea,c2a93b37,,c7dc6720,1793a828,e8b83407,2fede552
1,0.0,179,5.0,1.0,1464.0,6.0,70.0,6.0,16.0,0.0,10.0,,3.0,68fd1e64,404660bb,f1397040,09003f7b,25c83c98,7e0ccccf,1c86e0eb,5b392875,a73ee510,67eea4ef,755e4a50,0cdb9a18,5978055e,1adce6ef,6ddbba94,82708081,e5ba7672,4b17f8a2,21ddcdc9,5840adea,4c14738f,,32c7478e,a86c0565,f0f449dd,984e0db0
1,,1,7.0,2.0,2910.0,2.0,301.0,3.0,54.0,,15.0,0.0,2.0,8cf07265,942f9a8d,3a3d6eeb,eabe170f,25c83c98,6f6d9be8,49b74ebc,0b153874,a73ee510,0e9ead52,c4adf918,a66cfe4b,85dbe138,07d13a8f,a8e962af,a3d7b1d6,e5ba7672,1f868fdd,fc134659,a458ea53,bbcf650c,,32c7478e,75b9c133,9d93af03,e438a496
0,0.0,0,8.0,6.0,125.0,122.0,5.0,34.0,107.0,0.0,3.0,,24.0,5a9ed9b0,c5e4f7c9,,,25c83c98,7e0ccccf,95402f9a,64523cfa,a73ee510,5162b19c,c82f1813,,949ea585,b28479f6,b16ae607,,e5ba7672,ac02dc99,,,,c9d4222a,32c7478e,,,
0,0.0,0,5.0,6.0,6461.0,93.0,19.0,7.0,37.0,0.0,1.0,1.0,7.0,68fd1e64,09e68b86,5f8d9359,2628b8d6,25c83c98,13718bbd,53e14bd5,0b153874,a73ee510,97d3ddaa,319687c9,de2ecc9c,62036f49,cfef1c29,18847041,62675893,3486227d,5aed7436,b1fb78cc,a458ea53,be01d6b1,,3a171ecb,b1aad66f,e8b83407,3df61e3d
1,0.0,2,1.0,11.0,2119.0,79.0,6.0,2.0,114.0,0.0,3.0,1.0,11.0,05db9164,2ae0a573,4993b2b2,9ab05b8f,25c83c98,7e0ccccf,9e8dab66,0b153874,a73ee510,5ba575e7,2d9eed4d,bdf9cff8,949ea585,07d13a8f,413cc8c6,fb2ac6b5,3486227d,f2fc99b1,,,0fbced35,ad3062eb,32c7478e,d91ea8bd,,
0,0.0,17,5.0,7.0,6288.0,,0.0,42.0,1.0,0.0,0.0,,35.0,5a9ed9b0,62e9e9bf,,,25c83c98,7e0ccccf,f74ed3c0,0b153874,a73ee510,39046df2,e90cbbe1,,a4c7bffd,07d13a8f,de829bed,,e5ba7672,d2651d6e,,,,,32c7478e,,,
0,,2,23.0,20.0,148.0,,0.0,20.0,20.0,,0.0,,20.0,68fd1e64,09e68b86,7edab412,f1d06e8a,43b19349,,16401b7d,0b153874,a73ee510,3b08e48b,20ec800a,0a02e48e,18a5e4b8,1adce6ef,dbc5e126,e2bc04da,776ce399,5aed7436,0053530c,a458ea53,1de5dd94,,32c7478e,43fe299c,f0f449dd,f3b1f00d
0,,19,535.0,7.0,61968.0,,0.0,7.0,2.0,,0.0,,7.0,05db9164,8ab240be,145f2f75,82a61820,25c83c98,7e0ccccf,ff08f605,0b153874,7cc72ec2,ec4d75ea,6939835e,7161e106,dc1d72e4,1adce6ef,28883800,bb6d240e,e5ba7672,ca533012,21ddcdc9,5840adea,5fe17899,,72592995,cafb4e4d,e8b83407,99f4f64c
0,,0,113.0,3.0,3036.0,575.0,2.0,3.0,214.0,,1.0,,3.0,05db9164,0468d672,628b07b0,b63c0277,25c83c98,7e0ccccf,0d339a25,c8ddd494,a73ee510,1722d4c8,7d756b25,0c87b3e9,6f833c7a,1adce6ef,4f3b3616,48af915a,07c540c4,9880032b,21ddcdc9,5840adea,34cc61bb,c9d4222a,32c7478e,e5ed7da2,ea9a246c,984e0db0
1,0.0,1,1.0,1.0,1607.0,12.0,1.0,12.0,15.0,0.0,1.0,,12.0,be589b51,aa8fcc21,4255f8fd,7501d94a,25c83c98,fe6b92e5,0492c809,1f89b562,a73ee510,13ba96b0,ba0f9e8a,887a0c20,4e4dd817,07d13a8f,a4f91020,022714ba,1e88c74f,3972b4ed,,,d1aa4512,,32c7478e,9257f75f,,
1,1.0,0,6.0,3.0,0.0,0.0,19.0,3.0,3.0,1.0,9.0,0.0,0.0,05db9164,09e68b86,db151f8b,f1b645fc,25c83c98,,b87f4a4a,0b153874,a73ee510,e70742b0,319687c9,af6ad6b6,62036f49,f862f261,1dca7862,05a97a3c,3486227d,5aed7436,54591762,a458ea53,4a2c3526,,32c7478e,1793a828,e8b83407,1a02cbe1
0,0.0,22,6.0,22.0,203.0,153.0,80.0,18.0,508.0,0.0,11.0,0.0,22.0,05db9164,e5fb1af3,7e1ad1fe,46ec0a38,43b19349,7e0ccccf,24c48926,0b153874,a73ee510,afa26c81,9f0003f4,651d80c6,5afd9e51,07d13a8f,b5de5956,72401022,3486227d,13145934,55dd3565,5840adea,bf647035,,32c7478e,1481ceb4,e8b83407,988b0775
0,1.0,-1,,,138.0,0.0,1.0,0.0,0.0,1.0,1.0,,,be589b51,b46aceb6,,,43b19349,,17cdc396,0b153874,a73ee510,75d852fc,d79cc967,,115d29f4,07d13a8f,217d99f2,,d4bb7bd8,908eaeb8,,,,,32c7478e,,,
================================================
FILE: examples/gen_tfrecords.py
================================================
import tensorflow as tf
def make_example(line, sparse_feature_name, dense_feature_name, label_name):
features = {feat: tf.train.Feature(int64_list=tf.train.Int64List(value=[int(line[1][feat])])) for feat in
sparse_feature_name}
features.update(
{feat: tf.train.Feature(float_list=tf.train.FloatList(value=[line[1][feat]])) for feat in dense_feature_name})
features[label_name] = tf.train.Feature(float_list=tf.train.FloatList(value=[line[1][label_name]]))
return tf.train.Example(features=tf.train.Features(feature=features))
def write_tfrecord(filename, df, sparse_feature_names, dense_feature_names, label_name):
writer = tf.python_io.TFRecordWriter(filename)
for line in df.iterrows():
ex = make_example(line, sparse_feature_names, dense_feature_names, label_name)
writer.write(ex.SerializeToString())
writer.close()
# write_tfrecord('./criteo_sample.tr.tfrecords',train,sparse_features,dense_features,'label')
# write_tfrecord('./criteo_sample.te.tfrecords',test,sparse_features,dense_features,'label')
================================================
FILE: examples/movielens_age_vocabulary.csv
================================================
1,1
2,18
3,25
4,35
5,45
6,50
7,56
================================================
FILE: examples/movielens_sample.txt
================================================
user_id,movie_id,rating,timestamp,title,genres,gender,age,occupation,zip
3299,235,4,968035345,Ed Wood (1994),Comedy|Drama,F,25,4,19119
3630,3256,3,966536874,Patriot Games (1992),Action|Thriller,M,18,4,77005
517,105,4,976203603,"Bridges of Madison County, The (1995)",Drama|Romance,F,25,14,55408
785,2115,3,975430389,Indiana Jones and the Temple of Doom (1984),Action|Adventure,M,18,19,29307
5848,909,5,957782527,"Apartment, The (1960)",Comedy|Drama,M,50,20,20009
2996,2799,1,972769867,Problem Child 2 (1991),Comedy,M,18,0,63011
3087,837,5,969738869,Matilda (1996),Children's|Comedy,F,1,1,90802
872,3092,5,975273310,Chushingura (1962),Drama,M,50,1,20815
4094,529,5,966223349,Searching for Bobby Fischer (1993),Drama,M,25,17,49017
1868,3508,3,974694703,"Outlaw Josey Wales, The (1976)",Western,M,50,11,92346
2913,1387,5,971769808,Jaws (1975),Action|Horror,F,35,20,98119
380,3481,5,976316283,High Fidelity (2000),Comedy,M,25,2,92024
2073,1784,5,974759084,As Good As It Gets (1997),Comedy|Drama,F,18,4,13148
80,2059,3,977788576,"Parent Trap, The (1998)",Children's|Drama,M,56,1,49327
3679,2557,1,976298130,I Stand Alone (Seul contre tous) (1998),Drama,M,25,4,68108
2077,788,3,980013556,"Nutty Professor, The (1996)",Comedy|Fantasy|Romance|Sci-Fi,M,18,0,55112
6036,2085,4,956716684,101 Dalmatians (1961),Animation|Children's,F,25,15,32603
3675,532,3,966363610,Serial Mom (1994),Comedy|Crime|Horror,M,35,7,06680
4566,3683,4,964489599,Blood Simple (1984),Drama|Film-Noir,M,35,17,19473
2996,3763,3,972413564,F/X (1986),Action|Crime|Thriller,M,18,0,63011
5831,2458,1,957898337,Armed and Dangerous (1986),Comedy|Crime,M,25,1,92120
1869,1244,2,974695654,Manhattan (1979),Comedy|Drama|Romance,M,45,14,95148
5389,2657,3,960328279,"Rocky Horror Picture Show, The (1975)",Comedy|Horror|Musical|Sci-Fi,M,45,7,01905
1391,1535,3,974851275,Love! Valour! Compassion! (1997),Drama|Romance,M,35,15,20723
3123,2407,3,969324381,Cocoon (1985),Comedy|Sci-Fi,M,25,2,90401
4694,159,3,963602574,Clockers (1995),Drama,M,56,7,40505
1680,1988,3,974709821,Hello Mary Lou: Prom Night II (1987),Horror,M,25,20,95380
2002,1945,4,974677761,On the Waterfront (1954),Crime|Drama,F,56,13,02136-1522
3430,2690,4,979949863,"Ideal Husband, An (1999)",Comedy,F,45,1,15208
425,471,4,976284972,"Hudsucker Proxy, The (1994)",Comedy|Romance,M,25,12,55303
1841,2289,2,974699637,"Player, The (1992)",Comedy|Drama,M,18,0,95037
4964,2348,4,962619587,Sid and Nancy (1986),Drama,M,35,0,94110
4520,2160,4,964883648,Rosemary's Baby (1968),Horror|Thriller,M,25,4,45810
1265,2396,4,1011716691,Shakespeare in Love (1998),Comedy|Romance,F,18,20,49321
2496,1278,5,974435324,Young Frankenstein (1974),Comedy|Horror,M,50,1,37932
5511,2174,4,959787754,Beetlejuice (1988),Comedy|Fantasy,M,45,1,92407
621,833,1,975799925,High School High (1996),Comedy,M,18,4,93560
3045,2762,5,970189524,"Sixth Sense, The (1999)",Thriller,M,45,1,90631
2050,2546,4,975522689,"Deep End of the Ocean, The (1999)",Drama,F,35,3,99504
613,32,4,975812238,Twelve Monkeys (1995),Drama|Sci-Fi,M,35,20,10562
366,1077,5,978471241,Sleeper (1973),Comedy|Sci-Fi,M,50,15,55126
5108,367,4,962338215,"Mask, The (1994)",Comedy|Crime|Fantasy,F,25,9,93940
4502,1960,4,965094644,"Last Emperor, The (1987)",Drama|War,M,50,0,01379
5512,1801,5,959713840,"Man in the Iron Mask, The (1998)",Action|Drama|Romance,F,25,17,01701
1861,2642,2,974699627,Superman III (1983),Action|Adventure|Sci-Fi,M,50,16,92129
1667,1240,4,975016698,"Terminator, The (1984)",Action|Sci-Fi|Thriller,M,50,16,98516
753,434,3,975460449,Cliffhanger (1993),Action|Adventure|Crime,M,1,10,42754
1836,2736,5,974826228,Brighton Beach Memoirs (1986),Comedy,M,25,0,10016
5626,474,5,959052158,In the Line of Fire (1993),Action|Thriller,M,56,16,32043
1601,1396,4,978576948,Sneakers (1992),Crime|Drama|Sci-Fi,M,25,12,83001
4725,1100,4,963369546,Days of Thunder (1990),Action|Romance,M,35,5,96707-1321
2837,2396,5,972571456,Shakespeare in Love (1998),Comedy|Romance,M,18,0,49506
1776,3882,4,1001558470,Bring It On (2000),Comedy,M,25,0,45801
2820,457,2,972662398,"Fugitive, The (1993)",Action|Thriller,F,35,0,02138
1834,2288,3,1038179198,"Thing, The (1982)",Action|Horror|Sci-Fi|Thriller,M,35,5,10990
284,2716,4,976570902,Ghostbusters (1984),Comedy|Horror,M,25,12,91910
2744,588,1,973215985,Aladdin (1992),Animation|Children's|Comedy|Musical,M,18,17,53818
881,4,2,975264028,Waiting to Exhale (1995),Comedy|Drama,M,18,14,76401
2211,916,3,974607067,Roman Holiday (1953),Comedy|Romance,M,45,6,01950
2271,2671,4,1007158806,Notting Hill (1999),Comedy|Romance,M,50,14,13210
1010,2953,1,975222613,Home Alone 2: Lost in New York (1992),Children's|Comedy,M,25,0,10310
1589,2594,4,974735454,Open Your Eyes (Abre los ojos) (1997),Drama|Romance|Sci-Fi,M,25,0,95136
1724,597,5,976441106,Pretty Woman (1990),Comedy|Romance,M,18,4,00961
2590,2097,3,973840056,Something Wicked This Way Comes (1983),Children's|Horror,M,18,4,94044
1717,1352,3,1009256707,Albino Alligator (1996),Crime|Thriller,F,50,6,30307
1391,3160,2,974850796,Magnolia (1999),Drama,M,35,15,20723
1941,1263,3,974954220,"Deer Hunter, The (1978)",Drama|War,M,35,17,94550
3526,2867,4,966906064,Fright Night (1985),Comedy|Horror,M,35,2,62263-3004
5767,198,3,958192148,Strange Days (1995),Action|Crime|Sci-Fi,M,25,2,75287
5355,590,4,960596927,Dances with Wolves (1990),Adventure|Drama|Western,M,56,0,78232
5788,156,4,958108785,Blue in the Face (1995),Comedy,M,25,0,92646
1078,1307,4,974938851,When Harry Met Sally... (1989),Comedy|Romance,F,45,9,95661
3808,61,2,965973222,Eye for an Eye (1996),Drama|Thriller,M,25,7,60010
974,3897,4,975106398,Almost Famous (2000),Comedy|Drama,M,35,19,94930
5153,1290,4,961972292,Some Kind of Wonderful (1987),Drama|Romance,M,25,7,60046
5732,2115,3,958434069,Indiana Jones and the Temple of Doom (1984),Action|Adventure,F,25,11,02111
4627,2478,3,964110136,Three Amigos! (1986),Comedy|Western,M,56,1,45224
1884,1831,2,975648062,Lost in Space (1998),Action|Sci-Fi|Thriller,M,45,20,93108
4284,517,4,965277546,Rising Sun (1993),Action|Drama|Mystery,M,50,7,40601
1383,468,2,975979732,"Englishman Who Went Up a Hill, But Came Down a Mountain, The (1995)",Comedy|Romance,F,25,7,19806
2230,2873,3,974599097,Lulu on the Bridge (1998),Drama|Mystery|Romance,F,45,1,60302
2533,2266,4,974055724,"Butcher's Wife, The (1991)",Comedy|Romance,F,25,3,49423
6040,3224,5,956716750,Woman in the Dunes (Suna no onna) (1964),Drama,M,25,6,11106
4384,2918,5,965171739,Ferris Bueller's Day Off (1986),Comedy,M,25,0,43623
5156,3688,3,961946487,Porky's (1981),Comedy,M,18,14,10024
615,296,3,975805801,Pulp Fiction (1994),Crime|Drama,M,50,17,32951
2753,3045,3,973198964,Peter's Friends (1992),Comedy|Drama,F,50,20,27516
2438,1125,5,974259943,"Return of the Pink Panther, The (1974)",Comedy,M,35,1,22903
5746,1242,4,958354460,Glory (1989),Action|Drama|War,M,18,15,94061
5157,3462,5,961944604,Modern Times (1936),Comedy,M,35,1,74012
3402,1252,5,967433929,Chinatown (1974),Film-Noir|Mystery|Thriller,M,35,20,30306
76,593,5,977847255,"Silence of the Lambs, The (1991)",Drama|Thriller,M,35,7,55413
2067,1019,3,974658834,"20,000 Leagues Under the Sea (1954)",Adventure|Children's|Fantasy|Sci-Fi,M,50,16,06430
2181,2020,3,979353437,Dangerous Liaisons (1988),Drama|Romance,M,25,0,45245
3947,593,5,965691680,"Silence of the Lambs, The (1991)",Drama|Thriller,M,25,0,90019
546,218,4,976069421,Boys on the Side (1995),Comedy|Drama,F,25,0,37211
1246,3030,5,1032056405,Yojimbo (1961),Comedy|Drama|Western,M,18,4,98225
4214,3186,5,965319143,"Girl, Interrupted (1999)",Drama,F,25,0,20121
2841,680,3,982805796,Alphaville (1965),Sci-Fi,M,50,12,98056
4205,3175,4,965321085,Galaxy Quest (1999),Adventure|Comedy|Sci-Fi,F,25,15,87801
1120,1097,4,974911354,E.T. the Extra-Terrestrial (1982),Children's|Drama|Fantasy|Sci-Fi,M,18,4,95616
5371,3194,3,960481000,"Way We Were, The (1973)",Drama,M,25,11,55408
2695,1278,5,973310827,Young Frankenstein (1974),Comedy|Horror,M,35,11,46033
3312,520,2,976673070,Robin Hood: Men in Tights (1993),Comedy,F,18,4,90039
5039,1792,1,962513044,U.S. Marshalls (1998),Action|Thriller,F,35,4,97068
4655,2146,3,963903103,St. Elmo's Fire (1985),Drama|Romance,F,25,1,92037
3558,1580,5,966802528,Men in Black (1997),Action|Adventure|Comedy|Sci-Fi,M,18,17,66044
506,3354,1,976208080,Mission to Mars (2000),Sci-Fi,M,25,16,55103-1006
3568,1230,3,966745594,Annie Hall (1977),Comedy|Romance,M,25,0,98503
2943,1197,5,971319983,"Princess Bride, The (1987)",Action|Adventure|Comedy|Romance,M,35,12,95864
716,737,3,982881364,Barb Wire (1996),Action|Sci-Fi,M,18,4,98188
5964,454,3,956999469,"Firm, The (1993)",Drama|Thriller,M,18,5,97202
4802,1208,4,996034747,Apocalypse Now (1979),Drama|War,M,56,1,40601
1106,3624,4,974920622,Shanghai Noon (2000),Action,M,18,4,90241
3410,2565,3,967419652,"King and I, The (1956)",Musical,M,35,1,20653
1273,3095,5,974814536,"Grapes of Wrath, The (1940)",Drama,M,35,2,19123
1706,1916,4,974709448,Buffalo 66 (1998),Action|Comedy|Drama,M,25,20,19134
4889,590,5,962909224,Dances with Wolves (1990),Adventure|Drama|Western,M,18,4,63108
4966,2100,3,962609782,Splash (1984),Comedy|Fantasy|Romance,M,50,14,55407
4238,1884,4,965343416,Fear and Loathing in Las Vegas (1998),Comedy|Drama,M,35,16,44691
5365,1042,3,960502974,That Thing You Do! (1996),Comedy,M,18,12,90250
415,1302,3,977501743,Field of Dreams (1989),Drama,F,35,0,55406
4658,1009,5,963966553,Escape to Witch Mountain (1975),Adventure|Children's|Fantasy,M,25,4,99163
854,345,3,975357801,"Adventures of Priscilla, Queen of the Desert, The (1994)",Comedy|Drama,F,25,16,44092
2857,436,4,972509362,Color of Night (1994),Drama|Thriller,M,25,0,10469
1835,1330,4,974878241,April Fool's Day (1986),Comedy|Horror,M,25,19,11501
1321,2240,3,974778494,My Bodyguard (1980),Drama,F,25,14,34639
3274,3698,2,979767184,"Running Man, The (1987)",Action|Adventure|Sci-Fi,M,25,20,02062
5893,2144,3,957470619,Sixteen Candles (1984),Comedy,M,25,7,02139
3436,2724,3,967328026,Runaway Bride (1999),Comedy|Romance,M,35,0,98503
3315,2918,5,967942960,Ferris Bueller's Day Off (1986),Comedy,M,25,12,78731
5056,2700,5,962488280,"South Park: Bigger, Longer and Uncut (1999)",Animation|Comedy,M,45,1,16673
5256,208,2,961271616,Waterworld (1995),Action|Adventure,M,25,16,30269
4290,1193,4,965274348,One Flew Over the Cuckoo's Nest (1975),Drama,M,25,17,98661
1010,1379,2,975220259,Young Guns II (1990),Action|Comedy|Western,M,25,0,10310
829,904,4,975368038,Rear Window (1954),Mystery|Thriller,M,1,19,53711
5953,480,4,957143581,Jurassic Park (1993),Action|Adventure|Sci-Fi,M,1,10,21030
4732,3016,4,963332896,Creepshow (1982),Horror,M,25,14,24450
4815,3181,5,972240802,Titus (1999),Drama,F,50,18,04849
1164,1894,2,1004486985,Six Days Seven Nights (1998),Adventure|Comedy|Romance,F,25,19,90020
4373,3167,5,965180829,Carnal Knowledge (1971),Drama,M,50,12,32920
5293,1374,4,961055887,Star Trek: The Wrath of Khan (1982),Action|Adventure|Sci-Fi,M,25,12,95030
1579,3101,4,981272057,Fatal Attraction (1987),Thriller,M,25,0,60201
2600,3147,5,973804787,"Green Mile, The (1999)",Drama|Thriller,M,25,14,19312
1283,480,4,974793389,Jurassic Park (1993),Action|Adventure|Sci-Fi,F,18,1,94607
3242,3062,5,968341175,"Longest Day, The (1962)",Action|Drama|War,M,50,13,94089
3618,3374,3,967116272,Daughters of the Dust (1992),Drama,M,56,17,22657
3762,1337,4,966434517,"Body Snatcher, The (1945)",Horror,M,50,6,11746
1015,1184,3,975018699,Mediterraneo (1991),Comedy|War,M,35,3,11220
4645,2344,5,963976808,Runaway Train (1985),Action|Adventure|Drama|Thriller,F,50,6,48094
3184,1397,4,968709039,Bastard Out of Carolina (1996),Drama,F,25,18,21214
1285,1794,4,974833328,Love and Death on Long Island (1997),Comedy|Drama,M,35,4,98125
5521,3354,2,959833154,Mission to Mars (2000),Sci-Fi,F,25,6,02118
1472,2278,3,974767792,Ronin (1998),Action|Crime|Thriller,M,25,7,90248
5630,21,4,980085414,Get Shorty (1995),Action|Comedy|Drama,M,35,17,06854
3710,3033,5,966272980,Spaceballs (1987),Comedy|Sci-Fi,M,1,10,02818
192,761,1,977028390,"Phantom, The (1996)",Adventure,M,18,1,10977
1285,1198,5,974880310,Raiders of the Lost Ark (1981),Action|Adventure,M,35,4,98125
2174,1046,4,974613044,Beautiful Thing (1996),Drama|Romance,M,50,12,87505
635,1270,4,975768106,Back to the Future (1985),Comedy|Sci-Fi,M,56,17,33785
910,412,5,975207742,"Age of Innocence, The (1993)",Drama,F,50,0,98226
1752,2021,4,975729332,Dune (1984),Fantasy|Sci-Fi,M,25,3,96813
1408,198,4,974762924,Strange Days (1995),Action|Crime|Sci-Fi,M,25,0,90046
4738,1242,4,963279051,Glory (1989),Action|Drama|War,M,56,1,23608
1503,1971,2,974748897,"Nightmare on Elm Street 4: The Dream Master, A (1988)",Horror,M,25,12,92688
3053,1296,3,970601837,"Room with a View, A (1986)",Drama|Romance,F,25,3,55102
3471,3614,2,973297828,Honeymoon in Vegas (1992),Comedy|Romance,M,18,4,80302
678,1972,3,988638700,"Nightmare on Elm Street 5: The Dream Child, A (1989)",Horror,M,25,0,34952
3483,2561,3,986327282,True Crime (1999),Crime|Thriller,F,45,7,30260
3910,3108,5,965756244,"Fisher King, The (1991)",Comedy|Drama|Romance,M,25,20,91505
182,1089,1,977085647,Reservoir Dogs (1992),Crime|Thriller,M,18,4,03052
1755,1653,3,1036917836,Gattaca (1997),Drama|Sci-Fi|Thriller,F,18,4,77005
3589,70,2,966658567,From Dusk Till Dawn (1996),Action|Comedy|Crime|Horror|Thriller,F,45,0,80010
471,3481,4,976222483,High Fidelity (2000),Comedy,M,35,7,08904
1141,813,2,974878678,Larger Than Life (1996),Comedy,F,25,3,84770
5227,1196,2,961476022,Star Wars: Episode V - The Empire Strikes Back (1980),Action|Adventure|Drama|Sci-Fi|War,M,18,10,64050
1303,2344,2,974837844,Runaway Train (1985),Action|Adventure|Drama|Thriller,M,25,19,94111
5080,3102,5,962412804,Jagged Edge (1985),Thriller,F,50,12,95472
2023,1012,4,1006290836,Old Yeller (1957),Children's|Drama,M,18,4,56001
3759,2151,5,966094413,"Gods Must Be Crazy II, The (1989)",Comedy,M,35,6,54751
1685,2664,2,974709721,Invasion of the Body Snatchers (1956),Horror|Sci-Fi,M,35,12,95833
4715,1221,4,963508830,"Godfather: Part II, The (1974)",Action|Crime|Drama,M,25,2,97205
1591,350,5,974742941,"Client, The (1994)",Drama|Mystery|Thriller,M,50,7,26501
4227,3635,3,965411938,"Spy Who Loved Me, The (1977)",Action,M,25,19,11414-2520
1908,36,5,974697744,Dead Man Walking (1995),Drama,M,56,13,95129
5365,1892,4,960503255,"Perfect Murder, A (1998)",Mystery|Thriller,M,18,12,90250
1579,2420,4,981272235,"Karate Kid, The (1984)",Drama,M,25,0,60201
1866,3948,5,974753321,Meet the Parents (2000),Comedy,M,25,7,94043
4238,3543,4,965415533,Diner (1982),Comedy|Drama,M,35,16,44691
3590,2000,5,966657892,Lethal Weapon (1987),Action|Comedy|Crime|Drama,F,18,15,02115
3401,3256,5,980115327,Patriot Games (1992),Action|Thriller,M,35,7,76109
3705,540,2,966287116,Sliver (1993),Thriller,M,45,7,30076
4973,1246,3,962607149,Dead Poets Society (1989),Drama,F,56,2,949702
4947,380,4,962651180,True Lies (1994),Action|Adventure|Comedy|Romance,M,35,17,90035
2346,1416,4,974413811,Evita (1996),Drama|Musical,F,1,10,48105
1427,3596,3,974840560,Screwed (2000),Comedy,M,25,12,21401
3868,1626,3,965855033,Fire Down Below (1997),Action|Drama|Thriller,M,18,12,73112
249,2369,3,976730191,Desperately Seeking Susan (1985),Comedy|Romance,F,18,14,48126
5720,349,4,958503395,Clear and Present Danger (1994),Action|Adventure|Thriller,M,25,0,60610
877,1485,3,975270899,Liar Liar (1997),Comedy,M,25,0,90631
================================================
FILE: examples/run_all.sh
================================================
#!/usr/bin/env bash
function run_py(){
code_path=./
for file in $(ls)
do
if [[ $file =~ .py ]]
then
python $code_path$file
if [ $? -eq 0 ]
then
echo run $code_path$file succeed in $python_version
else
echo run $code_path$file failed in $python_version
exit -1
fi
fi
done
}
## python3
python_version=python3
source activate base
cd ..
python setup.py install
cd ./examples
run_py
#python2
python_version=python2
source activate py27
cd ..
python setup.py install
cd ./examples
run_py
echo "all examples run succeed in python2.7"
echo "all examples run succeed in python3.6"
echo "all examples run succeed in python2.7 and python3.6"
================================================
FILE: examples/run_classification_criteo.py
================================================
import pandas as pd
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names
if __name__ == "__main__":
data = pd.read_csv('./criteo_sample.txt')
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']
# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
# 2.count #unique features for each sparse field,and record dense feature field name
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4)
for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, )
for feat in dense_features]
dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)
train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
model.compile("adam", "binary_crossentropy",
metrics=['binary_crossentropy'], )
history = model.fit(train_model_input, train[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=256)
print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
================================================
FILE: examples/run_classification_criteo_hash.py
================================================
import pandas as pd
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names
if __name__ == "__main__":
data = pd.read_csv('./criteo_sample.txt')
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']
# 1.do simple Transformation for dense features
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
# 2.set hashing space for each sparse field,and record dense feature field name
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=1000,embedding_dim=4, use_hash=True, dtype='string') # since the input is string
for feat in sparse_features] + [DenseFeat(feat, 1, )
for feat in dense_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns, )
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)
train_model_input = {name:train[name] for name in feature_names}
test_model_input = {name:test[name] for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = DeepFM(linear_feature_columns,dnn_feature_columns, task='binary')
model.compile("adam", "binary_crossentropy",
metrics=['binary_crossentropy'], )
history = model.fit(train_model_input, train[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=256)
print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
================================================
FILE: examples/run_classification_criteo_multi_gpu.py
================================================
import pandas as pd
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.python.keras.utils import multi_gpu_model
from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names
from deepctr.models import DeepFM
if __name__ == "__main__":
data = pd.read_csv('./criteo_sample.txt')
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']
# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
# 2.count #unique features for each sparse field,and record dense feature field name
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4)
for feat in sparse_features] + [DenseFeat(feat, 1, )
for feat in dense_features]
dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)
train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
model = multi_gpu_model(model, gpus=2)
model.compile("adam", "binary_crossentropy",
metrics=['binary_crossentropy'], )
history = model.fit(train_model_input, train[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=256)
print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
================================================
FILE: examples/run_dien.py
================================================
import numpy as np
import tensorflow as tf
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat,get_feature_names
from deepctr.models import DIEN
def get_xy_fd(use_neg=False, hash_flag=False):
feature_columns = [SparseFeat('user', 3, embedding_dim=10, use_hash=hash_flag),
SparseFeat('gender', 2, embedding_dim=4, use_hash=hash_flag),
SparseFeat('item_id', 3 + 1, embedding_dim=8, use_hash=hash_flag),
SparseFeat('cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag),
DenseFeat('pay_score', 1)]
feature_columns += [
VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
maxlen=4, length_name="seq_length"),
VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4,
length_name="seq_length")]
behavior_feature_list = ["item_id", "cate_id"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3]) # 0 is mask value
cate_id = np.array([1, 2, 2]) # 0 is mask value
score = np.array([0.1, 0.2, 0.3])
hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
hist_cate_id = np.array([[1, 2, 2, 0], [1, 2, 2, 0], [1, 2, 0, 0]])
behavior_length = np.array([3, 3, 2])
feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id,
'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id,
'pay_score': score, "seq_length": behavior_length}
if use_neg:
feature_dict['neg_hist_item_id'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
feature_dict['neg_hist_cate_id'] = np.array([[1, 2, 2, 0], [1, 2, 2, 0], [1, 2, 0, 0]])
feature_columns += [
VarLenSparseFeat(SparseFeat('neg_hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
maxlen=4, length_name="seq_length"),
VarLenSparseFeat(SparseFeat('neg_hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'),
maxlen=4, length_name="seq_length")]
x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
y = np.array([1, 0, 1])
return x, y, feature_columns, behavior_feature_list
if __name__ == "__main__":
if tf.__version__ >= '2.0.0':
tf.compat.v1.disable_eager_execution()
USE_NEG = True
x, y, feature_columns, behavior_feature_list = get_xy_fd(use_neg=USE_NEG)
model = DIEN(feature_columns, behavior_feature_list,
dnn_hidden_units=[4, 4, 4], dnn_dropout=0.6, gru_type="AUGRU", use_negsampling=USE_NEG)
model.compile('adam', 'binary_crossentropy',
metrics=['binary_crossentropy'])
history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
================================================
FILE: examples/run_din.py
================================================
import numpy as np
from deepctr.models import DIN
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names
def get_xy_fd():
feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat(
'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8),
SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)]
feature_columns += [
VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
maxlen=4, length_name="seq_length"),
VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4,
length_name="seq_length")]
# Notice: History behavior sequence feature name must start with "hist_".
behavior_feature_list = ["item_id", "cate_id"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3]) # 0 is mask value
cate_id = np.array([1, 2, 2]) # 0 is mask value
pay_score = np.array([0.1, 0.2, 0.3])
hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]])
hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]])
seq_length = np.array([3, 3, 2]) # the actual length of the behavior sequence
feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id,
'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id,
'pay_score': pay_score, 'seq_length': seq_length}
x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
y = np.array([1, 0, 1])
return x, y, feature_columns, behavior_feature_list
if __name__ == "__main__":
x, y, feature_columns, behavior_feature_list = get_xy_fd()
model = DIN(feature_columns, behavior_feature_list)
# model = BST(feature_columns, behavior_feature_list,att_head_num=4)
model.compile('adam', 'binary_crossentropy',
metrics=['binary_crossentropy'])
history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
================================================
FILE: examples/run_dsin.py
================================================
import numpy as np
import tensorflow as tf
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat,get_feature_names
from deepctr.models import DSIN
def get_xy_fd(hash_flag=False):
feature_columns = [SparseFeat('user', 3, embedding_dim=10, use_hash=hash_flag),
SparseFeat('gender', 2, embedding_dim=4, use_hash=hash_flag),
SparseFeat('item', 3 + 1, embedding_dim=4, use_hash=hash_flag),
SparseFeat('cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag),
DenseFeat('pay_score', 1)]
feature_columns += [
VarLenSparseFeat(SparseFeat('sess_0_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'),
maxlen=4), VarLenSparseFeat(
SparseFeat('sess_0_cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='cate_id'),
maxlen=4)]
feature_columns += [
VarLenSparseFeat(SparseFeat('sess_1_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'),
maxlen=4), VarLenSparseFeat(
SparseFeat('sess_1_cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='cate_id'),
maxlen=4)]
behavior_feature_list = ["item", "cate_id"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3]) # 0 is mask value
cateid = np.array([1, 2, 2]) # 0 is mask value
score = np.array([0.1, 0.2, 0.3])
sess1_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [0, 0, 0, 0]])
sess1_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [0, 0, 0, 0]])
sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
sess2_cate_id = np.array([[1, 2, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
sess_number = np.array([2, 1, 0])
feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'cate_id': cateid,
'sess_0_item': sess1_iid, 'sess_0_cate_id': sess1_cate_id, 'pay_score': score,
'sess_1_item': sess2_iid, 'sess_1_cate_id': sess2_cate_id, }
x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
x["sess_length"] = sess_number
y = np.array([1, 0, 1])
return x, y, feature_columns, behavior_feature_list
if __name__ == "__main__":
if tf.__version__ >= '2.0.0':
tf.compat.v1.disable_eager_execution()
x, y, feature_columns, behavior_feature_list = get_xy_fd(True)
model = DSIN(feature_columns, behavior_feature_list, sess_max_count=2,
dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, )
model.compile('adam', 'binary_crossentropy',
metrics=['binary_crossentropy'])
history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
================================================
FILE: examples/run_estimator_pandas_classification.py
================================================
import pandas as pd
import tensorflow as tf
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from deepctr.estimator import DeepFMEstimator
from deepctr.estimator.inputs import input_fn_pandas
if __name__ == "__main__":
data = pd.read_csv('./criteo_sample.txt')
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']
# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
# 2.count #unique features for each sparse field,and record dense feature field name
dnn_feature_columns = []
linear_feature_columns = []
for i, feat in enumerate(sparse_features):
dnn_feature_columns.append(tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1), 4))
linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1))
for feat in dense_features:
dnn_feature_columns.append(tf.feature_column.numeric_column(feat))
linear_feature_columns.append(tf.feature_column.numeric_column(feat))
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2021)
# Not setting default value for continuous feature. filled with mean.
train_model_input = input_fn_pandas(train, sparse_features + dense_features, 'label', shuffle=True)
test_model_input = input_fn_pandas(test, sparse_features + dense_features, None, shuffle=False)
# 4.Define Model,train,predict and evaluate
model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary',
config=tf.estimator.RunConfig(tf_random_seed=2021))
model.train(train_model_input)
pred_ans_iter = model.predict(test_model_input)
pred_ans = list(map(lambda x: x['pred'], pred_ans_iter))
#
print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
================================================
FILE: examples/run_estimator_tfrecord_classification.py
================================================
import tensorflow as tf
from tensorflow.python.ops.parsing_ops import FixedLenFeature
from deepctr.estimator import DeepFMEstimator
from deepctr.estimator.inputs import input_fn_tfrecord
if __name__ == "__main__":
# 1.generate feature_column for linear part and dnn part
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
dnn_feature_columns = []
linear_feature_columns = []
for i, feat in enumerate(sparse_features):
dnn_feature_columns.append(tf.feature_column.embedding_column(
tf.feature_column.categorical_column_with_identity(feat, 1000), 4))
linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, 1000))
for feat in dense_features:
dnn_feature_columns.append(tf.feature_column.numeric_column(feat))
linear_feature_columns.append(tf.feature_column.numeric_column(feat))
# 2.generate input data for model
feature_description = {k: FixedLenFeature(dtype=tf.int64, shape=1) for k in sparse_features}
feature_description.update(
{k: FixedLenFeature(dtype=tf.float32, shape=1) for k in dense_features})
feature_description['label'] = FixedLenFeature(dtype=tf.float32, shape=1)
train_model_input = input_fn_tfrecord('./criteo_sample.tr.tfrecords', feature_description, 'label', batch_size=256,
num_epochs=1, shuffle_factor=10)
test_model_input = input_fn_tfrecord('./criteo_sample.te.tfrecords', feature_description, 'label',
batch_size=2 ** 14, num_epochs=1, shuffle_factor=0)
# 3.Define Model,train,predict and evaluate
model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary',
config=tf.estimator.RunConfig(tf_random_seed=2021))
model.train(train_model_input)
eval_result = model.evaluate(test_model_input)
print(eval_result)
================================================
FILE: examples/run_flen.py
================================================
import pandas as pd
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from deepctr.feature_column import SparseFeat,get_feature_names
from deepctr.models import FLEN
if __name__ == "__main__":
data = pd.read_csv('./avazu_sample.txt')
data['day'] = data['hour'].apply(lambda x: str(x)[4:6])
data['hour'] = data['hour'].apply(lambda x: str(x)[6:])
sparse_features = ['hour', 'C1', 'banner_pos', 'site_id', 'site_domain',
'site_category', 'app_id', 'app_domain', 'app_category', 'device_id',
'device_model', 'device_type', 'device_conn_type', # 'device_ip',
'C14',
'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', ]
data[sparse_features] = data[sparse_features].fillna('-1', )
target = ['click']
# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
# 2.count #unique features for each sparse field,and record dense feature field name
field_info = dict(C14='user', C15='user', C16='user', C17='user',
C18='user', C19='user', C20='user', C21='user', C1='user',
banner_pos='context', site_id='context',
site_domain='context', site_category='context',
app_id='item', app_domain='item', app_category='item',
device_model='user', device_type='user',
device_conn_type='context', hour='context',
device_id='user'
)
fixlen_feature_columns = [
SparseFeat(name, vocabulary_size=data[name].max() + 1, embedding_dim=16, use_hash=False, dtype='int32',
group_name=field_info[name]) for name in sparse_features]
dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)
train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = FLEN(linear_feature_columns, dnn_feature_columns, task='binary')
model.compile("adam", "binary_crossentropy",
metrics=['binary_crossentropy'], )
history = model.fit(train_model_input, train[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=256)
print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
================================================
FILE: examples/run_mtl.py
================================================
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names
from deepctr.models import MMOE
if __name__ == "__main__":
column_names = ['age', 'class_worker', 'det_ind_code', 'det_occ_code', 'education', 'wage_per_hour', 'hs_college',
'marital_stat', 'major_ind_code', 'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member',
'unemp_reason', 'full_or_part_emp', 'capital_gains', 'capital_losses', 'stock_dividends',
'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat', 'det_hh_summ',
'instance_weight', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt',
'num_emp', 'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship',
'own_or_self', 'vet_question', 'vet_benefits', 'weeks_worked', 'year', 'income_50k']
data = pd.read_csv('./census-income.sample', header=None, names=column_names)
data['label_income'] = data['income_50k'].map({' - 50000.': 0, ' 50000+.': 1})
data['label_marital'] = data['marital_stat'].apply(lambda x: 1 if x == ' Never married' else 0)
data.drop(labels=['income_50k', 'marital_stat'], axis=1, inplace=True)
columns = data.columns.values.tolist()
sparse_features = ['class_worker', 'det_ind_code', 'det_occ_code', 'education', 'hs_college', 'major_ind_code',
'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member', 'unemp_reason',
'full_or_part_emp', 'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat',
'det_hh_summ', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt',
'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship',
'vet_question']
dense_features = [col for col in columns if
col not in sparse_features and col not in ['label_income', 'label_marital']]
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4) for feat in sparse_features] \
+ [DenseFeat(feat, 1, ) for feat in dense_features]
dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)
train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = MMOE(dnn_feature_columns, tower_dnn_hidden_units=[], task_types=['binary', 'binary'],
task_names=['label_income', 'label_marital'])
model.compile("adam", loss=["binary_crossentropy", "binary_crossentropy"],
metrics=['binary_crossentropy'], )
history = model.fit(train_model_input, [train['label_income'].values, train['label_marital'].values],
batch_size=256, epochs=10, verbose=2, validation_split=0.2)
pred_ans = model.predict(test_model_input, batch_size=256)
print("test income AUC", round(roc_auc_score(test['label_income'], pred_ans[0]), 4))
print("test marital AUC", round(roc_auc_score(test['label_marital'], pred_ans[1]), 4))
================================================
FILE: examples/run_multivalue_movielens.py
================================================
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
from deepctr.feature_column import SparseFeat, VarLenSparseFeat,get_feature_names
from deepctr.models import DeepFM
def split(x):
key_ans = x.split('|')
for key in key_ans:
if key not in key2index:
# Notice : input value 0 is a special "padding",so we do not use 0 to encode valid feature for sequence input
key2index[key] = len(key2index) + 1
return list(map(lambda x: key2index[x], key_ans))
if __name__ == "__main__":
data = pd.read_csv("./movielens_sample.txt")
sparse_features = ["movie_id", "user_id",
"gender", "age", "occupation", "zip", ]
target = ['rating']
# 1.Label Encoding for sparse features,and process sequence features
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
# preprocess the sequence feature
key2index = {}
genres_list = list(map(split, data['genres'].values))
genres_length = np.array(list(map(len, genres_list)))
max_len = max(genres_length)
# Notice : padding=`post`
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', )
# 2.count #unique features for each sparse field and generate feature config for sequence feature
fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4)
for feat in sparse_features]
use_weighted_sequence = False
if use_weighted_sequence:
varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len(
key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean',
weight_name='genres_weight')] # Notice : value 0 is for padding for sequence input feature
else:
varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len(
key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean',
weight_name=None)] # Notice : value 0 is for padding for sequence input feature
linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
model_input = {name: data[name] for name in sparse_features} #
model_input["genres"] = genres_list
model_input["genres_weight"] = np.random.randn(data.shape[0], max_len, 1)
# 4.Define Model,compile and train
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )
history = model.fit(model_input, data[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
================================================
FILE: examples/run_multivalue_movielens_hash.py
================================================
import numpy as np
import pandas as pd
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
from deepctr.feature_column import SparseFeat, VarLenSparseFeat,get_feature_names
from deepctr.models import DeepFM
if __name__ == "__main__":
data = pd.read_csv("./movielens_sample.txt")
sparse_features = ["movie_id", "user_id",
"gender", "age", "occupation", "zip", ]
data[sparse_features] = data[sparse_features].astype(str)
target = ['rating']
# 1.Use hashing encoding on the fly for sparse features,and process sequence features
genres_list = list(map(lambda x: x.split('|'), data['genres'].values))
genres_length = np.array(list(map(len, genres_list)))
max_len = max(genres_length)
# Notice : padding=`post`
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str)
# 2.set hashing space for each sparse field and generate feature config for sequence feature
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True, dtype='string')
for feat in sparse_features]
varlen_feature_columns = [
VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4, use_hash=True, dtype="string"),
maxlen=max_len, combiner='mean',
)] # Notice : value 0 is for padding for sequence input feature
linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
model_input = {name: data[name] for name in feature_names}
model_input['genres'] = genres_list
# 4.Define Model,compile and train
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )
history = model.fit(model_input, data[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
================================================
FILE: examples/run_multivalue_movielens_vocab_hash.py
================================================
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names
import numpy as np
import pandas as pd
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
try:
import tensorflow.compat.v1 as tf
except ImportError as e:
import tensorflow as tf
if __name__ == "__main__":
data = pd.read_csv("./movielens_sample.txt")
sparse_features = ["movie_id", "user_id",
"gender", "age", "occupation", "zip", ]
data[sparse_features] = data[sparse_features].astype(str)
target = ['rating']
# 1.Use hashing encoding on the fly for sparse features,and process sequence features
genres_list = list(map(lambda x: x.split('|'), data['genres'].values))
genres_length = np.array(list(map(len, genres_list)))
max_len = max(genres_length)
# Notice : padding=`post`
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str)
# 2.set hashing space for each sparse field and generate feature config for sequence feature
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True,
vocabulary_path='./movielens_age_vocabulary.csv' if feat == 'age' else None,
dtype='string')
for feat in sparse_features]
varlen_feature_columns = [
VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4,
use_hash=True, dtype="string"),
maxlen=max_len, combiner='mean',
)] # Notice : value 0 is for padding for sequence input feature
linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
model_input = {name: data[name] for name in feature_names}
model_input['genres'] = genres_list
# 4.Define Model,compile and train
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )
if not hasattr(tf, 'version') or tf.version.VERSION < '2.0.0':
with tf.Session() as sess:
sess.run(tf.tables_initializer())
history = model.fit(model_input, data[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
else:
history = model.fit(model_input, data[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
================================================
FILE: examples/run_regression_movielens.py
================================================
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat,get_feature_names
if __name__ == "__main__":
data = pd.read_csv("./movielens_sample.txt")
sparse_features = ["movie_id", "user_id",
"gender", "age", "occupation", "zip"]
target = ['rating']
# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
# 2.count #unique features for each sparse field
fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1,embedding_dim=4)
for feat in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2, random_state=2020)
train_model_input = {name:train[name].values for name in feature_names}
test_model_input = {name:test[name].values for name in feature_names}
# 4.Define Model,train,predict and evaluate
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )
history = model.fit(train_model_input, train[target].values,
batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=256)
print("test MSE", round(mean_squared_error(
test[target].values, pred_ans), 4))
================================================
FILE: setup.cfg
================================================
[metadata]
desciption-file = README.md
#[coverage:run]
#branch = True
[coverage:report]
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover
# Don't complain about missing debug-only code:
def __repr__
if self\.debug
# Don't complain if tests don't hit defensive assertion code:
raise ValueError
raise AssertionError
raise NotImplementedError
# Don't complain if non-runnable code isn't run:
if 0:
if False:
if __name__ == .__main__.:
[coverage:run]
omit =
# omit anything in a .local directory anywhere
#*/.local/*
# omit everything in /usr
deepctr/contrib/*
# omit this single file
#utils/tirefire.py
================================================
FILE: setup.py
================================================
import sys
import setuptools
with open("README.md", "r") as fh:
long_description = fh.read()
REQUIRED_PACKAGES = [
'requests',
'h5py==3.7.0; python_version>="3.9"',
'h5py==2.10.0; python_version<"3.9"'
]
setuptools.setup(
name="deepctr",
version="0.9.3",
author="Weichen Shen",
author_email="weichenswc@163.com",
description="Easy-to-use,Modular and Extendible package of deep learning based CTR(Click Through Rate) prediction models with tensorflow 1.x and 2.x .",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/shenweichen/deepctr",
download_url='https://github.com/shenweichen/deepctr/tags',
packages=setuptools.find_packages(
exclude=["tests", "tests.models", "tests.layers"]),
python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*", # '>=3.4', # 3.4.6
install_requires=REQUIRED_PACKAGES,
extras_require={
"cpu": ["tensorflow>=1.4.0,!=1.7.*,!=1.8.*"],
"gpu": ["tensorflow-gpu>=1.4.0,!=1.7.*,!=1.8.*"],
},
entry_points={
},
classifiers=(
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
'Intended Audience :: Developers',
'Intended Audience :: Education',
'Intended Audience :: Science/Research',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Software Development',
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules',
),
license="Apache-2.0",
keywords=['ctr', 'click through rate',
'deep learning', 'tensorflow', 'tensor', 'keras'],
)
================================================
FILE: tests/README.md
================================================
================================================
FILE: tests/__init__.py
================================================
================================================
FILE: tests/feature_test.py
================================================
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, DenseFeat, VarLenSparseFeat, get_feature_names
import numpy as np
def test_long_dense_vector():
feature_columns = [SparseFeat('user_id', 4, ), SparseFeat('item_id', 5, ), DenseFeat("pic_vec", 5)]
fixlen_feature_names = get_feature_names(feature_columns)
user_id = np.array([[1], [0], [1]])
item_id = np.array([[3], [2], [1]])
pic_vec = np.array([[0.1, 0.5, 0.4, 0.3, 0.2], [0.1, 0.5, 0.4, 0.3, 0.2], [0.1, 0.5, 0.4, 0.3, 0.2]])
label = np.array([1, 0, 1])
input_dict = {'user_id': user_id, 'item_id': item_id, 'pic_vec': pic_vec}
model_input = [input_dict[name] for name in fixlen_feature_names]
model = DeepFM(feature_columns, feature_columns[:-1])
model.compile('adagrad', 'binary_crossentropy')
model.fit(model_input, label)
def test_feature_column_sparsefeat_vocabulary_path():
vocab_path = "./dummy_test.csv"
sf = SparseFeat('user_id', 4, vocabulary_path=vocab_path)
if sf.vocabulary_path != vocab_path:
raise ValueError("sf.vocabulary_path is invalid")
vlsf = VarLenSparseFeat(sf, 6)
if vlsf.vocabulary_path != vocab_path:
raise ValueError("vlsf.vocabulary_path is invalid")
================================================
FILE: tests/layers/__init__.py
================================================
================================================
FILE: tests/layers/activations_test.py
================================================
from deepctr.layers import activation
try:
from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
except ImportError:
from tensorflow.python.keras.utils import CustomObjectScope
from tests.utils import layer_test
def test_dice():
with CustomObjectScope({'Dice': activation.Dice}):
layer_test(activation.Dice, kwargs={},
input_shape=(2, 3))
================================================
FILE: tests/layers/core_test.py
================================================
import pytest
import tensorflow as tf
from tensorflow.python.keras.layers import PReLU
try:
from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
except ImportError:
from tensorflow.python.keras.utils import CustomObjectScope
from deepctr import layers
from deepctr.layers import Dice
from tests.layers.interaction_test import BATCH_SIZE, EMBEDDING_SIZE, SEQ_LENGTH
from tests.utils import layer_test
@pytest.mark.parametrize(
'hidden_units,activation',
[(hidden_units, activation)
for hidden_units in [(), (10,)]
for activation in ['sigmoid', Dice, PReLU]
]
)
def test_LocalActivationUnit(hidden_units, activation):
if tf.__version__ >= '1.13.0' and activation != 'sigmoid':
return
with CustomObjectScope({'LocalActivationUnit': layers.LocalActivationUnit}):
layer_test(layers.LocalActivationUnit,
kwargs={'hidden_units': hidden_units, 'activation': activation, 'dropout_rate': 0.5},
input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE)])
@pytest.mark.parametrize(
'hidden_units,use_bn',
[(hidden_units, use_bn)
for hidden_units in [(), (10,)]
for use_bn in [True, False]
]
)
def test_DNN(hidden_units, use_bn):
with CustomObjectScope({'DNN': layers.DNN}):
layer_test(layers.DNN, kwargs={'hidden_units': hidden_units, 'use_bn': use_bn, 'dropout_rate': 0.5},
input_shape=(
BATCH_SIZE, EMBEDDING_SIZE))
@pytest.mark.parametrize(
'task,use_bias',
[(task, use_bias)
for task in ['binary', 'regression']
for use_bias in [True, False]
]
)
def test_PredictionLayer(task, use_bias):
with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}):
layer_test(layers.PredictionLayer, kwargs={'task': task, 'use_bias': use_bias
}, input_shape=(BATCH_SIZE, 1))
@pytest.mark.xfail(reason="dim size must be 1 except for the batch size dim")
def test_test_PredictionLayer_invalid():
# with pytest.raises(ValueError):
with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}):
layer_test(layers.PredictionLayer, kwargs={'use_bias': True,
}, input_shape=(BATCH_SIZE, 2, 1))
================================================
FILE: tests/layers/interaction_test.py
================================================
import pytest
try:
from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
except ImportError:
from tensorflow.python.keras.utils import CustomObjectScope
from deepctr import layers
from tests.utils import layer_test
BATCH_SIZE = 5
FIELD_SIZE = 4
EMBEDDING_SIZE = 3
SEQ_LENGTH = 10
def test_FEFMLayer():
with CustomObjectScope({'FEFMLayer': layers.FEFMLayer}):
layer_test(layers.FEFMLayer, kwargs={'regularizer': 0.000001},
input_shape=(BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
@pytest.mark.parametrize(
'reg_strength',
[0.000001]
)
def test_FwFM(reg_strength):
with CustomObjectScope({'FwFMLayer': layers.FwFMLayer}):
layer_test(layers.FwFMLayer, kwargs={'num_fields': FIELD_SIZE, 'regularizer': reg_strength},
input_shape=(BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
@pytest.mark.parametrize(
'layer_num',
[0, 1]
)
def test_CrossNet(layer_num, ):
with CustomObjectScope({'CrossNet': layers.CrossNet}):
layer_test(layers.CrossNet, kwargs={
'layer_num': layer_num, }, input_shape=(2, 3))
# def test_CrossNet_invalid():
# with pytest.raises(ValueError):
# with CustomObjectScope({'CrossNet': layers.CrossNet}):
# layer_test(layers.CrossNet, kwargs={
# 'layer_num': 1, 'l2_reg': 0}, input_shape=(2, 3, 4))
@pytest.mark.parametrize(
'reduce_sum',
[reduce_sum
for reduce_sum in [True, False]
]
)
def test_InnerProductLayer(reduce_sum):
with CustomObjectScope({'InnerProductLayer': layers.InnerProductLayer}):
layer_test(layers.InnerProductLayer, kwargs={
'reduce_sum': reduce_sum}, input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE)] * FIELD_SIZE)
@pytest.mark.parametrize(
'kernel_type',
[kernel_type
for kernel_type in ['mat', 'vec', 'num']
]
)
def test_OutterProductLayer(kernel_type):
with CustomObjectScope({'OutterProductLayer': layers.OutterProductLayer}):
layer_test(layers.OutterProductLayer, kwargs={
'kernel_type': kernel_type}, input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE)] * FIELD_SIZE)
def test_BiInteractionPooling():
with CustomObjectScope({'BiInteractionPooling': layers.BiInteractionPooling}):
layer_test(layers.BiInteractionPooling, kwargs={},
input_shape=(BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
def test_FM():
with CustomObjectScope({'FM': layers.FM}):
layer_test(layers.FM, kwargs={}, input_shape=(
BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
def test_AFMLayer():
with CustomObjectScope({'AFMLayer': layers.AFMLayer}):
layer_test(layers.AFMLayer, kwargs={'dropout_rate': 0.5}, input_shape=[(
BATCH_SIZE, 1, EMBEDDING_SIZE)] * FIELD_SIZE)
@pytest.mark.parametrize(
'layer_size,split_half',
[((10,), False), ((10, 8), True)
]
)
def test_CIN(layer_size, split_half):
with CustomObjectScope({'CIN': layers.CIN}):
layer_test(layers.CIN, kwargs={"layer_size": layer_size, "split_half": split_half}, input_shape=(
BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
# @pytest.mark.parametrize(
# 'layer_size',
# [(), (3, 10)
# ]
# )
# def test_test_CIN_invalid(layer_size):
# with pytest.raises(ValueError):
# with CustomObjectScope({'CIN': layers.CIN}):
# layer_test(layers.CIN, kwargs={"layer_size": layer_size}, input_shape=(
# BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
@pytest.mark.parametrize(
'head_num,use_res',
[(1, True), (2, False,)]
)
def test_InteractingLayer(head_num, use_res, ):
with CustomObjectScope({'InteractingLayer': layers.InteractingLayer}):
layer_test(layers.InteractingLayer, kwargs={"head_num": head_num, "use_res":
use_res, }, input_shape=(
BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
def test_FGCNNLayer():
with CustomObjectScope({'FGCNNLayer': layers.FGCNNLayer}):
layer_test(layers.FGCNNLayer, kwargs={'filters': (4, 6,), 'kernel_width': (7, 7,)}, input_shape=(
BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
# def test_SENETLayer():
# with CustomObjectScope({'SENETLayer': layers.SENETLayer}):
# layer_test(layers.SENETLayer, kwargs={'reduction_ratio':2}, input_shape=[(
# BATCH_SIZE, 1, EMBEDDING_SIZE)]*FIELD_SIZE)
@pytest.mark.parametrize(
'bilinear_type',
['all', 'each', 'interaction'
]
)
def test_BilinearInteraction(bilinear_type):
with CustomObjectScope({'BilinearInteraction': layers.BilinearInteraction}):
layer_test(layers.BilinearInteraction, kwargs={'bilinear_type': bilinear_type}, input_shape=[(
BATCH_SIZE, 1, EMBEDDING_SIZE)] * FIELD_SIZE)
================================================
FILE: tests/layers/normalization_test.py
================================================
import pytest
try:
from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
except ImportError:
from tensorflow.python.keras.utils import CustomObjectScope
from deepctr import layers
from tests.layers.interaction_test import BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE
from tests.utils import layer_test
@pytest.mark.parametrize(
'axis',
[-1, -2
]
)
def test_LayerNormalization(axis):
with CustomObjectScope({'LayerNormalization': layers.LayerNormalization}):
layer_test(layers.LayerNormalization, kwargs={"axis": axis, }, input_shape=(
BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
================================================
FILE: tests/layers/sequence_test.py
================================================
import pytest
from packaging import version
try:
from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
except ImportError:
from tensorflow.python.keras.utils import CustomObjectScope
import tensorflow as tf
from deepctr.layers import sequence
from tests.utils import layer_test
try:
tf.keras.backend.set_learning_phase(True)
except ImportError:
from tensorflow.python.keras.backend import set_learning_phase
set_learning_phase(True)
BATCH_SIZE = 4
EMBEDDING_SIZE = 8
SEQ_LENGTH = 10
@pytest.mark.parametrize(
'weight_normalization',
[True, False
]
)
def test_AttentionSequencePoolingLayer(weight_normalization):
with CustomObjectScope({'AttentionSequencePoolingLayer': sequence.AttentionSequencePoolingLayer}):
layer_test(sequence.AttentionSequencePoolingLayer, kwargs={'weight_normalization': weight_normalization},
input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE),
(BATCH_SIZE, 1)])
@pytest.mark.parametrize(
'mode,supports_masking,input_shape',
[('sum', False, [(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, 1)]),
('mean', True, (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE)), ('max', True, (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE))
]
)
def test_SequencePoolingLayer(mode, supports_masking, input_shape):
if version.parse(tf.__version__) >= version.parse('1.14.0') and mode != 'sum': # todo check further version
return
with CustomObjectScope({'SequencePoolingLayer': sequence.SequencePoolingLayer}):
layer_test(sequence.SequencePoolingLayer, kwargs={'mode': mode, 'supports_masking': supports_masking},
input_shape=input_shape, supports_masking=supports_masking)
# @pytest.mark.parametrize(
#
# 'supports_masking,input_shape',
#
# [( False, [(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, 1),(BATCH_SIZE, 1)]), ( True, [(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE),(BATCH_SIZE, 1)])
# ]
#
# )
# def test_WeightedSequenceLayer(supports_masking, input_shape):
# # if version.parse(tf.__version__) >= version.parse('1.14.0') : #todo check further version
# # return
# with CustomObjectScope({'WeightedSequenceLayer': sequence.WeightedSequenceLayer}):
# layer_test(sequence.WeightedSequenceLayer, kwargs={'supports_masking': supports_masking},
# input_shape=input_shape, supports_masking=supports_masking)
#
@pytest.mark.parametrize(
'merge_mode',
['concat', 'ave', 'fw', 'bw', 'sum', 'mul']
)
def test_BiLSTM(merge_mode):
with CustomObjectScope({'BiLSTM': sequence.BiLSTM}):
layer_test(sequence.BiLSTM, kwargs={'merge_mode': merge_mode, 'units': EMBEDDING_SIZE, 'dropout_rate': 0.0},
# todo 0.5
input_shape=(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE))
@pytest.mark.parametrize(
'attention_type',
['scaled_dot_product', 'cos', 'ln', 'additive']
)
def test_Transformer(attention_type):
with CustomObjectScope({'Transformer': sequence.Transformer}):
layer_test(sequence.Transformer,
kwargs={'att_embedding_size': 1, 'head_num': 8, 'use_layer_norm': True, 'supports_masking': False,
'attention_type': attention_type, 'dropout_rate': 0.5, 'output_type': 'sum'},
input_shape=[(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE),
(BATCH_SIZE, 1), (BATCH_SIZE, 1)])
def test_KMaxPooling():
with CustomObjectScope({'KMaxPooling': sequence.KMaxPooling}):
layer_test(sequence.KMaxPooling, kwargs={'k': 3, 'axis': 1},
input_shape=(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE, 2))
@pytest.mark.parametrize(
'pos_embedding_trainable,zero_pad',
[(True, False), (False, True)
]
)
def test_PositionEncoding(pos_embedding_trainable, zero_pad):
with CustomObjectScope({'PositionEncoding': sequence.PositionEncoding, "tf": tf}):
layer_test(sequence.PositionEncoding,
kwargs={'pos_embedding_trainable': pos_embedding_trainable, 'zero_pad': zero_pad},
input_shape=(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE))
================================================
FILE: tests/layers/utils_test.py
================================================
import numpy as np
import pytest
import tensorflow as tf
from deepctr.layers.utils import Hash, Linear
from tests.layers.interaction_test import BATCH_SIZE, EMBEDDING_SIZE
from tests.utils import layer_test
try:
from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
except ImportError:
from tensorflow.python.keras.utils import CustomObjectScope
@pytest.mark.parametrize(
'num_buckets,mask_zero,vocabulary_path,input_data,expected_output',
[
(3 + 1, False, None, ['lakemerson'], None),
(3 + 1, True, None, ['lakemerson'], None),
(
3 + 1, False, "./tests/layers/vocabulary_example.csv", [['lake'], ['johnson'], ['lakemerson']],
[[1], [3], [0]])
]
)
def test_Hash(num_buckets, mask_zero, vocabulary_path, input_data, expected_output):
if not hasattr(tf, 'version') or tf.version.VERSION < '2.0.0':
return
with CustomObjectScope({'Hash': Hash}):
layer_test(Hash,
kwargs={'num_buckets': num_buckets, 'mask_zero': mask_zero, 'vocabulary_path': vocabulary_path},
input_dtype=tf.string, input_data=np.array(input_data, dtype='str'),
expected_output_dtype=tf.int64, expected_output=expected_output)
def test_Linear():
with CustomObjectScope({'Linear': Linear}):
layer_test(Linear,
kwargs={'mode': 1, 'use_bias': True}, input_shape=(BATCH_SIZE, EMBEDDING_SIZE))
================================================
FILE: tests/layers/vocabulary_example.csv
================================================
1,lake
2,merson
3,johnson
================================================
FILE: tests/models/AFM_test.py
================================================
import pytest
from deepctr.models import AFM
from ..utils import check_model, check_estimator, get_test_data, get_test_data_estimator, SAMPLE_SIZE, \
TEST_Estimator
@pytest.mark.parametrize(
'use_attention,sparse_feature_num,dense_feature_num',
[(True, 3, 0),
]
)
def test_AFM(use_attention, sparse_feature_num, dense_feature_num):
model_name = "AFM"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=dense_feature_num)
model = AFM(feature_columns, feature_columns, use_attention=use_attention, afm_dropout=0.5)
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'use_attention,sparse_feature_num,dense_feature_num',
[(True, 3, 0),
]
)
def test_AFMEstimator(use_attention, sparse_feature_num, dense_feature_num):
if not TEST_Estimator:
return
from deepctr.estimator import AFMEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
sparse_feature_num=sparse_feature_num,
dense_feature_num=dense_feature_num)
model = AFMEstimator(linear_feature_columns, dnn_feature_columns, use_attention=use_attention, afm_dropout=0.5)
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/AutoInt_test.py
================================================
import pytest
import tensorflow as tf
from packaging import version
from deepctr.models import AutoInt
from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \
TEST_Estimator
@pytest.mark.parametrize(
'att_layer_num,dnn_hidden_units,sparse_feature_num',
[(1, (), 1), (1, (4,), 1)] # (0, (4,), 2), (2, (4, 4,), 2)
)
def test_AutoInt(att_layer_num, dnn_hidden_units, sparse_feature_num):
if version.parse(tf.__version__) >= version.parse("1.14.0") and len(dnn_hidden_units) == 0: # todo check version
return
model_name = "AutoInt"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = AutoInt(feature_columns, feature_columns, att_layer_num=att_layer_num,
dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5, )
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'att_layer_num,dnn_hidden_units,sparse_feature_num',
[(1, (4,), 1)] # (0, (4,), 2), (2, (4, 4,), 2)
)
def test_AutoIntEstimator(att_layer_num, dnn_hidden_units, sparse_feature_num):
if not TEST_Estimator:
return
from deepctr.estimator import AutoIntEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = AutoIntEstimator(linear_feature_columns, dnn_feature_columns, att_layer_num=att_layer_num,
dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5, )
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/BST_test.py
================================================
from deepctr.models import BST
from ..utils import check_model
from .DIN_test import get_xy_fd
def test_BST():
model_name = "BST"
x, y, feature_columns, behavior_feature_list = get_xy_fd(hash_flag=True)
model = BST(dnn_feature_columns=feature_columns,
history_feature_list=behavior_feature_list,
att_head_num=4)
check_model(model, model_name, x, y,
check_model_io=True)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/CCPM_test.py
================================================
import pytest
import tensorflow as tf
from deepctr.models import CCPM
from ..utils import check_model, get_test_data, SAMPLE_SIZE, check_estimator, get_test_data_estimator, TEST_Estimator
@pytest.mark.parametrize(
'sparse_feature_num,dense_feature_num',
[(3, 0)
]
)
def test_CCPM(sparse_feature_num, dense_feature_num):
if tf.__version__ >= "2.0.0": # todo
return
model_name = "CCPM"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=dense_feature_num)
model = CCPM(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=(
2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5)
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'sparse_feature_num,dense_feature_num',
[(2, 0),
]
)
def test_CCPM_without_seq(sparse_feature_num, dense_feature_num):
if tf.__version__ >= "2.0.0":
return
model_name = "CCPM"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=dense_feature_num, sequence_feature=())
model = CCPM(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=(
2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5)
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'sparse_feature_num,dense_feature_num',
[(2, 0),
]
)
def test_CCPMEstimator_without_seq(sparse_feature_num, dense_feature_num):
if not TEST_Estimator:
return
from deepctr.estimator import CCPMEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = CCPMEstimator(linear_feature_columns, dnn_feature_columns, conv_kernel_width=(3, 2), conv_filters=(
2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5)
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/DCNMix_test.py
================================================
import pytest
from deepctr.models import DCNMix
from ..utils import check_model, get_test_data, SAMPLE_SIZE
@pytest.mark.parametrize(
'cross_num,hidden_size,sparse_feature_num',
[(0, (8,), 2), (1, (), 1), (1, (8,), 3)
]
)
def test_DCNMix(cross_num, hidden_size, sparse_feature_num):
model_name = "DCNMix"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = DCNMix(feature_columns, feature_columns, cross_num=cross_num, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
check_model(model, model_name, x, y)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/DCN_test.py
================================================
import pytest
from deepctr.models import DCN
from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator
@pytest.mark.parametrize(
'cross_num,hidden_size,sparse_feature_num,cross_parameterization',
[(0, (8,), 2, 'vector'), (1, (), 1, 'vector'), (1, (8,), 3, 'vector'),
(0, (8,), 2, 'matrix'), (1, (), 1, 'matrix'), (1, (8,), 3, 'matrix'),
]
)
def test_DCN(cross_num, hidden_size, sparse_feature_num, cross_parameterization):
model_name = "DCN"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = DCN(feature_columns, feature_columns, cross_num=cross_num, cross_parameterization=cross_parameterization,
dnn_hidden_units=hidden_size, dnn_dropout=0.5)
check_model(model, model_name, x, y)
def test_DCN_2():
model_name = "DCN"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=3,
dense_feature_num=2)
model = DCN([], feature_columns, cross_num=1, dnn_hidden_units=(8,), dnn_dropout=0.5)
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'cross_num,hidden_size,sparse_feature_num',
[(1, (8,), 3)
]
)
def test_DCNEstimator(cross_num, hidden_size, sparse_feature_num):
if not TEST_Estimator:
return
from deepctr.estimator import DCNEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = DCNEstimator(linear_feature_columns, dnn_feature_columns, cross_num=cross_num, dnn_hidden_units=hidden_size,
dnn_dropout=0.5)
check_estimator(model, input_fn)
# def test_DCN_invalid(embedding_size=8, cross_num=0, hidden_size=()):
# feature_dim_dict = {'sparse': [SparseFeat('sparse_1', 2), SparseFeat('sparse_2', 5), SparseFeat('sparse_3', 10)],
# 'dense': [SparseFeat('dense_1', 1), SparseFeat('dense_1', 1), SparseFeat('dense_1', 1)]}
# with pytest.raises(ValueError):
# _ = DCN(None, embedding_size=embedding_size, cross_num=cross_num, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/DIEN_test.py
================================================
import numpy as np
import pytest
import tensorflow as tf
from packaging import version
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names
from deepctr.models import DIEN
from ..utils import check_model
def get_xy_fd(use_neg=False, hash_flag=False):
feature_columns = [SparseFeat('user', 3, hash_flag),
SparseFeat('gender', 2, hash_flag),
SparseFeat('item', 3 + 1, hash_flag),
SparseFeat('item_gender', 2 + 1, hash_flag),
DenseFeat('score', 1)]
feature_columns += [
VarLenSparseFeat(SparseFeat('hist_item', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item'),
maxlen=4, length_name="seq_length"),
VarLenSparseFeat(SparseFeat('hist_item_gender', 2 + 1, embedding_dim=4, embedding_name='item_gender'),
maxlen=4, length_name="seq_length")]
behavior_feature_list = ["item", "item_gender"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3]) # 0 is mask value
igender = np.array([1, 2, 1]) # 0 is mask value
score = np.array([0.1, 0.2, 0.3])
hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
behavior_length = np.array([3, 3, 2])
feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
'hist_item': hist_iid, 'hist_item_gender': hist_igender,
'score': score,"seq_length":behavior_length}
if use_neg:
feature_dict['neg_hist_item'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
feature_dict['neg_hist_item_gender'] = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
feature_columns += [
VarLenSparseFeat(SparseFeat('neg_hist_item', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item'),
maxlen=4, length_name="seq_length"),
VarLenSparseFeat(SparseFeat('neg_hist_item_gender', 2 + 1, embedding_dim=4, embedding_name='item_gender'),
maxlen=4, length_name="seq_length")]
feature_names = get_feature_names(feature_columns)
x = {name: feature_dict[name] for name in feature_names}
y = np.array([1, 0, 1])
return x, y, feature_columns, behavior_feature_list
# @pytest.mark.xfail(reason="There is a bug when save model use Dice")
# @pytest.mark.skip(reason="misunderstood the API")
@pytest.mark.parametrize(
'gru_type',
['GRU', 'AIGRU', 'AGRU' # ,'AUGRU',
]
)
def test_DIEN(gru_type):
if version.parse(tf.__version__) >= version.parse('2.0.0'):
tf.compat.v1.disable_eager_execution() # todo
return
model_name = "DIEN_" + gru_type
x, y, feature_columns, behavior_feature_list = get_xy_fd(hash_flag=True)
model = DIEN(feature_columns, behavior_feature_list,
dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, gru_type=gru_type)
check_model(model, model_name, x, y,
check_model_io=(gru_type == "GRU")) # TODO:fix bugs when load model in other type
def test_DIEN_neg():
model_name = "DIEN_neg"
if version.parse(tf.__version__) >= version.parse("1.14.0"):
return
x, y, feature_dim_dict, behavior_feature_list = get_xy_fd(use_neg=True)
model = DIEN(feature_dim_dict, behavior_feature_list,
dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, gru_type="AUGRU", use_negsampling=True)
check_model(model, model_name, x, y)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/DIFM_test.py
================================================
import pytest
from deepctr.models import DIFM
from ..utils import check_model, get_test_data, SAMPLE_SIZE
@pytest.mark.parametrize(
'att_head_num,dnn_hidden_units,sparse_feature_num',
[(1, (4,), 2), (2, (4, 4,), 2), (1, (4,), 1)]
)
def test_DIFM(att_head_num, dnn_hidden_units, sparse_feature_num):
model_name = "DIFM"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = DIFM(feature_columns, feature_columns, dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5)
check_model(model, model_name, x, y)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/DIN_test.py
================================================
import numpy as np
import tensorflow as tf
from packaging import version
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names
from deepctr.models.sequence.din import DIN
from ..utils import check_model
def get_xy_fd(hash_flag=False):
feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat(
'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8),
SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)]
feature_columns += [
VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
maxlen=4, length_name="seq_length"),
VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4,
length_name="seq_length")]
# Notice: History behavior sequence feature name must start with "hist_".
behavior_feature_list = ["item_id", "cate_id"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3]) # 0 is mask value
cate_id = np.array([1, 2, 2]) # 0 is mask value
pay_score = np.array([0.1, 0.2, 0.3])
hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]])
hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]])
seq_length = np.array([3, 3, 2]) # the actual length of the behavior sequence
feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id,
'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id,
'pay_score': pay_score, 'seq_length': seq_length}
x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
y = np.array([1, 0, 1])
return x, y, feature_columns, behavior_feature_list
# @pytest.mark.xfail(reason="There is a bug when save model use Dice")
# @pytest.mark.skip(reason="misunderstood the API")
def test_DIN():
model_name = "DIN"
x, y, feature_columns, behavior_feature_list = get_xy_fd(True)
cur_version = version.parse(tf.__version__)
if cur_version >= version.parse('2.8.0'): # todo:
att_activation = 'sigmoid'
else:
att_activation = 'dice'
model = DIN(feature_columns, behavior_feature_list, dnn_hidden_units=[4, 4, 4], att_activation=att_activation,
dnn_dropout=0.5)
# todo test dice
check_model(model, model_name, x, y)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/DSIN_test.py
================================================
import numpy as np
import pytest
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names
from deepctr.models.sequence.dsin import DSIN
from ..utils import check_model
def get_xy_fd(hash_flag=False):
feature_columns = [SparseFeat('user', 3, use_hash=hash_flag),
SparseFeat('gender', 2, use_hash=hash_flag),
SparseFeat('item', 3 + 1, use_hash=hash_flag),
SparseFeat('item_gender', 2 + 1, use_hash=hash_flag),
DenseFeat('score', 1)]
feature_columns += [
VarLenSparseFeat(SparseFeat('sess_0_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'),
maxlen=4), VarLenSparseFeat(
SparseFeat('sess_0_item_gender', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item_gender'),
maxlen=4)]
feature_columns += [
VarLenSparseFeat(SparseFeat('sess_1_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'),
maxlen=4), VarLenSparseFeat(
SparseFeat('sess_1_item_gender', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item_gender'),
maxlen=4)]
behavior_feature_list = ["item", "item_gender"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3]) # 0 is mask value
igender = np.array([1, 2, 1]) # 0 is mask value
score = np.array([0.1, 0.2, 0.3])
sess1_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [0, 0, 0, 0]])
sess1_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [0, 0, 0, 0]])
sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
sess2_igender = np.array([[1, 1, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
sess_number = np.array([2, 1, 0])
feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
'sess_0_item': sess1_iid, 'sess_0_item_gender': sess1_igender, 'score': score,
'sess_1_item': sess2_iid, 'sess_1_item_gender': sess2_igender, }
x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
x["sess_length"] = sess_number
y = np.array([1, 0, 1])
return x, y, feature_columns, behavior_feature_list
@pytest.mark.parametrize(
'bias_encoding',
[True, False]
)
def test_DSIN(bias_encoding):
model_name = "DSIN"
x, y, feature_columns, behavior_feature_list = get_xy_fd(True)
model = DSIN(feature_columns, behavior_feature_list, sess_max_count=2, bias_encoding=bias_encoding,
dnn_hidden_units=[4, 4], dnn_dropout=0.5, )
check_model(model, model_name, x, y)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/DeepFEFM_test.py
================================================
import pytest
import tensorflow as tf
from deepctr.models import DeepFEFM
from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator
@pytest.mark.parametrize(
'hidden_size,sparse_feature_num,use_fefm,use_linear,use_fefm_embed_in_dnn',
[((2,), 1, True, True, True),
((2,), 1, True, True, False),
((2,), 1, True, False, True),
((2,), 1, False, True, True),
((2,), 1, True, False, False),
((2,), 1, False, True, False),
((2,), 1, False, False, True),
((2,), 1, False, False, False),
((), 1, True, True, True)
]
)
def test_DeepFEFM(hidden_size, sparse_feature_num, use_fefm, use_linear, use_fefm_embed_in_dnn):
if tf.__version__ == "1.15.0" or tf.__version__ == "1.4.0": # slow in tf 1.15
return
model_name = "DeepFEFM"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = DeepFEFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5,
use_linear=use_linear, use_fefm=use_fefm, use_fefm_embed_in_dnn=use_fefm_embed_in_dnn)
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'hidden_size,sparse_feature_num',
[((2,), 2),
((), 2),
]
)
def test_DeepFEFMEstimator(hidden_size, sparse_feature_num):
import tensorflow as tf
if not TEST_Estimator or tf.__version__ == "1.4.0":
return
from deepctr.estimator import DeepFEFMEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = DeepFEFMEstimator(linear_feature_columns, dnn_feature_columns,
dnn_hidden_units=hidden_size, dnn_dropout=0.5)
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/DeepFM_test.py
================================================
import pytest
from deepctr.models import DeepFM
from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator
@pytest.mark.parametrize(
'hidden_size,sparse_feature_num',
[((2,), 1), #
((3,), 2)
] # (True, (32,), 3), (False, (32,), 1)
)
def test_DeepFM(hidden_size, sparse_feature_num):
model_name = "DeepFM"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = DeepFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'hidden_size,sparse_feature_num',
[
((3,), 2)
] # (True, (32,), 3), (False, (32,), 1)
)
def test_DeepFMEstimator(hidden_size, sparse_feature_num):
if not TEST_Estimator:
return
from deepctr.estimator import DeepFMEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num,
classification=False)
model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5,
task="regression")
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/EDCN_test.py
================================================
import pytest
from deepctr.models import EDCN
from ..utils import check_model, get_test_data, SAMPLE_SIZE
@pytest.mark.parametrize(
'bridge_type, cross_num, cross_parameterization, sparse_feature_num',
[
('pointwise_addition', 2, 'vector', 3),
('hadamard_product', 2, 'vector', 4),
('concatenation', 1, 'vector', 5),
('attention_pooling', 2, 'matrix', 6),
]
)
def test_EDCN(bridge_type, cross_num, cross_parameterization, sparse_feature_num):
model_name = "EDCN"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=0)
model = EDCN(feature_columns, feature_columns, cross_num, cross_parameterization, bridge_type)
check_model(model, model_name, x, y)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/FGCNN_test.py
================================================
import pytest
from deepctr.models import FGCNN
from tests.utils import check_model, get_test_data, SAMPLE_SIZE
@pytest.mark.parametrize(
'sparse_feature_num,dense_feature_num',
[(1, 1), (3, 3)
]
)
def test_FGCNN(sparse_feature_num, dense_feature_num):
model_name = "FGCNN"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, embedding_size=8, sparse_feature_num=sparse_feature_num,
dense_feature_num=dense_feature_num)
model = FGCNN(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=(2, 1), new_maps=(
2, 2), pooling_width=(2, 2), dnn_hidden_units=(32,), dnn_dropout=0.5, )
# TODO: add model_io check
check_model(model, model_name, x, y, check_model_io=False)
# @pytest.mark.parametrize(
# 'sparse_feature_num,dense_feature_num',
# [(2, 1),
# ]
# )
# def test_FGCNN_without_seq(sparse_feature_num, dense_feature_num):
# model_name = "FGCNN_noseq"
#
# sample_size = SAMPLE_SIZE
# x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
# dense_feature_num=dense_feature_num, sequence_feature=())
#
# model = FGCNN(feature_columns, feature_columns, conv_kernel_width=(), conv_filters=(
# ), new_maps=(), pooling_width=(), dnn_hidden_units=(32,), dnn_dropout=0.5, )
# # TODO: add model_io check
# check_model(model, model_name, x, y, check_model_io=False)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/FLEN_test.py
================================================
import pytest
from deepctr.models import FLEN
from ..utils import check_model, get_test_data, SAMPLE_SIZE
@pytest.mark.parametrize(
'hidden_size,sparse_feature_num',
[
((3,), 6)
] # (True, (32,), 3), (False, (32,), 1)
)
def test_FLEN(hidden_size, sparse_feature_num):
model_name = "FLEN"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, embedding_size=2, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num, use_group=True)
model = FLEN(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
check_model(model, model_name, x, y)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/FNN_test.py
================================================
import pytest
import tensorflow as tf
from deepctr.models import FNN
from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator
@pytest.mark.parametrize(
'sparse_feature_num,dense_feature_num',
[(1, 1), (3, 3)
]
)
def test_FNN(sparse_feature_num, dense_feature_num):
if tf.__version__ >= "2.0.0":
return
model_name = "FNN"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=dense_feature_num)
model = FNN(feature_columns, feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5)
check_model(model, model_name, x, y)
# @pytest.mark.parametrize(
# 'sparse_feature_num,dense_feature_num',
# [(0, 1), (1, 0)
# ]
# )
# def test_FNN_without_seq(sparse_feature_num, dense_feature_num):
# model_name = "FNN"
#
# sample_size = SAMPLE_SIZE
# x, y, feature_columns = get_test_data(sample_size, sparse_feature_num, dense_feature_num, sequence_feature=())
#
# model = FNN(feature_columns,feature_columns, dnn_hidden_units=[32, 32], dnn_dropout=0.5)
# check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'sparse_feature_num,dense_feature_num',
[(2, 2),
]
)
def test_FNNEstimator(sparse_feature_num, dense_feature_num):
if not TEST_Estimator:
return
from deepctr.estimator import FNNEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
sparse_feature_num=sparse_feature_num,
dense_feature_num=dense_feature_num)
model = FNNEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5)
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/FiBiNET_test.py
================================================
import pytest
from deepctr.models import FiBiNET
from ..utils import check_model, SAMPLE_SIZE, get_test_data, get_test_data_estimator, check_estimator, TEST_Estimator
@pytest.mark.parametrize(
'bilinear_type',
["each",
"all", "interaction"]
)
def test_FiBiNET(bilinear_type):
model_name = "FiBiNET"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=2, dense_feature_num=2)
model = FiBiNET(feature_columns, feature_columns, bilinear_type=bilinear_type, dnn_hidden_units=[4, ],
dnn_dropout=0.5, )
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'bilinear_type',
["interaction"]
)
def test_FiBiNETEstimator(bilinear_type):
if not TEST_Estimator:
return
from deepctr.estimator import FiBiNETEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=2,
dense_feature_num=2)
model = FiBiNETEstimator(linear_feature_columns, dnn_feature_columns, bilinear_type=bilinear_type,
dnn_hidden_units=[4, ], dnn_dropout=0.5, )
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/FwFM_test.py
================================================
import pytest
from deepctr.models import FwFM
from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator
@pytest.mark.parametrize(
'hidden_size,sparse_feature_num',
[((2,), 1),
((), 1),
]
)
def test_FwFM(hidden_size, sparse_feature_num):
model_name = "FwFM"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = FwFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'hidden_size,sparse_feature_num',
[((2,), 2),
]
)
def test_FwFMEstimator(hidden_size, sparse_feature_num):
if not TEST_Estimator:
return
from deepctr.estimator import FwFMEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = FwFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/IFM_test.py
================================================
import pytest
from deepctr.models import IFM
from ..utils import check_model, get_test_data, SAMPLE_SIZE
@pytest.mark.parametrize(
'hidden_size,sparse_feature_num',
[((2,), 1),
((3,), 2)
]
)
def test_IFM(hidden_size, sparse_feature_num):
model_name = "IFM"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = IFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
check_model(model, model_name, x, y)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/MLR_test.py
================================================
import pytest
from deepctr.models import MLR
from ..utils import check_model, SAMPLE_SIZE, get_test_data
@pytest.mark.parametrize(
'region_sparse,region_dense,base_sparse,base_dense,bias_sparse,bias_dense',
[(0, 2, 0, 2, 0, 1), (0, 2, 0, 1, 0, 2), (0, 2, 0, 0, 1, 0),
# (0, 1, 1, 2, 1, 1,), (0, 1, 1, 1, 1, 2), (0, 1, 1, 0, 2, 0),
# (1, 0, 2, 2, 2, 1), (2, 0, 2, 1, 2, 2), (2, 0, 2, 0, 0, 0)
]
)
def test_MLRs(region_sparse, region_dense, base_sparse, base_dense, bias_sparse, bias_dense):
model_name = "MLRs"
_, y, region_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse,
dense_feature_num=region_dense, prefix='region')
base_x, y, base_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse,
dense_feature_num=region_dense, prefix='base')
bias_x, y, bias_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse,
dense_feature_num=region_dense, prefix='bias')
model = MLR(region_feature_columns, base_feature_columns, bias_feature_columns=bias_feature_columns)
model.compile('adam', 'binary_crossentropy',
metrics=['binary_crossentropy'])
print(model_name + " test pass!")
def test_MLR():
model_name = "MLR"
region_x, y, region_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3,
prefix='region')
base_x, y, base_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3,
prefix='base')
bias_x, y, bias_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3,
prefix='bias')
model = MLR(region_feature_columns)
model.compile('adam', 'binary_crossentropy',
metrics=['binary_crossentropy'])
check_model(model, model_name, region_x, y)
print(model_name + " test pass!")
if __name__ == "__main__":
pass
================================================
FILE: tests/models/MTL_test.py
================================================
import pytest
import tensorflow as tf
from deepctr.models.multitask import SharedBottom, ESMM, MMOE, PLE
from ..utils_mtl import get_mtl_test_data, check_mtl_model
def test_SharedBottom():
if tf.__version__ == "1.15.0": # slow in tf 1.15
return
model_name = "SharedBottom"
x, y_list, dnn_feature_columns = get_mtl_test_data()
model = SharedBottom(dnn_feature_columns, bottom_dnn_hidden_units=(8,), tower_dnn_hidden_units=(8,),
task_types=['binary', 'binary'], task_names=['label_income', 'label_marital'])
check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary'])
def test_ESMM():
if tf.__version__ == "1.15.0": # slow in tf 1.15
return
model_name = "ESMM"
x, y_list, dnn_feature_columns = get_mtl_test_data()
model = ESMM(dnn_feature_columns, tower_dnn_hidden_units=(8,), task_types=['binary', 'binary'],
task_names=['label_marital', 'label_income'])
check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary'])
def test_MMOE():
if tf.__version__ == "1.15.0": # slow in tf 1.15
return
model_name = "MMOE"
x, y_list, dnn_feature_columns = get_mtl_test_data()
model = MMOE(dnn_feature_columns, num_experts=3, expert_dnn_hidden_units=(8,),
tower_dnn_hidden_units=(8,),
gate_dnn_hidden_units=(), task_types=['binary', 'binary'],
task_names=['income', 'marital'])
check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary'])
@pytest.mark.parametrize(
'num_levels,gate_dnn_hidden_units',
[(2, ()),
(1, (4,))]
)
def test_PLE(num_levels, gate_dnn_hidden_units):
if tf.__version__ == "1.15.0": # slow in tf 1.15
return
model_name = "PLE"
x, y_list, dnn_feature_columns = get_mtl_test_data()
model = PLE(dnn_feature_columns, num_levels=num_levels, expert_dnn_hidden_units=(8,), tower_dnn_hidden_units=(8,),
gate_dnn_hidden_units=gate_dnn_hidden_units,
task_types=['binary', 'binary'], task_names=['income', 'marital'])
check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary'])
if __name__ == "__main__":
pass
================================================
FILE: tests/models/NFM_test.py
================================================
import pytest
from deepctr.models import NFM
from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator
@pytest.mark.parametrize(
'hidden_size,sparse_feature_num',
[((8,), 1), ((8, 8,), 2)]
)
def test_NFM(hidden_size, sparse_feature_num):
model_name = "NFM"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = NFM(feature_columns, feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5)
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'hidden_size,sparse_feature_num',
[((8,), 1), ((8, 8,), 2)]
)
def test_FNNEstimator(hidden_size, sparse_feature_num):
if not TEST_Estimator:
return
from deepctr.estimator import NFMEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = NFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5)
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/ONN_test.py
================================================
import pytest
import tensorflow as tf
from packaging import version
from deepctr.models import ONN
from ..utils import check_model, get_test_data, SAMPLE_SIZE
@pytest.mark.parametrize(
'sparse_feature_num',
[2]
)
def test_ONN(sparse_feature_num):
if version.parse(tf.__version__) >= version.parse('1.15.0'):
return
model_name = "ONN"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num,
sequence_feature=('sum', 'mean', 'max',), hash_flag=True)
model = ONN(feature_columns, feature_columns,
dnn_hidden_units=[4, 4], dnn_dropout=0.5)
check_model(model, model_name, x, y)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/PNN_test.py
================================================
import pytest
from deepctr.models import PNN
from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator
@pytest.mark.parametrize(
'use_inner, use_outter,sparse_feature_num',
[(True, True, 3), (False, False, 1)
]
)
def test_PNN(use_inner, use_outter, sparse_feature_num):
model_name = "PNN"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = PNN(feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5, use_inner=use_inner, use_outter=use_outter)
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'use_inner, use_outter,sparse_feature_num',
[(True, True, 2)
]
)
def test_PNNEstimator(use_inner, use_outter, sparse_feature_num):
if not TEST_Estimator:
return
from deepctr.estimator import PNNEstimator
sample_size = SAMPLE_SIZE
_, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = PNNEstimator(dnn_feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5, use_inner=use_inner,
use_outter=use_outter)
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/WDL_test.py
================================================
import pytest
import tensorflow as tf
from packaging import version
from deepctr.models import WDL
from ..utils import check_model, check_estimator, SAMPLE_SIZE, get_test_data, get_test_data_estimator, TEST_Estimator
@pytest.mark.parametrize(
'sparse_feature_num,dense_feature_num',
[(2, 0), (0, 2) # ,(2, 2)
]
)
def test_WDL(sparse_feature_num, dense_feature_num):
if version.parse(tf.__version__) >= version.parse('2.0.0'):
return
model_name = "WDL"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=dense_feature_num, hash_flag=True)
model = WDL(feature_columns, feature_columns,
dnn_hidden_units=[4, 4], dnn_dropout=0.5)
check_model(model, model_name, x, y)
@pytest.mark.parametrize(
'sparse_feature_num,dense_feature_num',
[(2, 1), # (0, 2)#,(2, 2)
]
)
def test_WDLEstimator(sparse_feature_num, dense_feature_num):
if not TEST_Estimator:
return
from deepctr.estimator import WDLEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num,
dense_feature_num)
model = WDLEstimator(linear_feature_columns, dnn_feature_columns,
dnn_hidden_units=[4, 4], dnn_dropout=0.5)
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/models/__init__.py
================================================
================================================
FILE: tests/models/xDeepFM_test.py
================================================
import pytest
from deepctr.models import xDeepFM
from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator
@pytest.mark.parametrize(
'dnn_hidden_units,cin_layer_size,cin_split_half,cin_activation,sparse_feature_num,dense_feature_dim',
[ # ((), (), True, 'linear', 1, 2),
((8,), (), True, 'linear', 1, 1),
((), (8,), True, 'linear', 2, 2),
((8,), (8,), False, 'relu', 1, 0)
]
)
def test_xDeepFM(dnn_hidden_units, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num,
dense_feature_dim):
model_name = "xDeepFM"
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = xDeepFM(feature_columns, feature_columns, dnn_hidden_units=dnn_hidden_units, cin_layer_size=cin_layer_size,
cin_split_half=cin_split_half, cin_activation=cin_activation, dnn_dropout=0.5)
check_model(model, model_name, x, y)
# @pytest.mark.parametrize(
# 'hidden_size,cin_layer_size,',
# [((8,), (3, 8)),
# ]
# )
# def test_xDeepFM_invalid(hidden_size, cin_layer_size):
# feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5,
# 'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']}
# with pytest.raises(ValueError):
# _ = xDeepFM(feature_dim_dict, None, dnn_hidden_units=hidden_size, cin_layer_size=cin_layer_size)
@pytest.mark.parametrize(
'dnn_hidden_units,cin_layer_size,cin_split_half,cin_activation,sparse_feature_num,dense_feature_dim',
[ # ((), (), True, 'linear', 1, 2),
((8,), (8,), False, 'relu', 2, 1)
]
)
def test_xDeepFMEstimator(dnn_hidden_units, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num,
dense_feature_dim):
import tensorflow as tf
if not TEST_Estimator or tf.__version__ == "1.4.0":
return
from deepctr.estimator import xDeepFMEstimator
sample_size = SAMPLE_SIZE
linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
sparse_feature_num=sparse_feature_num,
dense_feature_num=sparse_feature_num)
model = xDeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=dnn_hidden_units,
cin_layer_size=cin_layer_size,
cin_split_half=cin_split_half, cin_activation=cin_activation, dnn_dropout=0.5)
check_estimator(model, input_fn)
if __name__ == "__main__":
pass
================================================
FILE: tests/utils.py
================================================
from __future__ import absolute_import, division, print_function
import inspect
import os
import sys
import numpy as np
import tensorflow as tf
from numpy.testing import assert_allclose
from packaging import version
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.layers import Input, Masking
from tensorflow.python.keras.models import Model, load_model, save_model
from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, DEFAULT_GROUP_NAME
from deepctr.layers import custom_objects
SAMPLE_SIZE = 8
VOCABULARY_SIZE = 4
def test_estimator_version(tf_version):
cur_version = version.parse(tf_version)
tf2_version = version.parse('2.0.0')
left_version = version.parse('2.2.0')
right_version = version.parse('2.6.0')
return cur_version < tf2_version or left_version <= cur_version < right_version
TEST_Estimator = test_estimator_version(tf.__version__)
def gen_sequence(dim, max_len, sample_size):
return np.array([np.random.randint(0, dim, max_len) for _ in range(sample_size)]), np.random.randint(1, max_len + 1,
sample_size)
def get_test_data(sample_size=1000, embedding_size=4, sparse_feature_num=1, dense_feature_num=1,
sequence_feature=None, classification=True, include_length=False,
hash_flag=False, prefix='', use_group=False):
if sequence_feature is None:
sequence_feature = ['sum', 'mean', 'max', 'weight']
feature_columns = []
model_input = {}
if 'weight' in sequence_feature:
feature_columns.append(
VarLenSparseFeat(SparseFeat(prefix + "weighted_seq", vocabulary_size=2, embedding_dim=embedding_size),
maxlen=3, length_name=prefix + "weighted_seq" + "_seq_length",
weight_name=prefix + "weight"))
s_input, s_len_input = gen_sequence(
2, 3, sample_size)
model_input[prefix + "weighted_seq"] = s_input
model_input[prefix + 'weight'] = np.random.randn(sample_size, 3, 1)
model_input[prefix + "weighted_seq" + "_seq_length"] = s_len_input
sequence_feature.pop(sequence_feature.index('weight'))
for i in range(sparse_feature_num):
if use_group:
group_name = str(i % 3)
else:
group_name = DEFAULT_GROUP_NAME
dim = np.random.randint(1, 10)
feature_columns.append(
SparseFeat(prefix + 'sparse_feature_' + str(i), dim, embedding_size, use_hash=hash_flag, dtype=tf.int32,
group_name=group_name))
for i in range(dense_feature_num):
def transform_fn(x): return (x - 0.0) / 1.0
feature_columns.append(
DenseFeat(
prefix + 'dense_feature_' + str(i),
1,
dtype=tf.float32,
transform_fn=transform_fn
)
)
for i, mode in enumerate(sequence_feature):
dim = np.random.randint(1, 10)
maxlen = np.random.randint(1, 10)
feature_columns.append(
VarLenSparseFeat(SparseFeat(prefix + 'sequence_' + mode, vocabulary_size=dim, embedding_dim=embedding_size),
maxlen=maxlen, combiner=mode))
for fc in feature_columns:
if isinstance(fc, SparseFeat):
model_input[fc.name] = np.random.randint(0, fc.vocabulary_size, sample_size)
elif isinstance(fc, DenseFeat):
model_input[fc.name] = np.random.random(sample_size)
else:
s_input, s_len_input = gen_sequence(
fc.vocabulary_size, fc.maxlen, sample_size)
model_input[fc.name] = s_input
if include_length:
fc.length_name = prefix + "sequence_" + str(i) + '_seq_length'
model_input[prefix + "sequence_" + str(i) + '_seq_length'] = s_len_input
if classification:
y = np.random.randint(0, 2, sample_size)
else:
y = np.random.random(sample_size)
return model_input, y, feature_columns
def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None,
input_data=None, expected_output=None,
expected_output_dtype=None, fixed_batch_size=False, supports_masking=False):
# generate input data
if kwargs is None:
kwargs = {}
if input_data is None:
if not input_shape:
raise AssertionError()
if not input_dtype:
input_dtype = K.floatx()
input_data_shape = list(input_shape)
for i, e in enumerate(input_data_shape):
if e is None:
input_data_shape[i] = np.random.randint(1, 4)
input_mask = []
if all(isinstance(e, tuple) for e in input_data_shape):
input_data = []
for e in input_data_shape:
input_data.append(
(10 * np.random.random(e)).astype(input_dtype))
if supports_masking:
a = np.full(e[:2], False)
a[:, :e[1] // 2] = True
input_mask.append(a)
else:
input_data = (10 * np.random.random(input_data_shape))
input_data = input_data.astype(input_dtype)
if supports_masking:
a = np.full(input_data_shape[:2], False)
a[:, :input_data_shape[1] // 2] = True
print(a)
print(a.shape)
input_mask.append(a)
else:
if input_shape is None:
input_shape = input_data.shape
if input_dtype is None:
input_dtype = input_data.dtype
if expected_output_dtype is None:
expected_output_dtype = input_dtype
# instantiation
layer = layer_cls(**kwargs)
# test get_weights , set_weights at layer level
weights = layer.get_weights()
layer.set_weights(weights)
try:
expected_output_shape = layer.compute_output_shape(input_shape)
except Exception:
expected_output_shape = layer._compute_output_shape(input_shape)
# test in functional API
if isinstance(input_shape, list):
if fixed_batch_size:
x = [Input(batch_shape=e, dtype=input_dtype) for e in input_shape]
if supports_masking:
mask = [Input(batch_shape=e[0:2], dtype=bool)
for e in input_shape]
else:
x = [Input(shape=e[1:], dtype=input_dtype) for e in input_shape]
if supports_masking:
mask = [Input(shape=(e[1],), dtype=bool) for e in input_shape]
else:
if fixed_batch_size:
x = Input(batch_shape=input_shape, dtype=input_dtype)
if supports_masking:
mask = Input(batch_shape=input_shape[0:2], dtype=bool)
else:
x = Input(shape=input_shape[1:], dtype=input_dtype)
if supports_masking:
mask = Input(shape=(input_shape[1],), dtype=bool)
if supports_masking:
y = layer(Masking()(x), mask=mask)
else:
y = layer(x)
if not (K.dtype(y) == expected_output_dtype):
raise AssertionError()
# check with the functional API
if supports_masking:
model = Model([x, mask], y)
actual_output = model.predict([input_data, input_mask[0]])
else:
model = Model(x, y)
actual_output = model.predict(input_data)
actual_output_shape = actual_output.shape
for expected_dim, actual_dim in zip(expected_output_shape,
actual_output_shape):
if expected_dim is not None:
if not (expected_dim == actual_dim):
raise AssertionError("expected_shape", expected_output_shape, "actual_shape", actual_output_shape)
if expected_output is not None:
assert_allclose(actual_output, expected_output, rtol=1e-3)
# test serialization, weight setting at model level
model_config = model.get_config()
recovered_model = model.__class__.from_config(model_config)
if model.weights:
weights = model.get_weights()
recovered_model.set_weights(weights)
_output = recovered_model.predict(input_data)
assert_allclose(_output, actual_output, rtol=1e-3)
# test training mode (e.g. useful when the layer has a
# different behavior at training and testing time).
if has_arg(layer.call, 'training'):
model.compile('rmsprop', 'mse')
model.train_on_batch(input_data, actual_output)
# test instantiation from layer config
layer_config = layer.get_config()
layer_config['batch_input_shape'] = input_shape
layer = layer.__class__.from_config(layer_config)
# for further checks in the caller function
return actual_output
def has_arg(fn, name, accept_all=False):
"""Checks if a callable accepts a given keyword argument.
For Python 2, checks if there is an argument with the given name.
For Python 3, checks if there is an argument with the given name, and
also whether this argument can be called with a keyword (i.e. if it is
not a positional-only argument).
# Arguments
fn: Callable to inspect.
name: Check if `fn` can be called with `name` as a keyword argument.
accept_all: What to return if there is no parameter called `name`
but the function accepts a `**kwargs` argument.
# Returns
bool, whether `fn` accepts a `name` keyword argument.
"""
if sys.version_info < (3,):
arg_spec = inspect.getargspec(fn)
if accept_all and arg_spec.keywords is not None:
return True
return (name in arg_spec.args)
elif sys.version_info < (3, 3):
arg_spec = inspect.getfullargspec(fn)
if accept_all and arg_spec.varkw is not None:
return True
return (name in arg_spec.args or
name in arg_spec.kwonlyargs)
else:
signature = inspect.signature(fn)
parameter = signature.parameters.get(name)
if parameter is None:
if accept_all:
for param in signature.parameters.values():
if param.kind == inspect.Parameter.VAR_KEYWORD:
return True
return False
return (parameter.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD,
inspect.Parameter.KEYWORD_ONLY))
def check_model(model, model_name, x, y, check_model_io=True):
"""
compile model,train and evaluate it,then save/load weight and model file.
:param model:
:param model_name:
:param x:
:param y:
:param check_model_io: test save/load model file or not
:return:
"""
model.compile('adam', 'binary_crossentropy',
metrics=['binary_crossentropy'])
model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5)
print(model_name + " test train valid pass!")
model.save_weights(model_name + '_weights.h5')
model.load_weights(model_name + '_weights.h5')
os.remove(model_name + '_weights.h5')
print(model_name + " test save load weight pass!")
if check_model_io:
save_model(model, model_name + '.h5')
model = load_model(model_name + '.h5', custom_objects)
os.remove(model_name + '.h5')
print(model_name + " test save load model pass!")
print(model_name + " test pass!")
def get_test_data_estimator(sample_size=1000, embedding_size=4, sparse_feature_num=1, dense_feature_num=1,
classification=True):
x = {}
dnn_feature_columns = []
linear_feature_columns = []
voc_size = 4
for i in range(sparse_feature_num):
name = 's_' + str(i)
x[name] = np.random.randint(0, voc_size, sample_size)
dnn_feature_columns.append(
tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_identity(name, voc_size),
embedding_size))
linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(name, voc_size))
for i in range(dense_feature_num):
name = 'd_' + str(i)
x[name] = np.random.random(sample_size)
dnn_feature_columns.append(tf.feature_column.numeric_column(name))
linear_feature_columns.append(tf.feature_column.numeric_column(name))
if classification:
y = np.random.randint(0, 2, sample_size)
else:
y = np.random.random(sample_size)
if tf.__version__ >= "2.0.0":
input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(x, y, shuffle=False)
else:
input_fn = tf.estimator.inputs.numpy_input_fn(x, y, shuffle=False)
return linear_feature_columns, dnn_feature_columns, input_fn
def check_estimator(model, input_fn):
model.train(input_fn)
model.evaluate(input_fn)
================================================
FILE: tests/utils_mtl.py
================================================
# test utils for multi task learning
import os
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.models import load_model, save_model
from deepctr.feature_column import SparseFeat, DenseFeat, DEFAULT_GROUP_NAME
from deepctr.layers import custom_objects
def get_mtl_test_data(sample_size=10, embedding_size=4, sparse_feature_num=1,
dense_feature_num=1, task_types=('binary', 'binary'),
hash_flag=False, prefix='', use_group=False):
feature_columns = []
model_input = {}
for i in range(sparse_feature_num):
if use_group:
group_name = str(i % 3)
else:
group_name = DEFAULT_GROUP_NAME
dim = np.random.randint(1, 10)
feature_columns.append(
SparseFeat(prefix + 'sparse_feature_' + str(i), dim, embedding_size, use_hash=hash_flag, dtype=tf.int32,
group_name=group_name))
for i in range(dense_feature_num):
def transform_fn(x): return (x - 0.0) / 1.0
feature_columns.append(
DenseFeat(
prefix + 'dense_feature_' + str(i),
1,
dtype=tf.float32,
transform_fn=transform_fn
)
)
for fc in feature_columns:
if isinstance(fc, SparseFeat):
model_input[fc.name] = np.random.randint(0, fc.vocabulary_size, sample_size)
elif isinstance(fc, DenseFeat):
model_input[fc.name] = np.random.random(sample_size)
y_list = [] # multi label
for task in task_types:
if task == 'binary':
y = np.random.randint(0, 2, sample_size)
y_list.append(y)
else:
y = np.random.random(sample_size)
y_list.append(y)
return model_input, y_list, feature_columns
def check_mtl_model(model, model_name, x, y_list, task_types, check_model_io=True):
"""
compile model,train and evaluate it,then save/load weight and model file.
:param model:
:param model_name:
:param x:
:param y_list: mutil label of y
:param check_model_io: test save/load model file or not
:return:
"""
loss_list = []
metric_list = []
for task_type in task_types:
if task_type == 'binary':
loss_list.append('binary_crossentropy')
# metric_list.append('accuracy')
elif task_type == 'regression':
loss_list.append('mean_squared_error')
# metric_list.append('mae')
print('loss:', loss_list)
print('metric:', metric_list)
model.compile('adam', loss=loss_list, metrics=metric_list)
model.fit(x, y_list, batch_size=100, epochs=1, validation_split=0.5)
print(model_name + " test train valid pass!")
model.save_weights(model_name + '_weights.h5')
model.load_weights(model_name + '_weights.h5')
os.remove(model_name + '_weights.h5')
print(model_name + " test save load weight pass!")
if check_model_io:
save_model(model, model_name + '.h5')
model = load_model(model_name + '.h5', custom_objects)
os.remove(model_name + '.h5')
print(model_name + " test save load model pass!")
print(model_name + " test pass!")
================================================
FILE: tests/utils_test.py
================================================
from deepctr.utils import check_version
def test_check_version():
check_version('0.1.0')
check_version(20191231)