Repository: shenweichen/DeepCTR Branch: master Commit: e8f4d818f9b4 Files: 220 Total size: 867.4 KB Directory structure: gitextract_0q19foz8/ ├── .gitattributes ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ ├── feature_request.md │ │ └── question.md │ └── workflows/ │ ├── ci.yml │ └── ci2.yml ├── .gitignore ├── .readthedocs.yml ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── deepctr/ │ ├── __init__.py │ ├── contrib/ │ │ ├── __init__.py │ │ ├── rnn.py │ │ ├── rnn_v2.py │ │ └── utils.py │ ├── estimator/ │ │ ├── __init__.py │ │ ├── feature_column.py │ │ ├── inputs.py │ │ ├── models/ │ │ │ ├── __init__.py │ │ │ ├── afm.py │ │ │ ├── autoint.py │ │ │ ├── ccpm.py │ │ │ ├── dcn.py │ │ │ ├── deepfefm.py │ │ │ ├── deepfm.py │ │ │ ├── fibinet.py │ │ │ ├── fnn.py │ │ │ ├── fwfm.py │ │ │ ├── nfm.py │ │ │ ├── pnn.py │ │ │ ├── wdl.py │ │ │ └── xdeepfm.py │ │ └── utils.py │ ├── feature_column.py │ ├── inputs.py │ ├── layers/ │ │ ├── __init__.py │ │ ├── activation.py │ │ ├── core.py │ │ ├── interaction.py │ │ ├── normalization.py │ │ ├── sequence.py │ │ └── utils.py │ ├── models/ │ │ ├── __init__.py │ │ ├── afm.py │ │ ├── autoint.py │ │ ├── ccpm.py │ │ ├── dcn.py │ │ ├── dcnmix.py │ │ ├── deepfefm.py │ │ ├── deepfm.py │ │ ├── difm.py │ │ ├── edcn.py │ │ ├── fgcnn.py │ │ ├── fibinet.py │ │ ├── flen.py │ │ ├── fnn.py │ │ ├── fwfm.py │ │ ├── ifm.py │ │ ├── mlr.py │ │ ├── multitask/ │ │ │ ├── __init__.py │ │ │ ├── esmm.py │ │ │ ├── mmoe.py │ │ │ ├── ple.py │ │ │ └── sharedbottom.py │ │ ├── nfm.py │ │ ├── onn.py │ │ ├── pnn.py │ │ ├── sequence/ │ │ │ ├── __init__.py │ │ │ ├── bst.py │ │ │ ├── dien.py │ │ │ ├── din.py │ │ │ └── dsin.py │ │ ├── wdl.py │ │ └── xdeepfm.py │ └── utils.py ├── docs/ │ ├── Makefile │ ├── make.bat │ ├── requirements.readthedocs.txt │ └── source/ │ ├── Estimators.rst │ ├── Examples.md │ ├── FAQ.md │ ├── Features.md │ ├── History.md │ ├── Layers.rst │ ├── Model_Methods.md │ ├── Models.rst │ ├── Quick-Start.md │ ├── conf.py │ ├── deepctr.contrib.rnn.rst │ ├── deepctr.contrib.rst │ ├── deepctr.contrib.utils.rst │ ├── deepctr.estimator.feature_column.rst │ ├── deepctr.estimator.inputs.rst │ ├── deepctr.estimator.models.afm.rst │ ├── deepctr.estimator.models.autoint.rst │ ├── deepctr.estimator.models.ccpm.rst │ ├── deepctr.estimator.models.dcn.rst │ ├── deepctr.estimator.models.deepfefm.rst │ ├── deepctr.estimator.models.deepfm.rst │ ├── deepctr.estimator.models.fibinet.rst │ ├── deepctr.estimator.models.fnn.rst │ ├── deepctr.estimator.models.fwfm.rst │ ├── deepctr.estimator.models.nfm.rst │ ├── deepctr.estimator.models.pnn.rst │ ├── deepctr.estimator.models.rst │ ├── deepctr.estimator.models.wdl.rst │ ├── deepctr.estimator.models.xdeepfm.rst │ ├── deepctr.estimator.rst │ ├── deepctr.estimator.utils.rst │ ├── deepctr.feature_column.rst │ ├── deepctr.inputs.rst │ ├── deepctr.layers.activation.rst │ ├── deepctr.layers.core.rst │ ├── deepctr.layers.interaction.rst │ ├── deepctr.layers.normalization.rst │ ├── deepctr.layers.rst │ ├── deepctr.layers.sequence.rst │ ├── deepctr.layers.utils.rst │ ├── deepctr.models.afm.rst │ ├── deepctr.models.autoint.rst │ ├── deepctr.models.ccpm.rst │ ├── deepctr.models.dcn.rst │ ├── deepctr.models.dcnmix.rst │ ├── deepctr.models.deepfefm.rst │ ├── deepctr.models.deepfm.rst │ ├── deepctr.models.deepfwfm.rst │ ├── deepctr.models.difm.rst │ ├── deepctr.models.edcn.rst │ ├── deepctr.models.fgcnn.rst │ ├── deepctr.models.fibinet.rst │ ├── deepctr.models.flen.rst │ ├── deepctr.models.fnn.rst │ ├── deepctr.models.ifm.rst │ ├── deepctr.models.mlr.rst │ ├── deepctr.models.multitask.esmm.rst │ ├── deepctr.models.multitask.mmoe.rst │ ├── deepctr.models.multitask.ple.rst │ ├── deepctr.models.multitask.sharedbottom.rst │ ├── deepctr.models.nfm.rst │ ├── deepctr.models.onn.rst │ ├── deepctr.models.pnn.rst │ ├── deepctr.models.rst │ ├── deepctr.models.sequence.bst.rst │ ├── deepctr.models.sequence.dien.rst │ ├── deepctr.models.sequence.din.rst │ ├── deepctr.models.sequence.dsin.rst │ ├── deepctr.models.wdl.rst │ ├── deepctr.models.xdeepfm.rst │ ├── deepctr.rst │ ├── deepctr.utils.rst │ ├── index.rst │ └── modules.rst ├── examples/ │ ├── avazu_sample.txt │ ├── census-income.sample │ ├── criteo_sample.te.tfrecords │ ├── criteo_sample.tr.tfrecords │ ├── criteo_sample.txt │ ├── gen_tfrecords.py │ ├── movielens_age_vocabulary.csv │ ├── movielens_sample.txt │ ├── run_all.sh │ ├── run_classification_criteo.py │ ├── run_classification_criteo_hash.py │ ├── run_classification_criteo_multi_gpu.py │ ├── run_dien.py │ ├── run_din.py │ ├── run_dsin.py │ ├── run_estimator_pandas_classification.py │ ├── run_estimator_tfrecord_classification.py │ ├── run_flen.py │ ├── run_mtl.py │ ├── run_multivalue_movielens.py │ ├── run_multivalue_movielens_hash.py │ ├── run_multivalue_movielens_vocab_hash.py │ └── run_regression_movielens.py ├── setup.cfg ├── setup.py └── tests/ ├── README.md ├── __init__.py ├── feature_test.py ├── layers/ │ ├── __init__.py │ ├── activations_test.py │ ├── core_test.py │ ├── interaction_test.py │ ├── normalization_test.py │ ├── sequence_test.py │ ├── utils_test.py │ └── vocabulary_example.csv ├── models/ │ ├── AFM_test.py │ ├── AutoInt_test.py │ ├── BST_test.py │ ├── CCPM_test.py │ ├── DCNMix_test.py │ ├── DCN_test.py │ ├── DIEN_test.py │ ├── DIFM_test.py │ ├── DIN_test.py │ ├── DSIN_test.py │ ├── DeepFEFM_test.py │ ├── DeepFM_test.py │ ├── EDCN_test.py │ ├── FGCNN_test.py │ ├── FLEN_test.py │ ├── FNN_test.py │ ├── FiBiNET_test.py │ ├── FwFM_test.py │ ├── IFM_test.py │ ├── MLR_test.py │ ├── MTL_test.py │ ├── NFM_test.py │ ├── ONN_test.py │ ├── PNN_test.py │ ├── WDL_test.py │ ├── __init__.py │ └── xDeepFM_test.py ├── utils.py ├── utils_mtl.py └── utils_test.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ # Auto detect text files and perform LF normalization * text=auto # Custom for Visual Studio *.cs diff=csharp # Standard to msysgit *.doc diff=astextplain *.DOC diff=astextplain *.docx diff=astextplain *.DOCX diff=astextplain *.dot diff=astextplain *.DOT diff=astextplain *.pdf diff=astextplain *.PDF diff=astextplain *.rtf diff=astextplain *.RTF diff=astextplain ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Describe the bug(问题描述)** A clear and concise description of what the bug is.Better with standalone code to reproduce the issue. **To Reproduce(复现步骤)** Steps to reproduce the behavior: 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' 4. See error **Operating environment(运行环境):** - python version [e.g. 3.6, 3.7] - tensorflow version [e.g. 1.4.0, 1.15.0, 2.10.0] - deepctr version [e.g. 0.9.2,] **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: enhancement&feature request assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/ISSUE_TEMPLATE/question.md ================================================ --- name: Question about: Ask any question ~ title: '' labels: question assignees: '' --- Please refer to the [FAQ](https://deepctr-doc.readthedocs.io/en/latest/FAQ.html) in doc and search for the [related issues](https://github.com/shenweichen/DeepCTR/issues) before you ask the question. **Describe the question(问题描述)** A clear and concise description of what the question is. **Additional context** Add any other context about the problem here. **Operating environment(运行环境):** - python version [e.g. 3.6] - tensorflow version [e.g. 1.4.0, 1.15.0, 2.10.0] - deepctr version [e.g. 0.9.2,] ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI_TF2 on: push: path: - 'deepctr/*' - 'tests/*' pull_request: path: - 'deepctr/*' - 'tests/*' jobs: build: runs-on: ubuntu-latest timeout-minutes: 180 strategy: matrix: python-version: [ 3.6,3.7,3.8, 3.9,3.10.7 ] tf-version: [ 2.6.0,2.7.0,2.8.0,2.9.0,2.10.0 ] exclude: - python-version: 3.7 tf-version: 1.4.0 - python-version: 3.7 tf-version: 1.15.0 - python-version: 3.8 tf-version: 1.4.0 - python-version: 3.8 tf-version: 1.14.0 - python-version: 3.8 tf-version: 1.15.0 - python-version: 3.6 tf-version: 2.7.0 - python-version: 3.6 tf-version: 2.8.0 - python-version: 3.6 tf-version: 2.9.0 - python-version: 3.6 tf-version: 2.10.0 - python-version: 3.9 tf-version: 1.4.0 - python-version: 3.9 tf-version: 1.15.0 - python-version: 3.9 tf-version: 2.2.0 - python-version: 3.9 tf-version: 2.5.0 - python-version: 3.9 tf-version: 2.6.0 - python-version: 3.9 tf-version: 2.7.0 - python-version: 3.10.7 tf-version: 1.4.0 - python-version: 3.10.7 tf-version: 1.15.0 - python-version: 3.10.7 tf-version: 2.2.0 - python-version: 3.10.7 tf-version: 2.5.0 - python-version: 3.10.7 tf-version: 2.6.0 - python-version: 3.10.7 tf-version: 2.7.0 steps: - uses: actions/checkout@v3 - name: Setup python environment uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | pip3 install -q tensorflow==${{ matrix.tf-version }} pip install -q protobuf==3.19.0 pip install -q requests pip install -e . - name: Test with pytest timeout-minutes: 180 run: | pip install -q pytest pip install -q pytest-cov pip install -q python-coveralls pytest --cov=deepctr --cov-report=xml - name: Upload coverage to Codecov uses: codecov/codecov-action@v3.1.0 with: token: ${{secrets.CODECOV_TOKEN}} file: ./coverage.xml flags: pytest name: py${{ matrix.python-version }}-tf${{ matrix.tf-version }} ================================================ FILE: .github/workflows/ci2.yml ================================================ name: CI_TF1 on: push: path: - 'deepctr/*' - 'tests/*' pull_request: path: - 'deepctr/*' - 'tests/*' jobs: build: runs-on: ubuntu-latest timeout-minutes: 360 strategy: matrix: python-version: [ 3.6,3.7 ] tf-version: [ 1.15.0 ] exclude: - python-version: 3.7 tf-version: 1.4.0 - python-version: 3.7 tf-version: 1.12.0 - python-version: 3.7 tf-version: 1.15.0 - python-version: 3.8 tf-version: 1.4.0 - python-version: 3.8 tf-version: 1.14.0 - python-version: 3.8 tf-version: 1.15.0 - python-version: 3.6 tf-version: 2.7.0 - python-version: 3.6 tf-version: 2.8.0 - python-version: 3.6 tf-version: 2.9.0 - python-version: 3.6 tf-version: 2.10.0 - python-version: 3.9 tf-version: 1.4.0 - python-version: 3.9 tf-version: 1.15.0 - python-version: 3.9 tf-version: 2.2.0 - python-version: 3.9 tf-version: 2.5.0 - python-version: 3.9 tf-version: 2.6.0 - python-version: 3.9 tf-version: 2.7.0 - python-version: 3.10.7 tf-version: 1.4.0 - python-version: 3.10.7 tf-version: 1.15.0 - python-version: 3.10.7 tf-version: 2.2.0 - python-version: 3.10.7 tf-version: 2.5.0 - python-version: 3.10.7 tf-version: 2.6.0 - python-version: 3.10.7 tf-version: 2.7.0 steps: - uses: actions/checkout@v3 - name: Setup python environment uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | pip3 install -q tensorflow==${{ matrix.tf-version }} pip install -q protobuf==3.19.0 pip install -q requests pip install -e . - name: Test with pytest timeout-minutes: 360 run: | pip install -q pytest pip install -q pytest-cov pip install -q python-coveralls pytest --cov=deepctr --cov-report=xml - name: Upload coverage to Codecov uses: codecov/codecov-action@v3.1.0 with: token: ${{secrets.CODECOV_TOKEN}} file: ./coverage.xml flags: pytest name: py${{ matrix.python-version }}-tf${{ matrix.tf-version }} ================================================ FILE: .gitignore ================================================ *.h5 *.ipynb .pytest_cache/ .vscode/ tests/unused/* # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class .idea/ # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask instance folder instance/ # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject # ========================= # Operating System Files # ========================= # OSX # ========================= .DS_Store .AppleDouble .LSOverride # Thumbnails ._* # Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd .Spotlight-V100 .TemporaryItems .Trashes .VolumeIcon.icns # Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder Temporary Items .apdisk # Windows # ========================= # Windows image file caches Thumbs.db ehthumbs.db # Folder config file Desktop.ini # Recycle Bin used on file shares $RECYCLE.BIN/ # Windows Installer files *.cab *.msi *.msm *.msp # Windows shortcuts *.lnk ================================================ FILE: .readthedocs.yml ================================================ build: image: latest python: version: 3.6 ================================================ FILE: .travis.yml ================================================ #sudo: required #dist: trusty xenial language: python python: - "2.7" #time out #- "3.4" - "3.5" - "3.6" #- "3.7" env: # - TF_VERSION=1.13.1 # - TF_VERSION=1.12.2 - TF_VERSION=1.4.0 #Not Support- TF_VERSION=1.7.0 #Not Support- TF_VERSION=1.7.1 #Not Support- TF_VERSION=1.8.0 #- TF_VERSION=1.8.0 # - TF_VERSION=1.11.0 #- TF_VERSION=1.6.0 - TF_VERSION=2.0.0b1 #- TF_VERSION=1.13.2 - TF_VERSION=1.14.0 matrix: allow_failures: - python: "2.7" env: TF_VERSION=1.6.0 # to speed up - python: "2.7" env: TF_VERSION=2.0.0b1 - python: "3.4" - python: "3.5" - python: "3.7" - env: TF_VERSION=1.5.0 #local is ok,but sometimes CI is failed - env: TF_VERSION=1.7.0 - env: TF_VERSION=1.7.1 - env: TF_VERSION=1.8.0 - env: TF_VERSION=1.12.0 # too slow - env: TF_VERSION=1.13.1 # too slow - env: TF_VERSION=1.13.2 # too slow - env: TF_VERSION=1.14.0 # too slow fast_finish: true cache: pip # command to install dependencies install: - pip install -q pytest-cov==2.4.0 #>=2.4.0,<2.6 - pip install -q python-coveralls - pip install -q codacy-coverage - pip install -q tensorflow==$TF_VERSION - pip install -q pandas - pip install -q packaging - pip install -e . # command to run tests script: - pytest --cov=deepctr notifications: recipients: - weichenswc@163.com on_success: change on_failure: change after_success: - coveralls - coverage xml - python-codacy-coverage -r coverage.xml ================================================ FILE: CONTRIBUTING.md ================================================ This project is under development and we need developers to participate in. # Join us If you - familiar with and interested in ctr prediction algorithms - familiar with tensorflow - have spare time to learn and develop - familiar with git please send a brief introduction of your background and experience to weichenswc@163.com, welcome to join us! # Creating a pull request 1. **Become a collaborator**: Send an email with introduction and your github account name to weichenswc@163.com and waiting for invitation to become a collaborator. 2. **Fork&Dev**: Fork your own branch(`dev_yourname`) in `DeepCTR` from the `master` branch for development.If the `master` is updated during the development process, remember to merge and update to `dev_yourname` regularly. 3. **Testing**: Test logical correctness and effect when finishing the code development of the `dev_yourname` branch. 4. **Pre-release** : After testing contact weichenswc@163.com for pre-release integration, usually your branch `dev_yourname` will be merged into `release` branch by squash merge. 5. **Release a new version**: After confirming that the change is no longer needed, `release` branch will be merged into `master` and a new python package will be released on pypi. # Discussions https://github.com/shenweichen/DeepCTR/discussions ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2017-present Weichen Shen Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # DeepCTR [![Python Versions](https://img.shields.io/pypi/pyversions/deepctr.svg)](https://pypi.org/project/deepctr) [![TensorFlow Versions](https://img.shields.io/badge/TensorFlow-1.4+/2.0+-blue.svg)](https://pypi.org/project/deepctr) [![Downloads](https://pepy.tech/badge/deepctr)](https://pepy.tech/project/deepctr) [![PyPI Version](https://img.shields.io/pypi/v/deepctr.svg)](https://pypi.org/project/deepctr) [![GitHub Issues](https://img.shields.io/github/issues/shenweichen/deepctr.svg )](https://github.com/shenweichen/deepctr/issues) [![Documentation Status](https://readthedocs.org/projects/deepctr-doc/badge/?version=latest)](https://deepctr-doc.readthedocs.io/) ![CI status](https://github.com/shenweichen/deepctr/workflows/CI/badge.svg) [![codecov](https://codecov.io/gh/shenweichen/DeepCTR/branch/master/graph/badge.svg)](https://codecov.io/gh/shenweichen/DeepCTR) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/d4099734dc0e4bab91d332ead8c0bdd0)](https://www.codacy.com/gh/shenweichen/DeepCTR?utm_source=github.com&utm_medium=referral&utm_content=shenweichen/DeepCTR&utm_campaign=Badge_Grade) [![Disscussion](https://img.shields.io/badge/chat-wechat-brightgreen?style=flat)](./README.md#DisscussionGroup) [![License](https://img.shields.io/github/license/shenweichen/deepctr.svg)](https://github.com/shenweichen/deepctr/blob/master/LICENSE) DeepCTR is a **Easy-to-use**, **Modular** and **Extendible** package of deep-learning based CTR models along with lots of core components layers which can be used to easily build custom models.You can use any complex model with `model.fit()` ,and `model.predict()` . - Provide `tf.keras.Model` like interfaces for **quick experiment**. [example](https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html#getting-started-4-steps-to-deepctr) - Provide `tensorflow estimator` interface for **large scale data** and **distributed training**. [example](https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html#getting-started-4-steps-to-deepctr-estimator-with-tfrecord) - It is compatible with both `tf 1.x` and `tf 2.x`. Some related projects: - DeepMatch: https://github.com/shenweichen/DeepMatch - DeepCTR-Torch: https://github.com/shenweichen/DeepCTR-Torch Let's [**Get Started!**](https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html)([Chinese Introduction](https://zhuanlan.zhihu.com/p/53231955)) and [welcome to join us!](./CONTRIBUTING.md) ## Models List | Model | Paper | | :------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Convolutional Click Prediction Model | [CIKM 2015][A Convolutional Click Prediction Model](http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf) | | Factorization-supported Neural Network | [ECIR 2016][Deep Learning over Multi-field Categorical Data: A Case Study on User Response Prediction](https://arxiv.org/pdf/1601.02376.pdf) | | Product-based Neural Network | [ICDM 2016][Product-based neural networks for user response prediction](https://arxiv.org/pdf/1611.00144.pdf) | | Wide & Deep | [DLRS 2016][Wide & Deep Learning for Recommender Systems](https://arxiv.org/pdf/1606.07792.pdf) | | DeepFM | [IJCAI 2017][DeepFM: A Factorization-Machine based Neural Network for CTR Prediction](http://www.ijcai.org/proceedings/2017/0239.pdf) | | Piece-wise Linear Model | [arxiv 2017][Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction](https://arxiv.org/abs/1704.05194) | | Deep & Cross Network | [ADKDD 2017][Deep & Cross Network for Ad Click Predictions](https://arxiv.org/abs/1708.05123) | | Attentional Factorization Machine | [IJCAI 2017][Attentional Factorization Machines: Learning the Weight of Feature Interactions via Attention Networks](http://www.ijcai.org/proceedings/2017/435) | | Neural Factorization Machine | [SIGIR 2017][Neural Factorization Machines for Sparse Predictive Analytics](https://arxiv.org/pdf/1708.05027.pdf) | | xDeepFM | [KDD 2018][xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems](https://arxiv.org/pdf/1803.05170.pdf) | | Deep Interest Network | [KDD 2018][Deep Interest Network for Click-Through Rate Prediction](https://arxiv.org/pdf/1706.06978.pdf) | | AutoInt | [CIKM 2019][AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks](https://arxiv.org/abs/1810.11921) | | Deep Interest Evolution Network | [AAAI 2019][Deep Interest Evolution Network for Click-Through Rate Prediction](https://arxiv.org/pdf/1809.03672.pdf) | | FwFM | [WWW 2018][Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising](https://arxiv.org/pdf/1806.03514.pdf) | | ONN | [arxiv 2019][Operation-aware Neural Networks for User Response Prediction](https://arxiv.org/pdf/1904.12579.pdf) | | FGCNN | [WWW 2019][Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction ](https://arxiv.org/pdf/1904.04447) | | Deep Session Interest Network | [IJCAI 2019][Deep Session Interest Network for Click-Through Rate Prediction ](https://arxiv.org/abs/1905.06482) | | FiBiNET | [RecSys 2019][FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) | | FLEN | [arxiv 2019][FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690.pdf) | | BST | [DLP-KDD 2019][Behavior sequence transformer for e-commerce recommendation in Alibaba](https://arxiv.org/pdf/1905.06874.pdf) | | IFM | [IJCAI 2019][An Input-aware Factorization Machine for Sparse Prediction](https://www.ijcai.org/Proceedings/2019/0203.pdf) | | DCN V2 | [arxiv 2020][DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535) | | DIFM | [IJCAI 2020][A Dual Input-aware Factorization Machine for CTR Prediction](https://www.ijcai.org/Proceedings/2020/0434.pdf) | | FEFM and DeepFEFM | [arxiv 2020][Field-Embedded Factorization Machines for Click-through rate prediction](https://arxiv.org/abs/2009.09931) | | SharedBottom | [arxiv 2017][An Overview of Multi-Task Learning in Deep Neural Networks](https://arxiv.org/pdf/1706.05098.pdf) | | ESMM | [SIGIR 2018][Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate](https://arxiv.org/abs/1804.07931) | | MMOE | [KDD 2018][Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts](https://dl.acm.org/doi/abs/10.1145/3219819.3220007) | | PLE | [RecSys 2020][Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations](https://dl.acm.org/doi/10.1145/3383313.3412236) | | EDCN | [KDD 2021][Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models](https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf) | ## Citation - Weichen Shen. (2017). DeepCTR: Easy-to-use,Modular and Extendible package of deep-learning based CTR models. https://github.com/shenweichen/deepctr. If you find this code useful in your research, please cite it using the following BibTeX: ```bibtex @misc{shen2017deepctr, author = {Weichen Shen}, title = {DeepCTR: Easy-to-use,Modular and Extendible package of deep-learning based CTR models}, year = {2017}, publisher = {GitHub}, journal = {GitHub Repository}, howpublished = {\url{https://github.com/shenweichen/deepctr}}, } ``` ## DisscussionGroup - [Github Discussions](https://github.com/shenweichen/DeepCTR/discussions) - Wechat Discussions |公众号:浅梦学习笔记|微信:deepctrbot|学习小组 [加入](https://t.zsxq.com/026UJEuzv) [主题集合](https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MjM5MzY4NzE3MA==&action=getalbum&album_id=1361647041096843265&scene=126#wechat_redirect)| |:--:|:--:|:--:| | [![公众号](./docs/pics/code.png)](https://github.com/shenweichen/AlgoNotes)| [![微信](./docs/pics/deepctrbot.png)](https://github.com/shenweichen/AlgoNotes)|[![学习小组](./docs/pics/planet_github.png)](https://t.zsxq.com/026UJEuzv)| ## Main contributors([welcome to join us!](./CONTRIBUTING.md))
pic
Shen Weichen

Alibaba Group

pic
Zan Shuxun

Alibaba Group

pic
Harshit Pande

Amazon

pic
Lai Mincai

ByteDance

pic
Li Zichao

ByteDance

pic
Tan Tingyi

Chongqing University
of Posts and
Telecommunications

================================================ FILE: deepctr/__init__.py ================================================ from .utils import check_version __version__ = '0.9.3' check_version(__version__) ================================================ FILE: deepctr/contrib/__init__.py ================================================ ================================================ FILE: deepctr/contrib/rnn.py ================================================ # Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """RNN helpers for TensorFlow models. @@bidirectional_dynamic_rnn @@dynamic_rnn @@raw_rnn @@static_rnn @@static_state_saving_rnn @@static_bidirectional_rnn """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.util import nest import tensorflow as tf def _like_rnncell_(cell): """Checks that a given object is an RNNCell by using duck typing.""" conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"), hasattr(cell, "zero_state"), callable(cell)] return all(conditions) # pylint: disable=protected-access _concat = rnn_cell_impl._concat try: _like_rnncell = rnn_cell_impl._like_rnncell except Exception as e: _like_rnncell = _like_rnncell_ # pylint: enable=protected-access def _transpose_batch_time(x): """Transpose the batch and time dimensions of a Tensor. Retains as much of the static shape information as possible. Args: x: A tensor of rank 2 or higher. Returns: x transposed along the first two dimensions. Raises: ValueError: if `x` is rank 1 or lower. """ x_static_shape = x.get_shape() if x_static_shape.ndims is not None and x_static_shape.ndims < 2: raise ValueError( "Expected input tensor %s to have rank at least 2, but saw shape: %s" % (x, x_static_shape)) x_rank = array_ops.rank(x) x_t = array_ops.transpose( x, array_ops.concat( ([1, 0], math_ops.range(2, x_rank)), axis=0)) x_t.set_shape( tensor_shape.TensorShape([ x_static_shape[1].value, x_static_shape[0].value ]).concatenate(x_static_shape[2:])) return x_t def _best_effort_input_batch_size(flat_input): """Get static input batch size if available, with fallback to the dynamic one. Args: flat_input: An iterable of time major input Tensors of shape [max_time, batch_size, ...]. All inputs should have compatible batch sizes. Returns: The batch size in Python integer if available, or a scalar Tensor otherwise. Raises: ValueError: if there is any input with an invalid shape. """ for input_ in flat_input: shape = input_.shape if shape.ndims is None: continue if shape.ndims < 2: raise ValueError( "Expected input tensor %s to have rank at least 2" % input_) batch_size = shape[1].value if batch_size is not None: return batch_size # Fallback to the dynamic batch size of the first input. return array_ops.shape(flat_input[0])[1] def _infer_state_dtype(explicit_dtype, state): """Infer the dtype of an RNN state. Args: explicit_dtype: explicitly declared dtype or None. state: RNN's hidden state. Must be a Tensor or a nested iterable containing Tensors. Returns: dtype: inferred dtype of hidden state. Raises: ValueError: if `state` has heterogeneous dtypes or is empty. """ if explicit_dtype is not None: return explicit_dtype elif nest.is_sequence(state): inferred_dtypes = [element.dtype for element in nest.flatten(state)] if not inferred_dtypes: raise ValueError("Unable to infer dtype from empty state.") all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes]) if not all_same: raise ValueError( "State has tensors of different inferred_dtypes. Unable to infer a " "single representative dtype.") return inferred_dtypes[0] else: return state.dtype # pylint: disable=unused-argument def _rnn_step( time, sequence_length, min_sequence_length, max_sequence_length, zero_output, state, call_cell, state_size, skip_conditionals=False): """Calculate one step of a dynamic RNN minibatch. Returns an (output, state) pair conditioned on the sequence_lengths. When skip_conditionals=False, the pseudocode is something like: if t >= max_sequence_length: return (zero_output, state) if t < min_sequence_length: return call_cell() # Selectively output zeros or output, old state or new state depending # on if we've finished calculating each row. new_output, new_state = call_cell() final_output = np.vstack([ zero_output if time >= sequence_lengths[r] else new_output_r for r, new_output_r in enumerate(new_output) ]) final_state = np.vstack([ state[r] if time >= sequence_lengths[r] else new_state_r for r, new_state_r in enumerate(new_state) ]) return (final_output, final_state) Args: time: Python int, the current time step sequence_length: int32 `Tensor` vector of size [batch_size] min_sequence_length: int32 `Tensor` scalar, min of sequence_length max_sequence_length: int32 `Tensor` scalar, max of sequence_length zero_output: `Tensor` vector of shape [output_size] state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`, or a list/tuple of such tensors. call_cell: lambda returning tuple of (new_output, new_state) where new_output is a `Tensor` matrix of shape `[batch_size, output_size]`. new_state is a `Tensor` matrix of shape `[batch_size, state_size]`. state_size: The `cell.state_size` associated with the state. skip_conditionals: Python bool, whether to skip using the conditional calculations. This is useful for `dynamic_rnn`, where the input tensor matches `max_sequence_length`, and using conditionals just slows everything down. Returns: A tuple of (`final_output`, `final_state`) as given by the pseudocode above: final_output is a `Tensor` matrix of shape [batch_size, output_size] final_state is either a single `Tensor` matrix, or a tuple of such matrices (matching length and shapes of input `state`). Raises: ValueError: If the cell returns a state tuple whose length does not match that returned by `state_size`. """ # Convert state to a list for ease of use flat_state = nest.flatten(state) flat_zero_output = nest.flatten(zero_output) def _copy_one_through(output, new_output): # If the state contains a scalar value we simply pass it through. if output.shape.ndims == 0: return new_output copy_cond = (time >= sequence_length) with ops.colocate_with(new_output): return array_ops.where(copy_cond, output, new_output) def _copy_some_through(flat_new_output, flat_new_state): # Use broadcasting select to determine which values should get # the previous state & zero output, and which values should get # a calculated state & output. flat_new_output = [ _copy_one_through(zero_output, new_output) for zero_output, new_output in zip(flat_zero_output, flat_new_output)] flat_new_state = [ _copy_one_through(state, new_state) for state, new_state in zip(flat_state, flat_new_state)] return flat_new_output + flat_new_state def _maybe_copy_some_through(): """Run RNN step. Pass through either no or some past state.""" new_output, new_state = call_cell() nest.assert_same_structure(state, new_state) flat_new_state = nest.flatten(new_state) flat_new_output = nest.flatten(new_output) return control_flow_ops.cond( # if t < min_seq_len: calculate and return everything time < min_sequence_length, lambda: flat_new_output + flat_new_state, # else copy some of it through lambda: _copy_some_through(flat_new_output, flat_new_state)) # TODO(ebrevdo): skipping these conditionals may cause a slowdown, # but benefits from removing cond() and its gradient. We should # profile with and without this switch here. if skip_conditionals: # Instead of using conditionals, perform the selective copy at all time # steps. This is faster when max_seq_len is equal to the number of unrolls # (which is typical for dynamic_rnn). new_output, new_state = call_cell() nest.assert_same_structure(state, new_state) new_state = nest.flatten(new_state) new_output = nest.flatten(new_output) final_output_and_state = _copy_some_through(new_output, new_state) else: empty_update = lambda: flat_zero_output + flat_state final_output_and_state = control_flow_ops.cond( # if t >= max_seq_len: copy all state through, output zeros time >= max_sequence_length, empty_update, # otherwise calculation is required: copy some or all of it through _maybe_copy_some_through) if len(final_output_and_state) != len(flat_zero_output) + len(flat_state): raise ValueError("Internal error: state and output were not concatenated " "correctly.") final_output = final_output_and_state[:len(flat_zero_output)] final_state = final_output_and_state[len(flat_zero_output):] for output, flat_output in zip(final_output, flat_zero_output): output.set_shape(flat_output.get_shape()) for substate, flat_substate in zip(final_state, flat_state): substate.set_shape(flat_substate.get_shape()) final_output = nest.pack_sequence_as( structure=zero_output, flat_sequence=final_output) final_state = nest.pack_sequence_as( structure=state, flat_sequence=final_state) return final_output, final_state def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) or nested tuples of tensors. lengths: A `Tensor` of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) flat_results = [[] for _ in range(len(input_seq))] for sequence in zip(*flat_input_seq): input_shape = tensor_shape.unknown_shape( ndims=sequence[0].get_shape().ndims) for input_ in sequence: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.stack(sequence) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unstack(s_reversed) for r, flat_result in zip(result, flat_results): r.set_shape(input_shape) flat_result.append(r) results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) for input_, flat_result in zip(input_seq, flat_results)] return results # # def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, # # initial_state_fw=None, initial_state_bw=None, # # dtype=None, parallel_iterations=None, # # swap_memory=False, time_major=False, scope=None): # # """Creates a dynamic version of bidirectional recurrent neural network. # # # # Takes input and builds independent forward and backward RNNs. The input_size # # of forward and backward cell must match. The initial state for both directions # # is zero by default (but can be set optionally) and no intermediate states are # # ever returned -- the network is fully unrolled for the given (passed in) # # length(s) of the sequence(s) or completely unrolled if length(s) is not # # given. # # # # Args: # # cell_fw: An instance of RNNCell, to be used for forward direction. # # cell_bw: An instance of RNNCell, to be used for backward direction. # # inputs: The RNN inputs. # # If time_major == False (default), this must be a tensor of shape: # # `[batch_size, max_time, ...]`, or a nested tuple of such elements. # # If time_major == True, this must be a tensor of shape: # # `[max_time, batch_size, ...]`, or a nested tuple of such elements. # # sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, # # containing the actual lengths for each of the sequences in the batch. # # If not provided, all batch entries are assumed to be full sequences; and # # time reversal is applied from time `0` to `max_time` for each sequence. # # initial_state_fw: (optional) An initial state for the forward RNN. # # This must be a tensor of appropriate type and shape # # `[batch_size, cell_fw.state_size]`. # # If `cell_fw.state_size` is a tuple, this should be a tuple of # # tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. # # initial_state_bw: (optional) Same as for `initial_state_fw`, but using # # the corresponding properties of `cell_bw`. # # dtype: (optional) The data type for the initial states and expected output. # # Required if initial_states are not provided or RNN states have a # # heterogeneous dtype. # # parallel_iterations: (Default: 32). The number of iterations to run in # # parallel. Those operations which do not have any temporal dependency # # and can be run in parallel, will be. This parameter trades off # # time for space. Values >> 1 use more memory but take less time, # # while smaller values use less memory but computations take longer. # # swap_memory: Transparently swap the tensors produced in forward inference # # but needed for back prop from GPU to CPU. This allows training RNNs # # which would typically not fit on a single GPU, with very minimal (or no) # # performance penalty. # # time_major: The shape format of the `inputs` and `outputs` Tensors. # # If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. # # If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. # # Using `time_major = True` is a bit more efficient because it avoids # # transposes at the beginning and end of the RNN calculation. However, # # most TensorFlow data is batch-major, so by default this function # # accepts input and emits output in batch-major form. # # scope: VariableScope for the created subgraph; defaults to # # "bidirectional_rnn" # # # # Returns: # # A tuple (outputs, output_states) where: # # outputs: A tuple (output_fw, output_bw) containing the forward and # # the backward rnn output `Tensor`. # # If time_major == False (default), # # output_fw will be a `Tensor` shaped: # # `[batch_size, max_time, cell_fw.output_size]` # # and output_bw will be a `Tensor` shaped: # # `[batch_size, max_time, cell_bw.output_size]`. # # If time_major == True, # # output_fw will be a `Tensor` shaped: # # `[max_time, batch_size, cell_fw.output_size]` # # and output_bw will be a `Tensor` shaped: # # `[max_time, batch_size, cell_bw.output_size]`. # # It returns a tuple instead of a single concatenated `Tensor`, unlike # # in the `bidirectional_rnn`. If the concatenated one is preferred, # # the forward and backward outputs can be concatenated as # # `tf.concat(outputs, 2)`. # # output_states: A tuple (output_state_fw, output_state_bw) containing # # the forward and the backward final states of bidirectional rnn. # # # # Raises: # # TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. # # """ # # # # if not _like_rnncell(cell_fw): # # raise TypeError("cell_fw must be an instance of RNNCell") # # if not _like_rnncell(cell_bw): # # raise TypeError("cell_bw must be an instance of RNNCell") # # # # with vs.variable_scope(scope or "bidirectional_rnn"): # # # Forward direction # # with vs.variable_scope("fw") as fw_scope: # # output_fw, output_state_fw = dynamic_rnn( # # cell=cell_fw, inputs=inputs, sequence_length=sequence_length, # # initial_state=initial_state_fw, dtype=dtype, # # parallel_iterations=parallel_iterations, swap_memory=swap_memory, # # time_major=time_major, scope=fw_scope) # # # # # Backward direction # # if not time_major: # # time_dim = 1 # # batch_dim = 0 # # else: # # time_dim = 0 # # batch_dim = 1 # # # # def _reverse(input_, seq_lengths, seq_dim, batch_dim): # # if seq_lengths is not None: # # return array_ops.reverse_sequence( # # input=input_, seq_lengths=seq_lengths, # # seq_dim=seq_dim, batch_dim=batch_dim) # # else: # # return array_ops.reverse(input_, axis=[seq_dim]) # # # # with vs.variable_scope("bw") as bw_scope: # # inputs_reverse = _reverse( # # inputs, seq_lengths=sequence_length, # # seq_dim=time_dim, batch_dim=batch_dim) # # tmp, output_state_bw = dynamic_rnn( # # cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, # # initial_state=initial_state_bw, dtype=dtype, # # parallel_iterations=parallel_iterations, swap_memory=swap_memory, # # time_major=time_major, scope=bw_scope) # # # # output_bw = _reverse( # # tmp, seq_lengths=sequence_length, # # seq_dim=time_dim, batch_dim=batch_dim) # # # # outputs = (output_fw, output_bw) # # output_states = (output_state_fw, output_state_bw) # # # # return (outputs, output_states) # def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): """Creates a recurrent neural network specified by RNNCell `cell`. Performs fully dynamic unrolling of `inputs`. Example: ```python # create a BasicRNNCell rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size] # defining initial state initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32) # 'state' is a tensor of shape [batch_size, cell_state_size] outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data, initial_state=initial_state, dtype=tf.float32) ``` ```python # create 2 LSTMCells rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]] # create a RNN cell composed sequentially of a number of RNNCells multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) # 'outputs' is a tensor of shape [batch_size, max_time, 256] # 'state' is a N-tuple where N is the number of LSTMCells containing a # tf.contrib.rnn.LSTMStateTuple for each cell outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, inputs=data, dtype=tf.float32) ``` Args: cell: An instance of RNNCell. inputs: The RNN inputs. If `time_major == False` (default), this must be a `Tensor` of shape: `[batch_size, max_time, ...]`, or a nested tuple of such elements. If `time_major == True`, this must be a `Tensor` of shape: `[max_time, batch_size, ...]`, or a nested tuple of such elements. This may also be a (possibly nested) tuple of Tensors satisfying this property. The first two dimensions must match across all the inputs, but otherwise the ranks and other shape components may differ. In this case, input to `cell` at each time-step will replicate the structure of these tuples, except for the time dimension (from which the time is taken). The input to `cell` at each time step will be a `Tensor` or (possibly nested) tuple of Tensors each with dimensions `[batch_size, ...]`. sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. Used to copy-through state and zero-out outputs when past a batch element's sequence length. So it's more for correctness than performance. initial_state: (optional) An initial state for the RNN. If `cell.state_size` is an integer, this must be a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this should be a tuple of tensors having shapes `[batch_size, s] for s in cell.state_size`. dtype: (optional) The data type for the initial state and expected output. Required if initial_state is not provided or RNN state has a heterogeneous dtype. parallel_iterations: (Default: 32). The number of iterations to run in parallel. Those operations which do not have any temporal dependency and can be run in parallel, will be. This parameter trades off time for space. Values >> 1 use more memory but take less time, while smaller values use less memory but computations take longer. swap_memory: Transparently swap the tensors produced in forward inference but needed for back prop from GPU to CPU. This allows training RNNs which would typically not fit on a single GPU, with very minimal (or no) performance penalty. time_major: The shape format of the `inputs` and `outputs` Tensors. If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using `time_major = True` is a bit more efficient because it avoids transposes at the beginning and end of the RNN calculation. However, most TensorFlow data is batch-major, so by default this function accepts input and emits output in batch-major form. scope: VariableScope for the created subgraph; defaults to "rnn". Returns: A pair (outputs, state) where: outputs: The RNN output `Tensor`. If time_major == False (default), this will be a `Tensor` shaped: `[batch_size, max_time, cell.output_size]`. If time_major == True, this will be a `Tensor` shaped: `[max_time, batch_size, cell.output_size]`. Note, if `cell.output_size` is a (possibly nested) tuple of integers or `TensorShape` objects, then `outputs` will be a tuple having the same structure as `cell.output_size`, containing Tensors having shapes corresponding to the shape data in `cell.output_size`. state: The final state. If `cell.state_size` is an int, this will be shaped `[batch_size, cell.state_size]`. If it is a `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. If it is a (possibly nested) tuple of ints or `TensorShape`, this will be a tuple having the corresponding shapes. If cells are `LSTMCells` `state` will be a tuple containing a `LSTMStateTuple` for each cell. Raises: TypeError: If `cell` is not an instance of RNNCell. ValueError: If inputs is None or an empty list. """ if not _like_rnncell(cell): raise TypeError("cell must be an instance of RNNCell") # By default, time_major==False and inputs are batch-major: shaped # [batch, time, depth] # For internal calculations, we transpose to [time, batch, depth] flat_input = nest.flatten(inputs) if not time_major: # (B,T,D) => (T,B,D) flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) parallel_iterations = parallel_iterations or 32 if sequence_length is not None: sequence_length = math_ops.to_int32(sequence_length) if sequence_length.get_shape().ndims not in (None, 1): raise ValueError( "sequence_length must be a vector of length batch_size, " "but saw shape: %s" % sequence_length.get_shape()) sequence_length = array_ops.identity( # Just to find it in the graph. sequence_length, name="sequence_length") # Create a new scope in which the caching device is either # determined by the parent scope, or is set to place the cached # Variable using the same placement as for the rest of the RNN. with vs.variable_scope(scope or "rnn",reuse=tf.AUTO_REUSE) as varscope:#TODO:user defined reuse if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) batch_size = _best_effort_input_batch_size(flat_input) if initial_state is not None: state = initial_state else: if not dtype: raise ValueError("If there is no initial_state, you must give a dtype.") state = cell.zero_state(batch_size, dtype) def _assert_has_shape(x, shape): x_shape = array_ops.shape(x) packed_shape = array_ops.stack(shape) return control_flow_ops.Assert( math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), ["Expected shape for Tensor %s is " % x.name, packed_shape, " but saw shape: ", x_shape]) if sequence_length is not None: # Perform some shape validation with ops.control_dependencies( [_assert_has_shape(sequence_length, [batch_size])]): sequence_length = array_ops.identity( sequence_length, name="CheckSeqLen") inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) (outputs, final_state) = _dynamic_rnn_loop( cell, inputs, state, parallel_iterations=parallel_iterations, swap_memory=swap_memory, att_scores=att_scores, sequence_length=sequence_length, dtype=dtype) # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth]. # If we are performing batch-major calculations, transpose output back # to shape [batch, time, depth] if not time_major: # (T,B,D) => (B,T,D) outputs = nest.map_structure(_transpose_batch_time, outputs) return (outputs, final_state) def _dynamic_rnn_loop(cell, inputs, initial_state, parallel_iterations, swap_memory, att_scores=None, sequence_length=None, dtype=None): """Internal implementation of Dynamic RNN. Args: cell: An instance of RNNCell. inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested tuple of such elements. initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if `cell.state_size` is a tuple, then this should be a tuple of tensors having shapes `[batch_size, s] for s in cell.state_size`. parallel_iterations: Positive Python int. swap_memory: A Python boolean sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. dtype: (optional) Expected dtype of output. If not specified, inferred from initial_state. Returns: Tuple `(final_outputs, final_state)`. final_outputs: A `Tensor` of shape `[time, batch_size, cell.output_size]`. If `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` objects, then this returns a (possibly nsted) tuple of Tensors matching the corresponding shapes. final_state: A `Tensor`, or possibly nested tuple of Tensors, matching in length and shapes to `initial_state`. Raises: ValueError: If the input depth cannot be inferred via shape inference from the inputs. """ state = initial_state assert isinstance(parallel_iterations, int), "parallel_iterations must be int" state_size = cell.state_size flat_input = nest.flatten(inputs) flat_output_size = nest.flatten(cell.output_size) # Construct an initial output input_shape = array_ops.shape(flat_input[0]) time_steps = input_shape[0] batch_size = _best_effort_input_batch_size(flat_input) inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3) for input_ in flat_input) const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2] for shape in inputs_got_shape: if not shape[2:].is_fully_defined(): raise ValueError( "Input size (depth of inputs) must be accessible via shape inference," " but saw value None.") got_time_steps = shape[0].value got_batch_size = shape[1].value if const_time_steps != got_time_steps: raise ValueError( "Time steps is not the same for all the elements in the input in a " "batch.") if const_batch_size != got_batch_size: raise ValueError( "Batch_size is not the same for all the elements in the input.") # Prepare dynamic conditional copying of state & output def _create_zero_arrays(size): size = _concat(batch_size, size) return array_ops.zeros( array_ops.stack(size), _infer_state_dtype(dtype, state)) flat_zero_output = tuple(_create_zero_arrays(output) for output in flat_output_size) zero_output = nest.pack_sequence_as(structure=cell.output_size, flat_sequence=flat_zero_output) if sequence_length is not None: min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) time = array_ops.constant(0, dtype=dtypes.int32, name="time") with ops.name_scope("dynamic_rnn") as scope: base_name = scope def _create_ta(name, dtype): return tensor_array_ops.TensorArray(dtype=dtype, size=time_steps, tensor_array_name=base_name + name) output_ta = tuple(_create_ta("output_%d" % i, _infer_state_dtype(dtype, state)) for i in range(len(flat_output_size))) input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype) for i in range(len(flat_input))) input_ta = tuple(ta.unstack(input_) for ta, input_ in zip(input_ta, flat_input)) def _time_step(time, output_ta_t, state, att_scores=None): """Take a time step of the dynamic RNN. Args: time: int32 scalar Tensor. output_ta_t: List of `TensorArray`s that represent the output. state: nested tuple of vector tensors that represent the state. Returns: The tuple (time + 1, output_ta_t with updated flow, new_state). """ input_t = tuple(ta.read(time) for ta in input_ta) # Restore some shape information for input_, shape in zip(input_t, inputs_got_shape): input_.set_shape(shape[1:]) input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t) if att_scores is not None: att_score = att_scores[:, time, :] call_cell = lambda: cell(input_t, state, att_score) else: call_cell = lambda: cell(input_t, state) if sequence_length is not None: (output, new_state) = _rnn_step( time=time, sequence_length=sequence_length, min_sequence_length=min_sequence_length, max_sequence_length=max_sequence_length, zero_output=zero_output, state=state, call_cell=call_cell, state_size=state_size, skip_conditionals=True) else: (output, new_state) = call_cell() # Pack state if using state tuples output = nest.flatten(output) output_ta_t = tuple( ta.write(time, out) for ta, out in zip(output_ta_t, output)) if att_scores is not None: return (time + 1, output_ta_t, new_state, att_scores) else: return (time + 1, output_ta_t, new_state) if att_scores is not None: _, output_final_ta, final_state, _ = control_flow_ops.while_loop( cond=lambda time, *_: time < time_steps, body=_time_step, loop_vars=(time, output_ta, state, att_scores), parallel_iterations=parallel_iterations, swap_memory=swap_memory) else: _, output_final_ta, final_state = control_flow_ops.while_loop( cond=lambda time, *_: time < time_steps, body=_time_step, loop_vars=(time, output_ta, state), parallel_iterations=parallel_iterations, swap_memory=swap_memory) # Unpack final output if not using output tuples. final_outputs = tuple(ta.stack() for ta in output_final_ta) # Restore some shape information for output, output_size in zip(final_outputs, flat_output_size): shape = _concat( [const_time_steps, const_batch_size], output_size, static=True) output.set_shape(shape) final_outputs = nest.pack_sequence_as( structure=cell.output_size, flat_sequence=final_outputs) return (final_outputs, final_state) ================================================ FILE: deepctr/contrib/rnn_v2.py ================================================ # Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """RNN helpers for TensorFlow models. @@bidirectional_dynamic_rnn @@dynamic_rnn @@raw_rnn @@static_rnn @@static_state_saving_rnn @@static_bidirectional_rnn """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.util import nest import tensorflow as tf def _like_rnncell_(cell): """Checks that a given object is an RNNCell by using duck typing.""" conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"), hasattr(cell, "zero_state"), callable(cell)] return all(conditions) # pylint: disable=protected-access _concat = rnn_cell_impl._concat try: _like_rnncell = rnn_cell_impl._like_rnncell except: _like_rnncell = _like_rnncell_ # pylint: enable=protected-access def _transpose_batch_time(x): """Transpose the batch and time dimensions of a Tensor. Retains as much of the static shape information as possible. Args: x: A tensor of rank 2 or higher. Returns: x transposed along the first two dimensions. Raises: ValueError: if `x` is rank 1 or lower. """ x_static_shape = x.get_shape() if x_static_shape.ndims is not None and x_static_shape.ndims < 2: raise ValueError( "Expected input tensor %s to have rank at least 2, but saw shape: %s" % (x, x_static_shape)) x_rank = array_ops.rank(x) x_t = array_ops.transpose( x, array_ops.concat( ([1, 0], math_ops.range(2, x_rank)), axis=0)) x_t.set_shape( tensor_shape.TensorShape([ x_static_shape[1], x_static_shape[0] ]).concatenate(x_static_shape[2:])) return x_t def _best_effort_input_batch_size(flat_input): """Get static input batch size if available, with fallback to the dynamic one. Args: flat_input: An iterable of time major input Tensors of shape [max_time, batch_size, ...]. All inputs should have compatible batch sizes. Returns: The batch size in Python integer if available, or a scalar Tensor otherwise. Raises: ValueError: if there is any input with an invalid shape. """ for input_ in flat_input: shape = input_.shape if shape.ndims is None: continue if shape.ndims < 2: raise ValueError( "Expected input tensor %s to have rank at least 2" % input_) batch_size = shape[1] if batch_size is not None: return batch_size # Fallback to the dynamic batch size of the first input. return array_ops.shape(flat_input[0])[1] def _infer_state_dtype(explicit_dtype, state): """Infer the dtype of an RNN state. Args: explicit_dtype: explicitly declared dtype or None. state: RNN's hidden state. Must be a Tensor or a nested iterable containing Tensors. Returns: dtype: inferred dtype of hidden state. Raises: ValueError: if `state` has heterogeneous dtypes or is empty. """ if explicit_dtype is not None: return explicit_dtype elif nest.is_sequence(state): inferred_dtypes = [element.dtype for element in nest.flatten(state)] if not inferred_dtypes: raise ValueError("Unable to infer dtype from empty state.") all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes]) if not all_same: raise ValueError( "State has tensors of different inferred_dtypes. Unable to infer a " "single representative dtype.") return inferred_dtypes[0] else: return state.dtype # pylint: disable=unused-argument def _rnn_step( time, sequence_length, min_sequence_length, max_sequence_length, zero_output, state, call_cell, state_size, skip_conditionals=False): """Calculate one step of a dynamic RNN minibatch. Returns an (output, state) pair conditioned on the sequence_lengths. When skip_conditionals=False, the pseudocode is something like: if t >= max_sequence_length: return (zero_output, state) if t < min_sequence_length: return call_cell() # Selectively output zeros or output, old state or new state depending # on if we've finished calculating each row. new_output, new_state = call_cell() final_output = np.vstack([ zero_output if time >= sequence_lengths[r] else new_output_r for r, new_output_r in enumerate(new_output) ]) final_state = np.vstack([ state[r] if time >= sequence_lengths[r] else new_state_r for r, new_state_r in enumerate(new_state) ]) return (final_output, final_state) Args: time: Python int, the current time step sequence_length: int32 `Tensor` vector of size [batch_size] min_sequence_length: int32 `Tensor` scalar, min of sequence_length max_sequence_length: int32 `Tensor` scalar, max of sequence_length zero_output: `Tensor` vector of shape [output_size] state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`, or a list/tuple of such tensors. call_cell: lambda returning tuple of (new_output, new_state) where new_output is a `Tensor` matrix of shape `[batch_size, output_size]`. new_state is a `Tensor` matrix of shape `[batch_size, state_size]`. state_size: The `cell.state_size` associated with the state. skip_conditionals: Python bool, whether to skip using the conditional calculations. This is useful for `dynamic_rnn`, where the input tensor matches `max_sequence_length`, and using conditionals just slows everything down. Returns: A tuple of (`final_output`, `final_state`) as given by the pseudocode above: final_output is a `Tensor` matrix of shape [batch_size, output_size] final_state is either a single `Tensor` matrix, or a tuple of such matrices (matching length and shapes of input `state`). Raises: ValueError: If the cell returns a state tuple whose length does not match that returned by `state_size`. """ # Convert state to a list for ease of use flat_state = nest.flatten(state) flat_zero_output = nest.flatten(zero_output) def _copy_one_through(output, new_output): # If the state contains a scalar value we simply pass it through. if output.shape.ndims == 0: return new_output copy_cond = (time >= sequence_length) with ops.colocate_with(new_output): return array_ops.where(copy_cond, output, new_output) def _copy_some_through(flat_new_output, flat_new_state): # Use broadcasting select to determine which values should get # the previous state & zero output, and which values should get # a calculated state & output. flat_new_output = [ _copy_one_through(zero_output, new_output) for zero_output, new_output in zip(flat_zero_output, flat_new_output)] flat_new_state = [ _copy_one_through(state, new_state) for state, new_state in zip(flat_state, flat_new_state)] return flat_new_output + flat_new_state def _maybe_copy_some_through(): """Run RNN step. Pass through either no or some past state.""" new_output, new_state = call_cell() nest.assert_same_structure(state, new_state) flat_new_state = nest.flatten(new_state) flat_new_output = nest.flatten(new_output) return control_flow_ops.cond( # if t < min_seq_len: calculate and return everything time < min_sequence_length, lambda: flat_new_output + flat_new_state, # else copy some of it through lambda: _copy_some_through(flat_new_output, flat_new_state)) # TODO(ebrevdo): skipping these conditionals may cause a slowdown, # but benefits from removing cond() and its gradient. We should # profile with and without this switch here. if skip_conditionals: # Instead of using conditionals, perform the selective copy at all time # steps. This is faster when max_seq_len is equal to the number of unrolls # (which is typical for dynamic_rnn). new_output, new_state = call_cell() nest.assert_same_structure(state, new_state) new_state = nest.flatten(new_state) new_output = nest.flatten(new_output) final_output_and_state = _copy_some_through(new_output, new_state) else: empty_update = lambda: flat_zero_output + flat_state final_output_and_state = control_flow_ops.cond( # if t >= max_seq_len: copy all state through, output zeros time >= max_sequence_length, empty_update, # otherwise calculation is required: copy some or all of it through _maybe_copy_some_through) if len(final_output_and_state) != len(flat_zero_output) + len(flat_state): raise ValueError("Internal error: state and output were not concatenated " "correctly.") final_output = final_output_and_state[:len(flat_zero_output)] final_state = final_output_and_state[len(flat_zero_output):] for output, flat_output in zip(final_output, flat_zero_output): output.set_shape(flat_output.get_shape()) for substate, flat_substate in zip(final_state, flat_state): substate.set_shape(flat_substate.get_shape()) final_output = nest.pack_sequence_as( structure=zero_output, flat_sequence=final_output) final_state = nest.pack_sequence_as( structure=state, flat_sequence=final_state) return final_output, final_state def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) or nested tuples of tensors. lengths: A `Tensor` of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) flat_results = [[] for _ in range(len(input_seq))] for sequence in zip(*flat_input_seq): input_shape = tensor_shape.unknown_shape( ndims=sequence[0].get_shape().ndims) for input_ in sequence: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.stack(sequence) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unstack(s_reversed) for r, flat_result in zip(result, flat_results): r.set_shape(input_shape) flat_result.append(r) results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) for input_, flat_result in zip(input_seq, flat_results)] return results # # def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, # # initial_state_fw=None, initial_state_bw=None, # # dtype=None, parallel_iterations=None, # # swap_memory=False, time_major=False, scope=None): # # """Creates a dynamic version of bidirectional recurrent neural network. # # # # Takes input and builds independent forward and backward RNNs. The input_size # # of forward and backward cell must match. The initial state for both directions # # is zero by default (but can be set optionally) and no intermediate states are # # ever returned -- the network is fully unrolled for the given (passed in) # # length(s) of the sequence(s) or completely unrolled if length(s) is not # # given. # # # # Args: # # cell_fw: An instance of RNNCell, to be used for forward direction. # # cell_bw: An instance of RNNCell, to be used for backward direction. # # inputs: The RNN inputs. # # If time_major == False (default), this must be a tensor of shape: # # `[batch_size, max_time, ...]`, or a nested tuple of such elements. # # If time_major == True, this must be a tensor of shape: # # `[max_time, batch_size, ...]`, or a nested tuple of such elements. # # sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, # # containing the actual lengths for each of the sequences in the batch. # # If not provided, all batch entries are assumed to be full sequences; and # # time reversal is applied from time `0` to `max_time` for each sequence. # # initial_state_fw: (optional) An initial state for the forward RNN. # # This must be a tensor of appropriate type and shape # # `[batch_size, cell_fw.state_size]`. # # If `cell_fw.state_size` is a tuple, this should be a tuple of # # tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. # # initial_state_bw: (optional) Same as for `initial_state_fw`, but using # # the corresponding properties of `cell_bw`. # # dtype: (optional) The data type for the initial states and expected output. # # Required if initial_states are not provided or RNN states have a # # heterogeneous dtype. # # parallel_iterations: (Default: 32). The number of iterations to run in # # parallel. Those operations which do not have any temporal dependency # # and can be run in parallel, will be. This parameter trades off # # time for space. Values >> 1 use more memory but take less time, # # while smaller values use less memory but computations take longer. # # swap_memory: Transparently swap the tensors produced in forward inference # # but needed for back prop from GPU to CPU. This allows training RNNs # # which would typically not fit on a single GPU, with very minimal (or no) # # performance penalty. # # time_major: The shape format of the `inputs` and `outputs` Tensors. # # If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. # # If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. # # Using `time_major = True` is a bit more efficient because it avoids # # transposes at the beginning and end of the RNN calculation. However, # # most TensorFlow data is batch-major, so by default this function # # accepts input and emits output in batch-major form. # # scope: VariableScope for the created subgraph; defaults to # # "bidirectional_rnn" # # # # Returns: # # A tuple (outputs, output_states) where: # # outputs: A tuple (output_fw, output_bw) containing the forward and # # the backward rnn output `Tensor`. # # If time_major == False (default), # # output_fw will be a `Tensor` shaped: # # `[batch_size, max_time, cell_fw.output_size]` # # and output_bw will be a `Tensor` shaped: # # `[batch_size, max_time, cell_bw.output_size]`. # # If time_major == True, # # output_fw will be a `Tensor` shaped: # # `[max_time, batch_size, cell_fw.output_size]` # # and output_bw will be a `Tensor` shaped: # # `[max_time, batch_size, cell_bw.output_size]`. # # It returns a tuple instead of a single concatenated `Tensor`, unlike # # in the `bidirectional_rnn`. If the concatenated one is preferred, # # the forward and backward outputs can be concatenated as # # `tf.concat(outputs, 2)`. # # output_states: A tuple (output_state_fw, output_state_bw) containing # # the forward and the backward final states of bidirectional rnn. # # # # Raises: # # TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. # # """ # # # # if not _like_rnncell(cell_fw): # # raise TypeError("cell_fw must be an instance of RNNCell") # # if not _like_rnncell(cell_bw): # # raise TypeError("cell_bw must be an instance of RNNCell") # # # # with vs.variable_scope(scope or "bidirectional_rnn"): # # # Forward direction # # with vs.variable_scope("fw") as fw_scope: # # output_fw, output_state_fw = dynamic_rnn( # # cell=cell_fw, inputs=inputs, sequence_length=sequence_length, # # initial_state=initial_state_fw, dtype=dtype, # # parallel_iterations=parallel_iterations, swap_memory=swap_memory, # # time_major=time_major, scope=fw_scope) # # # # # Backward direction # # if not time_major: # # time_dim = 1 # # batch_dim = 0 # # else: # # time_dim = 0 # # batch_dim = 1 # # # # def _reverse(input_, seq_lengths, seq_dim, batch_dim): # # if seq_lengths is not None: # # return array_ops.reverse_sequence( # # input=input_, seq_lengths=seq_lengths, # # seq_dim=seq_dim, batch_dim=batch_dim) # # else: # # return array_ops.reverse(input_, axis=[seq_dim]) # # # # with vs.variable_scope("bw") as bw_scope: # # inputs_reverse = _reverse( # # inputs, seq_lengths=sequence_length, # # seq_dim=time_dim, batch_dim=batch_dim) # # tmp, output_state_bw = dynamic_rnn( # # cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, # # initial_state=initial_state_bw, dtype=dtype, # # parallel_iterations=parallel_iterations, swap_memory=swap_memory, # # time_major=time_major, scope=bw_scope) # # # # output_bw = _reverse( # # tmp, seq_lengths=sequence_length, # # seq_dim=time_dim, batch_dim=batch_dim) # # # # outputs = (output_fw, output_bw) # # output_states = (output_state_fw, output_state_bw) # # # # return (outputs, output_states) # def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): """Creates a recurrent neural network specified by RNNCell `cell`. Performs fully dynamic unrolling of `inputs`. Example: ```python # create a BasicRNNCell rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size] # defining initial state initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32) # 'state' is a tensor of shape [batch_size, cell_state_size] outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data, initial_state=initial_state, dtype=tf.float32) ``` ```python # create 2 LSTMCells rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]] # create a RNN cell composed sequentially of a number of RNNCells multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) # 'outputs' is a tensor of shape [batch_size, max_time, 256] # 'state' is a N-tuple where N is the number of LSTMCells containing a # tf.contrib.rnn.LSTMStateTuple for each cell outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, inputs=data, dtype=tf.float32) ``` Args: cell: An instance of RNNCell. inputs: The RNN inputs. If `time_major == False` (default), this must be a `Tensor` of shape: `[batch_size, max_time, ...]`, or a nested tuple of such elements. If `time_major == True`, this must be a `Tensor` of shape: `[max_time, batch_size, ...]`, or a nested tuple of such elements. This may also be a (possibly nested) tuple of Tensors satisfying this property. The first two dimensions must match across all the inputs, but otherwise the ranks and other shape components may differ. In this case, input to `cell` at each time-step will replicate the structure of these tuples, except for the time dimension (from which the time is taken). The input to `cell` at each time step will be a `Tensor` or (possibly nested) tuple of Tensors each with dimensions `[batch_size, ...]`. sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. Used to copy-through state and zero-out outputs when past a batch element's sequence length. So it's more for correctness than performance. initial_state: (optional) An initial state for the RNN. If `cell.state_size` is an integer, this must be a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this should be a tuple of tensors having shapes `[batch_size, s] for s in cell.state_size`. dtype: (optional) The data type for the initial state and expected output. Required if initial_state is not provided or RNN state has a heterogeneous dtype. parallel_iterations: (Default: 32). The number of iterations to run in parallel. Those operations which do not have any temporal dependency and can be run in parallel, will be. This parameter trades off time for space. Values >> 1 use more memory but take less time, while smaller values use less memory but computations take longer. swap_memory: Transparently swap the tensors produced in forward inference but needed for back prop from GPU to CPU. This allows training RNNs which would typically not fit on a single GPU, with very minimal (or no) performance penalty. time_major: The shape format of the `inputs` and `outputs` Tensors. If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using `time_major = True` is a bit more efficient because it avoids transposes at the beginning and end of the RNN calculation. However, most TensorFlow data is batch-major, so by default this function accepts input and emits output in batch-major form. scope: VariableScope for the created subgraph; defaults to "rnn". Returns: A pair (outputs, state) where: outputs: The RNN output `Tensor`. If time_major == False (default), this will be a `Tensor` shaped: `[batch_size, max_time, cell.output_size]`. If time_major == True, this will be a `Tensor` shaped: `[max_time, batch_size, cell.output_size]`. Note, if `cell.output_size` is a (possibly nested) tuple of integers or `TensorShape` objects, then `outputs` will be a tuple having the same structure as `cell.output_size`, containing Tensors having shapes corresponding to the shape data in `cell.output_size`. state: The final state. If `cell.state_size` is an int, this will be shaped `[batch_size, cell.state_size]`. If it is a `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. If it is a (possibly nested) tuple of ints or `TensorShape`, this will be a tuple having the corresponding shapes. If cells are `LSTMCells` `state` will be a tuple containing a `LSTMStateTuple` for each cell. Raises: TypeError: If `cell` is not an instance of RNNCell. ValueError: If inputs is None or an empty list. """ if not _like_rnncell(cell): raise TypeError("cell must be an instance of RNNCell") # By default, time_major==False and inputs are batch-major: shaped # [batch, time, depth] # For internal calculations, we transpose to [time, batch, depth] flat_input = nest.flatten(inputs) if not time_major: # (B,T,D) => (T,B,D) flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) parallel_iterations = parallel_iterations or 32 if sequence_length is not None: sequence_length = math_ops.to_int32(sequence_length) if sequence_length.get_shape().ndims not in (None, 1): raise ValueError( "sequence_length must be a vector of length batch_size, " "but saw shape: %s" % sequence_length.get_shape()) sequence_length = array_ops.identity( # Just to find it in the graph. sequence_length, name="sequence_length") # Create a new scope in which the caching device is either # determined by the parent scope, or is set to place the cached # Variable using the same placement as for the rest of the RNN. try: resue = tf.AUTO_REUSE except: resue = tf.compat.v1.AUTO_REUSE with vs.variable_scope(scope or "rnn",reuse=resue) as varscope:#TODO:user defined reuse if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) batch_size = _best_effort_input_batch_size(flat_input) if initial_state is not None: state = initial_state else: if not dtype: raise ValueError("If there is no initial_state, you must give a dtype.") state = cell.zero_state(batch_size, dtype) def _assert_has_shape(x, shape): x_shape = array_ops.shape(x) packed_shape = array_ops.stack(shape) return control_flow_ops.Assert( math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), ["Expected shape for Tensor %s is " % x.name, packed_shape, " but saw shape: ", x_shape]) if sequence_length is not None: # Perform some shape validation with ops.control_dependencies( [_assert_has_shape(sequence_length, [batch_size])]): sequence_length = array_ops.identity( sequence_length, name="CheckSeqLen") inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) (outputs, final_state) = _dynamic_rnn_loop( cell, inputs, state, parallel_iterations=parallel_iterations, swap_memory=swap_memory, att_scores=att_scores, sequence_length=sequence_length, dtype=dtype) # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth]. # If we are performing batch-major calculations, transpose output back # to shape [batch, time, depth] if not time_major: # (T,B,D) => (B,T,D) outputs = nest.map_structure(_transpose_batch_time, outputs) return (outputs, final_state) def _dynamic_rnn_loop(cell, inputs, initial_state, parallel_iterations, swap_memory, att_scores=None, sequence_length=None, dtype=None): """Internal implementation of Dynamic RNN. Args: cell: An instance of RNNCell. inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested tuple of such elements. initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if `cell.state_size` is a tuple, then this should be a tuple of tensors having shapes `[batch_size, s] for s in cell.state_size`. parallel_iterations: Positive Python int. swap_memory: A Python boolean sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. dtype: (optional) Expected dtype of output. If not specified, inferred from initial_state. Returns: Tuple `(final_outputs, final_state)`. final_outputs: A `Tensor` of shape `[time, batch_size, cell.output_size]`. If `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` objects, then this returns a (possibly nsted) tuple of Tensors matching the corresponding shapes. final_state: A `Tensor`, or possibly nested tuple of Tensors, matching in length and shapes to `initial_state`. Raises: ValueError: If the input depth cannot be inferred via shape inference from the inputs. """ state = initial_state assert isinstance(parallel_iterations, int), "parallel_iterations must be int" state_size = cell.state_size flat_input = nest.flatten(inputs) flat_output_size = nest.flatten(cell.output_size) # Construct an initial output input_shape = array_ops.shape(flat_input[0]) time_steps = input_shape[0] batch_size = _best_effort_input_batch_size(flat_input) inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3) for input_ in flat_input) const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2] for shape in inputs_got_shape: if not shape[2:].is_fully_defined(): raise ValueError( "Input size (depth of inputs) must be accessible via shape inference," " but saw value None.") got_time_steps = shape[0] got_batch_size = shape[1] if const_time_steps != got_time_steps: raise ValueError( "Time steps is not the same for all the elements in the input in a " "batch.") if const_batch_size != got_batch_size: raise ValueError( "Batch_size is not the same for all the elements in the input.") # Prepare dynamic conditional copying of state & output def _create_zero_arrays(size): size = _concat(batch_size, size) return array_ops.zeros( array_ops.stack(size), _infer_state_dtype(dtype, state)) flat_zero_output = tuple(_create_zero_arrays(output) for output in flat_output_size) zero_output = nest.pack_sequence_as(structure=cell.output_size, flat_sequence=flat_zero_output) if sequence_length is not None: min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) time = array_ops.constant(0, dtype=dtypes.int32, name="time") with ops.name_scope("dynamic_rnn") as scope: base_name = scope def _create_ta(name, dtype): return tensor_array_ops.TensorArray(dtype=dtype, size=time_steps, tensor_array_name=base_name + name) output_ta = tuple(_create_ta("output_%d" % i, _infer_state_dtype(dtype, state)) for i in range(len(flat_output_size))) input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype) for i in range(len(flat_input))) input_ta = tuple(ta.unstack(input_) for ta, input_ in zip(input_ta, flat_input)) def _time_step(time, output_ta_t, state, att_scores=None): """Take a time step of the dynamic RNN. Args: time: int32 scalar Tensor. output_ta_t: List of `TensorArray`s that represent the output. state: nested tuple of vector tensors that represent the state. Returns: The tuple (time + 1, output_ta_t with updated flow, new_state). """ input_t = tuple(ta.read(time) for ta in input_ta) # Restore some shape information for input_, shape in zip(input_t, inputs_got_shape): input_.set_shape(shape[1:]) input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t) if att_scores is not None: att_score = att_scores[:, time, :] call_cell = lambda: cell(input_t, state, att_score) else: call_cell = lambda: cell(input_t, state) if sequence_length is not None: (output, new_state) = _rnn_step( time=time, sequence_length=sequence_length, min_sequence_length=min_sequence_length, max_sequence_length=max_sequence_length, zero_output=zero_output, state=state, call_cell=call_cell, state_size=state_size, skip_conditionals=True) else: (output, new_state) = call_cell() # Pack state if using state tuples output = nest.flatten(output) output_ta_t = tuple( ta.write(time, out) for ta, out in zip(output_ta_t, output)) if att_scores is not None: return (time + 1, output_ta_t, new_state, att_scores) else: return (time + 1, output_ta_t, new_state) if att_scores is not None: _, output_final_ta, final_state, _ = control_flow_ops.while_loop( cond=lambda time, *_: time < time_steps, body=_time_step, loop_vars=(time, output_ta, state, att_scores), parallel_iterations=parallel_iterations, swap_memory=swap_memory) else: _, output_final_ta, final_state = control_flow_ops.while_loop( cond=lambda time, *_: time < time_steps, body=_time_step, loop_vars=(time, output_ta, state), parallel_iterations=parallel_iterations, swap_memory=swap_memory) # Unpack final output if not using output tuples. final_outputs = tuple(ta.stack() for ta in output_final_ta) # Restore some shape information for output, output_size in zip(final_outputs, flat_output_size): shape = _concat( [const_time_steps, const_batch_size], output_size, static=True) output.set_shape(shape) final_outputs = nest.pack_sequence_as( structure=cell.output_size, flat_sequence=final_outputs) return (final_outputs, final_state) ================================================ FILE: deepctr/contrib/utils.py ================================================ from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops.rnn_cell import * from tensorflow.python.util import nest _BIAS_VARIABLE_NAME = "bias" _WEIGHTS_VARIABLE_NAME = "kernel" class _Linear_(object): """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. Args: args: a 2D Tensor or a list of 2D, batch x n, Tensors. output_size: int, second dimension of weight variable. dtype: data type for variables. build_bias: boolean, whether to build a bias variable. bias_initializer: starting value to initialize the bias (default is all zeros). kernel_initializer: starting value to initialize the weight. Raises: ValueError: if inputs_shape is wrong. """ def __init__(self, args, output_size, build_bias, bias_initializer=None, kernel_initializer=None): self._build_bias = build_bias if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] self._is_sequence = False else: self._is_sequence = True # Calculate the total size of arguments on dimension 1. total_arg_size = 0 shapes = [a.get_shape() for a in args] for shape in shapes: if shape.ndims != 2: raise ValueError( "linear is expecting 2D arguments: %s" % shapes) if shape[1] is None: raise ValueError("linear expects shape[1] to be provided for shape %s, " "but saw %s" % (shape, shape[1])) else: total_arg_size += int(shape[1])#.value dtype = [a.dtype for a in args][0] scope = vs.get_variable_scope() with vs.variable_scope(scope) as outer_scope: self._weights = vs.get_variable( _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size], dtype=dtype, initializer=kernel_initializer) if build_bias: with vs.variable_scope(outer_scope) as inner_scope: inner_scope.set_partitioner(None) if bias_initializer is None: bias_initializer = init_ops.constant_initializer( 0.0, dtype=dtype) self._biases = vs.get_variable( _BIAS_VARIABLE_NAME, [output_size], dtype=dtype, initializer=bias_initializer) def __call__(self, args): if not self._is_sequence: args = [args] if len(args) == 1: res = math_ops.matmul(args[0], self._weights) else: res = math_ops.matmul(array_ops.concat(args, 1), self._weights) if self._build_bias: res = nn_ops.bias_add(res, self._biases) return res try: from tensorflow.python.ops.rnn_cell_impl import _Linear except: _Linear = _Linear_ class QAAttGRUCell(RNNCell): """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). Args: num_units: int, The number of units in the GRU cell. activation: Nonlinearity to use. Default: `tanh`. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. kernel_initializer: (optional) The initializer to use for the weight and projection matrices. bias_initializer: (optional) The initializer to use for the bias. """ def __init__(self, num_units, activation=None, reuse=None, kernel_initializer=None, bias_initializer=None): super(QAAttGRUCell, self).__init__(_reuse=reuse) self._num_units = num_units self._activation = activation or math_ops.tanh self._kernel_initializer = kernel_initializer self._bias_initializer = bias_initializer self._gate_linear = None self._candidate_linear = None @property def state_size(self): return self._num_units @property def output_size(self): return self._num_units def __call__(self, inputs, state, att_score): return self.call(inputs, state, att_score) def call(self, inputs, state, att_score=None): """Gated recurrent unit (GRU) with nunits cells.""" if self._gate_linear is None: bias_ones = self._bias_initializer if self._bias_initializer is None: bias_ones = init_ops.constant_initializer( 1.0, dtype=inputs.dtype) with vs.variable_scope("gates"): # Reset gate and update gate. self._gate_linear = _Linear( [inputs, state], 2 * self._num_units, True, bias_initializer=bias_ones, kernel_initializer=self._kernel_initializer) value = math_ops.sigmoid(self._gate_linear([inputs, state])) r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) r_state = r * state if self._candidate_linear is None: with vs.variable_scope("candidate"): self._candidate_linear = _Linear( [inputs, r_state], self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) c = self._activation(self._candidate_linear([inputs, r_state])) new_h = (1. - att_score) * state + att_score * c return new_h, new_h class VecAttGRUCell(RNNCell): """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). Args: num_units: int, The number of units in the GRU cell. activation: Nonlinearity to use. Default: `tanh`. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. kernel_initializer: (optional) The initializer to use for the weight and projection matrices. bias_initializer: (optional) The initializer to use for the bias. """ def __init__(self, num_units, activation=None, reuse=None, kernel_initializer=None, bias_initializer=None): super(VecAttGRUCell, self).__init__(_reuse=reuse) self._num_units = num_units self._activation = activation or math_ops.tanh self._kernel_initializer = kernel_initializer self._bias_initializer = bias_initializer self._gate_linear = None self._candidate_linear = None @property def state_size(self): return self._num_units @property def output_size(self): return self._num_units def __call__(self, inputs, state, att_score): return self.call(inputs, state, att_score) def call(self, inputs, state, att_score=None): """Gated recurrent unit (GRU) with nunits cells.""" if self._gate_linear is None: bias_ones = self._bias_initializer if self._bias_initializer is None: bias_ones = init_ops.constant_initializer( 1.0, dtype=inputs.dtype) with vs.variable_scope("gates"): # Reset gate and update gate. self._gate_linear = _Linear( [inputs, state], 2 * self._num_units, True, bias_initializer=bias_ones, kernel_initializer=self._kernel_initializer) value = math_ops.sigmoid(self._gate_linear([inputs, state])) r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) r_state = r * state if self._candidate_linear is None: with vs.variable_scope("candidate"): self._candidate_linear = _Linear( [inputs, r_state], self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) c = self._activation(self._candidate_linear([inputs, r_state])) u = (1.0 - att_score) * u new_h = u * state + (1 - u) * c return new_h, new_h ================================================ FILE: deepctr/estimator/__init__.py ================================================ from .models import * ================================================ FILE: deepctr/estimator/feature_column.py ================================================ import tensorflow as tf from tensorflow.python.feature_column.feature_column import _EmbeddingColumn from .utils import LINEAR_SCOPE_NAME, variable_scope, get_collection, get_GraphKeys, input_layer, get_losses def linear_model(features, linear_feature_columns): if tf.__version__ >= '2.0.0': linear_logits = tf.compat.v1.feature_column.linear_model(features, linear_feature_columns) else: linear_logits = tf.feature_column.linear_model(features, linear_feature_columns) return linear_logits def get_linear_logit(features, linear_feature_columns, l2_reg_linear=0): with variable_scope(LINEAR_SCOPE_NAME): if not linear_feature_columns: linear_logits = tf.Variable([[0.0]], name='bias_weights') else: linear_logits = linear_model(features, linear_feature_columns) if l2_reg_linear > 0: for var in get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME)[:-1]: get_losses().add_loss(l2_reg_linear * tf.nn.l2_loss(var, name=var.name.split(":")[0] + "_l2loss"), get_GraphKeys().REGULARIZATION_LOSSES) return linear_logits def input_from_feature_columns(features, feature_columns, l2_reg_embedding=0.0): dense_value_list = [] sparse_emb_list = [] for feat in feature_columns: if is_embedding(feat): sparse_emb = tf.expand_dims(input_layer(features, [feat]), axis=1) sparse_emb_list.append(sparse_emb) if l2_reg_embedding > 0: get_losses().add_loss(l2_reg_embedding * tf.nn.l2_loss(sparse_emb, name=feat.name + "_l2loss"), get_GraphKeys().REGULARIZATION_LOSSES) else: dense_value_list.append(input_layer(features, [feat])) return sparse_emb_list, dense_value_list def is_embedding(feature_column): try: from tensorflow.python.feature_column.feature_column_v2 import EmbeddingColumn except ImportError: EmbeddingColumn = _EmbeddingColumn return isinstance(feature_column, (_EmbeddingColumn, EmbeddingColumn)) ================================================ FILE: deepctr/estimator/inputs.py ================================================ import tensorflow as tf def input_fn_pandas(df, features, label=None, batch_size=256, num_epochs=1, shuffle=False, queue_capacity_factor=10, num_threads=1): if label is not None: y = df[label] else: y = None if tf.__version__ >= "2.0.0": return tf.compat.v1.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, num_epochs=num_epochs, shuffle=shuffle, queue_capacity=batch_size * queue_capacity_factor, num_threads=num_threads) return tf.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, num_epochs=num_epochs, shuffle=shuffle, queue_capacity=batch_size * queue_capacity_factor, num_threads=num_threads) def input_fn_tfrecord(filenames, feature_description, label=None, batch_size=256, num_epochs=1, num_parallel_calls=8, shuffle_factor=10, prefetch_factor=1, ): def _parse_examples(serial_exmp): try: features = tf.parse_single_example(serial_exmp, features=feature_description) except AttributeError: features = tf.io.parse_single_example(serial_exmp, features=feature_description) if label is not None: labels = features.pop(label) return features, labels return features def input_fn(): dataset = tf.data.TFRecordDataset(filenames) dataset = dataset.map(_parse_examples, num_parallel_calls=num_parallel_calls) if shuffle_factor > 0: dataset = dataset.shuffle(buffer_size=batch_size * shuffle_factor) dataset = dataset.repeat(num_epochs).batch(batch_size) if prefetch_factor > 0: dataset = dataset.prefetch(buffer_size=batch_size * prefetch_factor) try: iterator = dataset.make_one_shot_iterator() except AttributeError: iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) return iterator.get_next() return input_fn ================================================ FILE: deepctr/estimator/models/__init__.py ================================================ from .afm import AFMEstimator from .autoint import AutoIntEstimator from .ccpm import CCPMEstimator from .dcn import DCNEstimator from .deepfm import DeepFMEstimator from .fwfm import FwFMEstimator from .fibinet import FiBiNETEstimator from .fnn import FNNEstimator from .nfm import NFMEstimator from .pnn import PNNEstimator from .wdl import WDLEstimator from .xdeepfm import xDeepFMEstimator from .deepfefm import DeepFEFMEstimator ================================================ FILE: deepctr/estimator/models/afm.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017. (https://arxiv.org/abs/1708.04617) """ import tensorflow as tf from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope from ...layers.interaction import AFMLayer, FM from ...layers.utils import concat_func def AFMEstimator(linear_feature_columns, dnn_feature_columns, use_attention=True, attention_factor=8, l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, afm_dropout=0, seed=1024, task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the Attentional Factorization Machine architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine** :param attention_factor: positive integer,units in attention net :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_att: float. L2 regularizer strength applied to attention net :param afm_dropout: float in [0,1), Fraction of the attention net output units to dropout. :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) if use_attention: fm_logit = AFMLayer(attention_factor, l2_reg_att, afm_dropout, seed)(sparse_embedding_list, training=train_flag) else: fm_logit = FM()(concat_func(sparse_embedding_list, axis=1)) logits = linear_logits + fm_logit return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/autoint.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.(https://arxiv.org/abs/1810.11921) """ import tensorflow as tf from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope from ...layers.core import DNN from ...layers.interaction import InteractingLayer from ...layers.utils import concat_func, combined_dnn_input def AutoIntEstimator(linear_feature_columns, dnn_feature_columns, att_layer_num=3, att_embedding_size=8, att_head_num=2, att_res=True, dnn_hidden_units=(256, 128, 64), dnn_activation='relu', l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_use_bn=False, dnn_dropout=0, seed=1024, task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the AutoInt Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param att_layer_num: int.The InteractingLayer number to be used. :param att_embedding_size: int.The embedding size in multi-head self-attention network. :param att_head_num: int.The head number in multi-head self-attention network. :param att_res: bool.Whether or not use standard residual connections before output. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) att_input = concat_func(sparse_embedding_list, axis=1) for _ in range(att_layer_num): att_input = InteractingLayer( att_embedding_size, att_head_num, att_res)(att_input) att_output = tf.keras.layers.Flatten()(att_input) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) if len(dnn_hidden_units) > 0 and att_layer_num > 0: # Deep & Interacting Layer deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag) stack_out = tf.keras.layers.Concatenate()([att_output, deep_out]) final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(stack_out) elif len(dnn_hidden_units) > 0: # Only Deep deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag) final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(deep_out) elif att_layer_num > 0: # Only Interacting Layer final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(att_output) else: # Error raise NotImplementedError logits = linear_logits + final_logit return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/ccpm.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746. (http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf) """ import tensorflow as tf from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope from ...layers.core import DNN from ...layers.sequence import KMaxPooling from ...layers.utils import concat_func def CCPMEstimator(linear_feature_columns, dnn_feature_columns, conv_kernel_width=(6, 5), conv_filters=(4, 4), dnn_hidden_units=(128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_dropout=0, seed=1024, task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the Convolutional Click Prediction Model architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param conv_kernel_width: list,list of positive integer or empty list,the width of filter in each conv layer. :param conv_filters: list,list of positive integer or empty list,the number of filters in each conv layer. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN. :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ if len(conv_kernel_width) != len(conv_filters): raise ValueError( "conv_kernel_width must have same element with conv_filters") def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) n = len(sparse_embedding_list) l = len(conv_filters) conv_input = concat_func(sparse_embedding_list, axis=1) pooling_result = tf.keras.layers.Lambda( lambda x: tf.expand_dims(x, axis=3))(conv_input) for i in range(1, l + 1): filters = conv_filters[i - 1] width = conv_kernel_width[i - 1] k = max(1, int((1 - pow(i / l, l - i)) * n)) if i < l else 3 conv_result = tf.keras.layers.Conv2D(filters=filters, kernel_size=(width, 1), strides=(1, 1), padding='same', activation='tanh', use_bias=True, )(pooling_result) pooling_result = KMaxPooling( k=min(k, int(conv_result.shape[1])), axis=1)(conv_result) flatten_result = tf.keras.layers.Flatten()(pooling_result) dnn_out = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout, seed=seed)(flatten_result, training=train_flag) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out) logits = linear_logits + dnn_logit return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks ) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/dcn.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. (https://arxiv.org/abs/1708.05123) """ import tensorflow as tf from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope from ...layers.core import DNN from ...layers.interaction import CrossNet from ...layers.utils import combined_dnn_input def DCNEstimator(linear_feature_columns, dnn_feature_columns, cross_num=2, dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_cross=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_use_bn=False, dnn_activation='relu', task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the Deep&Cross Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param cross_num: positive integet,cross layer number :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_cross: float. L2 regularizer strength applied to cross net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not DNN :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ if len(dnn_hidden_units) == 0 and cross_num == 0: raise ValueError("Either hidden_layer or cross layer must > 0") def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) if len(dnn_hidden_units) > 0 and cross_num > 0: # Deep & Cross deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag) cross_out = CrossNet(cross_num, l2_reg=l2_reg_cross)(dnn_input) stack_out = tf.keras.layers.Concatenate()([cross_out, deep_out]) final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(stack_out) elif len(dnn_hidden_units) > 0: # Only Deep deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag) final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(deep_out) elif cross_num > 0: # Only Cross cross_out = CrossNet(cross_num, l2_reg=l2_reg_cross)(dnn_input) final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(cross_out) else: # Error raise NotImplementedError logits = linear_logits + final_logit return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/deepfefm.py ================================================ # -*- coding:utf-8 -*- """ Author: Harshit Pande Reference: [1] Field-Embedded Factorization Machines for Click-through Rate Prediction] (https://arxiv.org/abs/2009.09931) """ import tensorflow as tf from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import DNN_SCOPE_NAME, deepctr_model_fn, variable_scope from ...layers.core import DNN from ...layers.interaction import FEFMLayer from ...layers.utils import concat_func, add_func, combined_dnn_input, reduce_sum def DeepFEFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding_feat=0.00001, l2_reg_embedding_field=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0.0, dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the DeepFEFM Network architecture or the shallow FEFM architecture (Ablation support not provided as estimator is meant for production, Ablation support provided in DeepFEFM implementation in models :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding_feat: float. L2 regularizer strength applied to embedding vector of features :param l2_reg_embedding_field: float, L2 regularizer to field embeddings :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) final_logit_components = [linear_logits] with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding_feat) fefm_interaction_embedding = FEFMLayer( regularizer=l2_reg_embedding_field)(concat_func(sparse_embedding_list, axis=1)) fefm_logit = tf.keras.layers.Lambda(lambda x: reduce_sum(x, axis=1, keep_dims=True))( fefm_interaction_embedding) final_logit_components.append(fefm_logit) if dnn_hidden_units: dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_input = concat_func([dnn_input, fefm_interaction_embedding], axis=1) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)( dnn_input, training=train_flag) dnn_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output) final_logit_components.append(dnn_logit) logits = add_func(final_logit_components) return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/deepfm.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247) """ import tensorflow as tf from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope from ...layers.core import DNN from ...layers.interaction import FM from ...layers.utils import concat_func, combined_dnn_input def DeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param fm_group: list, group_name of features that will be used to do feature interactions. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) fm_logit = FM()(concat_func(sparse_embedding_list, axis=1)) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag) dnn_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(dnn_output) logits = linear_logits + fm_logit + dnn_logit return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks =training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/fibinet.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019. """ import tensorflow as tf from tensorflow.python.keras.layers import Dense, Flatten from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope from ...layers.core import DNN from ...layers.interaction import SENETLayer, BilinearInteraction from ...layers.utils import concat_func, combined_dnn_input def FiBiNETEstimator(linear_feature_columns, dnn_feature_columns, bilinear_type='interaction', reduction_ratio=3, dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the Feature Importance and Bilinear feature Interaction NETwork architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param bilinear_type: str,bilinear function type used in Bilinear Interaction Layer,can be ``'all'`` , ``'each'`` or ``'interaction'`` :param reduction_ratio: integer in [1,inf), reduction ratio used in SENET Layer :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to wide part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) senet_embedding_list = SENETLayer( reduction_ratio, seed)(sparse_embedding_list) senet_bilinear_out = BilinearInteraction( bilinear_type=bilinear_type, seed=seed)(senet_embedding_list) bilinear_out = BilinearInteraction( bilinear_type=bilinear_type, seed=seed)(sparse_embedding_list) dnn_input = combined_dnn_input( [Flatten()(concat_func([senet_bilinear_out, bilinear_out]))], dense_value_list) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input, training=train_flag) dnn_logit = Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out) logits = linear_logits + dnn_logit return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/fnn.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf) """ import tensorflow as tf from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope from ...layers.core import DNN from ...layers.utils import combined_dnn_input def FNNEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the Factorization-supported Neural Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_linear: float. L2 regularizer strength applied to linear weight :param l2_reg_dnn: float . L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input, training=train_flag) dnn_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(deep_out) logits = linear_logits + dnn_logit return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/fwfm.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Harshit Pande Reference: [1] Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising (https://arxiv.org/pdf/1806.03514.pdf) """ import tensorflow as tf from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import DNN_SCOPE_NAME, deepctr_model_fn, variable_scope from ...layers.core import DNN from ...layers.interaction import FwFMLayer from ...layers.utils import concat_func, add_func, combined_dnn_input def FwFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_field_strength=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the DeepFwFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param fm_group: list, group_name of features that will be used to do feature interactions. :param dnn_hidden_units: list,list of positive integer or empty list if do not want DNN, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_field_strength: float. L2 regularizer strength applied to the field pair strength parameters :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) final_logit_components = [linear_logits] with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) fwfm_logit = FwFMLayer(num_fields=len(sparse_embedding_list), regularizer=l2_reg_field_strength)( concat_func(sparse_embedding_list, axis=1)) final_logit_components.append(fwfm_logit) if dnn_hidden_units: dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag) dnn_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output) final_logit_components.append(dnn_logit) logits = add_func(final_logit_components) return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/nfm.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027) """ import tensorflow as tf from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope from ...layers.core import DNN from ...layers.interaction import BiInteractionPooling from ...layers.utils import concat_func, combined_dnn_input def NFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, bi_dropout=0, dnn_dropout=0, dnn_activation='relu', task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the Neural Factorization Machine architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_linear: float. L2 regularizer strength applied to linear part. :param l2_reg_dnn: float . L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param biout_dropout: When not ``None``, the probability we will drop out the output of BiInteractionPooling Layer. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in deep net :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) fm_input = concat_func(sparse_embedding_list, axis=1) bi_out = BiInteractionPooling()(fm_input) if bi_dropout: bi_out = tf.keras.layers.Dropout(bi_dropout)(bi_out, training=None) dnn_input = combined_dnn_input([bi_out], dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input, training=train_flag) dnn_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output) logits = linear_logits + dnn_logit return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/pnn.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf) """ import tensorflow as tf from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope from ...layers.core import DNN from ...layers.interaction import InnerProductLayer, OutterProductLayer from ...layers.utils import concat_func, combined_dnn_input def PNNEstimator(dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', use_inner=True, use_outter=False, kernel_type='mat', task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the Product-based Neural Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param use_inner: bool,whether use inner-product or not. :param use_outter: bool,whether use outter-product or not. :param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'`` :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ if kernel_type not in ['mat', 'vec', 'num']: raise ValueError("kernel_type must be mat,vec or num") def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, [], l2_reg_linear=0) with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) inner_product = tf.keras.layers.Flatten()( InnerProductLayer()(sparse_embedding_list)) outter_product = OutterProductLayer(kernel_type)(sparse_embedding_list) # ipnn deep input linear_signal = tf.keras.layers.Reshape( [sum(map(lambda x: int(x.shape[-1]), sparse_embedding_list))])(concat_func(sparse_embedding_list)) if use_inner and use_outter: deep_input = tf.keras.layers.Concatenate()( [linear_signal, inner_product, outter_product]) elif use_inner: deep_input = tf.keras.layers.Concatenate()( [linear_signal, inner_product]) elif use_outter: deep_input = tf.keras.layers.Concatenate()( [linear_signal, outter_product]) else: deep_input = linear_signal dnn_input = combined_dnn_input([deep_input], dense_value_list) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input, training=train_flag) dnn_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out) logits = linear_logits + dnn_logit return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/wdl.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf) """ import tensorflow as tf from tensorflow.python.keras.layers import Dense from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope from ...layers import DNN, combined_dnn_input def WDLEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the Wide&Deep Learning architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to wide part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input, training=train_flag) dnn_logits = Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out) logits = linear_logits + dnn_logits return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/models/xdeepfm.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.(https://arxiv.org/pdf/1803.05170.pdf) """ import tensorflow as tf from ..feature_column import get_linear_logit, input_from_feature_columns from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope from ...layers.core import DNN from ...layers.interaction import CIN from ...layers.utils import concat_func, add_func, combined_dnn_input def xDeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), cin_layer_size=(128, 128,), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, l2_reg_cin=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', dnn_optimizer='Adagrad', training_chief_hooks=None): """Instantiates the xDeepFM architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param cin_layer_size: list,list of positive integer or empty list, the feature maps in each hidden layer of Compressed Interaction Network :param cin_split_half: bool.if set to True, half of the feature maps in each hidden will connect to output unit :param cin_activation: activation function used on feature maps :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: L2 regularizer strength applied to embedding vector :param l2_reg_dnn: L2 regularizer strength applied to deep net :param l2_reg_cin: L2 regularizer strength applied to CIN. :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. :param config: tf.RunConfig object to configure the runtime settings. :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to the linear part of the model. Defaults to FTRL optimizer. :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to the deep part of the model. Defaults to Adagrad optimizer. :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training. :return: A Tensorflow Estimator instance. """ def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) logits_list = [linear_logits] with variable_scope(DNN_SCOPE_NAME): sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding=l2_reg_embedding) fm_input = concat_func(sparse_embedding_list, axis=1) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, training=train_flag) dnn_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output) logits_list.append(dnn_logit) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, cin_activation, cin_split_half, l2_reg_cin, seed)(fm_input, training=train_flag) exFM_logit = tf.keras.layers.Dense(1, kernel_initializer=tf.keras.initializers.glorot_normal(seed) )(exFM_out) logits_list.append(exFM_logit) logits = add_func(logits_list) return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks) return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) ================================================ FILE: deepctr/estimator/utils.py ================================================ import tensorflow as tf from tensorflow.python.estimator.canned.head import _Head from tensorflow.python.estimator.canned.optimizers import get_optimizer_instance LINEAR_SCOPE_NAME = 'linear' DNN_SCOPE_NAME = 'dnn' def _summary_key(head_name, val): return '%s/%s' % (val, head_name) if head_name else val class Head(_Head): def __init__(self, task, name=None): self._task = task self._name = name @property def name(self): return self._name @property def logits_dimension(self): return 1 def _eval_metric_ops(self, labels, logits, predictions, unweighted_loss, weights=None): labels = to_float(labels) predictions = to_float(predictions) # with name_scope(None, 'metrics', (labels, logits, predictions, # unweighted_loss, weights)): metrics = get_metrics() losses = get_losses() metric_ops = { _summary_key(self._name, "prediction/mean"): metrics.mean(predictions, weights=weights), _summary_key(self._name, "label/mean"): metrics.mean(labels, weights=weights), } summary_scalar("prediction/mean", metric_ops[_summary_key(self._name, "prediction/mean")][1]) summary_scalar("label/mean", metric_ops[_summary_key(self._name, "label/mean")][1]) mean_loss = losses.compute_weighted_loss( unweighted_loss, weights=1.0, reduction=losses.Reduction.MEAN) if self._task == "binary": metric_ops[_summary_key(self._name, "LogLoss")] = metrics.mean(mean_loss, weights=weights, ) summary_scalar("LogLoss", mean_loss) metric_ops[_summary_key(self._name, "AUC")] = metrics.auc(labels, predictions, weights=weights) summary_scalar("AUC", metric_ops[_summary_key(self._name, "AUC")][1]) else: metric_ops[_summary_key(self._name, "MSE")] = metrics.mean_squared_error(labels, predictions, weights=weights) summary_scalar("MSE", mean_loss) metric_ops[_summary_key(self._name, "MAE")] = metrics.mean_absolute_error(labels, predictions, weights=weights) summary_scalar("MAE", metric_ops[_summary_key(self._name, "MAE")][1]) return metric_ops def create_loss(self, features, mode, logits, labels): del mode, features # Unused for this head. losses = get_losses() if self._task == "binary": loss = losses.sigmoid_cross_entropy(labels, logits, reduction=losses.Reduction.NONE) else: loss = losses.mean_squared_error(labels, logits, reduction=losses.Reduction.NONE) return loss def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None, training_chief_hooks=None): # with name_scope('head'): logits = tf.reshape(logits, [-1, 1]) if self._task == 'binary': pred = tf.sigmoid(logits) else: pred = logits predictions = {"pred": pred, "logits": logits} export_outputs = {"predict": tf.estimator.export.PredictOutput(predictions)} if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) labels = tf.reshape(labels, [-1, 1]) unweighted_loss = self.create_loss(features, mode, logits, labels) losses = get_losses() loss = losses.compute_weighted_loss( unweighted_loss, weights=1.0, reduction=losses.Reduction.SUM) reg_loss = losses.get_regularization_loss() training_loss = loss + reg_loss eval_metric_ops = self._eval_metric_ops(labels, logits, pred, unweighted_loss) return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss), eval_metric_ops=eval_metric_ops, training_chief_hooks=training_chief_hooks) def deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks): linear_optimizer = get_optimizer_instance(linear_optimizer, 0.005) dnn_optimizer = get_optimizer_instance(dnn_optimizer, 0.01) train_op_fn = get_train_op_fn(linear_optimizer, dnn_optimizer) head = Head(task) return head.create_estimator_spec(features=features, mode=mode, labels=labels, train_op_fn=train_op_fn, logits=logits, training_chief_hooks=training_chief_hooks) def get_train_op_fn(linear_optimizer, dnn_optimizer): def _train_op_fn(loss): train_ops = [] try: global_step = tf.train.get_global_step() except AttributeError: global_step = tf.compat.v1.train.get_global_step() linear_var_list = get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME) dnn_var_list = get_collection(get_GraphKeys().TRAINABLE_VARIABLES, DNN_SCOPE_NAME) if len(dnn_var_list) > 0: train_ops.append( dnn_optimizer.minimize( loss, var_list=dnn_var_list)) if len(linear_var_list) > 0: train_ops.append( linear_optimizer.minimize( loss, var_list=linear_var_list)) train_op = tf.group(*train_ops) with tf.control_dependencies([train_op]): try: return tf.assign_add(global_step, 1).op except AttributeError: return tf.compat.v1.assign_add(global_step, 1).op return _train_op_fn def variable_scope(name_or_scope): try: return tf.variable_scope(name_or_scope) except AttributeError: return tf.compat.v1.variable_scope(name_or_scope) def get_collection(key, scope=None): try: return tf.get_collection(key, scope=scope) except AttributeError: return tf.compat.v1.get_collection(key, scope=scope) def get_GraphKeys(): try: return tf.GraphKeys except AttributeError: return tf.compat.v1.GraphKeys def get_losses(): try: return tf.compat.v1.losses except AttributeError: return tf.losses def input_layer(features, feature_columns): try: return tf.feature_column.input_layer(features, feature_columns) except AttributeError: return tf.compat.v1.feature_column.input_layer(features, feature_columns) def get_metrics(): try: return tf.compat.v1.metrics except AttributeError: return tf.metrics def to_float(x, name="ToFloat"): try: return tf.to_float(x, name) except AttributeError: return tf.compat.v1.to_float(x, name) def summary_scalar(name, data): try: tf.summary.scalar(name, data) except AttributeError: # tf version 2.5.0+:AttributeError: module 'tensorflow._api.v2.summary' has no attribute 'scalar' tf.compat.v1.summary.scalar(name, data) ================================================ FILE: deepctr/feature_column.py ================================================ import tensorflow as tf from collections import namedtuple, OrderedDict from copy import copy from itertools import chain from tensorflow.python.keras.initializers import RandomNormal, Zeros from tensorflow.python.keras.layers import Input, Lambda from .inputs import create_embedding_matrix, embedding_lookup, get_dense_input, varlen_embedding_lookup, \ get_varlen_pooling_list, mergeDict from .layers import Linear from .layers.utils import concat_func DEFAULT_GROUP_NAME = "default_group" class SparseFeat(namedtuple('SparseFeat', ['name', 'vocabulary_size', 'embedding_dim', 'use_hash', 'vocabulary_path', 'dtype', 'embeddings_initializer', 'embedding_name', 'group_name', 'trainable'])): __slots__ = () def __new__(cls, name, vocabulary_size, embedding_dim=4, use_hash=False, vocabulary_path=None, dtype="int32", embeddings_initializer=None, embedding_name=None, group_name=DEFAULT_GROUP_NAME, trainable=True): if embedding_dim == "auto": embedding_dim = 6 * int(pow(vocabulary_size, 0.25)) if embeddings_initializer is None: embeddings_initializer = RandomNormal(mean=0.0, stddev=0.0001, seed=2020) if embedding_name is None: embedding_name = name return super(SparseFeat, cls).__new__(cls, name, vocabulary_size, embedding_dim, use_hash, vocabulary_path, dtype, embeddings_initializer, embedding_name, group_name, trainable) def __hash__(self): return self.name.__hash__() class VarLenSparseFeat(namedtuple('VarLenSparseFeat', ['sparsefeat', 'maxlen', 'combiner', 'length_name', 'weight_name', 'weight_norm'])): __slots__ = () def __new__(cls, sparsefeat, maxlen, combiner="mean", length_name=None, weight_name=None, weight_norm=True): return super(VarLenSparseFeat, cls).__new__(cls, sparsefeat, maxlen, combiner, length_name, weight_name, weight_norm) @property def name(self): return self.sparsefeat.name @property def vocabulary_size(self): return self.sparsefeat.vocabulary_size @property def embedding_dim(self): return self.sparsefeat.embedding_dim @property def use_hash(self): return self.sparsefeat.use_hash @property def vocabulary_path(self): return self.sparsefeat.vocabulary_path @property def dtype(self): return self.sparsefeat.dtype @property def embeddings_initializer(self): return self.sparsefeat.embeddings_initializer @property def embedding_name(self): return self.sparsefeat.embedding_name @property def group_name(self): return self.sparsefeat.group_name @property def trainable(self): return self.sparsefeat.trainable def __hash__(self): return self.name.__hash__() class DenseFeat(namedtuple('DenseFeat', ['name', 'dimension', 'dtype', 'transform_fn'])): """ Dense feature Args: name: feature name. dimension: dimension of the feature, default = 1. dtype: dtype of the feature, default="float32". transform_fn: If not `None` , a function that can be used to transform values of the feature. the function takes the input Tensor as its argument, and returns the output Tensor. (e.g. lambda x: (x - 3.0) / 4.2). """ __slots__ = () def __new__(cls, name, dimension=1, dtype="float32", transform_fn=None): return super(DenseFeat, cls).__new__(cls, name, dimension, dtype, transform_fn) def __hash__(self): return self.name.__hash__() # def __eq__(self, other): # if self.name == other.name: # return True # return False # def __repr__(self): # return 'DenseFeat:'+self.name def get_feature_names(feature_columns): features = build_input_features(feature_columns) return list(features.keys()) def build_input_features(feature_columns, prefix=''): input_features = OrderedDict() for fc in feature_columns: if isinstance(fc, SparseFeat): input_features[fc.name] = Input( shape=(1,), name=prefix + fc.name, dtype=fc.dtype) elif isinstance(fc, DenseFeat): input_features[fc.name] = Input( shape=(fc.dimension,), name=prefix + fc.name, dtype=fc.dtype) elif isinstance(fc, VarLenSparseFeat): input_features[fc.name] = Input(shape=(fc.maxlen,), name=prefix + fc.name, dtype=fc.dtype) if fc.weight_name is not None: input_features[fc.weight_name] = Input(shape=(fc.maxlen, 1), name=prefix + fc.weight_name, dtype="float32") if fc.length_name is not None: input_features[fc.length_name] = Input((1,), name=prefix + fc.length_name, dtype='int32') else: raise TypeError("Invalid feature column type,got", type(fc)) return input_features def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=1024, prefix='linear', l2_reg=0, sparse_feat_refine_weight=None): linear_feature_columns = copy(feature_columns) for i in range(len(linear_feature_columns)): if isinstance(linear_feature_columns[i], SparseFeat): linear_feature_columns[i] = linear_feature_columns[i]._replace(embedding_dim=1, embeddings_initializer=Zeros()) if isinstance(linear_feature_columns[i], VarLenSparseFeat): linear_feature_columns[i] = linear_feature_columns[i]._replace( sparsefeat=linear_feature_columns[i].sparsefeat._replace(embedding_dim=1, embeddings_initializer=Zeros())) linear_emb_list = [input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix + str(i))[0] for i in range(units)] _, dense_input_list = input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix) linear_logit_list = [] for i in range(units): if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0: sparse_input = concat_func(linear_emb_list[i]) dense_input = concat_func(dense_input_list) if sparse_feat_refine_weight is not None: sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))( [sparse_input, sparse_feat_refine_weight]) linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input]) elif len(linear_emb_list[i]) > 0: sparse_input = concat_func(linear_emb_list[i]) if sparse_feat_refine_weight is not None: sparse_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=1))( [sparse_input, sparse_feat_refine_weight]) linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input) elif len(dense_input_list) > 0: dense_input = concat_func(dense_input_list) linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias, seed=seed)(dense_input) else: #empty feature_columns return Lambda(lambda x: tf.constant([[0.0]]))(list(features.values())[0]) linear_logit_list.append(linear_logit) return concat_func(linear_logit_list) def input_from_feature_columns(features, feature_columns, l2_reg, seed, prefix='', seq_mask_zero=True, support_dense=True, support_group=False): sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), feature_columns)) if feature_columns else [] embedding_matrix_dict = create_embedding_matrix(feature_columns, l2_reg, seed, prefix=prefix, seq_mask_zero=seq_mask_zero) group_sparse_embedding_dict = embedding_lookup(embedding_matrix_dict, features, sparse_feature_columns) dense_value_list = get_dense_input(features, feature_columns) if not support_dense and len(dense_value_list) > 0: raise ValueError("DenseFeat is not supported in dnn_feature_columns") sequence_embed_dict = varlen_embedding_lookup(embedding_matrix_dict, features, varlen_sparse_feature_columns) group_varlen_sparse_embedding_dict = get_varlen_pooling_list(sequence_embed_dict, features, varlen_sparse_feature_columns) group_embedding_dict = mergeDict(group_sparse_embedding_dict, group_varlen_sparse_embedding_dict) if not support_group: group_embedding_dict = list(chain.from_iterable(group_embedding_dict.values())) return group_embedding_dict, dense_value_list ================================================ FILE: deepctr/inputs.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen,weichenswc@163.com """ from collections import defaultdict from itertools import chain from tensorflow.python.keras.layers import Embedding, Lambda from tensorflow.python.keras.regularizers import l2 from .layers.sequence import SequencePoolingLayer, WeightedSequenceLayer from .layers.utils import Hash def get_inputs_list(inputs): return list(chain(*list(map(lambda x: x.values(), filter(lambda x: x is not None, inputs))))) def create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, l2_reg, prefix='sparse_', seq_mask_zero=True): sparse_embedding = {} for feat in sparse_feature_columns: emb = Embedding(feat.vocabulary_size, feat.embedding_dim, embeddings_initializer=feat.embeddings_initializer, embeddings_regularizer=l2(l2_reg), name=prefix + '_emb_' + feat.embedding_name) emb.trainable = feat.trainable sparse_embedding[feat.embedding_name] = emb if varlen_sparse_feature_columns and len(varlen_sparse_feature_columns) > 0: for feat in varlen_sparse_feature_columns: # if feat.name not in sparse_embedding: emb = Embedding(feat.vocabulary_size, feat.embedding_dim, embeddings_initializer=feat.embeddings_initializer, embeddings_regularizer=l2( l2_reg), name=prefix + '_seq_emb_' + feat.name, mask_zero=seq_mask_zero) emb.trainable = feat.trainable sparse_embedding[feat.embedding_name] = emb return sparse_embedding def get_embedding_vec_list(embedding_dict, input_dict, sparse_feature_columns, return_feat_list=(), mask_feat_list=()): embedding_vec_list = [] for fg in sparse_feature_columns: feat_name = fg.name if len(return_feat_list) == 0 or feat_name in return_feat_list: if fg.use_hash: lookup_idx = Hash(fg.vocabulary_size, mask_zero=(feat_name in mask_feat_list), vocabulary_path=fg.vocabulary_path)(input_dict[feat_name]) else: lookup_idx = input_dict[feat_name] embedding_vec_list.append(embedding_dict[feat_name](lookup_idx)) return embedding_vec_list def create_embedding_matrix(feature_columns, l2_reg, seed, prefix="", seq_mask_zero=True): from . import feature_column as fc_lib sparse_feature_columns = list( filter(lambda x: isinstance(x, fc_lib.SparseFeat), feature_columns)) if feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, fc_lib.VarLenSparseFeat), feature_columns)) if feature_columns else [] sparse_emb_dict = create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, seed, l2_reg, prefix=prefix + 'sparse', seq_mask_zero=seq_mask_zero) return sparse_emb_dict def embedding_lookup(sparse_embedding_dict, sparse_input_dict, sparse_feature_columns, return_feat_list=(), mask_feat_list=(), to_list=False): group_embedding_dict = defaultdict(list) for fc in sparse_feature_columns: feature_name = fc.name embedding_name = fc.embedding_name if (len(return_feat_list) == 0 or feature_name in return_feat_list): if fc.use_hash: lookup_idx = Hash(fc.vocabulary_size, mask_zero=(feature_name in mask_feat_list), vocabulary_path=fc.vocabulary_path)( sparse_input_dict[feature_name]) else: lookup_idx = sparse_input_dict[feature_name] group_embedding_dict[fc.group_name].append(sparse_embedding_dict[embedding_name](lookup_idx)) if to_list: return list(chain.from_iterable(group_embedding_dict.values())) return group_embedding_dict def varlen_embedding_lookup(embedding_dict, sequence_input_dict, varlen_sparse_feature_columns): varlen_embedding_vec_dict = {} for fc in varlen_sparse_feature_columns: feature_name = fc.name embedding_name = fc.embedding_name if fc.use_hash: lookup_idx = Hash(fc.vocabulary_size, mask_zero=True, vocabulary_path=fc.vocabulary_path)(sequence_input_dict[feature_name]) else: lookup_idx = sequence_input_dict[feature_name] varlen_embedding_vec_dict[feature_name] = embedding_dict[embedding_name](lookup_idx) return varlen_embedding_vec_dict def get_varlen_pooling_list(embedding_dict, features, varlen_sparse_feature_columns, to_list=False): pooling_vec_list = defaultdict(list) for fc in varlen_sparse_feature_columns: feature_name = fc.name combiner = fc.combiner feature_length_name = fc.length_name if feature_length_name is not None: if fc.weight_name is not None: seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm)( [embedding_dict[feature_name], features[feature_length_name], features[fc.weight_name]]) else: seq_input = embedding_dict[feature_name] vec = SequencePoolingLayer(combiner, supports_masking=False)( [seq_input, features[feature_length_name]]) else: if fc.weight_name is not None: seq_input = WeightedSequenceLayer(weight_normalization=fc.weight_norm, supports_masking=True)( [embedding_dict[feature_name], features[fc.weight_name]]) else: seq_input = embedding_dict[feature_name] vec = SequencePoolingLayer(combiner, supports_masking=True)( seq_input) pooling_vec_list[fc.group_name].append(vec) if to_list: return chain.from_iterable(pooling_vec_list.values()) return pooling_vec_list def get_dense_input(features, feature_columns): from . import feature_column as fc_lib dense_feature_columns = list( filter(lambda x: isinstance(x, fc_lib.DenseFeat), feature_columns)) if feature_columns else [] dense_input_list = [] for fc in dense_feature_columns: if fc.transform_fn is None: dense_input_list.append(features[fc.name]) else: transform_result = Lambda(fc.transform_fn)(features[fc.name]) dense_input_list.append(transform_result) return dense_input_list def mergeDict(a, b): c = defaultdict(list) for k, v in a.items(): c[k].extend(v) for k, v in b.items(): c[k].extend(v) return c ================================================ FILE: deepctr/layers/__init__.py ================================================ import tensorflow as tf from .activation import Dice from .core import DNN, LocalActivationUnit, PredictionLayer, RegulationModule from .interaction import (CIN, FM, AFMLayer, BiInteractionPooling, CrossNet, CrossNetMix, InnerProductLayer, InteractingLayer, OutterProductLayer, FGCNNLayer, SENETLayer, BilinearInteraction, FieldWiseBiInteraction, FwFMLayer, FEFMLayer, BridgeModule) from .normalization import LayerNormalization from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM, KMaxPooling, SequencePoolingLayer, WeightedSequenceLayer, Transformer, DynamicGRU, PositionEncoding) from .utils import NoMask, Hash, Linear, _Add, combined_dnn_input, softmax, reduce_sum, Concat custom_objects = {'tf': tf, 'InnerProductLayer': InnerProductLayer, 'OutterProductLayer': OutterProductLayer, 'DNN': DNN, 'PredictionLayer': PredictionLayer, 'FM': FM, 'AFMLayer': AFMLayer, 'CrossNet': CrossNet, 'CrossNetMix': CrossNetMix, 'BiInteractionPooling': BiInteractionPooling, 'LocalActivationUnit': LocalActivationUnit, 'Dice': Dice, 'SequencePoolingLayer': SequencePoolingLayer, 'AttentionSequencePoolingLayer': AttentionSequencePoolingLayer, 'CIN': CIN, 'InteractingLayer': InteractingLayer, 'LayerNormalization': LayerNormalization, 'BiLSTM': BiLSTM, 'Transformer': Transformer, 'NoMask': NoMask, 'BiasEncoding': BiasEncoding, 'KMaxPooling': KMaxPooling, 'FGCNNLayer': FGCNNLayer, 'Hash': Hash, 'Linear': Linear, 'Concat': Concat, 'DynamicGRU': DynamicGRU, 'SENETLayer': SENETLayer, 'BilinearInteraction': BilinearInteraction, 'WeightedSequenceLayer': WeightedSequenceLayer, '_Add': _Add, 'FieldWiseBiInteraction': FieldWiseBiInteraction, 'FwFMLayer': FwFMLayer, 'softmax': softmax, 'FEFMLayer': FEFMLayer, 'reduce_sum': reduce_sum, 'PositionEncoding': PositionEncoding, 'RegulationModule': RegulationModule, 'BridgeModule': BridgeModule } ================================================ FILE: deepctr/layers/activation.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen,weichenswc@163.com """ import tensorflow as tf try: from tensorflow.python.ops.init_ops import Zeros except ImportError: from tensorflow.python.ops.init_ops_v2 import Zeros from tensorflow.python.keras.layers import Layer, Activation try: from tensorflow.python.keras.layers import BatchNormalization except ImportError: BatchNormalization = tf.keras.layers.BatchNormalization try: unicode except NameError: unicode = str class Dice(Layer): """The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data. Input shape - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. Output shape - Same shape as the input. Arguments - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis). - **epsilon** : Small float added to variance to avoid dividing by zero. References - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) """ def __init__(self, axis=-1, epsilon=1e-9, **kwargs): self.axis = axis self.epsilon = epsilon super(Dice, self).__init__(**kwargs) def build(self, input_shape): self.bn = BatchNormalization( axis=self.axis, epsilon=self.epsilon, center=False, scale=False) self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros( ), dtype=tf.float32, name='dice_alpha') # name='alpha_'+self.name super(Dice, self).build(input_shape) # Be sure to call this somewhere! self.uses_learning_phase = True def call(self, inputs, training=None, **kwargs): inputs_normed = self.bn(inputs, training=training) # tf.layers.batch_normalization( # inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False) x_p = tf.sigmoid(inputs_normed) return self.alphas * (1.0 - x_p) * inputs + x_p * inputs def compute_output_shape(self, input_shape): return input_shape def get_config(self, ): config = {'axis': self.axis, 'epsilon': self.epsilon} base_config = super(Dice, self).get_config() return dict(list(base_config.items()) + list(config.items())) def activation_layer(activation): if activation in ("dice", "Dice"): act_layer = Dice() elif isinstance(activation, (str, unicode)): act_layer = Activation(activation) elif issubclass(activation, Layer): act_layer = activation() else: raise ValueError( "Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation)) return act_layer ================================================ FILE: deepctr/layers/core.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen,weichenswc@163.com """ import tensorflow as tf from tensorflow.python.keras import backend as K try: from tensorflow.python.ops.init_ops_v2 import Zeros, Ones, glorot_normal except ImportError: from tensorflow.python.ops.init_ops import Zeros, Ones, glorot_normal_initializer as glorot_normal from tensorflow.python.keras.layers import Layer, Dropout try: from tensorflow.python.keras.layers import BatchNormalization except ImportError: BatchNormalization = tf.keras.layers.BatchNormalization from tensorflow.python.keras.regularizers import l2 from .activation import activation_layer class LocalActivationUnit(Layer): """The LocalActivationUnit used in DIN with which the representation of user interests varies adaptively given different candidate items. Input shape - A list of two 3D tensor with shape: ``(batch_size, 1, embedding_size)`` and ``(batch_size, T, embedding_size)`` Output shape - 3D tensor with shape: ``(batch_size, T, 1)``. Arguments - **hidden_units**:list of positive integer, the attention net layer number and units in each layer. - **activation**: Activation function to use in attention net. - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix of attention net. - **dropout_rate**: float in [0,1). Fraction of the units to dropout in attention net. - **use_bn**: bool. Whether use BatchNormalization before activation or not in attention net. - **seed**: A Python integer to use as random seed. References - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) """ def __init__(self, hidden_units=(64, 32), activation='sigmoid', l2_reg=0, dropout_rate=0, use_bn=False, seed=1024, **kwargs): self.hidden_units = hidden_units self.activation = activation self.l2_reg = l2_reg self.dropout_rate = dropout_rate self.use_bn = use_bn self.seed = seed super(LocalActivationUnit, self).__init__(**kwargs) self.supports_masking = True def build(self, input_shape): if not isinstance(input_shape, list) or len(input_shape) != 2: raise ValueError('A `LocalActivationUnit` layer should be called ' 'on a list of 2 inputs') if len(input_shape[0]) != 3 or len(input_shape[1]) != 3: raise ValueError("Unexpected inputs dimensions %d and %d, expect to be 3 dimensions" % ( len(input_shape[0]), len(input_shape[1]))) if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1: raise ValueError('A `LocalActivationUnit` layer requires ' 'inputs of a two inputs with shape (None,1,embedding_size) and (None,T,embedding_size)' 'Got different shapes: %s,%s' % (input_shape[0], input_shape[1])) size = 4 * \ int(input_shape[0][-1] ) if len(self.hidden_units) == 0 else self.hidden_units[-1] self.kernel = self.add_weight(shape=(size, 1), initializer=glorot_normal( seed=self.seed), name="kernel") self.bias = self.add_weight( shape=(1,), initializer=Zeros(), name="bias") self.dnn = DNN(self.hidden_units, self.activation, self.l2_reg, self.dropout_rate, self.use_bn, seed=self.seed) super(LocalActivationUnit, self).build( input_shape) # Be sure to call this somewhere! def call(self, inputs, training=None, **kwargs): query, keys = inputs keys_len = keys.get_shape()[1] queries = K.repeat_elements(query, keys_len, 1) att_input = tf.concat( [queries, keys, queries - keys, queries * keys], axis=-1) att_out = self.dnn(att_input, training=training) attention_score = tf.nn.bias_add(tf.tensordot(att_out, self.kernel, axes=(-1, 0)), self.bias) return attention_score def compute_output_shape(self, input_shape): return input_shape[1][:2] + (1,) def compute_mask(self, inputs, mask): return mask def get_config(self, ): config = {'activation': self.activation, 'hidden_units': self.hidden_units, 'l2_reg': self.l2_reg, 'dropout_rate': self.dropout_rate, 'use_bn': self.use_bn, 'seed': self.seed} base_config = super(LocalActivationUnit, self).get_config() return dict(list(base_config.items()) + list(config.items())) class DNN(Layer): """The Multi Layer Percetron Input shape - nD tensor with shape: ``(batch_size, ..., input_dim)``. The most common situation would be a 2D input with shape ``(batch_size, input_dim)``. Output shape - nD tensor with shape: ``(batch_size, ..., hidden_size[-1])``. For instance, for a 2D input with shape ``(batch_size, input_dim)``, the output would have shape ``(batch_size, hidden_size[-1])``. Arguments - **hidden_units**:list of positive integer, the layer number and units in each layer. - **activation**: Activation function to use. - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix. - **dropout_rate**: float in [0,1). Fraction of the units to dropout. - **use_bn**: bool. Whether use BatchNormalization before activation or not. - **output_activation**: Activation function to use in the last layer.If ``None``,it will be same as ``activation``. - **seed**: A Python integer to use as random seed. """ def __init__(self, hidden_units, activation='relu', l2_reg=0, dropout_rate=0, use_bn=False, output_activation=None, seed=1024, **kwargs): self.hidden_units = hidden_units self.activation = activation self.l2_reg = l2_reg self.dropout_rate = dropout_rate self.use_bn = use_bn self.output_activation = output_activation self.seed = seed super(DNN, self).__init__(**kwargs) def build(self, input_shape): # if len(self.hidden_units) == 0: # raise ValueError("hidden_units is empty") input_size = input_shape[-1] hidden_units = [int(input_size)] + list(self.hidden_units) self.kernels = [self.add_weight(name='kernel' + str(i), shape=( hidden_units[i], hidden_units[i + 1]), initializer=glorot_normal( seed=self.seed), regularizer=l2(self.l2_reg), trainable=True) for i in range(len(self.hidden_units))] self.bias = [self.add_weight(name='bias' + str(i), shape=(self.hidden_units[i],), initializer=Zeros(), trainable=True) for i in range(len(self.hidden_units))] if self.use_bn: self.bn_layers = [BatchNormalization() for _ in range(len(self.hidden_units))] self.dropout_layers = [Dropout(self.dropout_rate, seed=self.seed + i) for i in range(len(self.hidden_units))] self.activation_layers = [activation_layer(self.activation) for _ in range(len(self.hidden_units))] if self.output_activation: self.activation_layers[-1] = activation_layer(self.output_activation) super(DNN, self).build(input_shape) # Be sure to call this somewhere! def call(self, inputs, training=None, **kwargs): deep_input = inputs for i in range(len(self.hidden_units)): fc = tf.nn.bias_add(tf.tensordot( deep_input, self.kernels[i], axes=(-1, 0)), self.bias[i]) if self.use_bn: fc = self.bn_layers[i](fc, training=training) try: fc = self.activation_layers[i](fc, training=training) except TypeError as e: # TypeError: call() got an unexpected keyword argument 'training' print("make sure the activation function use training flag properly", e) fc = self.activation_layers[i](fc) fc = self.dropout_layers[i](fc, training=training) deep_input = fc return deep_input def compute_output_shape(self, input_shape): if len(self.hidden_units) > 0: shape = input_shape[:-1] + (self.hidden_units[-1],) else: shape = input_shape return tuple(shape) def get_config(self, ): config = {'activation': self.activation, 'hidden_units': self.hidden_units, 'l2_reg': self.l2_reg, 'use_bn': self.use_bn, 'dropout_rate': self.dropout_rate, 'output_activation': self.output_activation, 'seed': self.seed} base_config = super(DNN, self).get_config() return dict(list(base_config.items()) + list(config.items())) class PredictionLayer(Layer): """ Arguments - **task**: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss - **use_bias**: bool.Whether add bias term or not. """ def __init__(self, task='binary', use_bias=True, **kwargs): if task not in ["binary", "multiclass", "regression"]: raise ValueError("task must be binary,multiclass or regression") self.task = task self.use_bias = use_bias super(PredictionLayer, self).__init__(**kwargs) def build(self, input_shape): if self.use_bias: self.global_bias = self.add_weight( shape=(1,), initializer=Zeros(), name="global_bias") # Be sure to call this somewhere! super(PredictionLayer, self).build(input_shape) def call(self, inputs, **kwargs): x = inputs if self.use_bias: x = tf.nn.bias_add(x, self.global_bias, data_format='NHWC') if self.task == "binary": x = tf.sigmoid(x) output = tf.reshape(x, (-1, 1)) return output def compute_output_shape(self, input_shape): return (None, 1) def get_config(self, ): config = {'task': self.task, 'use_bias': self.use_bias} base_config = super(PredictionLayer, self).get_config() return dict(list(base_config.items()) + list(config.items())) class RegulationModule(Layer): """Regulation module used in EDCN. Input shape - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. Output shape - 2D tensor with shape: ``(batch_size,field_size * embedding_size)``. Arguments - **tau** : Positive float, the temperature coefficient to control distribution of field-wise gating unit. References - [Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models.](https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf) """ def __init__(self, tau=1.0, **kwargs): if tau == 0: raise ValueError("RegulationModule tau can not be zero.") self.tau = 1.0 / tau super(RegulationModule, self).__init__(**kwargs) def build(self, input_shape): self.field_size = int(input_shape[1]) self.embedding_size = int(input_shape[2]) self.g = self.add_weight( shape=(1, self.field_size, 1), initializer=Ones(), name=self.name + '_field_weight') # Be sure to call this somewhere! super(RegulationModule, self).build(input_shape) def call(self, inputs, **kwargs): if K.ndim(inputs) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) feild_gating_score = tf.nn.softmax(self.g * self.tau, 1) E = inputs * feild_gating_score return tf.reshape(E, [-1, self.field_size * self.embedding_size]) def compute_output_shape(self, input_shape): return (None, self.field_size * self.embedding_size) def get_config(self): config = {'tau': self.tau} base_config = super(RegulationModule, self).get_config() base_config.update(config) return base_config ================================================ FILE: deepctr/layers/interaction.py ================================================ # -*- coding:utf-8 -*- """ Authors: Weichen Shen,weichenswc@163.com, Harshit Pande, Yi He, heyi_jack@163.com """ import itertools import tensorflow as tf from tensorflow.python.keras import backend as K from tensorflow.python.keras.backend import batch_dot try: from tensorflow.python.ops.init_ops import Zeros, Ones, Constant, TruncatedNormal, \ glorot_normal_initializer as glorot_normal, \ glorot_uniform_initializer as glorot_uniform except ImportError: from tensorflow.python.ops.init_ops_v2 import Zeros, Ones, Constant, TruncatedNormal, glorot_normal, glorot_uniform from tensorflow.python.keras.layers import Layer, MaxPooling2D, Conv2D, Dropout, Lambda, Dense, Flatten from tensorflow.python.keras.regularizers import l2 from tensorflow.python.layers import utils from .activation import activation_layer from .utils import concat_func, reduce_sum, softmax, reduce_mean from .core import DNN class AFMLayer(Layer): """Attentonal Factorization Machine models pairwise (order-2) feature interactions without linear term and bias. Input shape - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. Output shape - 2D tensor with shape: ``(batch_size, 1)``. Arguments - **attention_factor** : Positive integer, dimensionality of the attention network output space. - **l2_reg_w** : float between 0 and 1. L2 regularizer strength applied to attention network. - **dropout_rate** : float between in [0,1). Fraction of the attention net output units to dropout. - **seed** : A Python integer to use as random seed. References - [Attentional Factorization Machines : Learning the Weight of Feature Interactions via Attention Networks](https://arxiv.org/pdf/1708.04617.pdf) """ def __init__(self, attention_factor=4, l2_reg_w=0, dropout_rate=0, seed=1024, **kwargs): self.attention_factor = attention_factor self.l2_reg_w = l2_reg_w self.dropout_rate = dropout_rate self.seed = seed super(AFMLayer, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list) or len(input_shape) < 2: # input_shape = input_shape[0] # if not isinstance(input_shape, list) or len(input_shape) < 2: raise ValueError('A `AttentionalFM` layer should be called ' 'on a list of at least 2 inputs') shape_set = set() reduced_input_shape = [shape.as_list() for shape in input_shape] for i in range(len(input_shape)): shape_set.add(tuple(reduced_input_shape[i])) if len(shape_set) > 1: raise ValueError('A `AttentionalFM` layer requires ' 'inputs with same shapes ' 'Got different shapes: %s' % (shape_set)) if len(input_shape[0]) != 3 or input_shape[0][1] != 1: raise ValueError('A `AttentionalFM` layer requires ' 'inputs of a list with same shape tensor like\ (None, 1, embedding_size)' 'Got different shapes: %s' % (input_shape[0])) embedding_size = int(input_shape[0][-1]) self.attention_W = self.add_weight(shape=(embedding_size, self.attention_factor), initializer=glorot_normal(seed=self.seed), regularizer=l2(self.l2_reg_w), name="attention_W") self.attention_b = self.add_weight( shape=(self.attention_factor,), initializer=Zeros(), name="attention_b") self.projection_h = self.add_weight(shape=(self.attention_factor, 1), initializer=glorot_normal(seed=self.seed), name="projection_h") self.projection_p = self.add_weight(shape=( embedding_size, 1), initializer=glorot_normal(seed=self.seed), name="projection_p") self.dropout = Dropout( self.dropout_rate, seed=self.seed) self.tensordot = Lambda( lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0))) # Be sure to call this somewhere! super(AFMLayer, self).build(input_shape) def call(self, inputs, training=None, **kwargs): if K.ndim(inputs[0]) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) embeds_vec_list = inputs row = [] col = [] for r, c in itertools.combinations(embeds_vec_list, 2): row.append(r) col.append(c) p = tf.concat(row, axis=1) q = tf.concat(col, axis=1) inner_product = p * q bi_interaction = inner_product attention_temp = tf.nn.relu(tf.nn.bias_add(tf.tensordot( bi_interaction, self.attention_W, axes=(-1, 0)), self.attention_b)) # Dense(self.attention_factor,'relu',kernel_regularizer=l2(self.l2_reg_w))(bi_interaction) self.normalized_att_score = softmax(tf.tensordot( attention_temp, self.projection_h, axes=(-1, 0)), dim=1) attention_output = reduce_sum( self.normalized_att_score * bi_interaction, axis=1) attention_output = self.dropout(attention_output, training=training) # training afm_out = self.tensordot([attention_output, self.projection_p]) return afm_out def compute_output_shape(self, input_shape): if not isinstance(input_shape, list): raise ValueError('A `AFMLayer` layer should be called ' 'on a list of inputs.') return (None, 1) def get_config(self, ): config = {'attention_factor': self.attention_factor, 'l2_reg_w': self.l2_reg_w, 'dropout_rate': self.dropout_rate, 'seed': self.seed} base_config = super(AFMLayer, self).get_config() base_config.update(config) return base_config class BiInteractionPooling(Layer): """Bi-Interaction Layer used in Neural FM,compress the pairwise element-wise product of features into one single vector. Input shape - A 3D tensor with shape:``(batch_size,field_size,embedding_size)``. Output shape - 3D tensor with shape: ``(batch_size,1,embedding_size)``. References - [He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364.](http://arxiv.org/abs/1708.05027) """ def __init__(self, **kwargs): super(BiInteractionPooling, self).__init__(**kwargs) def build(self, input_shape): if len(input_shape) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) super(BiInteractionPooling, self).build( input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): if K.ndim(inputs) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) concated_embeds_value = inputs square_of_sum = tf.square(reduce_sum( concated_embeds_value, axis=1, keep_dims=True)) sum_of_square = reduce_sum( concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True) cross_term = 0.5 * (square_of_sum - sum_of_square) return cross_term def compute_output_shape(self, input_shape): return (None, 1, input_shape[-1]) class CIN(Layer): """Compressed Interaction Network used in xDeepFM.This implemention is adapted from code that the author of the paper published on https://github.com/Leavingseason/xDeepFM. Input shape - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. Output shape - 2D tensor with shape: ``(batch_size, featuremap_num)`` ``featuremap_num = sum(self.layer_size[:-1]) // 2 + self.layer_size[-1]`` if ``split_half=True``,else ``sum(layer_size)`` . Arguments - **layer_size** : list of int.Feature maps in each layer. - **activation** : activation function used on feature maps. - **split_half** : bool.if set to False, half of the feature maps in each hidden will connect to output unit. - **seed** : A Python integer to use as random seed. References - [Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.] (https://arxiv.org/pdf/1803.05170.pdf) """ def __init__(self, layer_size=(128, 128), activation='relu', split_half=True, l2_reg=1e-5, seed=1024, **kwargs): if len(layer_size) == 0: raise ValueError( "layer_size must be a list(tuple) of length greater than 1") self.layer_size = layer_size self.split_half = split_half self.activation = activation self.l2_reg = l2_reg self.seed = seed super(CIN, self).__init__(**kwargs) def build(self, input_shape): if len(input_shape) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) self.field_nums = [int(input_shape[1])] self.filters = [] self.bias = [] for i, size in enumerate(self.layer_size): self.filters.append(self.add_weight(name='filter' + str(i), shape=[1, self.field_nums[-1] * self.field_nums[0], size], dtype=tf.float32, initializer=glorot_uniform( seed=self.seed + i), regularizer=l2(self.l2_reg))) self.bias.append(self.add_weight(name='bias' + str(i), shape=[size], dtype=tf.float32, initializer=Zeros())) if self.split_half: if i != len(self.layer_size) - 1 and size % 2 > 0: raise ValueError( "layer_size must be even number except for the last layer when split_half=True") self.field_nums.append(size // 2) else: self.field_nums.append(size) self.activation_layers = [activation_layer( self.activation) for _ in self.layer_size] super(CIN, self).build(input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): if K.ndim(inputs) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) dim = int(inputs.get_shape()[-1]) hidden_nn_layers = [inputs] final_result = [] split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2) for idx, layer_size in enumerate(self.layer_size): split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2) dot_result_m = tf.matmul( split_tensor0, split_tensor, transpose_b=True) dot_result_o = tf.reshape( dot_result_m, shape=[dim, -1, self.field_nums[0] * self.field_nums[idx]]) dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2]) curr_out = tf.nn.conv1d( dot_result, filters=self.filters[idx], stride=1, padding='VALID') curr_out = tf.nn.bias_add(curr_out, self.bias[idx]) curr_out = self.activation_layers[idx](curr_out) curr_out = tf.transpose(curr_out, perm=[0, 2, 1]) if self.split_half: if idx != len(self.layer_size) - 1: next_hidden, direct_connect = tf.split( curr_out, 2 * [layer_size // 2], 1) else: direct_connect = curr_out next_hidden = 0 else: direct_connect = curr_out next_hidden = curr_out final_result.append(direct_connect) hidden_nn_layers.append(next_hidden) result = tf.concat(final_result, axis=1) result = reduce_sum(result, -1, keep_dims=False) return result def compute_output_shape(self, input_shape): if self.split_half: featuremap_num = sum( self.layer_size[:-1]) // 2 + self.layer_size[-1] else: featuremap_num = sum(self.layer_size) return (None, featuremap_num) def get_config(self, ): config = {'layer_size': self.layer_size, 'split_half': self.split_half, 'activation': self.activation, 'seed': self.seed} base_config = super(CIN, self).get_config() base_config.update(config) return base_config class CrossNet(Layer): """The Cross Network part of Deep&Cross Network model, which leans both low and high degree cross feature. Input shape - 2D tensor with shape: ``(batch_size, units)``. Output shape - 2D tensor with shape: ``(batch_size, units)``. Arguments - **layer_num**: Positive integer, the cross layer number - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix - **parameterization**: string, ``"vector"`` or ``"matrix"`` , way to parameterize the cross network. - **seed**: A Python integer to use as random seed. References - [Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12.](https://arxiv.org/abs/1708.05123) """ def __init__(self, layer_num=2, parameterization='vector', l2_reg=0, seed=1024, **kwargs): self.layer_num = layer_num self.parameterization = parameterization self.l2_reg = l2_reg self.seed = seed print('CrossNet parameterization:', self.parameterization) super(CrossNet, self).__init__(**kwargs) def build(self, input_shape): if len(input_shape) != 2: raise ValueError( "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (len(input_shape),)) dim = int(input_shape[-1]) if self.parameterization == 'vector': self.kernels = [self.add_weight(name='kernel' + str(i), shape=(dim, 1), initializer=glorot_normal( seed=self.seed), regularizer=l2(self.l2_reg), trainable=True) for i in range(self.layer_num)] elif self.parameterization == 'matrix': self.kernels = [self.add_weight(name='kernel' + str(i), shape=(dim, dim), initializer=glorot_normal( seed=self.seed), regularizer=l2(self.l2_reg), trainable=True) for i in range(self.layer_num)] else: # error raise ValueError("parameterization should be 'vector' or 'matrix'") self.bias = [self.add_weight(name='bias' + str(i), shape=(dim, 1), initializer=Zeros(), trainable=True) for i in range(self.layer_num)] # Be sure to call this somewhere! super(CrossNet, self).build(input_shape) def call(self, inputs, **kwargs): if K.ndim(inputs) != 2: raise ValueError( "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (K.ndim(inputs))) x_0 = tf.expand_dims(inputs, axis=2) x_l = x_0 for i in range(self.layer_num): if self.parameterization == 'vector': xl_w = tf.tensordot(x_l, self.kernels[i], axes=(1, 0)) dot_ = tf.matmul(x_0, xl_w) x_l = dot_ + self.bias[i] + x_l elif self.parameterization == 'matrix': xl_w = tf.einsum('ij,bjk->bik', self.kernels[i], x_l) # W * xi (bs, dim, 1) dot_ = xl_w + self.bias[i] # W * xi + b x_l = x_0 * dot_ + x_l # x0 · (W * xi + b) +xl Hadamard-product else: # error raise ValueError("parameterization should be 'vector' or 'matrix'") x_l = tf.squeeze(x_l, axis=2) return x_l def get_config(self, ): config = {'layer_num': self.layer_num, 'parameterization': self.parameterization, 'l2_reg': self.l2_reg, 'seed': self.seed} base_config = super(CrossNet, self).get_config() base_config.update(config) return base_config def compute_output_shape(self, input_shape): return input_shape class CrossNetMix(Layer): """The Cross Network part of DCN-Mix model, which improves DCN-M by: 1 add MOE to learn feature interactions in different subspaces 2 add nonlinear transformations in low-dimensional space Input shape - 2D tensor with shape: ``(batch_size, units)``. Output shape - 2D tensor with shape: ``(batch_size, units)``. Arguments - **low_rank** : Positive integer, dimensionality of low-rank sapce. - **num_experts** : Positive integer, number of experts. - **layer_num**: Positive integer, the cross layer number - **l2_reg**: float between 0 and 1. L2 regularizer strength applied to the kernel weights matrix - **seed**: A Python integer to use as random seed. References - [Wang R, Shivanna R, Cheng D Z, et al. DCN-M: Improved Deep & Cross Network for Feature Cross Learning in Web-scale Learning to Rank Systems[J]. 2020.](https://arxiv.org/abs/2008.13535) """ def __init__(self, low_rank=32, num_experts=4, layer_num=2, l2_reg=0, seed=1024, **kwargs): self.low_rank = low_rank self.num_experts = num_experts self.layer_num = layer_num self.l2_reg = l2_reg self.seed = seed super(CrossNetMix, self).__init__(**kwargs) def build(self, input_shape): if len(input_shape) != 2: raise ValueError( "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (len(input_shape),)) dim = int(input_shape[-1]) # U: (dim, low_rank) self.U_list = [self.add_weight(name='U_list' + str(i), shape=(self.num_experts, dim, self.low_rank), initializer=glorot_normal( seed=self.seed), regularizer=l2(self.l2_reg), trainable=True) for i in range(self.layer_num)] # V: (dim, low_rank) self.V_list = [self.add_weight(name='V_list' + str(i), shape=(self.num_experts, dim, self.low_rank), initializer=glorot_normal( seed=self.seed), regularizer=l2(self.l2_reg), trainable=True) for i in range(self.layer_num)] # C: (low_rank, low_rank) self.C_list = [self.add_weight(name='C_list' + str(i), shape=(self.num_experts, self.low_rank, self.low_rank), initializer=glorot_normal( seed=self.seed), regularizer=l2(self.l2_reg), trainable=True) for i in range(self.layer_num)] self.gating = [Dense(1, use_bias=False) for i in range(self.num_experts)] self.bias = [self.add_weight(name='bias' + str(i), shape=(dim, 1), initializer=Zeros(), trainable=True) for i in range(self.layer_num)] # Be sure to call this somewhere! super(CrossNetMix, self).build(input_shape) def call(self, inputs, **kwargs): if K.ndim(inputs) != 2: raise ValueError( "Unexpected inputs dimensions %d, expect to be 2 dimensions" % (K.ndim(inputs))) x_0 = tf.expand_dims(inputs, axis=2) x_l = x_0 for i in range(self.layer_num): output_of_experts = [] gating_score_of_experts = [] for expert_id in range(self.num_experts): # (1) G(x_l) # compute the gating score by x_l gating_score_of_experts.append(self.gating[expert_id](tf.squeeze(x_l, axis=2))) # (2) E(x_l) # project the input x_l to $\mathbb{R}^{r}$ v_x = tf.einsum('ij,bjk->bik', tf.transpose(self.V_list[i][expert_id]), x_l) # (bs, low_rank, 1) # nonlinear activation in low rank space v_x = tf.nn.tanh(v_x) v_x = tf.einsum('ij,bjk->bik', self.C_list[i][expert_id], v_x) # (bs, low_rank, 1) v_x = tf.nn.tanh(v_x) # project back to $\mathbb{R}^{d}$ uv_x = tf.einsum('ij,bjk->bik', self.U_list[i][expert_id], v_x) # (bs, dim, 1) dot_ = uv_x + self.bias[i] dot_ = x_0 * dot_ # Hadamard-product output_of_experts.append(tf.squeeze(dot_, axis=2)) # (3) mixture of low-rank experts output_of_experts = tf.stack(output_of_experts, 2) # (bs, dim, num_experts) gating_score_of_experts = tf.stack(gating_score_of_experts, 1) # (bs, num_experts, 1) moe_out = tf.matmul(output_of_experts, tf.nn.softmax(gating_score_of_experts, 1)) x_l = moe_out + x_l # (bs, dim, 1) x_l = tf.squeeze(x_l, axis=2) return x_l def get_config(self, ): config = {'low_rank': self.low_rank, 'num_experts': self.num_experts, 'layer_num': self.layer_num, 'l2_reg': self.l2_reg, 'seed': self.seed} base_config = super(CrossNetMix, self).get_config() base_config.update(config) return base_config def compute_output_shape(self, input_shape): return input_shape class FM(Layer): """Factorization Machine models pairwise (order-2) feature interactions without linear term and bias. Input shape - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. Output shape - 2D tensor with shape: ``(batch_size, 1)``. References - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) """ def __init__(self, **kwargs): super(FM, self).__init__(**kwargs) def build(self, input_shape): if len(input_shape) != 3: raise ValueError("Unexpected inputs dimensions % d,\ expect to be 3 dimensions" % (len(input_shape))) super(FM, self).build(input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): if K.ndim(inputs) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) concated_embeds_value = inputs square_of_sum = tf.square(reduce_sum( concated_embeds_value, axis=1, keep_dims=True)) sum_of_square = reduce_sum( concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True) cross_term = square_of_sum - sum_of_square cross_term = 0.5 * reduce_sum(cross_term, axis=2, keep_dims=False) return cross_term def compute_output_shape(self, input_shape): return (None, 1) class InnerProductLayer(Layer): """InnerProduct Layer used in PNN that compute the element-wise product or inner product between feature vectors. Input shape - a list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. Output shape - 3D tensor with shape: ``(batch_size, N*(N-1)/2 ,1)`` if use reduce_sum. or 3D tensor with shape: ``(batch_size, N*(N-1)/2, embedding_size )`` if not use reduce_sum. Arguments - **reduce_sum**: bool. Whether return inner product or element-wise product References - [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf) """ def __init__(self, reduce_sum=True, **kwargs): self.reduce_sum = reduce_sum super(InnerProductLayer, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list) or len(input_shape) < 2: raise ValueError('A `InnerProductLayer` layer should be called ' 'on a list of at least 2 inputs') reduced_inputs_shapes = [shape.as_list() for shape in input_shape] shape_set = set() for i in range(len(input_shape)): shape_set.add(tuple(reduced_inputs_shapes[i])) if len(shape_set) > 1: raise ValueError('A `InnerProductLayer` layer requires ' 'inputs with same shapes ' 'Got different shapes: %s' % (shape_set)) if len(input_shape[0]) != 3 or input_shape[0][1] != 1: raise ValueError('A `InnerProductLayer` layer requires ' 'inputs of a list with same shape tensor like (None,1,embedding_size)' 'Got different shapes: %s' % (input_shape[0])) super(InnerProductLayer, self).build( input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): if K.ndim(inputs[0]) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) embed_list = inputs row = [] col = [] num_inputs = len(embed_list) for i in range(num_inputs - 1): for j in range(i + 1, num_inputs): row.append(i) col.append(j) p = tf.concat([embed_list[idx] for idx in row], axis=1) # batch num_pairs k q = tf.concat([embed_list[idx] for idx in col], axis=1) inner_product = p * q if self.reduce_sum: inner_product = reduce_sum( inner_product, axis=2, keep_dims=True) return inner_product def compute_output_shape(self, input_shape): num_inputs = len(input_shape) num_pairs = int(num_inputs * (num_inputs - 1) / 2) input_shape = input_shape[0] embed_size = input_shape[-1] if self.reduce_sum: return (input_shape[0], num_pairs, 1) else: return (input_shape[0], num_pairs, embed_size) def get_config(self, ): config = {'reduce_sum': self.reduce_sum, } base_config = super(InnerProductLayer, self).get_config() base_config.update(config) return base_config class InteractingLayer(Layer): """A Layer used in AutoInt that model the correlations between different feature fields by multi-head self-attention mechanism. Input shape - A 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. Output shape - 3D tensor with shape:``(batch_size,field_size,att_embedding_size * head_num)``. Arguments - **att_embedding_size**: int.The embedding size in multi-head self-attention network. - **head_num**: int.The head number in multi-head self-attention network. - **use_res**: bool.Whether or not use standard residual connections before output. - **seed**: A Python integer to use as random seed. References - [Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.](https://arxiv.org/abs/1810.11921) """ def __init__(self, att_embedding_size=8, head_num=2, use_res=True, scaling=False, seed=1024, **kwargs): if head_num <= 0: raise ValueError('head_num must be a int > 0') self.att_embedding_size = att_embedding_size self.head_num = head_num self.use_res = use_res self.seed = seed self.scaling = scaling super(InteractingLayer, self).__init__(**kwargs) def build(self, input_shape): if len(input_shape) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) embedding_size = int(input_shape[-1]) self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, initializer=TruncatedNormal(seed=self.seed)) self.W_key = self.add_weight(name='key', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, initializer=TruncatedNormal(seed=self.seed + 1)) self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, initializer=TruncatedNormal(seed=self.seed + 2)) if self.use_res: self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, initializer=TruncatedNormal(seed=self.seed)) # Be sure to call this somewhere! super(InteractingLayer, self).build(input_shape) def call(self, inputs, **kwargs): if K.ndim(inputs) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) querys = tf.tensordot(inputs, self.W_Query, axes=(-1, 0)) # None F D*head_num keys = tf.tensordot(inputs, self.W_key, axes=(-1, 0)) values = tf.tensordot(inputs, self.W_Value, axes=(-1, 0)) # head_num None F D querys = tf.stack(tf.split(querys, self.head_num, axis=2)) keys = tf.stack(tf.split(keys, self.head_num, axis=2)) values = tf.stack(tf.split(values, self.head_num, axis=2)) inner_product = tf.matmul( querys, keys, transpose_b=True) # head_num None F F if self.scaling: inner_product /= self.att_embedding_size ** 0.5 self.normalized_att_scores = softmax(inner_product) result = tf.matmul(self.normalized_att_scores, values) # head_num None F D result = tf.concat(tf.split(result, self.head_num, ), axis=-1) result = tf.squeeze(result, axis=0) # None F D*head_num if self.use_res: result += tf.tensordot(inputs, self.W_Res, axes=(-1, 0)) result = tf.nn.relu(result) return result def compute_output_shape(self, input_shape): return (None, input_shape[1], self.att_embedding_size * self.head_num) def get_config(self, ): config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num, 'use_res': self.use_res, 'seed': self.seed} base_config = super(InteractingLayer, self).get_config() base_config.update(config) return base_config class OutterProductLayer(Layer): """OutterProduct Layer used in PNN.This implemention is adapted from code that the author of the paper published on https://github.com/Atomu2014/product-nets. Input shape - A list of N 3D tensor with shape: ``(batch_size,1,embedding_size)``. Output shape - 2D tensor with shape:``(batch_size,N*(N-1)/2 )``. Arguments - **kernel_type**: str. The kernel weight matrix type to use,can be mat,vec or num - **seed**: A Python integer to use as random seed. References - [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf) """ def __init__(self, kernel_type='mat', seed=1024, **kwargs): if kernel_type not in ['mat', 'vec', 'num']: raise ValueError("kernel_type must be mat,vec or num") self.kernel_type = kernel_type self.seed = seed super(OutterProductLayer, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list) or len(input_shape) < 2: raise ValueError('A `OutterProductLayer` layer should be called ' 'on a list of at least 2 inputs') reduced_inputs_shapes = [shape.as_list() for shape in input_shape] shape_set = set() for i in range(len(input_shape)): shape_set.add(tuple(reduced_inputs_shapes[i])) if len(shape_set) > 1: raise ValueError('A `OutterProductLayer` layer requires ' 'inputs with same shapes ' 'Got different shapes: %s' % (shape_set)) if len(input_shape[0]) != 3 or input_shape[0][1] != 1: raise ValueError('A `OutterProductLayer` layer requires ' 'inputs of a list with same shape tensor like (None,1,embedding_size)' 'Got different shapes: %s' % (input_shape[0])) num_inputs = len(input_shape) num_pairs = int(num_inputs * (num_inputs - 1) / 2) input_shape = input_shape[0] embed_size = int(input_shape[-1]) if self.kernel_type == 'mat': self.kernel = self.add_weight(shape=(embed_size, num_pairs, embed_size), initializer=glorot_uniform( seed=self.seed), name='kernel') elif self.kernel_type == 'vec': self.kernel = self.add_weight(shape=(num_pairs, embed_size,), initializer=glorot_uniform(self.seed), name='kernel' ) elif self.kernel_type == 'num': self.kernel = self.add_weight( shape=(num_pairs, 1), initializer=glorot_uniform(self.seed), name='kernel') super(OutterProductLayer, self).build( input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): if K.ndim(inputs[0]) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) embed_list = inputs row = [] col = [] num_inputs = len(embed_list) for i in range(num_inputs - 1): for j in range(i + 1, num_inputs): row.append(i) col.append(j) p = tf.concat([embed_list[idx] for idx in row], axis=1) # batch num_pairs k # Reshape([num_pairs, self.embedding_size]) q = tf.concat([embed_list[idx] for idx in col], axis=1) # ------------------------- if self.kernel_type == 'mat': p = tf.expand_dims(p, 1) # k k* pair* k # batch * pair kp = reduce_sum( # batch * pair * k tf.multiply( # batch * pair * k tf.transpose( # batch * k * pair reduce_sum( # batch * k * pair * k tf.multiply( p, self.kernel), -1), [0, 2, 1]), q), -1) else: # 1 * pair * (k or 1) k = tf.expand_dims(self.kernel, 0) # batch * pair kp = reduce_sum(p * q * k, -1) # p q # b * p * k return kp def compute_output_shape(self, input_shape): num_inputs = len(input_shape) num_pairs = int(num_inputs * (num_inputs - 1) / 2) return (None, num_pairs) def get_config(self, ): config = {'kernel_type': self.kernel_type, 'seed': self.seed} base_config = super(OutterProductLayer, self).get_config() base_config.update(config) return base_config class FGCNNLayer(Layer): """Feature Generation Layer used in FGCNN,including Convolution,MaxPooling and Recombination. Input shape - A 3D tensor with shape:``(batch_size,field_size,embedding_size)``. Output shape - 3D tensor with shape: ``(batch_size,new_feture_num,embedding_size)``. References - [Liu B, Tang R, Chen Y, et al. Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1904.04447, 2019.](https://arxiv.org/pdf/1904.04447) """ def __init__(self, filters=(14, 16,), kernel_width=(7, 7,), new_maps=(3, 3,), pooling_width=(2, 2), **kwargs): if not (len(filters) == len(kernel_width) == len(new_maps) == len(pooling_width)): raise ValueError("length of argument must be equal") self.filters = filters self.kernel_width = kernel_width self.new_maps = new_maps self.pooling_width = pooling_width super(FGCNNLayer, self).__init__(**kwargs) def build(self, input_shape): if len(input_shape) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) self.conv_layers = [] self.pooling_layers = [] self.dense_layers = [] pooling_shape = input_shape.as_list() + [1, ] embedding_size = int(input_shape[-1]) for i in range(1, len(self.filters) + 1): filters = self.filters[i - 1] width = self.kernel_width[i - 1] new_filters = self.new_maps[i - 1] pooling_width = self.pooling_width[i - 1] conv_output_shape = self._conv_output_shape( pooling_shape, (width, 1)) pooling_shape = self._pooling_output_shape( conv_output_shape, (pooling_width, 1)) self.conv_layers.append(Conv2D(filters=filters, kernel_size=(width, 1), strides=(1, 1), padding='same', activation='tanh', use_bias=True, )) self.pooling_layers.append( MaxPooling2D(pool_size=(pooling_width, 1))) self.dense_layers.append(Dense(pooling_shape[1] * embedding_size * new_filters, activation='tanh', use_bias=True)) self.flatten = Flatten() super(FGCNNLayer, self).build( input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): if K.ndim(inputs) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) embedding_size = int(inputs.shape[-1]) pooling_result = tf.expand_dims(inputs, axis=3) new_feature_list = [] for i in range(1, len(self.filters) + 1): new_filters = self.new_maps[i - 1] conv_result = self.conv_layers[i - 1](pooling_result) pooling_result = self.pooling_layers[i - 1](conv_result) flatten_result = self.flatten(pooling_result) new_result = self.dense_layers[i - 1](flatten_result) new_feature_list.append( tf.reshape(new_result, (-1, int(pooling_result.shape[1]) * new_filters, embedding_size))) new_features = concat_func(new_feature_list, axis=1) return new_features def compute_output_shape(self, input_shape): new_features_num = 0 features_num = input_shape[1] for i in range(0, len(self.kernel_width)): pooled_features_num = features_num // self.pooling_width[i] new_features_num += self.new_maps[i] * pooled_features_num features_num = pooled_features_num return (None, new_features_num, input_shape[-1]) def get_config(self, ): config = {'kernel_width': self.kernel_width, 'filters': self.filters, 'new_maps': self.new_maps, 'pooling_width': self.pooling_width} base_config = super(FGCNNLayer, self).get_config() base_config.update(config) return base_config def _conv_output_shape(self, input_shape, kernel_size): # channels_last space = input_shape[1:-1] new_space = [] for i in range(len(space)): new_dim = utils.conv_output_length( space[i], kernel_size[i], padding='same', stride=1, dilation=1) new_space.append(new_dim) return ([input_shape[0]] + new_space + [self.filters]) def _pooling_output_shape(self, input_shape, pool_size): # channels_last rows = input_shape[1] cols = input_shape[2] rows = utils.conv_output_length(rows, pool_size[0], 'valid', pool_size[0]) cols = utils.conv_output_length(cols, pool_size[1], 'valid', pool_size[1]) return [input_shape[0], rows, cols, input_shape[3]] class SENETLayer(Layer): """SENETLayer used in FiBiNET. Input shape - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. Output shape - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. Arguments - **reduction_ratio** : Positive integer, dimensionality of the attention network output space. - **seed** : A Python integer to use as random seed. References - [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) """ def __init__(self, reduction_ratio=3, seed=1024, **kwargs): self.reduction_ratio = reduction_ratio self.seed = seed super(SENETLayer, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list) or len(input_shape) < 2: raise ValueError('A `AttentionalFM` layer should be called ' 'on a list of at least 2 inputs') self.filed_size = len(input_shape) self.embedding_size = input_shape[0][-1] reduction_size = max(1, self.filed_size // self.reduction_ratio) self.W_1 = self.add_weight(shape=( self.filed_size, reduction_size), initializer=glorot_normal(seed=self.seed), name="W_1") self.W_2 = self.add_weight(shape=( reduction_size, self.filed_size), initializer=glorot_normal(seed=self.seed), name="W_2") self.tensordot = Lambda( lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0))) # Be sure to call this somewhere! super(SENETLayer, self).build(input_shape) def call(self, inputs, training=None, **kwargs): if K.ndim(inputs[0]) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) inputs = concat_func(inputs, axis=1) Z = reduce_mean(inputs, axis=-1, ) A_1 = tf.nn.relu(self.tensordot([Z, self.W_1])) A_2 = tf.nn.relu(self.tensordot([A_1, self.W_2])) V = tf.multiply(inputs, tf.expand_dims(A_2, axis=2)) return tf.split(V, self.filed_size, axis=1) def compute_output_shape(self, input_shape): return input_shape def compute_mask(self, inputs, mask=None): return [None] * self.filed_size def get_config(self, ): config = {'reduction_ratio': self.reduction_ratio, 'seed': self.seed} base_config = super(SENETLayer, self).get_config() base_config.update(config) return base_config class BilinearInteraction(Layer): """BilinearInteraction Layer used in FiBiNET. Input shape - A list of 3D tensor with shape: ``(batch_size,1,embedding_size)``. Its length is ``filed_size``. Output shape - 3D tensor with shape: ``(batch_size,filed_size*(filed_size-1)/2,embedding_size)``. Arguments - **bilinear_type** : String, types of bilinear functions used in this layer. - **seed** : A Python integer to use as random seed. References - [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) """ def __init__(self, bilinear_type="interaction", seed=1024, **kwargs): self.bilinear_type = bilinear_type self.seed = seed super(BilinearInteraction, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list) or len(input_shape) < 2: raise ValueError('A `AttentionalFM` layer should be called ' 'on a list of at least 2 inputs') embedding_size = int(input_shape[0][-1]) if self.bilinear_type == "all": self.W = self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( seed=self.seed), name="bilinear_weight") elif self.bilinear_type == "each": self.W_list = [self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( seed=self.seed), name="bilinear_weight" + str(i)) for i in range(len(input_shape) - 1)] elif self.bilinear_type == "interaction": self.W_list = [self.add_weight(shape=(embedding_size, embedding_size), initializer=glorot_normal( seed=self.seed), name="bilinear_weight" + str(i) + '_' + str(j)) for i, j in itertools.combinations(range(len(input_shape)), 2)] else: raise NotImplementedError super(BilinearInteraction, self).build( input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): if K.ndim(inputs[0]) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) n = len(inputs) if self.bilinear_type == "all": vidots = [tf.tensordot(inputs[i], self.W, axes=(-1, 0)) for i in range(n)] p = [tf.multiply(vidots[i], inputs[j]) for i, j in itertools.combinations(range(n), 2)] elif self.bilinear_type == "each": vidots = [tf.tensordot(inputs[i], self.W_list[i], axes=(-1, 0)) for i in range(n - 1)] p = [tf.multiply(vidots[i], inputs[j]) for i, j in itertools.combinations(range(n), 2)] elif self.bilinear_type == "interaction": p = [tf.multiply(tf.tensordot(v[0], w, axes=(-1, 0)), v[1]) for v, w in zip(itertools.combinations(inputs, 2), self.W_list)] else: raise NotImplementedError output = concat_func(p, axis=1) return output def compute_output_shape(self, input_shape): filed_size = len(input_shape) embedding_size = input_shape[0][-1] return (None, filed_size * (filed_size - 1) // 2, embedding_size) def get_config(self, ): config = {'bilinear_type': self.bilinear_type, 'seed': self.seed} base_config = super(BilinearInteraction, self).get_config() base_config.update(config) return base_config class FieldWiseBiInteraction(Layer): """Field-Wise Bi-Interaction Layer used in FLEN,compress the pairwise element-wise product of features into one single vector. Input shape - A list of 3D tensor with shape:``(batch_size,field_size,embedding_size)``. Output shape - 2D tensor with shape: ``(batch_size,embedding_size)``. Arguments - **use_bias** : Boolean, if use bias. - **seed** : A Python integer to use as random seed. References - [FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690) """ def __init__(self, use_bias=True, seed=1024, **kwargs): self.use_bias = use_bias self.seed = seed super(FieldWiseBiInteraction, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list) or len(input_shape) < 2: raise ValueError( 'A `Field-Wise Bi-Interaction` layer should be called ' 'on a list of at least 2 inputs') self.num_fields = len(input_shape) embedding_size = input_shape[0][-1] self.kernel_mf = self.add_weight( name='kernel_mf', shape=(int(self.num_fields * (self.num_fields - 1) / 2), 1), initializer=Ones(), regularizer=None, trainable=True) self.kernel_fm = self.add_weight( name='kernel_fm', shape=(self.num_fields, 1), initializer=Constant(value=0.5), regularizer=None, trainable=True) if self.use_bias: self.bias_mf = self.add_weight(name='bias_mf', shape=(embedding_size), initializer=Zeros()) self.bias_fm = self.add_weight(name='bias_fm', shape=(embedding_size), initializer=Zeros()) super(FieldWiseBiInteraction, self).build(input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): if K.ndim(inputs[0]) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) field_wise_embeds_list = inputs # MF module field_wise_vectors = tf.concat([ reduce_sum(field_i_vectors, axis=1, keep_dims=True) for field_i_vectors in field_wise_embeds_list ], 1) left = [] right = [] for i, j in itertools.combinations(list(range(self.num_fields)), 2): left.append(i) right.append(j) embeddings_left = tf.gather(params=field_wise_vectors, indices=left, axis=1) embeddings_right = tf.gather(params=field_wise_vectors, indices=right, axis=1) embeddings_prod = embeddings_left * embeddings_right field_weighted_embedding = embeddings_prod * self.kernel_mf h_mf = reduce_sum(field_weighted_embedding, axis=1) if self.use_bias: h_mf = tf.nn.bias_add(h_mf, self.bias_mf) # FM module square_of_sum_list = [ tf.square(reduce_sum(field_i_vectors, axis=1, keep_dims=True)) for field_i_vectors in field_wise_embeds_list ] sum_of_square_list = [ reduce_sum(field_i_vectors * field_i_vectors, axis=1, keep_dims=True) for field_i_vectors in field_wise_embeds_list ] field_fm = tf.concat([ square_of_sum - sum_of_square for square_of_sum, sum_of_square in zip(square_of_sum_list, sum_of_square_list) ], 1) h_fm = reduce_sum(field_fm * self.kernel_fm, axis=1) if self.use_bias: h_fm = tf.nn.bias_add(h_fm, self.bias_fm) return h_mf + h_fm def compute_output_shape(self, input_shape): return (None, input_shape[0][-1]) def get_config(self, ): config = {'use_bias': self.use_bias, 'seed': self.seed} base_config = super(FieldWiseBiInteraction, self).get_config() base_config.update(config) return base_config class FwFMLayer(Layer): """Field-weighted Factorization Machines Input shape - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. Output shape - 2D tensor with shape: ``(batch_size, 1)``. Arguments - **num_fields** : integer for number of fields - **regularizer** : L2 regularizer weight for the field strength parameters of FwFM References - [Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising] https://arxiv.org/pdf/1806.03514.pdf """ def __init__(self, num_fields=4, regularizer=0.000001, **kwargs): self.num_fields = num_fields self.regularizer = regularizer super(FwFMLayer, self).__init__(**kwargs) def build(self, input_shape): if len(input_shape) != 3: raise ValueError("Unexpected inputs dimensions % d,\ expect to be 3 dimensions" % (len(input_shape))) if input_shape[1] != self.num_fields: raise ValueError("Mismatch in number of fields {} and \ concatenated embeddings dims {}".format(self.num_fields, input_shape[1])) self.field_strengths = self.add_weight(name='field_pair_strengths', shape=(self.num_fields, self.num_fields), initializer=TruncatedNormal(), regularizer=l2(self.regularizer), trainable=True) super(FwFMLayer, self).build(input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): if K.ndim(inputs) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) if inputs.shape[1] != self.num_fields: raise ValueError("Mismatch in number of fields {} and \ concatenated embeddings dims {}".format(self.num_fields, inputs.shape[1])) pairwise_inner_prods = [] for fi, fj in itertools.combinations(range(self.num_fields), 2): # get field strength for pair fi and fj r_ij = self.field_strengths[fi, fj] # get embeddings for the features of both the fields feat_embed_i = tf.squeeze(inputs[0:, fi:fi + 1, 0:], axis=1) feat_embed_j = tf.squeeze(inputs[0:, fj:fj + 1, 0:], axis=1) f = tf.scalar_mul(r_ij, batch_dot(feat_embed_i, feat_embed_j, axes=1)) pairwise_inner_prods.append(f) sum_ = tf.add_n(pairwise_inner_prods) return sum_ def compute_output_shape(self, input_shape): return (None, 1) def get_config(self): config = super(FwFMLayer, self).get_config().copy() config.update({ 'num_fields': self.num_fields, 'regularizer': self.regularizer }) return config class FEFMLayer(Layer): """Field-Embedded Factorization Machines Input shape - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``. Output shape - 2D tensor with shape: ``(batch_size, (num_fields * (num_fields-1))/2)`` # concatenated FEFM interaction embeddings Arguments - **regularizer** : L2 regularizer weight for the field pair matrix embeddings parameters of FEFM References - [Field-Embedded Factorization Machines for Click-through Rate Prediction] https://arxiv.org/pdf/2009.09931.pdf """ def __init__(self, regularizer, **kwargs): self.regularizer = regularizer super(FEFMLayer, self).__init__(**kwargs) def build(self, input_shape): if len(input_shape) != 3: raise ValueError("Unexpected inputs dimensions % d,\ expect to be 3 dimensions" % (len(input_shape))) self.num_fields = int(input_shape[1]) embedding_size = int(input_shape[2]) self.field_embeddings = {} for fi, fj in itertools.combinations(range(self.num_fields), 2): field_pair_id = str(fi) + "-" + str(fj) self.field_embeddings[field_pair_id] = self.add_weight(name='field_embeddings' + field_pair_id, shape=(embedding_size, embedding_size), initializer=TruncatedNormal(), regularizer=l2(self.regularizer), trainable=True) super(FEFMLayer, self).build(input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): if K.ndim(inputs) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs))) pairwise_inner_prods = [] for fi, fj in itertools.combinations(range(self.num_fields), 2): field_pair_id = str(fi) + "-" + str(fj) feat_embed_i = tf.squeeze(inputs[0:, fi:fi + 1, 0:], axis=1) feat_embed_j = tf.squeeze(inputs[0:, fj:fj + 1, 0:], axis=1) field_pair_embed_ij = self.field_embeddings[field_pair_id] feat_embed_i_tr = tf.matmul(feat_embed_i, field_pair_embed_ij + tf.transpose(field_pair_embed_ij)) f = batch_dot(feat_embed_i_tr, feat_embed_j, axes=1) pairwise_inner_prods.append(f) concat_vec = tf.concat(pairwise_inner_prods, axis=1) return concat_vec def compute_output_shape(self, input_shape): num_fields = int(input_shape[1]) return (None, (num_fields * (num_fields - 1)) / 2) def get_config(self): config = super(FEFMLayer, self).get_config().copy() config.update({ 'regularizer': self.regularizer, }) return config class BridgeModule(Layer): """Bridge Module used in EDCN Input shape - A list of two 2D tensor with shape: ``(batch_size, units)``. Output shape - 2D tensor with shape: ``(batch_size, units)``. Arguments - **bridge_type**: The type of bridge interaction, one of 'pointwise_addition', 'hadamard_product', 'concatenation', 'attention_pooling' - **activation**: Activation function to use. References - [Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models.](https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf) """ def __init__(self, bridge_type='hadamard_product', activation='relu', **kwargs): self.bridge_type = bridge_type self.activation = activation super(BridgeModule, self).__init__(**kwargs) def build(self, input_shape): if not isinstance(input_shape, list) or len(input_shape) < 2: raise ValueError( 'A `BridgeModule` layer should be called ' 'on a list of 2 inputs') self.dnn_dim = int(input_shape[0][-1]) if self.bridge_type == "concatenation": self.dense = Dense(self.dnn_dim, self.activation) elif self.bridge_type == "attention_pooling": self.dense_x = DNN([self.dnn_dim, self.dnn_dim], self.activation, output_activation='softmax') self.dense_h = DNN([self.dnn_dim, self.dnn_dim], self.activation, output_activation='softmax') super(BridgeModule, self).build(input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): x, h = inputs if self.bridge_type == "pointwise_addition": return x + h elif self.bridge_type == "hadamard_product": return x * h elif self.bridge_type == "concatenation": return self.dense(tf.concat([x, h], axis=-1)) elif self.bridge_type == "attention_pooling": a_x = self.dense_x(x) a_h = self.dense_h(h) return a_x * x + a_h * h def compute_output_shape(self, input_shape): return (None, self.dnn_dim) def get_config(self): base_config = super(BridgeModule, self).get_config().copy() config = { 'bridge_type': self.bridge_type, 'activation': self.activation } config.update(base_config) return config ================================================ FILE: deepctr/layers/normalization.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen,weichenswc@163.com """ from tensorflow.python.keras import backend as K from tensorflow.python.keras.layers import Layer try: from tensorflow.python.ops.init_ops import Zeros, Ones except ImportError: from tensorflow.python.ops.init_ops_v2 import Zeros, Ones class LayerNormalization(Layer): def __init__(self, axis=-1, eps=1e-9, center=True, scale=True, **kwargs): self.axis = axis self.eps = eps self.center = center self.scale = scale super(LayerNormalization, self).__init__(**kwargs) def build(self, input_shape): self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:], initializer=Ones(), trainable=True) self.beta = self.add_weight(name='beta', shape=input_shape[-1:], initializer=Zeros(), trainable=True) super(LayerNormalization, self).build(input_shape) def call(self, inputs): mean = K.mean(inputs, axis=self.axis, keepdims=True) variance = K.mean(K.square(inputs - mean), axis=-1, keepdims=True) std = K.sqrt(variance + self.eps) outputs = (inputs - mean) / std if self.scale: outputs *= self.gamma if self.center: outputs += self.beta return outputs def compute_output_shape(self, input_shape): return input_shape def get_config(self, ): config = {'axis': self.axis, 'eps': self.eps, 'center': self.center, 'scale': self.scale} base_config = super(LayerNormalization, self).get_config() return dict(list(base_config.items()) + list(config.items())) ================================================ FILE: deepctr/layers/sequence.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen,weichenswc@163.com """ import numpy as np import tensorflow as tf from tensorflow.python.keras import backend as K try: from tensorflow.python.ops.init_ops import TruncatedNormal, Constant, glorot_uniform_initializer as glorot_uniform except ImportError: from tensorflow.python.ops.init_ops_v2 import TruncatedNormal, Constant, glorot_uniform from tensorflow.python.keras.layers import LSTM, Lambda, Layer, Dropout from .core import LocalActivationUnit from .normalization import LayerNormalization if tf.__version__ >= '2.0.0': from ..contrib.rnn_v2 import dynamic_rnn else: from ..contrib.rnn import dynamic_rnn from ..contrib.utils import QAAttGRUCell, VecAttGRUCell from .utils import reduce_sum, reduce_max, div, softmax, reduce_mean class SequencePoolingLayer(Layer): """The SequencePoolingLayer is used to apply pooling operation(sum,mean,max) on variable-length sequence feature/multi-value feature. Input shape - A list of two tensor [seq_value,seq_len] - seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` - seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence. Output shape - 3D tensor with shape: ``(batch_size, 1, embedding_size)``. Arguments - **mode**:str.Pooling operation to be used,can be sum,mean or max. - **supports_masking**:If True,the input need to support masking. """ def __init__(self, mode='mean', supports_masking=False, **kwargs): if mode not in ['sum', 'mean', 'max']: raise ValueError("mode must be sum or mean") self.mode = mode self.eps = tf.constant(1e-8, tf.float32) super(SequencePoolingLayer, self).__init__(**kwargs) self.supports_masking = supports_masking def build(self, input_shape): if not self.supports_masking: self.seq_len_max = int(input_shape[0][1]) super(SequencePoolingLayer, self).build( input_shape) # Be sure to call this somewhere! def call(self, seq_value_len_list, mask=None, **kwargs): if self.supports_masking: if mask is None: raise ValueError( "When supports_masking=True,input must support masking") uiseq_embed_list = seq_value_len_list mask = tf.cast(mask, tf.float32) # tf.to_float(mask) user_behavior_length = reduce_sum(mask, axis=-1, keep_dims=True) mask = tf.expand_dims(mask, axis=2) else: uiseq_embed_list, user_behavior_length = seq_value_len_list mask = tf.sequence_mask(user_behavior_length, self.seq_len_max, dtype=tf.float32) mask = tf.transpose(mask, (0, 2, 1)) embedding_size = uiseq_embed_list.shape[-1] mask = tf.tile(mask, [1, 1, embedding_size]) if self.mode == "max": hist = uiseq_embed_list - (1 - mask) * 1e9 return reduce_max(hist, 1, keep_dims=True) hist = reduce_sum(uiseq_embed_list * mask, 1, keep_dims=False) if self.mode == "mean": hist = div(hist, tf.cast(user_behavior_length, tf.float32) + self.eps) hist = tf.expand_dims(hist, axis=1) return hist def compute_output_shape(self, input_shape): if self.supports_masking: return (None, 1, input_shape[-1]) else: return (None, 1, input_shape[0][-1]) def compute_mask(self, inputs, mask): return None def get_config(self, ): config = {'mode': self.mode, 'supports_masking': self.supports_masking} base_config = super(SequencePoolingLayer, self).get_config() return dict(list(base_config.items()) + list(config.items())) class WeightedSequenceLayer(Layer): """The WeightedSequenceLayer is used to apply weight score on variable-length sequence feature/multi-value feature. Input shape - A list of two tensor [seq_value,seq_len,seq_weight] - seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` - seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence. - seq_weight is a 3D tensor with shape: ``(batch_size, T, 1)`` Output shape - 3D tensor with shape: ``(batch_size, T, embedding_size)``. Arguments - **weight_normalization**: bool.Whether normalize the weight score before applying to sequence. - **supports_masking**:If True,the input need to support masking. """ def __init__(self, weight_normalization=True, supports_masking=False, **kwargs): super(WeightedSequenceLayer, self).__init__(**kwargs) self.weight_normalization = weight_normalization self.supports_masking = supports_masking def build(self, input_shape): if not self.supports_masking: self.seq_len_max = int(input_shape[0][1]) super(WeightedSequenceLayer, self).build( input_shape) # Be sure to call this somewhere! def call(self, input_list, mask=None, **kwargs): if self.supports_masking: if mask is None: raise ValueError( "When supports_masking=True,input must support masking") key_input, value_input = input_list mask = tf.expand_dims(mask[0], axis=2) else: key_input, key_length_input, value_input = input_list mask = tf.sequence_mask(key_length_input, self.seq_len_max, dtype=tf.bool) mask = tf.transpose(mask, (0, 2, 1)) embedding_size = key_input.shape[-1] if self.weight_normalization: paddings = tf.ones_like(value_input) * (-2 ** 32 + 1) else: paddings = tf.zeros_like(value_input) value_input = tf.where(mask, value_input, paddings) if self.weight_normalization: value_input = softmax(value_input, dim=1) if len(value_input.shape) == 2: value_input = tf.expand_dims(value_input, axis=2) value_input = tf.tile(value_input, [1, 1, embedding_size]) return tf.multiply(key_input, value_input) def compute_output_shape(self, input_shape): return input_shape[0] def compute_mask(self, inputs, mask): if self.supports_masking: return mask[0] else: return None def get_config(self, ): config = {'weight_normalization': self.weight_normalization, 'supports_masking': self.supports_masking} base_config = super(WeightedSequenceLayer, self).get_config() return dict(list(base_config.items()) + list(config.items())) class AttentionSequencePoolingLayer(Layer): """The Attentional sequence pooling operation used in DIN. Input shape - A list of three tensor: [query,keys,keys_length] - query is a 3D tensor with shape: ``(batch_size, 1, embedding_size)`` - keys is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` - keys_length is a 2D tensor with shape: ``(batch_size, 1)`` Output shape - 3D tensor with shape: ``(batch_size, 1, embedding_size)``. Arguments - **att_hidden_units**:list of positive integer, the attention net layer number and units in each layer. - **att_activation**: Activation function to use in attention net. - **weight_normalization**: bool.Whether normalize the attention score of local activation unit. - **supports_masking**:If True,the input need to support masking. References - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) """ def __init__(self, att_hidden_units=(80, 40), att_activation='sigmoid', weight_normalization=False, return_score=False, supports_masking=False, **kwargs): self.att_hidden_units = att_hidden_units self.att_activation = att_activation self.weight_normalization = weight_normalization self.return_score = return_score super(AttentionSequencePoolingLayer, self).__init__(**kwargs) self.supports_masking = supports_masking def build(self, input_shape): if not self.supports_masking: if not isinstance(input_shape, list) or len(input_shape) != 3: raise ValueError('A `AttentionSequencePoolingLayer` layer should be called ' 'on a list of 3 inputs') if len(input_shape[0]) != 3 or len(input_shape[1]) != 3 or len(input_shape[2]) != 2: raise ValueError( "Unexpected inputs dimensions,the 3 tensor dimensions are %d,%d and %d , expect to be 3,3 and 2" % ( len(input_shape[0]), len(input_shape[1]), len(input_shape[2]))) if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1 or input_shape[2][1] != 1: raise ValueError('A `AttentionSequencePoolingLayer` layer requires ' 'inputs of a 3 tensor with shape (None,1,embedding_size),(None,T,embedding_size) and (None,1)' 'Got different shapes: %s' % (input_shape)) else: pass self.local_att = LocalActivationUnit( self.att_hidden_units, self.att_activation, l2_reg=0, dropout_rate=0, use_bn=False, seed=1024, ) super(AttentionSequencePoolingLayer, self).build( input_shape) # Be sure to call this somewhere! def call(self, inputs, mask=None, training=None, **kwargs): if self.supports_masking: if mask is None: raise ValueError( "When supports_masking=True,input must support masking") queries, keys = inputs key_masks = tf.expand_dims(mask[-1], axis=1) else: queries, keys, keys_length = inputs hist_len = keys.get_shape()[1] key_masks = tf.sequence_mask(keys_length, hist_len) attention_score = self.local_att([queries, keys], training=training) outputs = tf.transpose(attention_score, (0, 2, 1)) if self.weight_normalization: paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) else: paddings = tf.zeros_like(outputs) outputs = tf.where(key_masks, outputs, paddings) if self.weight_normalization: outputs = softmax(outputs) if not self.return_score: outputs = tf.matmul(outputs, keys) if tf.__version__ < '1.13.0': outputs._uses_learning_phase = attention_score._uses_learning_phase else: outputs._uses_learning_phase = training is not None return outputs def compute_output_shape(self, input_shape): if self.return_score: return (None, 1, input_shape[1][1]) else: return (None, 1, input_shape[0][-1]) def compute_mask(self, inputs, mask): return None def get_config(self, ): config = {'att_hidden_units': self.att_hidden_units, 'att_activation': self.att_activation, 'weight_normalization': self.weight_normalization, 'return_score': self.return_score, 'supports_masking': self.supports_masking} base_config = super(AttentionSequencePoolingLayer, self).get_config() return dict(list(base_config.items()) + list(config.items())) class BiLSTM(Layer): """A multiple layer Bidirectional Residual LSTM Layer. Input shape - 3D tensor with shape ``(batch_size, timesteps, input_dim)``. Output shape - 3D tensor with shape: ``(batch_size, timesteps, units)``. Arguments - **units**: Positive integer, dimensionality of the output space. - **layers**:Positive integer, number of LSTM layers to stacked. - **res_layers**: Positive integer, number of residual connection to used in last ``res_layers``. - **dropout_rate**: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. - **merge_mode**: merge_mode: Mode by which outputs of the forward and backward RNNs will be combined. One of { ``'fw'`` , ``'bw'`` , ``'sum'`` , ``'mul'`` , ``'concat'`` , ``'ave'`` , ``None`` }. If None, the outputs will not be combined, they will be returned as a list. """ def __init__(self, units, layers=2, res_layers=0, dropout_rate=0.2, merge_mode='ave', **kwargs): if merge_mode not in ['fw', 'bw', 'sum', 'mul', 'ave', 'concat', None]: raise ValueError('Invalid merge mode. ' 'Merge mode should be one of ' '{"fw","bw","sum", "mul", "ave", "concat", None}') self.units = units self.layers = layers self.res_layers = res_layers self.dropout_rate = dropout_rate self.merge_mode = merge_mode super(BiLSTM, self).__init__(**kwargs) self.supports_masking = True def build(self, input_shape): if len(input_shape) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape))) self.fw_lstm = [] self.bw_lstm = [] for _ in range(self.layers): self.fw_lstm.append( LSTM(self.units, dropout=self.dropout_rate, bias_initializer='ones', return_sequences=True, unroll=True)) self.bw_lstm.append( LSTM(self.units, dropout=self.dropout_rate, bias_initializer='ones', return_sequences=True, go_backwards=True, unroll=True)) super(BiLSTM, self).build( input_shape) # Be sure to call this somewhere! def call(self, inputs, mask=None, **kwargs): input_fw = inputs input_bw = inputs for i in range(self.layers): output_fw = self.fw_lstm[i](input_fw) output_bw = self.bw_lstm[i](input_bw) output_bw = Lambda(lambda x: K.reverse( x, 1), mask=lambda inputs, mask: mask)(output_bw) if i >= self.layers - self.res_layers: output_fw += input_fw output_bw += input_bw input_fw = output_fw input_bw = output_bw output_fw = input_fw output_bw = input_bw if self.merge_mode == "fw": output = output_fw elif self.merge_mode == "bw": output = output_bw elif self.merge_mode == 'concat': output = tf.concat([output_fw, output_bw], axis=-1) elif self.merge_mode == 'sum': output = output_fw + output_bw elif self.merge_mode == 'ave': output = (output_fw + output_bw) / 2 elif self.merge_mode == 'mul': output = output_fw * output_bw elif self.merge_mode is None: output = [output_fw, output_bw] return output def compute_output_shape(self, input_shape): print(self.merge_mode) if self.merge_mode is None: return [input_shape, input_shape] elif self.merge_mode == 'concat': return input_shape[:-1] + (input_shape[-1] * 2,) else: return input_shape def compute_mask(self, inputs, mask): return mask def get_config(self, ): config = {'units': self.units, 'layers': self.layers, 'res_layers': self.res_layers, 'dropout_rate': self.dropout_rate, 'merge_mode': self.merge_mode} base_config = super(BiLSTM, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Transformer(Layer): """ Simplified version of Transformer proposed in 《Attention is all you need》 Input shape - a list of two 3D tensor with shape ``(batch_size, timesteps, input_dim)`` if ``supports_masking=True`` . - a list of two 4 tensors, first two tensors with shape ``(batch_size, timesteps, input_dim)``,last two tensors with shape ``(batch_size, 1)`` if ``supports_masking=False`` . Output shape - 3D tensor with shape: ``(batch_size, 1, input_dim)`` if ``output_type='mean'`` or ``output_type='sum'`` , else ``(batch_size, timesteps, input_dim)`` . Arguments - **att_embedding_size**: int.The embedding size in multi-head self-attention network. - **head_num**: int.The head number in multi-head self-attention network. - **dropout_rate**: float between 0 and 1. Fraction of the units to drop. - **use_positional_encoding**: bool. Whether or not use positional_encoding - **use_res**: bool. Whether or not use standard residual connections before output. - **use_feed_forward**: bool. Whether or not use pointwise feed foward network. - **use_layer_norm**: bool. Whether or not use Layer Normalization. - **blinding**: bool. Whether or not use blinding. - **seed**: A Python integer to use as random seed. - **supports_masking**:bool. Whether or not support masking. - **attention_type**: str, Type of attention, the value must be one of { ``'scaled_dot_product'`` , ``'cos'`` , ``'ln'`` , ``'additive'`` }. - **output_type**: ``'mean'`` , ``'sum'`` or `None`. Whether or not use average/sum pooling for output. References - [Vaswani, Ashish, et al. "Attention is all you need." Advances in Neural Information Processing Systems. 2017.](https://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf) """ def __init__(self, att_embedding_size=1, head_num=8, dropout_rate=0.0, use_positional_encoding=True, use_res=True, use_feed_forward=True, use_layer_norm=False, blinding=True, seed=1024, supports_masking=False, attention_type="scaled_dot_product", output_type="mean", **kwargs): if head_num <= 0: raise ValueError('head_num must be a int > 0') self.att_embedding_size = att_embedding_size self.head_num = head_num self.num_units = att_embedding_size * head_num self.use_res = use_res self.use_feed_forward = use_feed_forward self.seed = seed self.use_positional_encoding = use_positional_encoding self.dropout_rate = dropout_rate self.use_layer_norm = use_layer_norm self.blinding = blinding self.attention_type = attention_type self.output_type = output_type super(Transformer, self).__init__(**kwargs) self.supports_masking = supports_masking def build(self, input_shape): embedding_size = int(input_shape[0][-1]) if self.num_units != embedding_size: raise ValueError( "att_embedding_size * head_num must equal the last dimension size of inputs,got %d * %d != %d" % ( self.att_embedding_size, self.head_num, embedding_size)) self.seq_len_max = int(input_shape[0][-2]) self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, initializer=TruncatedNormal(seed=self.seed)) self.W_key = self.add_weight(name='key', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, initializer=TruncatedNormal(seed=self.seed + 1)) self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, initializer=TruncatedNormal(seed=self.seed + 2)) if self.attention_type == "additive": self.b = self.add_weight('b', shape=[self.att_embedding_size], dtype=tf.float32, initializer=glorot_uniform(seed=self.seed)) self.v = self.add_weight('v', shape=[self.att_embedding_size], dtype=tf.float32, initializer=glorot_uniform(seed=self.seed)) elif self.attention_type == "ln": self.att_ln_q = LayerNormalization() self.att_ln_k = LayerNormalization() # if self.use_res: # self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, # initializer=TruncatedNormal(seed=self.seed)) if self.use_feed_forward: self.fw1 = self.add_weight('fw1', shape=[self.num_units, 4 * self.num_units], dtype=tf.float32, initializer=glorot_uniform(seed=self.seed)) self.fw2 = self.add_weight('fw2', shape=[4 * self.num_units, self.num_units], dtype=tf.float32, initializer=glorot_uniform(seed=self.seed)) self.dropout = Dropout( self.dropout_rate, seed=self.seed) self.ln = LayerNormalization() if self.use_positional_encoding: self.query_pe = PositionEncoding() self.key_pe = PositionEncoding() # Be sure to call this somewhere! super(Transformer, self).build(input_shape) def call(self, inputs, mask=None, training=None, **kwargs): if self.supports_masking: queries, keys = inputs query_masks, key_masks = mask query_masks = tf.cast(query_masks, tf.float32) key_masks = tf.cast(key_masks, tf.float32) else: queries, keys, query_masks, key_masks = inputs query_masks = tf.sequence_mask( query_masks, self.seq_len_max, dtype=tf.float32) key_masks = tf.sequence_mask( key_masks, self.seq_len_max, dtype=tf.float32) query_masks = tf.squeeze(query_masks, axis=1) key_masks = tf.squeeze(key_masks, axis=1) if self.use_positional_encoding: queries = self.query_pe(queries) keys = self.key_pe(keys) Q = tf.tensordot(queries, self.W_Query, axes=(-1, 0)) # N T_q D*h K = tf.tensordot(keys, self.W_key, axes=(-1, 0)) V = tf.tensordot(keys, self.W_Value, axes=(-1, 0)) # h*N T_q D Q_ = tf.concat(tf.split(Q, self.head_num, axis=2), axis=0) K_ = tf.concat(tf.split(K, self.head_num, axis=2), axis=0) V_ = tf.concat(tf.split(V, self.head_num, axis=2), axis=0) if self.attention_type == "scaled_dot_product": # h*N T_q T_k outputs = tf.matmul(Q_, K_, transpose_b=True) outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5) elif self.attention_type == "cos": Q_cos = tf.nn.l2_normalize(Q_, dim=-1) K_cos = tf.nn.l2_normalize(K_, dim=-1) outputs = tf.matmul(Q_cos, K_cos, transpose_b=True) # h*N T_q T_k outputs = outputs * 20 # Scale elif self.attention_type == 'ln': Q_ = self.att_ln_q(Q_) K_ = self.att_ln_k(K_) outputs = tf.matmul(Q_, K_, transpose_b=True) # h*N T_q T_k # Scale outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5) elif self.attention_type == "additive": Q_reshaped = tf.expand_dims(Q_, axis=-2) K_reshaped = tf.expand_dims(K_, axis=-3) outputs = tf.tanh(tf.nn.bias_add(Q_reshaped + K_reshaped, self.b)) outputs = tf.squeeze(tf.tensordot(outputs, tf.expand_dims(self.v, axis=-1), axes=[-1, 0]), axis=-1) else: raise ValueError("attention_type must be [scaled_dot_product,cos,ln,additive]") key_masks = tf.tile(key_masks, [self.head_num, 1]) # (h*N, T_q, T_k) key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1]) paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) # (h*N, T_q, T_k) outputs = tf.where(tf.equal(key_masks, 1), outputs, paddings, ) if self.blinding: try: outputs = tf.matrix_set_diag(outputs, tf.ones_like(outputs)[ :, :, 0] * (-2 ** 32 + 1)) except AttributeError: outputs = tf.compat.v1.matrix_set_diag(outputs, tf.ones_like(outputs)[ :, :, 0] * (-2 ** 32 + 1)) outputs -= reduce_max(outputs, axis=-1, keep_dims=True) outputs = softmax(outputs) query_masks = tf.tile(query_masks, [self.head_num, 1]) # (h*N, T_q) # (h*N, T_q, T_k) query_masks = tf.tile(tf.expand_dims( query_masks, -1), [1, 1, tf.shape(keys)[1]]) outputs *= query_masks outputs = self.dropout(outputs, training=training) # Weighted sum # ( h*N, T_q, C/h) result = tf.matmul(outputs, V_) result = tf.concat(tf.split(result, self.head_num, axis=0), axis=2) if self.use_res: # tf.tensordot(queries, self.W_Res, axes=(-1, 0)) result += queries if self.use_layer_norm: result = self.ln(result) if self.use_feed_forward: fw1 = tf.nn.relu(tf.tensordot(result, self.fw1, axes=[-1, 0])) fw1 = self.dropout(fw1, training=training) fw2 = tf.tensordot(fw1, self.fw2, axes=[-1, 0]) if self.use_res: result += fw2 if self.use_layer_norm: result = self.ln(result) if self.output_type == "mean": return reduce_mean(result, axis=1, keep_dims=True) elif self.output_type == "sum": return reduce_sum(result, axis=1, keep_dims=True) else: return result def compute_output_shape(self, input_shape): return (None, 1, self.att_embedding_size * self.head_num) def compute_mask(self, inputs, mask=None): return None def get_config(self, ): config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num, 'dropout_rate': self.dropout_rate, 'use_res': self.use_res, 'use_positional_encoding': self.use_positional_encoding, 'use_feed_forward': self.use_feed_forward, 'use_layer_norm': self.use_layer_norm, 'seed': self.seed, 'supports_masking': self.supports_masking, 'blinding': self.blinding, 'attention_type': self.attention_type, 'output_type': self.output_type} base_config = super(Transformer, self).get_config() return dict(list(base_config.items()) + list(config.items())) class PositionEncoding(Layer): def __init__(self, pos_embedding_trainable=True, zero_pad=False, scale=True, **kwargs): self.pos_embedding_trainable = pos_embedding_trainable self.zero_pad = zero_pad self.scale = scale super(PositionEncoding, self).__init__(**kwargs) def build(self, input_shape): # Create a trainable weight variable for this layer. _, T, num_units = input_shape.as_list() # inputs.get_shape().as_list() # First part of the PE function: sin and cos argument position_enc = np.array([ [pos / np.power(10000, 2. * (i // 2) / num_units) for i in range(num_units)] for pos in range(T)]) # Second part, apply the cosine to even columns and sin to odds. position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1 if self.zero_pad: position_enc[0, :] = np.zeros(num_units) self.lookup_table = self.add_weight("lookup_table", (T, num_units), initializer=Constant(position_enc), trainable=self.pos_embedding_trainable) # Be sure to call this somewhere! super(PositionEncoding, self).build(input_shape) def call(self, inputs, mask=None): _, T, num_units = inputs.get_shape().as_list() position_ind = tf.expand_dims(tf.range(T), 0) outputs = tf.nn.embedding_lookup(self.lookup_table, position_ind) if self.scale: outputs = outputs * num_units ** 0.5 return outputs + inputs def compute_output_shape(self, input_shape): return input_shape def compute_mask(self, inputs, mask=None): return mask def get_config(self, ): config = {'pos_embedding_trainable': self.pos_embedding_trainable, 'zero_pad': self.zero_pad, 'scale': self.scale} base_config = super(PositionEncoding, self).get_config() return dict(list(base_config.items()) + list(config.items())) class BiasEncoding(Layer): def __init__(self, sess_max_count, seed=1024, **kwargs): self.sess_max_count = sess_max_count self.seed = seed super(BiasEncoding, self).__init__(**kwargs) def build(self, input_shape): # Create a trainable weight variable for this layer. if self.sess_max_count == 1: embed_size = input_shape[2].value seq_len_max = input_shape[1].value else: try: embed_size = input_shape[0][2].value seq_len_max = input_shape[0][1].value except AttributeError: embed_size = input_shape[0][2] seq_len_max = input_shape[0][1] self.sess_bias_embedding = self.add_weight('sess_bias_embedding', shape=(self.sess_max_count, 1, 1), initializer=TruncatedNormal( mean=0.0, stddev=0.0001, seed=self.seed)) self.seq_bias_embedding = self.add_weight('seq_bias_embedding', shape=(1, seq_len_max, 1), initializer=TruncatedNormal( mean=0.0, stddev=0.0001, seed=self.seed)) self.item_bias_embedding = self.add_weight('item_bias_embedding', shape=(1, 1, embed_size), initializer=TruncatedNormal( mean=0.0, stddev=0.0001, seed=self.seed)) # Be sure to call this somewhere! super(BiasEncoding, self).build(input_shape) def call(self, inputs, mask=None): """ :param concated_embeds_value: None * field_size * embedding_size :return: None*1 """ transformer_out = [] for i in range(self.sess_max_count): transformer_out.append( inputs[i] + self.item_bias_embedding + self.seq_bias_embedding + self.sess_bias_embedding[i]) return transformer_out def compute_output_shape(self, input_shape): return input_shape def compute_mask(self, inputs, mask=None): return mask def get_config(self, ): config = {'sess_max_count': self.sess_max_count, 'seed': self.seed, } base_config = super(BiasEncoding, self).get_config() return dict(list(base_config.items()) + list(config.items())) class DynamicGRU(Layer): def __init__(self, num_units=None, gru_type='GRU', return_sequence=True, **kwargs): self.num_units = num_units self.return_sequence = return_sequence self.gru_type = gru_type super(DynamicGRU, self).__init__(**kwargs) def build(self, input_shape): # Create a trainable weight variable for this layer. input_seq_shape = input_shape[0] if self.num_units is None: self.num_units = input_seq_shape.as_list()[-1] if self.gru_type == "AGRU": self.gru_cell = QAAttGRUCell(self.num_units) elif self.gru_type == "AUGRU": self.gru_cell = VecAttGRUCell(self.num_units) else: try: self.gru_cell = tf.nn.rnn_cell.GRUCell(self.num_units) # GRUCell except AttributeError: self.gru_cell = tf.compat.v1.nn.rnn_cell.GRUCell(self.num_units) # Be sure to call this somewhere! super(DynamicGRU, self).build(input_shape) def call(self, input_list): """ :param concated_embeds_value: None * field_size * embedding_size :return: None*1 """ if self.gru_type == "GRU" or self.gru_type == "AIGRU": rnn_input, sequence_length = input_list att_score = None else: rnn_input, sequence_length, att_score = input_list rnn_output, hidden_state = dynamic_rnn(self.gru_cell, inputs=rnn_input, att_scores=att_score, sequence_length=tf.squeeze(sequence_length, ), dtype=tf.float32, scope=self.name) if self.return_sequence: return rnn_output else: return tf.expand_dims(hidden_state, axis=1) def compute_output_shape(self, input_shape): rnn_input_shape = input_shape[0] if self.return_sequence: return rnn_input_shape else: return (None, 1, rnn_input_shape[2]) def get_config(self, ): config = {'num_units': self.num_units, 'gru_type': self.gru_type, 'return_sequence': self.return_sequence} base_config = super(DynamicGRU, self).get_config() return dict(list(base_config.items()) + list(config.items())) class KMaxPooling(Layer): """K Max pooling that selects the k biggest value along the specific axis. Input shape - nD tensor with shape: ``(batch_size, ..., input_dim)``. Output shape - nD tensor with shape: ``(batch_size, ..., output_dim)``. Arguments - **k**: positive integer, number of top elements to look for along the ``axis`` dimension. - **axis**: positive integer, the dimension to look for elements. """ def __init__(self, k=1, axis=-1, **kwargs): self.k = k self.axis = axis super(KMaxPooling, self).__init__(**kwargs) def build(self, input_shape): if self.axis < 1 or self.axis > len(input_shape): raise ValueError("axis must be 1~%d,now is %d" % (len(input_shape), self.axis)) if self.k < 1 or self.k > input_shape[self.axis]: raise ValueError("k must be in 1 ~ %d,now k is %d" % (input_shape[self.axis], self.k)) self.dims = len(input_shape) # Be sure to call this somewhere! super(KMaxPooling, self).build(input_shape) def call(self, inputs): # swap the last and the axis dimensions since top_k will be applied along the last dimension perm = list(range(self.dims)) perm[-1], perm[self.axis] = perm[self.axis], perm[-1] shifted_input = tf.transpose(inputs, perm) # extract top_k, returns two tensors [values, indices] top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0] output = tf.transpose(top_k, perm) return output def compute_output_shape(self, input_shape): output_shape = list(input_shape) output_shape[self.axis] = self.k return tuple(output_shape) def get_config(self, ): config = {'k': self.k, 'axis': self.axis} base_config = super(KMaxPooling, self).get_config() return dict(list(base_config.items()) + list(config.items())) ================================================ FILE: deepctr/layers/utils.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen,weichenswc@163.com """ import tensorflow as tf from tensorflow.python.keras import backend as K from tensorflow.python.keras.layers import Flatten, Layer, Add from tensorflow.python.ops.lookup_ops import TextFileInitializer try: from tensorflow.python.ops.init_ops import Zeros, glorot_normal_initializer as glorot_normal except ImportError: from tensorflow.python.ops.init_ops_v2 import Zeros, glorot_normal from tensorflow.python.keras.regularizers import l2 try: from tensorflow.python.ops.lookup_ops import StaticHashTable except ImportError: from tensorflow.python.ops.lookup_ops import HashTable as StaticHashTable class NoMask(Layer): def __init__(self, **kwargs): super(NoMask, self).__init__(**kwargs) def build(self, input_shape): # Be sure to call this somewhere! super(NoMask, self).build(input_shape) def call(self, x, mask=None, **kwargs): return x def compute_mask(self, inputs, mask): return None class Hash(Layer): """Looks up keys in a table when setup `vocabulary_path`, which outputs the corresponding values. If `vocabulary_path` is not set, `Hash` will hash the input to [0,num_buckets). When `mask_zero` = True, input value `0` or `0.0` will be set to `0`, and other value will be set in range [1,num_buckets). The following snippet initializes a `Hash` with `vocabulary_path` file with the first column as keys and second column as values: * `1,emerson` * `2,lake` * `3,palmer` >>> hash = Hash( ... num_buckets=3+1, ... vocabulary_path=filename, ... default_value=0) >>> hash(tf.constant('lake')).numpy() 2 >>> hash(tf.constant('lakeemerson')).numpy() 0 Args: num_buckets: An `int` that is >= 1. The number of buckets or the vocabulary size + 1 when `vocabulary_path` is setup. mask_zero: default is False. The `Hash` value will hash input `0` or `0.0` to value `0` when the `mask_zero` is `True`. `mask_zero` is not used when `vocabulary_path` is setup. vocabulary_path: default `None`. The `CSV` text file path of the vocabulary hash, which contains two columns seperated by delimiter `comma`, the first column is the value and the second is the key. The key data type is `string`, the value data type is `int`. The path must be accessible from wherever `Hash` is initialized. default_value: default '0'. The default value if a key is missing in the table. **kwargs: Additional keyword arguments. """ def __init__(self, num_buckets, mask_zero=False, vocabulary_path=None, default_value=0, **kwargs): self.num_buckets = num_buckets self.mask_zero = mask_zero self.vocabulary_path = vocabulary_path self.default_value = default_value if self.vocabulary_path: initializer = TextFileInitializer(vocabulary_path, 'string', 1, 'int64', 0, delimiter=',') self.hash_table = StaticHashTable(initializer, default_value=self.default_value) super(Hash, self).__init__(**kwargs) def build(self, input_shape): # Be sure to call this somewhere! super(Hash, self).build(input_shape) def call(self, x, mask=None, **kwargs): if x.dtype != tf.string: zero = tf.as_string(tf.zeros([1], dtype=x.dtype)) x = tf.as_string(x, ) else: zero = tf.as_string(tf.zeros([1], dtype='int32')) if self.vocabulary_path: hash_x = self.hash_table.lookup(x) return hash_x num_buckets = self.num_buckets if not self.mask_zero else self.num_buckets - 1 try: hash_x = tf.string_to_hash_bucket_fast(x, num_buckets, name=None) # weak hash except AttributeError: hash_x = tf.strings.to_hash_bucket_fast(x, num_buckets, name=None) # weak hash if self.mask_zero: mask = tf.cast(tf.not_equal(x, zero), dtype='int64') hash_x = (hash_x + 1) * mask return hash_x def compute_output_shape(self, input_shape): return input_shape def get_config(self, ): config = {'num_buckets': self.num_buckets, 'mask_zero': self.mask_zero, 'vocabulary_path': self.vocabulary_path, 'default_value': self.default_value} base_config = super(Hash, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Linear(Layer): def __init__(self, l2_reg=0.0, mode=0, use_bias=False, seed=1024, **kwargs): self.l2_reg = l2_reg # self.l2_reg = tf.contrib.layers.l2_regularizer(float(l2_reg_linear)) if mode not in [0, 1, 2]: raise ValueError("mode must be 0,1 or 2") self.mode = mode self.use_bias = use_bias self.seed = seed super(Linear, self).__init__(**kwargs) def build(self, input_shape): if self.use_bias: self.bias = self.add_weight(name='linear_bias', shape=(1,), initializer=Zeros(), trainable=True) if self.mode == 1: self.kernel = self.add_weight( 'linear_kernel', shape=[int(input_shape[-1]), 1], initializer=glorot_normal(self.seed), regularizer=l2(self.l2_reg), trainable=True) elif self.mode == 2: self.kernel = self.add_weight( 'linear_kernel', shape=[int(input_shape[1][-1]), 1], initializer=glorot_normal(self.seed), regularizer=l2(self.l2_reg), trainable=True) super(Linear, self).build(input_shape) # Be sure to call this somewhere! def call(self, inputs, **kwargs): if self.mode == 0: sparse_input = inputs linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=True) elif self.mode == 1: dense_input = inputs fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0)) linear_logit = fc else: sparse_input, dense_input = inputs fc = tf.tensordot(dense_input, self.kernel, axes=(-1, 0)) linear_logit = reduce_sum(sparse_input, axis=-1, keep_dims=False) + fc if self.use_bias: linear_logit += self.bias return linear_logit def compute_output_shape(self, input_shape): return (None, 1) def compute_mask(self, inputs, mask): return None def get_config(self, ): config = {'mode': self.mode, 'l2_reg': self.l2_reg, 'use_bias': self.use_bias, 'seed': self.seed} base_config = super(Linear, self).get_config() return dict(list(base_config.items()) + list(config.items())) class Concat(Layer): def __init__(self, axis, supports_masking=True, **kwargs): super(Concat, self).__init__(**kwargs) self.axis = axis self.supports_masking = supports_masking def call(self, inputs): return tf.concat(inputs, axis=self.axis) def compute_mask(self, inputs, mask=None): if not self.supports_masking: return None if mask is None: mask = [inputs_i._keras_mask if hasattr(inputs_i, "_keras_mask") else None for inputs_i in inputs] if mask is None: return None if not isinstance(mask, list): raise ValueError('`mask` should be a list.') if not isinstance(inputs, list): raise ValueError('`inputs` should be a list.') if len(mask) != len(inputs): raise ValueError('The lists `inputs` and `mask` ' 'should have the same length.') if all([m is None for m in mask]): return None # Make a list of masks while making sure # the dimensionality of each mask # is the same as the corresponding input. masks = [] for input_i, mask_i in zip(inputs, mask): if mask_i is None: # Input is unmasked. Append all 1s to masks, masks.append(tf.ones_like(input_i, dtype='bool')) elif K.ndim(mask_i) < K.ndim(input_i): # Mask is smaller than the input, expand it masks.append(tf.expand_dims(mask_i, axis=-1)) else: masks.append(mask_i) concatenated = K.concatenate(masks, axis=self.axis) return K.all(concatenated, axis=-1, keepdims=False) def get_config(self, ): config = {'axis': self.axis, 'supports_masking': self.supports_masking} base_config = super(Concat, self).get_config() return dict(list(base_config.items()) + list(config.items())) def concat_func(inputs, axis=-1, mask=False): if len(inputs) == 1: input = inputs[0] if not mask: input = NoMask()(input) return input return Concat(axis, supports_masking=mask)(inputs) def reduce_mean(input_tensor, axis=None, keep_dims=False, name=None, reduction_indices=None): try: return tf.reduce_mean(input_tensor, axis=axis, keep_dims=keep_dims, name=name, reduction_indices=reduction_indices) except TypeError: return tf.reduce_mean(input_tensor, axis=axis, keepdims=keep_dims, name=name) def reduce_sum(input_tensor, axis=None, keep_dims=False, name=None, reduction_indices=None): try: return tf.reduce_sum(input_tensor, axis=axis, keep_dims=keep_dims, name=name, reduction_indices=reduction_indices) except TypeError: return tf.reduce_sum(input_tensor, axis=axis, keepdims=keep_dims, name=name) def reduce_max(input_tensor, axis=None, keep_dims=False, name=None, reduction_indices=None): try: return tf.reduce_max(input_tensor, axis=axis, keep_dims=keep_dims, name=name, reduction_indices=reduction_indices) except TypeError: return tf.reduce_max(input_tensor, axis=axis, keepdims=keep_dims, name=name) def div(x, y, name=None): try: return tf.div(x, y, name=name) except AttributeError: return tf.divide(x, y, name=name) def softmax(logits, dim=-1, name=None): try: return tf.nn.softmax(logits, dim=dim, name=name) except TypeError: return tf.nn.softmax(logits, axis=dim, name=name) class _Add(Layer): def __init__(self, **kwargs): super(_Add, self).__init__(**kwargs) def build(self, input_shape): # Be sure to call this somewhere! super(_Add, self).build(input_shape) def call(self, inputs, **kwargs): if len(inputs) == 0: return tf.constant([[0.0]]) return Add()(inputs) def add_func(inputs): if not isinstance(inputs, list): return inputs if len(inputs) == 1: return inputs[0] return _Add()(inputs) def combined_dnn_input(sparse_embedding_list, dense_value_list): if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0: sparse_dnn_input = Flatten()(concat_func(sparse_embedding_list)) dense_dnn_input = Flatten()(concat_func(dense_value_list)) return concat_func([sparse_dnn_input, dense_dnn_input]) elif len(sparse_embedding_list) > 0: return Flatten()(concat_func(sparse_embedding_list)) elif len(dense_value_list) > 0: return Flatten()(concat_func(dense_value_list)) else: raise NotImplementedError("dnn_feature_columns can not be empty list") ================================================ FILE: deepctr/models/__init__.py ================================================ from .afm import AFM from .autoint import AutoInt from .ccpm import CCPM from .dcn import DCN from .dcnmix import DCNMix from .deepfefm import DeepFEFM from .deepfm import DeepFM from .difm import DIFM from .fgcnn import FGCNN from .fibinet import FiBiNET from .flen import FLEN from .fnn import FNN from .fwfm import FwFM from .ifm import IFM from .mlr import MLR from .multitask import SharedBottom, ESMM, MMOE, PLE from .nfm import NFM from .onn import ONN from .pnn import PNN from .sequence import DIN, DIEN, DSIN, BST from .wdl import WDL from .xdeepfm import xDeepFM from .edcn import EDCN __all__ = ["AFM", "CCPM", "DCN", "IFM", "DIFM", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN", "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST", "DeepFEFM", "SharedBottom", "ESMM", "MMOE", "PLE", 'EDCN'] ================================================ FILE: deepctr/models/afm.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017. (https://arxiv.org/abs/1708.04617) """ from tensorflow.python.keras.models import Model from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns from ..layers.core import PredictionLayer from ..layers.interaction import AFMLayer, FM from ..layers.utils import concat_func, add_func def AFM(linear_feature_columns, dnn_feature_columns, fm_group=DEFAULT_GROUP_NAME, use_attention=True, attention_factor=8, l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, afm_dropout=0, seed=1024, task='binary'): """Instantiates the Attentional Factorization Machine architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param fm_group: list, group_name of features that will be used to do feature interactions. :param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine** :param attention_factor: positive integer,units in attention net :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_att: float. L2 regularizer strength applied to attention net :param afm_dropout: float in [0,1), Fraction of the attention net output units to dropout. :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features( linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) group_embedding_dict, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed, support_dense=False, support_group=True) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) if use_attention: fm_logit = add_func([AFMLayer(attention_factor, l2_reg_att, afm_dropout, seed)(list(v)) for k, v in group_embedding_dict.items() if k in fm_group]) else: fm_logit = add_func([FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group]) final_logit = add_func([linear_logit, fm_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/autoint.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.(https://arxiv.org/abs/1810.11921) """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Flatten, Concatenate, Dense from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.interaction import InteractingLayer from ..layers.utils import concat_func, add_func, combined_dnn_input def AutoInt(linear_feature_columns, dnn_feature_columns, att_layer_num=3, att_embedding_size=8, att_head_num=2, att_res=True, dnn_hidden_units=(256, 128, 64), dnn_activation='relu', l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_use_bn=False, dnn_dropout=0, seed=1024, task='binary', ): """Instantiates the AutoInt Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param att_layer_num: int.The InteractingLayer number to be used. :param att_embedding_size: int.The embedding size in multi-head self-attention network. :param att_head_num: int.The head number in multi-head self-attention network. :param att_res: bool.Whether or not use standard residual connections before output. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if len(dnn_hidden_units) <= 0 and att_layer_num <= 0: raise ValueError("Either hidden_layer or att_layer_num must > 0") features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) att_input = concat_func(sparse_embedding_list, axis=1) for _ in range(att_layer_num): att_input = InteractingLayer( att_embedding_size, att_head_num, att_res)(att_input) att_output = Flatten()(att_input) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) if len(dnn_hidden_units) > 0 and att_layer_num > 0: # Deep & Interacting Layer deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) stack_out = Concatenate()([att_output, deep_out]) final_logit = Dense(1, use_bias=False)(stack_out) elif len(dnn_hidden_units) > 0: # Only Deep deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input, ) final_logit = Dense(1, use_bias=False)(deep_out) elif att_layer_num > 0: # Only Interacting Layer final_logit = Dense(1, use_bias=False)(att_output) else: # Error raise NotImplementedError final_logit = add_func([final_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/ccpm.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746. (http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf) """ import tensorflow as tf from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Flatten, Conv2D, Lambda from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import DNN, PredictionLayer from ..layers.sequence import KMaxPooling from ..layers.utils import concat_func, add_func def CCPM(linear_feature_columns, dnn_feature_columns, conv_kernel_width=(6, 5), conv_filters=(4, 4), dnn_hidden_units=(128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_dropout=0, seed=1024, task='binary'): """Instantiates the Convolutional Click Prediction Model architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param conv_kernel_width: list,list of positive integer or empty list,the width of filter in each conv layer. :param conv_filters: list,list of positive integer or empty list,the number of filters in each conv layer. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN. :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if len(conv_kernel_width) != len(conv_filters): raise ValueError( "conv_kernel_width must have same element with conv_filters") features = build_input_features( linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, l2_reg=l2_reg_linear) sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed, support_dense=False) n = len(sparse_embedding_list) l = len(conv_filters) conv_input = concat_func(sparse_embedding_list, axis=1) pooling_result = Lambda( lambda x: tf.expand_dims(x, axis=3))(conv_input) for i in range(1, l + 1): filters = conv_filters[i - 1] width = conv_kernel_width[i - 1] k = max(1, int((1 - pow(i / l, l - i)) * n)) if i < l else 3 conv_result = Conv2D(filters=filters, kernel_size=(width, 1), strides=(1, 1), padding='same', activation='tanh', use_bias=True, )(pooling_result) pooling_result = KMaxPooling( k=min(k, int(conv_result.shape[1])), axis=1)(conv_result) flatten_result = Flatten()(pooling_result) dnn_out = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout)(flatten_result) dnn_logit = Dense(1, use_bias=False)( dnn_out) final_logit = add_func([dnn_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/dcn.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Shuxun Zan, zanshuxun@aliyun.com Reference: [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. (https://arxiv.org/abs/1708.05123) [2] Wang R, Shivanna R, Cheng D Z, et al. DCN-M: Improved Deep & Cross Network for Feature Cross Learning in Web-scale Learning to Rank Systems[J]. 2020. (https://arxiv.org/abs/2008.13535) """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Concatenate from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.interaction import CrossNet from ..layers.utils import add_func, combined_dnn_input def DCN(linear_feature_columns, dnn_feature_columns, cross_num=2, cross_parameterization='vector', dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_cross=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_use_bn=False, dnn_activation='relu', task='binary'): """Instantiates the Deep&Cross Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param cross_num: positive integet,cross layer number :param cross_parameterization: str, ``"vector"`` or ``"matrix"``, how to parameterize the cross network. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_cross: float. L2 regularizer strength applied to cross net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not DNN :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if len(dnn_hidden_units) == 0 and cross_num == 0: raise ValueError("Either hidden_layer or cross layer must > 0") features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) if len(dnn_hidden_units) > 0 and cross_num > 0: # Deep & Cross deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) cross_out = CrossNet(cross_num, parameterization=cross_parameterization, l2_reg=l2_reg_cross)(dnn_input) stack_out = Concatenate()([cross_out, deep_out]) final_logit = Dense(1, use_bias=False)(stack_out) elif len(dnn_hidden_units) > 0: # Only Deep deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) final_logit = Dense(1, use_bias=False)(deep_out) elif cross_num > 0: # Only Cross cross_out = CrossNet(cross_num, parameterization=cross_parameterization, l2_reg=l2_reg_cross)(dnn_input) final_logit = Dense(1, use_bias=False)(cross_out) else: # Error raise NotImplementedError final_logit = add_func([final_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/dcnmix.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Shuxun Zan, zanshuxun@aliyun.com Reference: [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. (https://arxiv.org/abs/1708.05123) [2] Wang R, Shivanna R, Cheng D Z, et al. DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems[J]. 2020. (https://arxiv.org/abs/2008.13535) """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Concatenate from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.interaction import CrossNetMix from ..layers.utils import add_func, combined_dnn_input def DCNMix(linear_feature_columns, dnn_feature_columns, cross_num=2, dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, low_rank=32, num_experts=4, l2_reg_cross=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_use_bn=False, dnn_activation='relu', task='binary'): """Instantiates the Deep&Cross Network with mixture of experts architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param cross_num: positive integet,cross layer number :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_cross: float. L2 regularizer strength applied to cross net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not DNN :param dnn_activation: Activation function to use in DNN :param low_rank: Positive integer, dimensionality of low-rank sapce. :param num_experts: Positive integer, number of experts. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if len(dnn_hidden_units) == 0 and cross_num == 0: raise ValueError("Either hidden_layer or cross layer must > 0") features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) if len(dnn_hidden_units) > 0 and cross_num > 0: # Deep & Cross deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) cross_out = CrossNetMix(low_rank=low_rank, num_experts=num_experts, layer_num=cross_num, l2_reg=l2_reg_cross)(dnn_input) stack_out = Concatenate()([cross_out, deep_out]) final_logit = Dense(1, use_bias=False)(stack_out) elif len(dnn_hidden_units) > 0: # Only Deep deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) final_logit = Dense(1, use_bias=False,)(deep_out) elif cross_num > 0: # Only Cross cross_out = CrossNetMix(low_rank=low_rank, num_experts=num_experts, layer_num=cross_num, l2_reg=l2_reg_cross)(dnn_input) final_logit = Dense(1, use_bias=False, )(cross_out) else: # Error raise NotImplementedError final_logit = add_func([final_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/deepfefm.py ================================================ # -*- coding:utf-8 -*- """ Author: Harshit Pande Reference: [1] Field-Embedded Factorization Machines for Click-through Rate Prediction] (https://arxiv.org/pdf/2009.09931.pdf) this file also supports all the possible Ablation studies for reproducibility """ from itertools import chain from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Lambda from ..feature_column import input_from_feature_columns, get_linear_logit, build_input_features, DEFAULT_GROUP_NAME from ..layers.core import PredictionLayer, DNN from ..layers.interaction import FEFMLayer from ..layers.utils import concat_func, combined_dnn_input, reduce_sum, add_func def DeepFEFM(linear_feature_columns, dnn_feature_columns, use_fefm=True, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding_feat=0.00001, l2_reg_embedding_field=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0.0, exclude_feature_embed_in_dnn=False, use_linear=True, use_fefm_embed_in_dnn=True, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DeepFEFM Network architecture or the shallow FEFM architecture (Ablation studies supported) :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param fm_group: list, group_name of features that will be used to do feature interactions. :param use_fefm: bool,use FEFM logit or not (doesn't effect FEFM embeddings in DNN, controls only the use of final FEFM logit) :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding_feat: float. L2 regularizer strength applied to embedding vector of features :param l2_reg_embedding_field: float, L2 regularizer to field embeddings :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param exclude_feature_embed_in_dnn: bool, used in ablation studies for removing feature embeddings in DNN :param use_linear: bool, used in ablation studies :param use_fefm_embed_in_dnn: bool, True if FEFM interaction embeddings are to be used in FEFM (set False for Ablation) :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, seed=seed, prefix='linear') group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding_feat, seed, support_group=True) fefm_interaction_embedding = concat_func([FEFMLayer( regularizer=l2_reg_embedding_field)(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in [DEFAULT_GROUP_NAME]], axis=1) dnn_input = combined_dnn_input(list(chain.from_iterable(group_embedding_dict.values())), dense_value_list) # if use_fefm_embed_in_dnn is set to False it is Ablation4 (Use false only for Ablation) if use_fefm_embed_in_dnn: if exclude_feature_embed_in_dnn: # Ablation3: remove feature vector embeddings from the DNN input dnn_input = fefm_interaction_embedding else: # No ablation dnn_input = concat_func([dnn_input, fefm_interaction_embedding], axis=1) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) dnn_logit = Dense(1, use_bias=False, )(dnn_out) fefm_logit = Lambda(lambda x: reduce_sum(x, axis=1, keep_dims=True))(fefm_interaction_embedding) if len(dnn_hidden_units) == 0 and use_fefm is False and use_linear is True: # only linear final_logit = linear_logit elif len(dnn_hidden_units) == 0 and use_fefm is True and use_linear is True: # linear + FEFM final_logit = add_func([linear_logit, fefm_logit]) elif len(dnn_hidden_units) > 0 and use_fefm is False and use_linear is True: # linear + Deep # Ablation1 final_logit = add_func([linear_logit, dnn_logit]) elif len(dnn_hidden_units) > 0 and use_fefm is True and use_linear is True: # linear + FEFM + Deep final_logit = add_func([linear_logit, fefm_logit, dnn_logit]) elif len(dnn_hidden_units) == 0 and use_fefm is True and use_linear is False: # only FEFM (shallow) final_logit = fefm_logit elif len(dnn_hidden_units) > 0 and use_fefm is False and use_linear is False: # only Deep final_logit = dnn_logit elif len(dnn_hidden_units) > 0 and use_fefm is True and use_linear is False: # FEFM + Deep # Ablation2 final_logit = add_func([fefm_logit, dnn_logit]) else: raise NotImplementedError output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/deepfm.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247) """ from itertools import chain from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.interaction import FM from ..layers.utils import concat_func, add_func, combined_dnn_input def DeepFM(linear_feature_columns, dnn_feature_columns, fm_group=(DEFAULT_GROUP_NAME,), dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by the linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by the deep part of the model. :param fm_group: list, group_name of features that will be used to do feature interactions. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features( linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) fm_logit = add_func([FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group]) dnn_input = combined_dnn_input(list(chain.from_iterable( group_embedding_dict.values())), dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) dnn_logit = Dense(1, use_bias=False)(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/difm.py ================================================ # -*- coding:utf-8 -*- """ Author: zanshuxun, zanshuxun@aliyun.com Reference: [1] Lu W, Yu Y, Chang Y, et al. A Dual Input-aware Factorization Machine for CTR Prediction[C] //IJCAI. 2020: 3139-3145.(https://www.ijcai.org/Proceedings/2020/0434.pdf) """ import tensorflow as tf from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Lambda, Flatten from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns, SparseFeat, \ VarLenSparseFeat from ..layers.core import PredictionLayer, DNN from ..layers.interaction import FM, InteractingLayer from ..layers.utils import concat_func, add_func, combined_dnn_input def DIFM(linear_feature_columns, dnn_feature_columns, att_embedding_size=8, att_head_num=8, att_res=True, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DIFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param att_embedding_size: integer, the embedding size in multi-head self-attention network. :param att_head_num: int. The head number in multi-head self-attention network. :param att_res: bool. Whether or not use standard residual connections before output. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if not len(dnn_hidden_units) > 0: raise ValueError("dnn_hidden_units is null!") features = build_input_features( linear_feature_columns + dnn_feature_columns) sparse_feat_num = len(list(filter(lambda x: isinstance(x, SparseFeat) or isinstance(x, VarLenSparseFeat), dnn_feature_columns))) inputs_list = list(features.values()) sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) if not len(sparse_embedding_list) > 0: raise ValueError("there are no sparse features") att_input = concat_func(sparse_embedding_list, axis=1) att_out = InteractingLayer(att_embedding_size, att_head_num, att_res, scaling=True)(att_input) att_out = Flatten()(att_out) m_vec = Dense(sparse_feat_num, use_bias=False)(att_out) dnn_input = combined_dnn_input(sparse_embedding_list, []) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) m_bit = Dense(sparse_feat_num, use_bias=False)(dnn_output) input_aware_factor = add_func([m_vec, m_bit]) # the complete input-aware factor m_x linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear, sparse_feat_refine_weight=input_aware_factor) fm_input = concat_func(sparse_embedding_list, axis=1) refined_fm_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=-1))( [fm_input, input_aware_factor]) fm_logit = FM()(refined_fm_input) final_logit = add_func([linear_logit, fm_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/edcn.py ================================================ # -*- coding:utf-8 -*- """ Author: Yi He, heyi_jack@163.com Reference: [1] Chen, B., Wang, Y., Liu, et al. Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models. CIKM, 2021, October (https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf) """ from tensorflow.python.keras.layers import Dense, Reshape, Concatenate from tensorflow.python.keras.models import Model from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import PredictionLayer, DNN, RegulationModule from ..layers.interaction import CrossNet, BridgeModule from ..layers.utils import add_func, concat_func def EDCN(linear_feature_columns, dnn_feature_columns, cross_num=2, cross_parameterization='vector', bridge_type='concatenation', tau=1.0, l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_cross=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_use_bn=False, dnn_activation='relu', task='binary'): """Instantiates the Enhanced Deep&Cross Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param cross_num: positive integet,cross layer number :param cross_parameterization: str, ``"vector"`` or ``"matrix"``, how to parameterize the cross network. :param bridge_type: The type of bridge interaction, one of ``"pointwise_addition"``, ``"hadamard_product"``, ``"concatenation"`` , ``"attention_pooling"`` :param tau: Positive float, the temperature coefficient to control distribution of field-wise gating unit :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_cross: float. L2 regularizer strength applied to cross net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not DNN :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if cross_num == 0: raise ValueError("Cross layer num must > 0") print('EDCN brige type: ', bridge_type) features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_embedding_list, _ = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed, support_dense=False) emb_input = concat_func(sparse_embedding_list, axis=1) deep_in = RegulationModule(tau)(emb_input) cross_in = RegulationModule(tau)(emb_input) field_size = len(sparse_embedding_list) embedding_size = int(sparse_embedding_list[0].shape[-1]) cross_dim = field_size * embedding_size for i in range(cross_num): cross_out = CrossNet(1, parameterization=cross_parameterization, l2_reg=l2_reg_cross)(cross_in) deep_out = DNN([cross_dim], dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(deep_in) print(cross_out, deep_out) bridge_out = BridgeModule(bridge_type)([cross_out, deep_out]) if i + 1 < cross_num: bridge_out_list = Reshape([field_size, embedding_size])(bridge_out) deep_in = RegulationModule(tau)(bridge_out_list) cross_in = RegulationModule(tau)(bridge_out_list) stack_out = Concatenate()([cross_out, deep_out, bridge_out]) final_logit = Dense(1, use_bias=False)(stack_out) final_logit = add_func([final_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/fgcnn.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Liu B, Tang R, Chen Y, et al. Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1904.04447, 2019. (https://arxiv.org/pdf/1904.04447) """ import tensorflow as tf from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Lambda, Flatten, Concatenate from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.interaction import InnerProductLayer, FGCNNLayer from ..layers.utils import concat_func, add_func def unstack(input_tensor): input_ = tf.expand_dims(input_tensor, axis=2) return tf.unstack(input_, input_.shape[1], 1) def FGCNN(linear_feature_columns, dnn_feature_columns, conv_kernel_width=(7, 7, 7, 7), conv_filters=(14, 16, 18, 20), new_maps=(3, 3, 3, 3), pooling_width=(2, 2, 2, 2), dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_dropout=0, seed=1024, task='binary', ): """Instantiates the Feature Generation by Convolutional Neural Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param conv_kernel_width: list,list of positive integer or empty list,the width of filter in each conv layer. :param conv_filters: list,list of positive integer or empty list,the number of filters in each conv layer. :param new_maps: list, list of positive integer or empty list, the feature maps of generated features. :param pooling_width: list, list of positive integer or empty list,the width of pooling layer. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net. :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if not (len(conv_kernel_width) == len(conv_filters) == len(new_maps) == len(pooling_width)): raise ValueError( "conv_kernel_width,conv_filters,new_maps and pooling_width must have same length") features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) deep_emb_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) fg_deep_emb_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed, prefix='fg') fg_input = concat_func(fg_deep_emb_list, axis=1) origin_input = concat_func(deep_emb_list, axis=1) if len(conv_filters) > 0: new_features = FGCNNLayer( conv_filters, conv_kernel_width, new_maps, pooling_width)(fg_input) combined_input = concat_func([origin_input, new_features], axis=1) else: combined_input = origin_input inner_product = Flatten()( InnerProductLayer()(Lambda(unstack, mask=[None] * int(combined_input.shape[1]))(combined_input))) linear_signal = Flatten()(combined_input) dnn_input = Concatenate()([linear_signal, inner_product]) dnn_input = Flatten()(dnn_input) final_logit = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout)(dnn_input) final_logit = Dense(1, use_bias=False)(final_logit) final_logit = add_func([final_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/fibinet.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019. """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Flatten from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.interaction import SENETLayer, BilinearInteraction from ..layers.utils import concat_func, add_func, combined_dnn_input def FiBiNET(linear_feature_columns, dnn_feature_columns, bilinear_type='interaction', reduction_ratio=3, dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', task='binary'): """Instantiates the Feature Importance and Bilinear feature Interaction NETwork architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param bilinear_type: str,bilinear function type used in Bilinear Interaction Layer,can be ``'all'`` , ``'each'`` or ``'interaction'`` :param reduction_ratio: integer in [1,inf), reduction ratio used in SENET Layer :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to wide part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) senet_embedding_list = SENETLayer( reduction_ratio, seed)(sparse_embedding_list) senet_bilinear_out = BilinearInteraction( bilinear_type=bilinear_type, seed=seed)(senet_embedding_list) bilinear_out = BilinearInteraction( bilinear_type=bilinear_type, seed=seed)(sparse_embedding_list) dnn_input = combined_dnn_input([Flatten()(concat_func([senet_bilinear_out, bilinear_out]))], dense_value_list) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) dnn_logit = Dense(1, use_bias=False)(dnn_out) final_logit = add_func([linear_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/flen.py ================================================ # -*- coding:utf-8 -*- """ Author: Tingyi Tan, 5636374@qq.com Reference: [1] Chen W, Zhan L, Ci Y, Lin C. FLEN: Leveraging Field for Scalable CTR Prediction . arXiv preprint arXiv:1911.04690, 2019.(https://arxiv.org/pdf/1911.04690) """ from itertools import chain from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.interaction import FieldWiseBiInteraction from ..layers.utils import concat_func, add_func, combined_dnn_input def FLEN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0.0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the FLEN Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) fm_mf_out = FieldWiseBiInteraction(seed=seed)( [concat_func(v, axis=1) for k, v in group_embedding_dict.items()]) dnn_input = combined_dnn_input( list(chain.from_iterable(group_embedding_dict.values())), dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) dnn_logit = Dense(1, use_bias=False)(concat_func([fm_mf_out, dnn_output])) final_logit = add_func([linear_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/fnn.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf) """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.utils import add_func, combined_dnn_input def FNN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', task='binary'): """Instantiates the Factorization-supported Neural Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_linear: float. L2 regularizer strength applied to linear weight :param l2_reg_dnn: float . L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features( linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) dnn_logit = Dense(1, use_bias=False)(deep_out) final_logit = add_func([dnn_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/fwfm.py ================================================ # -*- coding:utf-8 -*- """ Author: Harshit Pande Reference: [1] Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising (https://arxiv.org/pdf/1806.03514.pdf) """ from itertools import chain from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.interaction import FwFMLayer from ..layers.utils import concat_func, add_func, combined_dnn_input def FwFM(linear_feature_columns, dnn_feature_columns, fm_group=(DEFAULT_GROUP_NAME,), dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_field_strength=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the FwFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param fm_group: list, group_name of features that will be used to do feature interactions. :param dnn_hidden_units: list,list of positive integer or empty list if do not want DNN, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_field_strength: float. L2 regularizer strength applied to the field pair strength parameters :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) fwfm_logit = add_func([FwFMLayer(num_fields=len(v), regularizer=l2_reg_field_strength) (concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group]) final_logit_components = [linear_logit, fwfm_logit] if dnn_hidden_units: dnn_input = combined_dnn_input(list(chain.from_iterable( group_embedding_dict.values())), dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) dnn_logit = Dense(1, use_bias=False)(dnn_output) final_logit_components.append(dnn_logit) final_logit = add_func(final_logit_components) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/ifm.py ================================================ # -*- coding:utf-8 -*- """ Author: zanshuxun, zanshuxun@aliyun.com Reference: [1] Yu Y, Wang Z, Yuan B. An Input-aware Factorization Machine for Sparse Prediction[C]//IJCAI. 2019: 1466-1472. (https://www.ijcai.org/Proceedings/2019/0203.pdf) """ import tensorflow as tf from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Lambda from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns, SparseFeat, \ VarLenSparseFeat from ..layers.core import PredictionLayer, DNN from ..layers.interaction import FM from ..layers.utils import concat_func, add_func, combined_dnn_input, softmax def IFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the IFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if not len(dnn_hidden_units) > 0: raise ValueError("dnn_hidden_units is null!") features = build_input_features( linear_feature_columns + dnn_feature_columns) sparse_feat_num = len(list(filter(lambda x: isinstance(x, SparseFeat) or isinstance(x, VarLenSparseFeat), dnn_feature_columns))) inputs_list = list(features.values()) sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) if not len(sparse_embedding_list) > 0: raise ValueError("there are no sparse features") dnn_input = combined_dnn_input(sparse_embedding_list, []) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) # here, dnn_output is the m'_{x} dnn_output = Dense(sparse_feat_num, use_bias=False)(dnn_output) # input_aware_factor m_{x,i} input_aware_factor = Lambda(lambda x: tf.cast(tf.shape(x)[-1], tf.float32) * softmax(x, dim=1))(dnn_output) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear, sparse_feat_refine_weight=input_aware_factor) fm_input = concat_func(sparse_embedding_list, axis=1) refined_fm_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=-1))( [fm_input, input_aware_factor]) fm_logit = FM()(refined_fm_input) final_logit = add_func([linear_logit, fm_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/mlr.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017.(https://arxiv.org/abs/1704.05194) """ from tensorflow.python.keras.layers import Activation, dot from tensorflow.python.keras.models import Model from ..feature_column import build_input_features, get_linear_logit from ..layers.core import PredictionLayer from ..layers.utils import concat_func def MLR(region_feature_columns, base_feature_columns=None, region_num=4, l2_reg_linear=1e-5, seed=1024, task='binary', bias_feature_columns=None): """Instantiates the Mixed Logistic Regression/Piece-wise Linear Model. :param region_feature_columns: An iterable containing all the features used by region part of the model. :param base_feature_columns: An iterable containing all the features used by base part of the model. :param region_num: integer > 1,indicate the piece number :param l2_reg_linear: float. L2 regularizer strength applied to weight :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param bias_feature_columns: An iterable containing all the features used by bias part of the model. :return: A Keras model instance. """ if region_num <= 1: raise ValueError("region_num must > 1") if base_feature_columns is None or len(base_feature_columns) == 0: base_feature_columns = region_feature_columns if bias_feature_columns is None: bias_feature_columns = [] features = build_input_features(region_feature_columns + base_feature_columns + bias_feature_columns) inputs_list = list(features.values()) region_score = get_region_score(features, region_feature_columns, region_num, l2_reg_linear, seed) learner_score = get_learner_score(features, base_feature_columns, region_num, l2_reg_linear, seed, task=task) final_logit = dot([region_score, learner_score], axes=-1) if bias_feature_columns is not None and len(bias_feature_columns) > 0: bias_score = get_learner_score(features, bias_feature_columns, 1, l2_reg_linear, seed, prefix='bias_', task='binary') final_logit = dot([final_logit, bias_score], axes=-1) model = Model(inputs=inputs_list, outputs=final_logit) return model def get_region_score(features, feature_columns, region_number, l2_reg, seed, prefix='region_', seq_mask_zero=True): region_logit = concat_func([get_linear_logit(features, feature_columns, seed=seed + i, prefix=prefix + str(i + 1), l2_reg=l2_reg) for i in range(region_number)]) return Activation('softmax')(region_logit) def get_learner_score(features, feature_columns, region_number, l2_reg, seed, prefix='learner_', seq_mask_zero=True, task='binary'): region_score = [PredictionLayer(task=task, use_bias=False)( get_linear_logit(features, feature_columns, seed=seed + i, prefix=prefix + str(i + 1), l2_reg=l2_reg)) for i in range(region_number)] return concat_func(region_score) ================================================ FILE: deepctr/models/multitask/__init__.py ================================================ from .esmm import ESMM from .mmoe import MMOE from .ple import PLE from .sharedbottom import SharedBottom ================================================ FILE: deepctr/models/multitask/esmm.py ================================================ """ Author: Mincai Lai, laimc@shanghaitech.edu.cn Weichen Shen, weichenswc@163.com Reference: [1] Ma X, Zhao L, Huang G, et al. Entire space multi-task model: An effective approach for estimating post-click conversion rate[C]//The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval. 2018.(https://arxiv.org/abs/1804.07931) """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Multiply from ...feature_column import build_input_features, input_from_feature_columns from ...layers.core import PredictionLayer, DNN from ...layers.utils import combined_dnn_input def ESMM(dnn_feature_columns, tower_dnn_hidden_units=(256, 128, 64), l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task_types=('binary', 'binary'), task_names=('ctr', 'ctcvr')): """Instantiates the Entire Space Multi-Task Model architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task DNN. :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN. :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task_types: str, indicating the loss of each tasks, ``"binary"`` for binary logloss or ``"regression"`` for regression loss. :param task_names: list of str, indicating the predict target of each tasks. default value is ['ctr', 'ctcvr'] :return: A Keras model instance. """ if len(task_names) != 2: raise ValueError("the length of task_names must be equal to 2") for task_type in task_types: if task_type != 'binary': raise ValueError("task must be binary in ESMM, {} is illegal".format(task_type)) features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) ctr_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)( dnn_input) cvr_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)( dnn_input) ctr_logit = Dense(1, use_bias=False)(ctr_output) cvr_logit = Dense(1, use_bias=False)(cvr_output) ctr_pred = PredictionLayer('binary', name=task_names[0])(ctr_logit) cvr_pred = PredictionLayer('binary')(cvr_logit) ctcvr_pred = Multiply(name=task_names[1])([ctr_pred, cvr_pred]) # CTCVR = CTR * CVR model = Model(inputs=inputs_list, outputs=[ctr_pred, ctcvr_pred]) return model ================================================ FILE: deepctr/models/multitask/mmoe.py ================================================ """ Author: Mincai Lai, laimc@shanghaitech.edu.cn Weichen Shen, weichenswc@163.com Reference: [1] Ma J, Zhao Z, Yi X, et al. Modeling task relationships in multi-task learning with multi-gate mixture-of-experts[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. 2018.(https://dl.acm.org/doi/abs/10.1145/3219819.3220007) """ import tensorflow as tf from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Lambda from ...feature_column import build_input_features, input_from_feature_columns from ...layers.core import PredictionLayer, DNN from ...layers.utils import combined_dnn_input, reduce_sum def MMOE(dnn_feature_columns, num_experts=3, expert_dnn_hidden_units=(256, 128), tower_dnn_hidden_units=(64,), gate_dnn_hidden_units=(), l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task_types=('binary', 'binary'), task_names=('ctr', 'ctcvr')): """Instantiates the Multi-gate Mixture-of-Experts multi-task learning architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param num_experts: integer, number of experts. :param expert_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of expert DNN. :param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task-specific DNN. :param gate_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of gate DNN. :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task_types: list of str, indicating the loss of each tasks, ``"binary"`` for binary logloss, ``"regression"`` for regression loss. e.g. ['binary', 'regression'] :param task_names: list of str, indicating the predict target of each tasks :return: a Keras model instance """ num_tasks = len(task_names) if num_tasks <= 1: raise ValueError("num_tasks must be greater than 1") if num_experts <= 1: raise ValueError("num_experts must be greater than 1") if len(task_types) != num_tasks: raise ValueError("num_tasks must be equal to the length of task_types") for task_type in task_types: if task_type not in ['binary', 'regression']: raise ValueError("task must be binary or regression, {} is illegal".format(task_type)) features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) # build expert layer expert_outs = [] for i in range(num_experts): expert_network = DNN(expert_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, name='expert_' + str(i))(dnn_input) expert_outs.append(expert_network) expert_concat = Lambda(lambda x: tf.stack(x, axis=1))(expert_outs) # None,num_experts,dim mmoe_outs = [] for i in range(num_tasks): # one mmoe layer: nums_tasks = num_gates # build gate layers gate_input = DNN(gate_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, name='gate_' + task_names[i])(dnn_input) gate_out = Dense(num_experts, use_bias=False, activation='softmax', name='gate_softmax_' + task_names[i])(gate_input) gate_out = Lambda(lambda x: tf.expand_dims(x, axis=-1))(gate_out) # gate multiply the expert gate_mul_expert = Lambda(lambda x: reduce_sum(x[0] * x[1], axis=1, keep_dims=False), name='gate_mul_expert_' + task_names[i])([expert_concat, gate_out]) mmoe_outs.append(gate_mul_expert) task_outs = [] for task_type, task_name, mmoe_out in zip(task_types, task_names, mmoe_outs): # build tower layer tower_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, name='tower_' + task_name)(mmoe_out) logit = Dense(1, use_bias=False)(tower_output) output = PredictionLayer(task_type, name=task_name)(logit) task_outs.append(output) model = Model(inputs=inputs_list, outputs=task_outs) return model ================================================ FILE: deepctr/models/multitask/ple.py ================================================ """ Author: Mincai Lai, laimc@shanghaitech.edu.cn Weichen Shen, weichenswc@163.com Reference: [1] Tang H, Liu J, Zhao M, et al. Progressive layered extraction (ple): A novel multi-task learning (mtl) model for personalized recommendations[C]//Fourteenth ACM Conference on Recommender Systems. 2020.(https://dl.acm.org/doi/10.1145/3383313.3412236) """ import tensorflow as tf from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Lambda from ...feature_column import build_input_features, input_from_feature_columns from ...layers.core import PredictionLayer, DNN from ...layers.utils import combined_dnn_input, reduce_sum def PLE(dnn_feature_columns, shared_expert_num=1, specific_expert_num=1, num_levels=2, expert_dnn_hidden_units=(256,), tower_dnn_hidden_units=(64,), gate_dnn_hidden_units=(), l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task_types=('binary', 'binary'), task_names=('ctr', 'ctcvr')): """Instantiates the multi level of Customized Gate Control of Progressive Layered Extraction architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param shared_expert_num: integer, number of task-shared experts. :param specific_expert_num: integer, number of task-specific experts. :param num_levels: integer, number of CGC levels. :param expert_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of expert DNN. :param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task-specific DNN. :param gate_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of gate DNN. :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN. :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN. :param task_types: list of str, indicating the loss of each tasks, ``"binary"`` for binary logloss, ``"regression"`` for regression loss. e.g. ['binary', 'regression'] :param task_names: list of str, indicating the predict target of each tasks :return: a Keras model instance. """ num_tasks = len(task_names) if num_tasks <= 1: raise ValueError("num_tasks must be greater than 1") if len(task_types) != num_tasks: raise ValueError("num_tasks must be equal to the length of task_types") for task_type in task_types: if task_type not in ['binary', 'regression']: raise ValueError("task must be binary or regression, {} is illegal".format(task_type)) features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) # single Extraction Layer def cgc_net(inputs, level_name, is_last=False): # inputs: [task1, task2, ... taskn, shared task] specific_expert_outputs = [] # build task-specific expert layer for i in range(num_tasks): for j in range(specific_expert_num): expert_network = DNN(expert_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, name=level_name + 'task_' + task_names[i] + '_expert_specific_' + str(j))( inputs[i]) specific_expert_outputs.append(expert_network) # build task-shared expert layer shared_expert_outputs = [] for k in range(shared_expert_num): expert_network = DNN(expert_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, name=level_name + 'expert_shared_' + str(k))(inputs[-1]) shared_expert_outputs.append(expert_network) # task_specific gate (count = num_tasks) cgc_outs = [] for i in range(num_tasks): # concat task-specific expert and task-shared expert cur_expert_num = specific_expert_num + shared_expert_num # task_specific + task_shared cur_experts = specific_expert_outputs[ i * specific_expert_num:(i + 1) * specific_expert_num] + shared_expert_outputs expert_concat = Lambda(lambda x: tf.stack(x, axis=1))(cur_experts) # build gate layers gate_input = DNN(gate_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, name=level_name + 'gate_specific_' + task_names[i])( inputs[i]) # gate[i] for task input[i] gate_out = Dense(cur_expert_num, use_bias=False, activation='softmax', name=level_name + 'gate_softmax_specific_' + task_names[i])(gate_input) gate_out = Lambda(lambda x: tf.expand_dims(x, axis=-1))(gate_out) # gate multiply the expert gate_mul_expert = Lambda(lambda x: reduce_sum(x[0] * x[1], axis=1, keep_dims=False), name=level_name + 'gate_mul_expert_specific_' + task_names[i])( [expert_concat, gate_out]) cgc_outs.append(gate_mul_expert) # task_shared gate, if the level not in last, add one shared gate if not is_last: cur_expert_num = num_tasks * specific_expert_num + shared_expert_num cur_experts = specific_expert_outputs + shared_expert_outputs # all the expert include task-specific expert and task-shared expert expert_concat = Lambda(lambda x: tf.stack(x, axis=1))(cur_experts) # build gate layers gate_input = DNN(gate_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, name=level_name + 'gate_shared')(inputs[-1]) # gate for shared task input gate_out = Dense(cur_expert_num, use_bias=False, activation='softmax', name=level_name + 'gate_softmax_shared')(gate_input) gate_out = Lambda(lambda x: tf.expand_dims(x, axis=-1))(gate_out) # gate multiply the expert gate_mul_expert = Lambda(lambda x: reduce_sum(x[0] * x[1], axis=1, keep_dims=False), name=level_name + 'gate_mul_expert_shared')( [expert_concat, gate_out]) cgc_outs.append(gate_mul_expert) return cgc_outs # build Progressive Layered Extraction ple_inputs = [dnn_input] * (num_tasks + 1) # [task1, task2, ... taskn, shared task] ple_outputs = [] for i in range(num_levels): if i == num_levels - 1: # the last level ple_outputs = cgc_net(inputs=ple_inputs, level_name='level_' + str(i) + '_', is_last=True) else: ple_outputs = cgc_net(inputs=ple_inputs, level_name='level_' + str(i) + '_', is_last=False) ple_inputs = ple_outputs task_outs = [] for task_type, task_name, ple_out in zip(task_types, task_names, ple_outputs): # build tower layer tower_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, name='tower_' + task_name)(ple_out) logit = Dense(1, use_bias=False)(tower_output) output = PredictionLayer(task_type, name=task_name)(logit) task_outs.append(output) model = Model(inputs=inputs_list, outputs=task_outs) return model ================================================ FILE: deepctr/models/multitask/sharedbottom.py ================================================ """ Author: Mincai Lai, laimc@shanghaitech.edu.cn Weichen Shen, weichenswc@163.com Reference: [1] Ruder S. An overview of multi-task learning in deep neural networks[J]. arXiv preprint arXiv:1706.05098, 2017.(https://arxiv.org/pdf/1706.05098.pdf) """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense from ...feature_column import build_input_features, input_from_feature_columns from ...layers.core import PredictionLayer, DNN from ...layers.utils import combined_dnn_input def SharedBottom(dnn_feature_columns, bottom_dnn_hidden_units=(256, 128), tower_dnn_hidden_units=(64,), l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task_types=('binary', 'binary'), task_names=('ctr', 'ctcvr')): """Instantiates the SharedBottom multi-task learning Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param bottom_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of shared bottom DNN. :param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task-specific DNN. :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task_types: list of str, indicating the loss of each tasks, ``"binary"`` for binary logloss or ``"regression"`` for regression loss. e.g. ['binary', 'regression'] :param task_names: list of str, indicating the predict target of each tasks :return: A Keras model instance. """ num_tasks = len(task_names) if num_tasks <= 1: raise ValueError("num_tasks must be greater than 1") if len(task_types) != num_tasks: raise ValueError("num_tasks must be equal to the length of task_types") for task_type in task_types: if task_type not in ['binary', 'regression']: raise ValueError("task must be binary or regression, {} is illegal".format(task_type)) features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) shared_bottom_output = DNN(bottom_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)( dnn_input) tasks_output = [] for task_type, task_name in zip(task_types, task_names): tower_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, name='tower_' + task_name)(shared_bottom_output) logit = Dense(1, use_bias=False)(tower_output) output = PredictionLayer(task_type, name=task_name)(logit) tasks_output.append(output) model = Model(inputs=inputs_list, outputs=tasks_output) return model ================================================ FILE: deepctr/models/nfm.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027) """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Dropout from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.interaction import BiInteractionPooling from ..layers.utils import concat_func, add_func, combined_dnn_input def NFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, bi_dropout=0, dnn_dropout=0, dnn_activation='relu', task='binary'): """Instantiates the Neural Factorization Machine architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_linear: float. L2 regularizer strength applied to linear part. :param l2_reg_dnn: float . L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param biout_dropout: When not ``None``, the probability we will drop out the output of BiInteractionPooling Layer. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in deep net :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features( linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) fm_input = concat_func(sparse_embedding_list, axis=1) bi_out = BiInteractionPooling()(fm_input) if bi_dropout: bi_out = Dropout(bi_dropout)(bi_out, training=None) dnn_input = combined_dnn_input([bi_out], dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) dnn_logit = Dense(1, use_bias=False)(dnn_output) final_logit = add_func([linear_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/onn.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Yang Y, Xu B, Shen F, et al. Operation-aware Neural Networks for User Response Prediction[J]. arXiv preprint arXiv:1904.12579, 2019. (https://arxiv.org/pdf/1904.12579) """ import itertools from tensorflow.python.keras import backend as K from tensorflow.python.keras.layers import (Dense, Embedding, Lambda, multiply, Flatten) try: from tensorflow.python.keras.layers import BatchNormalization except ImportError: import tensorflow as tf BatchNormalization = tf.keras.layers.BatchNormalization from tensorflow.python.keras.models import Model from tensorflow.python.keras.regularizers import l2 from ..feature_column import SparseFeat, VarLenSparseFeat, build_input_features, get_linear_logit from ..inputs import get_dense_input from ..layers.core import DNN, PredictionLayer from ..layers.sequence import SequencePoolingLayer from ..layers.utils import concat_func, Hash, NoMask, add_func, combined_dnn_input def ONN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, dnn_dropout=0, seed=1024, use_bn=True, reduce_sum=False, task='binary', ): """Instantiates the Operation-aware Neural Networks architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_linear: float. L2 regularizer strength applied to linear part. :param l2_reg_dnn: float . L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param use_bn: bool,whether use bn after ffm out or not :param reduce_sum: bool,whether apply reduce_sum on cross vector :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] sparse_embedding = {fc_j.embedding_name: {fc_i.embedding_name: Embedding(fc_j.vocabulary_size, fc_j.embedding_dim, embeddings_initializer=fc_j.embeddings_initializer, embeddings_regularizer=l2( l2_reg_embedding), mask_zero=isinstance(fc_j, VarLenSparseFeat), name='sparse_emb_' + str( fc_j.embedding_name) + '_' + fc_i.embedding_name) for fc_i in sparse_feature_columns + varlen_sparse_feature_columns} for fc_j in sparse_feature_columns + varlen_sparse_feature_columns} dense_value_list = get_dense_input(features, dnn_feature_columns) embed_list = [] for fc_i, fc_j in itertools.combinations(sparse_feature_columns + varlen_sparse_feature_columns, 2): i_input = features[fc_i.name] if fc_i.use_hash: i_input = Hash(fc_i.vocabulary_size)(i_input) j_input = features[fc_j.name] if fc_j.use_hash: j_input = Hash(fc_j.vocabulary_size)(j_input) fc_i_embedding = feature_embedding(fc_i, fc_j, sparse_embedding, i_input) fc_j_embedding = feature_embedding(fc_j, fc_i, sparse_embedding, j_input) element_wise_prod = multiply([fc_i_embedding, fc_j_embedding]) if reduce_sum: element_wise_prod = Lambda(lambda element_wise_prod: K.sum( element_wise_prod, axis=-1))(element_wise_prod) embed_list.append(element_wise_prod) ffm_out = Flatten()(concat_func(embed_list, axis=1)) if use_bn: ffm_out = BatchNormalization()(ffm_out) dnn_input = combined_dnn_input([ffm_out], dense_value_list) dnn_out = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout)(dnn_input) dnn_logit = Dense(1, use_bias=False)(dnn_out) final_logit = add_func([dnn_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model def feature_embedding(fc_i, fc_j, embedding_dict, input_feature): fc_i_embedding = embedding_dict[fc_i.name][fc_j.name](input_feature) if isinstance(fc_i, SparseFeat): return NoMask()(fc_i_embedding) else: return SequencePoolingLayer(fc_i.combiner, supports_masking=True)(fc_i_embedding) ================================================ FILE: deepctr/models/pnn.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf) """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense, Reshape, Flatten from ..feature_column import build_input_features, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.interaction import InnerProductLayer, OutterProductLayer from ..layers.utils import concat_func, combined_dnn_input def PNN(dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', use_inner=True, use_outter=False, kernel_type='mat', task='binary'): """Instantiates the Product-based Neural Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param use_inner: bool,whether use inner-product or not. :param use_outter: bool,whether use outter-product or not. :param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'`` :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if kernel_type not in ['mat', 'vec', 'num']: raise ValueError("kernel_type must be mat,vec or num") features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) inner_product = Flatten()( InnerProductLayer()(sparse_embedding_list)) outter_product = OutterProductLayer(kernel_type)(sparse_embedding_list) # ipnn deep input linear_signal = Reshape( [sum(map(lambda x: int(x.shape[-1]), sparse_embedding_list))])(concat_func(sparse_embedding_list)) if use_inner and use_outter: deep_input = concat_func([linear_signal, inner_product, outter_product]) elif use_inner: deep_input = concat_func([linear_signal, inner_product]) elif use_outter: deep_input = concat_func([linear_signal, outter_product]) else: deep_input = linear_signal dnn_input = combined_dnn_input([deep_input], dense_value_list) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) dnn_logit = Dense(1, use_bias=False)(dnn_out) output = PredictionLayer(task)(dnn_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/sequence/__init__.py ================================================ from .bst import BST from .dien import DIEN from .din import DIN from .dsin import DSIN ================================================ FILE: deepctr/models/sequence/bst.py ================================================ # -*- coding:utf-8 -*- """ Author: Zichao Li, 2843656167@qq.com Reference: Qiwei Chen, Huan Zhao, Wei Li, Pipei Huang, and Wenwu Ou. 2019. Behavior sequence transformer for e-commerce recommendation in Alibaba. In Proceedings of the 1st International Workshop on Deep Learning Practice for High-Dimensional Sparse Data (DLP-KDD '19). Association for Computing Machinery, New York, NY, USA, Article 12, 1–4. DOI:https://doi.org/10.1145/3326937.3341261 """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import (Dense, Flatten) from ...feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features from ...inputs import get_varlen_pooling_list, create_embedding_matrix, embedding_lookup, varlen_embedding_lookup, \ get_dense_input from ...layers.core import DNN, PredictionLayer from ...layers.sequence import Transformer, AttentionSequencePoolingLayer from ...layers.utils import concat_func, combined_dnn_input def BST(dnn_feature_columns, history_feature_list, transformer_num=1, att_head_num=8, use_bn=False, dnn_hidden_units=(256, 128, 64), dnn_activation='relu', l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0.0, seed=1024, task='binary'): """Instantiates the BST architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param history_feature_list: list, to indicate sequence sparse field. :param transformer_num: int, the number of transformer layer. :param att_head_num: int, the number of heads in multi-head self attention. :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) user_behavior_length = features["seq_length"] sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, seed, prefix="", seq_mask_zero=True) query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, return_feat_list=history_feature_list, to_list=True) hist_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, return_feat_list=history_fc_names, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup(embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list(sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list query_emb = concat_func(query_emb_list) deep_input_emb = concat_func(dnn_input_emb_list) hist_emb = concat_func(hist_emb_list) transformer_output = hist_emb for _ in range(transformer_num): att_embedding_size = transformer_output.get_shape().as_list()[-1] // att_head_num transformer_layer = Transformer(att_embedding_size=att_embedding_size, head_num=att_head_num, dropout_rate=dnn_dropout, use_positional_encoding=True, use_res=True, use_feed_forward=True, use_layer_norm=True, blinding=False, seed=seed, supports_masking=False, output_type=None) transformer_output = transformer_layer([transformer_output, transformer_output, user_behavior_length, user_behavior_length]) attn_output = AttentionSequencePoolingLayer(att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)([query_emb, transformer_output, user_behavior_length]) deep_input_emb = concat_func([deep_input_emb, attn_output], axis=-1) deep_input_emb = Flatten()(deep_input_emb) dnn_input = combined_dnn_input([deep_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed=seed)(dnn_input) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/sequence/dien.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Zhou G, Mou N, Fan Y, et al. Deep Interest Evolution Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1809.03672, 2018. (https://arxiv.org/pdf/1809.03672.pdf) """ import tensorflow as tf from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import (Concatenate, Dense, Permute, multiply, Flatten) from ...feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features from ...inputs import get_varlen_pooling_list, create_embedding_matrix, embedding_lookup, varlen_embedding_lookup, \ get_dense_input from ...layers.core import DNN, PredictionLayer from ...layers.sequence import AttentionSequencePoolingLayer, DynamicGRU from ...layers.utils import concat_func, reduce_mean, combined_dnn_input def auxiliary_loss(h_states, click_seq, noclick_seq, mask, stag=None): #:param h_states: #:param click_seq: #:param noclick_seq: #[B,T-1,E] #:param mask:#[B,1] #:param stag: #:return: hist_len, _ = click_seq.get_shape().as_list()[1:] mask = tf.sequence_mask(mask, hist_len) mask = mask[:, 0, :] mask = tf.cast(mask, tf.float32) click_input_ = tf.concat([h_states, click_seq], -1) noclick_input_ = tf.concat([h_states, noclick_seq], -1) auxiliary_nn = DNN([100, 50, 1], activation='sigmoid') click_prop_ = auxiliary_nn(click_input_, stag=stag)[:, :, 0] noclick_prop_ = auxiliary_nn(noclick_input_, stag=stag)[ :, :, 0] # [B,T-1] try: click_loss_ = - tf.reshape(tf.log(click_prop_), [-1, tf.shape(click_seq)[1]]) * mask except AttributeError: click_loss_ = - tf.reshape(tf.compat.v1.log(click_prop_), [-1, tf.shape(click_seq)[1]]) * mask try: noclick_loss_ = - \ tf.reshape(tf.log(1.0 - noclick_prop_), [-1, tf.shape(noclick_seq)[1]]) * mask except AttributeError: noclick_loss_ = - \ tf.reshape(tf.compat.v1.log(1.0 - noclick_prop_), [-1, tf.shape(noclick_seq)[1]]) * mask loss_ = reduce_mean(click_loss_ + noclick_loss_) return loss_ def interest_evolution(concat_behavior, deep_input_item, user_behavior_length, gru_type="GRU", use_neg=False, neg_concat_behavior=None, att_hidden_size=(64, 16), att_activation='sigmoid', att_weight_normalization=False, ): if gru_type not in ["GRU", "AIGRU", "AGRU", "AUGRU"]: raise ValueError("gru_type error ") aux_loss_1 = None embedding_size = None rnn_outputs = DynamicGRU(embedding_size, return_sequence=True, name="gru1")([concat_behavior, user_behavior_length]) if gru_type == "AUGRU" and use_neg: aux_loss_1 = auxiliary_loss(rnn_outputs[:, :-1, :], concat_behavior[:, 1:, :], neg_concat_behavior[:, 1:, :], tf.subtract(user_behavior_length, 1), stag="gru") # [:, 1:] if gru_type == "GRU": rnn_outputs2 = DynamicGRU(embedding_size, return_sequence=True, name="gru2")([rnn_outputs, user_behavior_length]) # attention_score = AttentionSequencePoolingLayer(hidden_size=att_hidden_size, activation=att_activation, weight_normalization=att_weight_normalization, return_score=True)([ # deep_input_item, rnn_outputs2, user_behavior_length]) # outputs = Lambda(lambda x: tf.matmul(x[0], x[1]))( # [attention_score, rnn_outputs2]) # hist = outputs hist = AttentionSequencePoolingLayer(att_hidden_units=att_hidden_size, att_activation=att_activation, weight_normalization=att_weight_normalization, return_score=False)([ deep_input_item, rnn_outputs2, user_behavior_length]) else: # AIGRU AGRU AUGRU scores = AttentionSequencePoolingLayer(att_hidden_units=att_hidden_size, att_activation=att_activation, weight_normalization=att_weight_normalization, return_score=True)([ deep_input_item, rnn_outputs, user_behavior_length]) if gru_type == "AIGRU": hist = multiply([rnn_outputs, Permute([2, 1])(scores)]) final_state2 = DynamicGRU(embedding_size, gru_type="GRU", return_sequence=False, name='gru2')( [hist, user_behavior_length]) else: # AGRU AUGRU final_state2 = DynamicGRU(embedding_size, gru_type=gru_type, return_sequence=False, name='gru2')([rnn_outputs, user_behavior_length, Permute([2, 1])(scores)]) hist = final_state2 return hist, aux_loss_1 def DIEN(dnn_feature_columns, history_feature_list, gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, dnn_hidden_units=(256, 128, 64), dnn_activation='relu', att_hidden_units=(64, 16), att_activation="dice", att_weight_normalization=True, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param history_feature_list: list,to indicate sequence sparse field :param gru_type: str,can be GRU AIGRU AUGRU AGRU :param use_negsampling: bool, whether or not use negtive sampling :param alpha: float ,weight of auxiliary_loss :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param att_hidden_units: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(dnn_feature_columns) user_behavior_length = features["seq_length"] sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] history_feature_columns = [] neg_history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) neg_history_fc_names = list(map(lambda x: "neg_" + x, history_fc_names)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) elif feature_name in neg_history_fc_names: neg_history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) inputs_list = list(features.values()) embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, seed, prefix="", seq_mask_zero=False) query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, return_feat_list=history_feature_list, to_list=True) keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, return_feat_list=history_fc_names, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup(embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list(sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list keys_emb = concat_func(keys_emb_list) deep_input_emb = concat_func(dnn_input_emb_list) query_emb = concat_func(query_emb_list) if use_negsampling: neg_uiseq_embed_list = embedding_lookup(embedding_dict, features, neg_history_feature_columns, neg_history_fc_names, to_list=True) neg_concat_behavior = concat_func(neg_uiseq_embed_list) else: neg_concat_behavior = None hist, aux_loss_1 = interest_evolution(keys_emb, query_emb, user_behavior_length, gru_type=gru_type, use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior, att_hidden_size=att_hidden_units, att_activation=att_activation, att_weight_normalization=att_weight_normalization, ) deep_input_emb = Concatenate()([deep_input_emb, hist]) deep_input_emb = Flatten()(deep_input_emb) dnn_input = combined_dnn_input([deep_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed=seed)(dnn_input) final_logit = Dense(1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(output) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) if use_negsampling: model.add_loss(alpha * aux_loss_1) try: tf.keras.backend.get_session().run(tf.global_variables_initializer()) except AttributeError: tf.compat.v1.keras.backend.get_session().run(tf.compat.v1.global_variables_initializer()) tf.compat.v1.experimental.output_all_intermediates(True) return model ================================================ FILE: deepctr/models/sequence/din.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068. (https://arxiv.org/pdf/1706.06978.pdf) """ from tensorflow.python.keras.layers import Dense, Flatten from tensorflow.python.keras.models import Model from ...feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features from ...inputs import create_embedding_matrix, embedding_lookup, get_dense_input, varlen_embedding_lookup, \ get_varlen_pooling_list from ...layers.core import DNN, PredictionLayer from ...layers.sequence import AttentionSequencePoolingLayer from ...layers.utils import concat_func, combined_dnn_input def DIN(dnn_feature_columns, history_feature_list, dnn_use_bn=False, dnn_hidden_units=(256, 128, 64), dnn_activation='relu', att_hidden_size=(80, 40), att_activation="dice", att_weight_normalization=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, seed=1024, task='binary'): """Instantiates the Deep Interest Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param history_feature_list: list,to indicate sequence sparse field :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(dnn_feature_columns) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) inputs_list = list(features.values()) embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, seed, prefix="") query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, history_feature_list, history_feature_list, to_list=True) keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names, history_fc_names, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup(embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list(sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list keys_emb = concat_func(keys_emb_list, mask=True) deep_input_emb = concat_func(dnn_input_emb_list) query_emb = concat_func(query_emb_list, mask=True) hist = AttentionSequencePoolingLayer(att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([ query_emb, keys_emb]) deep_input_emb = concat_func([deep_input_emb, hist]) deep_input_emb = Flatten()(deep_input_emb) dnn_input = combined_dnn_input([deep_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/sequence/dsin.py ================================================ # coding: utf-8 """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Feng Y, Lv F, Shen W, et al. Deep Session Interest Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.06482, 2019.(https://arxiv.org/abs/1905.06482) """ from collections import OrderedDict from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import (Concatenate, Dense, Embedding, Flatten, Input) from tensorflow.python.keras.regularizers import l2 from ...feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features from ...inputs import (get_embedding_vec_list, get_inputs_list, embedding_lookup, get_dense_input) from ...layers.core import DNN, PredictionLayer from ...layers.sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM, Transformer) from ...layers.utils import concat_func, combined_dnn_input def DSIN(dnn_feature_columns, sess_feature_list, sess_max_count=5, bias_encoding=False, att_embedding_size=1, att_head_num=8, dnn_hidden_units=(256, 128, 64), dnn_activation='relu', dnn_dropout=0, dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, seed=1024, task='binary', ): """Instantiates the Deep Session Interest Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param sess_feature_list: list,to indicate sequence sparse field :param sess_max_count: positive int, to indicate the max number of sessions :param sess_len_max: positive int, to indicate the max length of each session :param bias_encoding: bool. Whether use bias encoding or postional encoding :param att_embedding_size: positive int, the embedding size of each attention head :param att_head_num: positive int, the number of attention head :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ hist_emb_size = sum( map(lambda fc: fc.embedding_dim, filter(lambda fc: fc.name in sess_feature_list, dnn_feature_columns))) if (att_embedding_size * att_head_num != hist_emb_size): raise ValueError( "hist_emb_size must equal to att_embedding_size * att_head_num ,got %d != %d *%d" % ( hist_emb_size, att_embedding_size, att_head_num)) features = build_input_features(dnn_feature_columns) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "sess" + x, sess_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: continue else: sparse_varlen_feature_columns.append(fc) inputs_list = list(features.values()) user_behavior_input_dict = {} for idx in range(sess_max_count): sess_input = OrderedDict() for i, feat in enumerate(sess_feature_list): sess_input[feat] = features["sess_" + str(idx) + "_" + feat] user_behavior_input_dict["sess_" + str(idx)] = sess_input user_sess_length = Input(shape=(1,), name='sess_length') embedding_dict = {feat.embedding_name: Embedding(feat.vocabulary_size, feat.embedding_dim, embeddings_initializer=feat.embeddings_initializer, embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in sess_feature_list)) for i, feat in enumerate(sparse_feature_columns)} query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, sess_feature_list, sess_feature_list, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=sess_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) query_emb = concat_func(query_emb_list, mask=True) dnn_input_emb = Flatten()(concat_func(dnn_input_emb_list)) tr_input = sess_interest_division(embedding_dict, user_behavior_input_dict, sparse_feature_columns, sess_feature_list, sess_max_count, bias_encoding=bias_encoding) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=(not bias_encoding), seed=seed, supports_masking=True, blinding=True) sess_fea = sess_interest_extractor( tr_input, sess_max_count, Self_Attention) interest_attention_layer = AttentionSequencePoolingLayer(att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)( [query_emb, sess_fea, user_sess_length]) lstm_outputs = BiLSTM(hist_emb_size, layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea) lstm_attention_layer = AttentionSequencePoolingLayer(att_hidden_units=(64, 16), weight_normalization=True)( [query_emb, lstm_outputs, user_sess_length]) dnn_input_emb = Concatenate()( [dnn_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer)]) dnn_input_emb = combined_dnn_input([dnn_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input_emb) output = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(output) sess_input_list = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) sess_input_list.extend(get_inputs_list( [user_behavior_input_dict[sess_name]])) model = Model(inputs=inputs_list + [user_sess_length], outputs=output) return model def sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, sparse_fg_list, sess_feture_list, sess_max_count, bias_encoding=True): tr_input = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input_dict[sess_name], sparse_fg_list, sess_feture_list, sess_feture_list) keys_emb = concat_func(keys_emb_list, mask=True) tr_input.append(keys_emb) if bias_encoding: tr_input = BiasEncoding(sess_max_count)(tr_input) return tr_input def sess_interest_extractor(tr_input, sess_max_count, TR): tr_out = [] for i in range(sess_max_count): tr_out.append(TR( [tr_input[i], tr_input[i]])) sess_fea = concat_func(tr_out, axis=1) return sess_fea ================================================ FILE: deepctr/models/wdl.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf) """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.utils import add_func, combined_dnn_input def WDL(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', task='binary'): """Instantiates the Wide&Deep Learning architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to wide part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features( linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) dnn_logit = Dense(1, use_bias=False)(dnn_out) final_logit = add_func([dnn_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/models/xdeepfm.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen, weichenswc@163.com Reference: [1] Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.(https://arxiv.org/pdf/1803.05170.pdf) """ from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Dense from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns from ..layers.core import PredictionLayer, DNN from ..layers.interaction import CIN from ..layers.utils import concat_func, add_func, combined_dnn_input def xDeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), cin_layer_size=(128, 128,), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, l2_reg_cin=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the xDeepFM architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param cin_layer_size: list,list of positive integer or empty list, the feature maps in each hidden layer of Compressed Interaction Network :param cin_split_half: bool.if set to True, half of the feature maps in each hidden will connect to output unit :param cin_activation: activation function used on feature maps :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: L2 regularizer strength applied to embedding vector :param l2_reg_dnn: L2 regularizer strength applied to deep net :param l2_reg_cin: L2 regularizer strength applied to CIN. :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features( linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed) fm_input = concat_func(sparse_embedding_list, axis=1) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) dnn_logit = Dense(1, use_bias=False)(dnn_output) final_logit = add_func([linear_logit, dnn_logit]) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, cin_activation, cin_split_half, l2_reg_cin, seed)(fm_input) exFM_logit = Dense(1, use_bias=False)(exFM_out) final_logit = add_func([final_logit, exFM_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model ================================================ FILE: deepctr/utils.py ================================================ # -*- coding:utf-8 -*- """ Author: Weichen Shen,weichenswc@163.com """ import json import logging from threading import Thread import requests try: from packaging.version import parse except ImportError: from pip._vendor.packaging.version import parse def check_version(version): """Return version of package on pypi.python.org using json.""" def check(version): try: url_pattern = 'https://pypi.python.org/pypi/deepctr/json' req = requests.get(url_pattern) latest_version = parse('0') version = parse(version) if req.status_code == requests.codes.ok: j = json.loads(req.text.encode('utf-8')) releases = j.get('releases', []) for release in releases: ver = parse(release) if ver.is_prerelease or ver.is_postrelease: continue latest_version = max(latest_version, ver) if latest_version > version: logging.warning( '\nDeepCTR version {0} detected. Your version is {1}.\nUse `pip install -U deepctr` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v{0}'.format( latest_version, version)) except: print("Please check the latest version manually on https://pypi.org/project/deepctr/#history") return Thread(target=check, args=(version,)).start() ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = DeepCTR SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=source set BUILDDIR=build set SPHINXPROJ=DeepCTR if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd ================================================ FILE: docs/requirements.readthedocs.txt ================================================ tensorflow==2.6.2 recommonmark==0.7.1 ================================================ FILE: docs/source/Estimators.rst ================================================ DeepCTR Estimators API ====================== .. toctree:: CCPM FNN PNN WDL DeepFM NFM AFM DCN xDeepFM AutoInt FiBiNET ================================================ FILE: docs/source/Examples.md ================================================ # Examples ## Classification: Criteo The Criteo Display Ads dataset is for the purpose of predicting ads click-through rate. It has 13 integer features and 26 categorical features where each category has a high cardinality. ![image](../pics/criteo_sample.png) In this example,we simply normailize the dense feature between 0 and 1,you can try other transformation technique like log normalization or discretization.Then we use [SparseFeat](./Features.html#sparsefeat) and [DenseFeat](./Features.html#densefeat) to generate feature columns for sparse features and dense features. This example shows how to use ``DeepFM`` to solve a simple binary classification task. You can get the demo data [criteo_sample.txt](https://github.com/shenweichen/DeepCTR/tree/master/examples/criteo_sample.txt) and run the following codes. ```python import pandas as pd from sklearn.metrics import log_loss, roc_auc_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, MinMaxScaler from deepctr.models import * from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names if __name__ == "__main__": data = pd.read_csv('./criteo_sample.txt') sparse_features = ['C' + str(i) for i in range(1, 27)] dense_features = ['I' + str(i) for i in range(1, 14)] data[sparse_features] = data[sparse_features].fillna('-1', ) data[dense_features] = data[dense_features].fillna(0, ) target = ['label'] # 1.Label Encoding for sparse features,and do simple Transformation for dense features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) mms = MinMaxScaler(feature_range=(0, 1)) data[dense_features] = mms.fit_transform(data[dense_features]) # 2.count #unique features for each sparse field,and record dense feature field name fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4) for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, ) for feat in dense_features] dnn_feature_columns = fixlen_feature_columns linear_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2020) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary') model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) ``` ## Classification: Criteo with feature hashing on the fly This example shows how to use ``DeepFM`` to solve a simple binary classification task using feature hashing. You can get the demo data [criteo_sample.txt](https://github.com/shenweichen/DeepCTR/tree/master/examples/criteo_sample.txt) and run the following codes. ```python import pandas as pd from sklearn.metrics import log_loss, roc_auc_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names if __name__ == "__main__": data = pd.read_csv('./criteo_sample.txt') sparse_features = ['C' + str(i) for i in range(1, 27)] dense_features = ['I' + str(i) for i in range(1, 14)] data[sparse_features] = data[sparse_features].fillna('-1', ) data[dense_features] = data[dense_features].fillna(0, ) target = ['label'] # 1.do simple Transformation for dense features mms = MinMaxScaler(feature_range=(0, 1)) data[dense_features] = mms.fit_transform(data[dense_features]) # 2.set hashing space for each sparse field,and record dense feature field name fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=1000, embedding_dim=4, use_hash=True, dtype='string') # since the input is string for feat in sparse_features] + [DenseFeat(feat, 1, ) for feat in dense_features] linear_feature_columns = fixlen_feature_columns dnn_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns, ) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2020) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary') model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) ``` ## Regression: Movielens The MovieLens data has been used for personalized tag recommendation,which contains 668, 953 tag applications of users on movies. Here is a small fraction of data include only sparse field. ![image](../pics/movielens_sample.png) This example shows how to use ``DeepFM`` to solve a simple binary regression task. You can get the demo data [movielens_sample.txt](https://github.com/shenweichen/DeepCTR/tree/master/examples/movielens_sample.txt) and run the following codes. ```python import pandas as pd from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat, get_feature_names if __name__ == "__main__": data = pd.read_csv("./movielens_sample.txt") sparse_features = ["movie_id", "user_id", "gender", "age", "occupation", "zip"] target = ['rating'] # 1.Label Encoding for sparse features,and do simple Transformation for dense features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) # 2.count #unique features for each sparse field fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4) for feat in sparse_features] linear_feature_columns = fixlen_feature_columns dnn_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2020) train_model_input = {name: train[name].values for name in feature_names} test_model_input = {name: test[name].values for name in feature_names} # 4.Define Model,train,predict and evaluate model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') model.compile("adam", "mse", metrics=['mse'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) print("test MSE", round(mean_squared_error( test[target].values, pred_ans), 4)) ``` ## Multi-value Input : Movielens The MovieLens data has been used for personalized tag recommendation,which contains 668, 953 tag applications of users on movies. Here is a small fraction of data include sparse fields and a multivalent field. ![image](../pics/movielens_sample_with_genres.png) There are 2 additional steps to use DeepCTR with sequence feature input. 1. Generate the paded and encoded sequence feature of sequence input feature(**value 0 is for padding**). 2. Generate config of sequence feature with [VarLenSparseFeat](./Features.html#varlensparsefeat) This example shows how to use ``DeepFM`` with sequence(multi-value) feature. You can get the demo data [movielens_sample.txt](https://github.com/shenweichen/DeepCTR/tree/master/examples/movielens_sample.txt) and run the following codes. ```python import numpy as np import pandas as pd from sklearn.preprocessing import LabelEncoder from tensorflow.python.keras.preprocessing.sequence import pad_sequences from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names def split(x): key_ans = x.split('|') for key in key_ans: if key not in key2index: # Notice : input value 0 is a special "padding",so we do not use 0 to encode valid feature for sequence input key2index[key] = len(key2index) + 1 return list(map(lambda x: key2index[x], key_ans)) if __name__ == "__main__": data = pd.read_csv("./movielens_sample.txt") sparse_features = ["movie_id", "user_id", "gender", "age", "occupation", "zip", ] target = ['rating'] # 1.Label Encoding for sparse features,and process sequence features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) # preprocess the sequence feature key2index = {} genres_list = list(map(split, data['genres'].values)) genres_length = np.array(list(map(len, genres_list))) max_len = max(genres_length) # Notice : padding=`post` genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', ) # 2.count #unique features for each sparse field and generate feature config for sequence feature fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4) for feat in sparse_features] use_weighted_sequence = False if use_weighted_sequence: varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len( key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean', weight_name='genres_weight')] # Notice : value 0 is for padding for sequence input feature else: varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len( key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean', weight_name=None)] # Notice : value 0 is for padding for sequence input feature linear_feature_columns = fixlen_feature_columns + varlen_feature_columns dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model model_input = {name: data[name] for name in feature_names} # model_input["genres"] = genres_list model_input["genres_weight"] = np.random.randn(data.shape[0], max_len, 1) # 4.Define Model,compile and train model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') model.compile("adam", "mse", metrics=['mse'], ) history = model.fit(model_input, data[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) ``` ## Multi-value Input : Movielens with feature hashing on the fly ```python import numpy as np import pandas as pd from tensorflow.python.keras.preprocessing.sequence import pad_sequences from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names from deepctr.models import DeepFM if __name__ == "__main__": data = pd.read_csv("./movielens_sample.txt") sparse_features = ["movie_id", "user_id", "gender", "age", "occupation", "zip", ] data[sparse_features] = data[sparse_features].astype(str) target = ['rating'] # 1.Use hashing encoding on the fly for sparse features,and process sequence features genres_list = list(map(lambda x: x.split('|'), data['genres'].values)) genres_length = np.array(list(map(len, genres_list))) max_len = max(genres_length) # Notice : padding=`post` genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str) # 2.set hashing space for each sparse field and generate feature config for sequence feature fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True, dtype='string') for feat in sparse_features] varlen_feature_columns = [ VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4, use_hash=True, dtype="string"), maxlen=max_len, combiner='mean', )] # Notice : value 0 is for padding for sequence input feature linear_feature_columns = fixlen_feature_columns + varlen_feature_columns dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model model_input = {name: data[name] for name in feature_names} model_input['genres'] = genres_list # 4.Define Model,compile and train model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') model.compile("adam", "mse", metrics=['mse'], ) history = model.fit(model_input, data[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) ``` ## Hash Layer with pre-defined key-value vocabulary This examples how to use pre-defined key-value vocabulary in `Hash` Layer.`movielens_age_vocabulary.csv` stores the key-value mapping for `age` feature. ```python from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names import numpy as np import pandas as pd from tensorflow.python.keras.preprocessing.sequence import pad_sequences try: import tensorflow.compat.v1 as tf except ImportError as e: import tensorflow as tf if __name__ == "__main__": data = pd.read_csv("./movielens_sample.txt") sparse_features = ["movie_id", "user_id", "gender", "age", "occupation", "zip", ] data[sparse_features] = data[sparse_features].astype(str) target = ['rating'] # 1.Use hashing encoding on the fly for sparse features,and process sequence features genres_list = list(map(lambda x: x.split('|'), data['genres'].values)) genres_length = np.array(list(map(len, genres_list))) max_len = max(genres_length) # Notice : padding=`post` genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str) # 2.set hashing space for each sparse field and generate feature config for sequence feature fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True, vocabulary_path='./movielens_age_vocabulary.csv' if feat == 'age' else None, dtype='string') for feat in sparse_features] varlen_feature_columns = [ VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4, use_hash=True, dtype="string"), maxlen=max_len, combiner='mean', )] # Notice : value 0 is for padding for sequence input feature linear_feature_columns = fixlen_feature_columns + varlen_feature_columns dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model model_input = {name: data[name] for name in feature_names} model_input['genres'] = genres_list # 4.Define Model,compile and train model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') model.compile("adam", "mse", metrics=['mse'], ) if not hasattr(tf, 'version') or tf.version.VERSION < '2.0.0': with tf.Session() as sess: sess.run(tf.tables_initializer()) history = model.fit(model_input, data[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) else: history = model.fit(model_input, data[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) ``` ## Estimator with TFRecord: Classification Criteo This example shows how to use ``DeepFMEstimator`` to solve a simple binary classification task. You can get the demo data [criteo_sample.tr.tfrecords](https://github.com/shenweichen/DeepCTR/tree/master/examples/criteo_sample.tr.tfrecords) and [criteo_sample.te.tfrecords](https://github.com/shenweichen/DeepCTR/tree/master/examples/criteo_sample.te.tfrecords) and run the following codes. ```python import tensorflow as tf from tensorflow.python.ops.parsing_ops import FixedLenFeature from deepctr.estimator import DeepFMEstimator from deepctr.estimator.inputs import input_fn_tfrecord if __name__ == "__main__": # 1.generate feature_column for linear part and dnn part sparse_features = ['C' + str(i) for i in range(1, 27)] dense_features = ['I' + str(i) for i in range(1, 14)] dnn_feature_columns = [] linear_feature_columns = [] for i, feat in enumerate(sparse_features): dnn_feature_columns.append(tf.feature_column.embedding_column( tf.feature_column.categorical_column_with_identity(feat, 1000), 4)) linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, 1000)) for feat in dense_features: dnn_feature_columns.append(tf.feature_column.numeric_column(feat)) linear_feature_columns.append(tf.feature_column.numeric_column(feat)) # 2.generate input data for model feature_description = {k: FixedLenFeature(dtype=tf.int64, shape=1) for k in sparse_features} feature_description.update( {k: FixedLenFeature(dtype=tf.float32, shape=1) for k in dense_features}) feature_description['label'] = FixedLenFeature(dtype=tf.float32, shape=1) train_model_input = input_fn_tfrecord('./criteo_sample.tr.tfrecords', feature_description, 'label', batch_size=256, num_epochs=1, shuffle_factor=10) test_model_input = input_fn_tfrecord('./criteo_sample.te.tfrecords', feature_description, 'label', batch_size=2 ** 14, num_epochs=1, shuffle_factor=0) # 3.Define Model,train,predict and evaluate model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary', config=tf.estimator.RunConfig(tf_random_seed=2021)) model.train(train_model_input) eval_result = model.evaluate(test_model_input) print(eval_result) ``` ## Estimator with Pandas DataFrame: Classification Criteo This example shows how to use ``DeepFMEstimator`` to solve a simple binary classification task. You can get the demo data [criteo_sample.txt](https://github.com/shenweichen/DeepCTR/tree/master/examples/criteo_sample.txt) and run the following codes. ```python import pandas as pd import tensorflow as tf from sklearn.metrics import log_loss, roc_auc_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, MinMaxScaler from deepctr.estimator import DeepFMEstimator from deepctr.estimator.inputs import input_fn_pandas if __name__ == "__main__": data = pd.read_csv('./criteo_sample.txt') sparse_features = ['C' + str(i) for i in range(1, 27)] dense_features = ['I' + str(i) for i in range(1, 14)] data[sparse_features] = data[sparse_features].fillna('-1', ) data[dense_features] = data[dense_features].fillna(0, ) target = ['label'] # 1.Label Encoding for sparse features,and do simple Transformation for dense features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) mms = MinMaxScaler(feature_range=(0, 1)) data[dense_features] = mms.fit_transform(data[dense_features]) # 2.count #unique features for each sparse field,and record dense feature field name dnn_feature_columns = [] linear_feature_columns = [] for i, feat in enumerate(sparse_features): dnn_feature_columns.append(tf.feature_column.embedding_column( tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1), 4)) linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1)) for feat in dense_features: dnn_feature_columns.append(tf.feature_column.numeric_column(feat)) linear_feature_columns.append(tf.feature_column.numeric_column(feat)) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2021) # Not setting default value for continuous feature. filled with mean. train_model_input = input_fn_pandas(train, sparse_features + dense_features, 'label', shuffle=True) test_model_input = input_fn_pandas(test, sparse_features + dense_features, None, shuffle=False) # 4.Define Model,train,predict and evaluate model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary', config=tf.estimator.RunConfig(tf_random_seed=2021)) model.train(train_model_input) pred_ans_iter = model.predict(test_model_input) pred_ans = list(map(lambda x: x['pred'], pred_ans_iter)) # print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) ``` ## MultiTask Learning:MMOE The UCI census-income dataset is extracted from the 1994 census database. It contains 299,285 instances of demographic information of American adults. There are 40 features in total. We construct a multi-task learning problem from this dataset by setting some of the features as prediction targets : - Task 1: Predict whether the income exceeds $50K; - Task 2: Predict whether this person’s marital status is never married. This example shows how to use ``MMOE`` to solve a multi task learning problem. You can get the demo data [census-income.sample](https://github.com/shenweichen/DeepCTR/tree/master/examples/census-income.sample) and run the following codes. ```python import pandas as pd from sklearn.metrics import roc_auc_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, MinMaxScaler from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names from deepctr.models import MMOE if __name__ == "__main__": column_names = ['age', 'class_worker', 'det_ind_code', 'det_occ_code', 'education', 'wage_per_hour', 'hs_college', 'marital_stat', 'major_ind_code', 'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member', 'unemp_reason', 'full_or_part_emp', 'capital_gains', 'capital_losses', 'stock_dividends', 'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat', 'det_hh_summ', 'instance_weight', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt', 'num_emp', 'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship', 'own_or_self', 'vet_question', 'vet_benefits', 'weeks_worked', 'year', 'income_50k'] data = pd.read_csv('./census-income.sample', header=None, names=column_names) data['label_income'] = data['income_50k'].map({' - 50000.': 0, ' 50000+.': 1}) data['label_marital'] = data['marital_stat'].apply(lambda x: 1 if x == ' Never married' else 0) data.drop(labels=['income_50k', 'marital_stat'], axis=1, inplace=True) columns = data.columns.values.tolist() sparse_features = ['class_worker', 'det_ind_code', 'det_occ_code', 'education', 'hs_college', 'major_ind_code', 'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member', 'unemp_reason', 'full_or_part_emp', 'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat', 'det_hh_summ', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt', 'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship', 'vet_question'] dense_features = [col for col in columns if col not in sparse_features and col not in ['label_income', 'label_marital']] data[sparse_features] = data[sparse_features].fillna('-1', ) data[dense_features] = data[dense_features].fillna(0, ) mms = MinMaxScaler(feature_range=(0, 1)) data[dense_features] = mms.fit_transform(data[dense_features]) for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4) for feat in sparse_features] + [DenseFeat(feat, 1, ) for feat in dense_features] dnn_feature_columns = fixlen_feature_columns linear_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2020) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate model = MMOE(dnn_feature_columns, tower_dnn_hidden_units=[], task_types=['binary', 'binary'], task_names=['label_income', 'label_marital']) model.compile("adam", loss=["binary_crossentropy", "binary_crossentropy"], metrics=['binary_crossentropy'], ) history = model.fit(train_model_input, [train['label_income'].values, train['label_marital'].values], batch_size=256, epochs=10, verbose=2, validation_split=0.2) pred_ans = model.predict(test_model_input, batch_size=256) print("test income AUC", round(roc_auc_score(test['label_income'], pred_ans[0]), 4)) print("test marital AUC", round(roc_auc_score(test['label_marital'], pred_ans[1]), 4)) ``` ================================================ FILE: docs/source/FAQ.md ================================================ # FAQ ## 1. Save or load weights/models ---------------------------------------- To save/load weights,you can write codes just like any other keras models. ```python model = DeepFM() model.save_weights('DeepFM_w.h5') model.load_weights('DeepFM_w.h5') ``` To save/load models,just a little different. ```python from tensorflow.python.keras.models import save_model,load_model model = DeepFM() save_model(model, 'DeepFM.h5')# save_model, same as before from deepctr.layers import custom_objects model = load_model('DeepFM.h5',custom_objects)# load_model,just add a parameter ``` ## 2. Set learning rate and use earlystopping --------------------------------------------------- You can use any models in DeepCTR like a keras model object. Here is a example of how to set learning rate and earlystopping: ```python import deepctr from tensorflow.python.keras.optimizers import Adam,Adagrad from tensorflow.python.keras.callbacks import EarlyStopping model = deepctr.models.DeepFM(linear_feature_columns,dnn_feature_columns) model.compile(Adagrad(0.1024),'binary_crossentropy',metrics=['binary_crossentropy']) es = EarlyStopping(monitor='val_binary_crossentropy') history = model.fit(model_input, data[target].values,batch_size=256, epochs=10, verbose=2, validation_split=0.2,callbacks=[es] ) ``` If you are using Estimator models, you can set learning rate like: ```python from deepctr.estimator import DeepFMEstimator import tensorflow as tf model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary', linear_optimizer=tf.train.FtrlOptimizer(0.05), dnn_optimizer=tf.train.AdagradOptimizer(0.1) ) ``` ## 3. Get the attentional weights of feature interactions in AFM -------------------------------------------------------------------------- First,make sure that you have install the latest version of deepctr. Then,use the following code,the `attentional_weights[:,i,0]` is the `feature_interactions[i]`'s attentional weight of all samples. ```python import itertools import deepctr from deepctr.models import AFM from deepctr.feature_column import get_feature_names from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Lambda model = AFM(linear_feature_columns,dnn_feature_columns) model.fit(model_input,target) afmlayer = model.layers[-3] afm_weight_model = Model(model.input,outputs=Lambda(lambda x:afmlayer.normalized_att_score)(model.input)) attentional_weights = afm_weight_model.predict(model_input,batch_size=4096) feature_names = get_feature_names(dnn_feature_columns) feature_interactions = list(itertools.combinations(feature_names ,2)) ``` ## 4. How to extract the embedding vectors in deepfm? ```python feature_columns = [SparseFeat('user_id',120,),SparseFeat('item_id',60,),SparseFeat('cate_id',60,)] def get_embedding_weights(dnn_feature_columns,model): embedding_dict = {} for fc in dnn_feature_columns: if hasattr(fc,'embedding_name'): if fc.embedding_name is not None: name = fc.embedding_name else: name = fc.name embedding_dict[name] = model.get_layer("sparse_emb_"+name).get_weights()[0] return embedding_dict embedding_dict = get_embedding_weights(feature_columns,model) user_id_emb = embedding_dict['user_id'] item_id_emb = embedding_dict['item_id'] ``` ## 5. How to add a long dense feature vector as a input to the model? ```python from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names import numpy as np feature_columns = [SparseFeat('user_id',120,),SparseFeat('item_id',60,),DenseFeat("pic_vec",5)] fixlen_feature_names = get_feature_names(feature_columns) user_id = np.array([[1],[0],[1]]) item_id = np.array([[30],[20],[10]]) pic_vec = np.array([[0.1,0.5,0.4,0.3,0.2],[0.1,0.5,0.4,0.3,0.2],[0.1,0.5,0.4,0.3,0.2]]) label = np.array([1,0,1]) model_input = {'user_id':user_id,'item_id':item_id,'pic_vec':pic_vec} model = DeepFM(feature_columns,feature_columns) model.compile('adagrad','binary_crossentropy') model.fit(model_input,label) ``` ## 6. How to use pretrained weights to initialize embedding weights and frozen embedding weights? ----------------------------------------------------------------------------------------------------- Use `tf.initializers.identity()` to set the `embeddings_initializer` of `SparseFeat`,and set `trainable=False` to frozen embedding weights. ```python import numpy as np import tensorflow as tf from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat,get_feature_names pretrained_item_weights = np.random.randn(60,4) pretrained_weights_initializer = tf.initializers.constant(pretrained_item_weights) feature_columns = [SparseFeat('user_id',120,),SparseFeat('item_id',60,embedding_dim=4,embeddings_initializer=pretrained_weights_initializer,trainable=False)] fixlen_feature_names = get_feature_names(feature_columns) user_id = np.array([[1],[0],[1]]) item_id = np.array([[30],[20],[10]]) label = np.array([1,0,1]) model_input = {'user_id':user_id,'item_id':item_id,} model = DeepFM(feature_columns,feature_columns) model.compile('adagrad','binary_crossentropy') model.fit(model_input,label) ``` ## 7. How to run the demo with GPU ? just install deepctr with ```bash $ pip install deepctr[gpu] ``` ## 8. How to run the demo with multiple GPUs you can use multiple gpus with tensorflow version higher than ``1.4``,see [run_classification_criteo_multi_gpu.py](https://github.com/shenweichen/DeepCTR/blob/master/examples/run_classification_criteo_multi_gpu.py) ================================================ FILE: docs/source/Features.md ================================================ # Features ## Overview With the great success of deep learning,DNN-based techniques have been widely used in CTR prediction task. DNN based CTR prediction models usually have following 4 modules: `Input,Embedding,Low-order&High-order Feature Extractor,Prediction` - Input&Embedding > The data in CTR estimation task usually includes high sparse,high cardinality categorical features and some dense numerical features. > Since DNN are good at handling dense numerical features,we usually map the sparse categorical features to dense numerical through `embedding technique`. > For numerical features,we usually apply `discretization` or `normalization` on them. - Feature Extractor > Low-order Extractor learns feature interaction through product between vectors.Factorization-Machine and it's variants are widely used to learn the low-order feature interaction. > High-order Extractor learns feature combination through complex neural network functions like MLP,Cross Net,etc. ## Feature Columns ### SparseFeat ``SparseFeat`` is a namedtuple with signature ``SparseFeat(name, vocabulary_size, embedding_dim, use_hash, vocabulary_path, dtype, embeddings_initializer, embedding_name, group_name, trainable)`` - name : feature name - vocabulary_size : number of unique feature values for sparse feature or hashing space when `use_hash=True` - embedding_dim : embedding dimension - use_hash : default `False`.If `True` the input will be hashed to space of size `vocabulary_size`. - vocabulary_path : default `None`. The `CSV` text file path of the vocabulary table used by `tf.lookup.TextFileInitializer`, which assigns one entry in the table for each line in the file. One entry contains two columns separated by comma, the first is the value column, the second is the key column. The `0` value is reserved to use if a key is missing in the table, so hash value need start from `1`. - dtype : default `int32`.dtype of input tensor. - embeddings_initializer : initializer for the `embeddings` matrix. - embedding_name : default `None`. If None, the embedding_name will be same as `name`. - group_name : feature group of this feature. - trainable: default `True`.Whether or not the embedding is trainable. ### DenseFeat ``DenseFeat`` is a namedtuple with signature ``DenseFeat(name, dimension, dtype, transform_fn)`` - name : feature name - dimension : dimension of dense feature vector. - dtype : default `float32`.dtype of input tensor. - transform_fn : If not `None` , a function that can be used to transform values of the feature. the function takes the input Tensor as its argument, and returns the output Tensor. (e.g. `lambda x: (x - 3.0) / 4.2)`. ### VarLenSparseFeat ``VarLenSparseFeat`` is a namedtuple with signature ``VarLenSparseFeat(sparsefeat, maxlen, combiner, length_name, weight_name,weight_norm)`` - sparsefeat : a instance of `SparseFeat` - maxlen : maximum length of this feature for all samples - combiner : pooling method,can be ``sum``,``mean`` or ``max`` - length_name : feature length name,if `None`, value 0 in feature is for padding. - weight_name : default `None`. If not None, the sequence feature will be multiplyed by the feature whose name is `weight_name`. - weight_norm : default `True`. Whether normalize the weight score or not. ## Models ### CCPM (Convolutional Click Prediction Model) CCPM can extract local-global key features from an input instance with varied elements, which can be implemented for not only single ad impression but also sequential ad impression. [**CCPM Model API**](./deepctr.models.ccpm.html) [**CCPM Estimator API**](./deepctr.estimator.models.ccpm.html) ![CCPM](../pics/CCPM.png) [Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746.](http://ir.ia.ac.cn/bitstream/173211/12337/1/A%20Convolutional%20Click%20Prediction%20Model.pdf) ### FNN (Factorization-supported Neural Network) According to the paper,FNN learn embedding vectors of categorical data via pre-trained FM. It use FM's latent vector to initialiaze the embedding vectors.During the training stage,it concatenates the embedding vectors and feeds them into a MLP(MultiLayer Perceptron). [**FNN Model API**](./deepctr.models.fnn.html) [**FNN Estimator API**](./deepctr.estimator.models.fnn.html) ![FNN](../pics/FNN.png) [Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.](https://arxiv.org/pdf/1601.02376.pdf) ### PNN (Product-based Neural Network) PNN concatenates sparse feature embeddings and the product between embedding vectors as the input of MLP. [**PNN Model API**](./deepctr.models.pnn.html) [**PNN Estimator API**](./deepctr.estimator.models.pnn.html) ![PNN](../pics/PNN.png) [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.](https://arxiv.org/pdf/1611.00144.pdf) ### Wide & Deep WDL's deep part concatenates sparse feature embeddings as the input of MLP,the wide part use handcrafted feature as input. The logits of deep part and wide part are added to get the prediction probability. [**WDL Model API**](./deepctr.models.wdl.html) [**WDL Estimator API**](./deepctr.estimator.models.wdl.html) ![WDL](../pics/WDL.png) [Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.](https://arxiv.org/pdf/1606.07792.pdf) ### DeepFM DeepFM can be seen as an improvement of WDL and FNN.Compared with WDL,DeepFM use FM instead of LR in the wide part and use concatenation of embedding vectors as the input of MLP in the deep part. Compared with FNN,the embedding vector of FM and input to MLP are same. And they do not need a FM pretrained vector to initialiaze,they are learned end2end. [**DeepFM Model API**](./deepctr.models.deepfm.html) [**DeepFM Estimator API**](./deepctr.estimator.models.deepfm.html) ![DeepFM](../pics/DeepFM.png) [Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.](http://www.ijcai.org/proceedings/2017/0239.pdf) ### MLR(Mixed Logistic Regression/Piece-wise Linear Model) MLR can be viewed as a combination of $2m$ LR model, $m$ is the piece(region) number. $m$ LR model learns the weight that the sample belong to each region,another m LR model learn sample's click probability in the region. Finally,the sample's CTR is a weighted sum of each region's click probability.Notice the weight is normalized weight. [**MLR Model API**](./deepctr.models.mlr.html) ![MLR](../pics/MLR.png) [Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017.](http://arxiv.org/abs/1704.05194) ### NFM (Neural Factorization Machine) NFM use a bi-interaction pooling layer to learn feature interaction between embedding vectors and compress the result into a singe vector which has the same size as a single embedding vector. And then fed it into a MLP.The output logit of MLP and the output logit of linear part are added to get the prediction probability. [**NFM Model API**](./deepctr.models.nfm.html) [**NFM Estimator API**](./deepctr.estimator.models.nfm.html) ![NFM](../pics/NFM.png) [He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364.](https://arxiv.org/pdf/1708.05027.pdf) ### AFM (Attentional Factorization Machine) AFM is a variant of FM,tradional FM sums the inner product of embedding vector uniformly. AFM can be seen as weighted sum of feature interactions.The weight is learned by a small MLP. [**AFM Model API**](./deepctr.models.afm.html) [**AFM Estimator API**](./deepctr.estimator.models.afm.html) ![AFM](../pics/AFM.png) [Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017.](http://www.ijcai.org/proceedings/2017/435) ### DCN (Deep & Cross Network) DCN use a Cross Net to learn both low and high order feature interaction explicitly,and use a MLP to learn feature interaction implicitly. The output of Cross Net and MLP are concatenated.The concatenated vector are feed into one fully connected layer to get the prediction probability. [**DCN Model API**](./deepctr.models.dcn.html) [**DCN Estimator API**](./deepctr.estimator.models.dcn.html) ![DCN](../pics/DCN.png) [Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12.](https://arxiv.org/abs/1708.05123) ### DCN-Mix (Improved Deep & Cross Network with mix of experts and matrix kernel) DCN-Mix uses a matrix kernel instead of vector kernel in CrossNet compared with DCN,and it uses mixture of experts to learn feature interactions. [**DCN-Mix Model API**](./deepctr.models.dcnmix.html) ![DCN-Mix](../pics/DCN-Mix.png) [Wang R, Shivanna R, Cheng D Z, et al. DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems[J]. arXiv preprint arXiv:2008.13535, 2020. ](https://arxiv.org/abs/2008.13535) ### xDeepFM xDeepFM use a Compressed Interaction Network (CIN) to learn both low and high order feature interaction explicitly,and use a MLP to learn feature interaction implicitly. In each layer of CIN,first compute outer products between $x^k$ and $x_0$ to get a tensor $Z_{k+1}$,then use a 1DConv to learn feature maps $H_{k+1}$ on this tensor. Finally,apply sum pooling on all the feature maps $H_k$ to get one vector.The vector is used to compute the logit that CIN contributes. [**xDeepFM Model API**](./deepctr.models.xdeepfm.html) [**xDeepFM Estimator API**](./deepctr.estimator.models.xdeepfn.html) ![CIN](../pics/CIN.png) ![xDeepFM](../pics/xDeepFM.png) [Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.](https://arxiv.org/pdf/1803.05170.pdf) ### AutoInt(Automatic Feature Interaction) AutoInt use a interacting layer to model the interactions between different features. Within each interacting layer, each feature is allowed to interact with all the other features and is able to automatically identify relevant features to form meaningful higher-order features via the multi-head attention mechanism. By stacking multiple interacting layers,AutoInt is able to model different orders of feature interactions. [**AutoInt Model API**](./deepctr.models.autoint.html) [**AutoInt Estimator API**](./deepctr.estimator.models.autoint.html) ![InteractingLayer](../pics/InteractingLayer.png) ![AutoInt](../pics/AutoInt.png) [Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via self-attentive neural networks[C]//Proceedings of the 28th ACM International Conference on Information and Knowledge Management. 2019: 1161-1170. ](https://arxiv.org/abs/1810.11921) ### ONN(Operation-aware Neural Networks for User Response Prediction) ONN models second order feature interactions like like FFM and preserves second-order interaction information as much as possible.Further more,deep neural network is used to learn higher-ordered feature interactions. [**ONN Model API**](./deepctr.models.onn.html) ![ONN](../pics/ONN.png) [Yang Y, Xu B, Shen F, et al. Operation-aware Neural Networks for User Response Prediction[J]. arXiv preprint arXiv:1904.12579, 2019.](https://arxiv.org/pdf/1904.12579.pdf) ### FGCNN(Feature Generation by Convolutional Neural Network) FGCNN models with two components: Feature Generation and Deep Classifier. Feature Generation leverages the strength of CNN to generate local patterns and recombine them to generate new features. Deep Classifier adopts the structure of IPNN to learn interactions from the augmented feature space. [**FGCNN Model API**](./deepctr.models.fgcnn.html) ![FGCNN](../pics/FGCNN.png) [Liu B, Tang R, Chen Y, et al. Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1904.04447, 2019.](https://arxiv.org/pdf/1904.04447) ### FiBiNET(Feature Importance and Bilinear feature Interaction NETwork) Feature Importance and Bilinear feature Interaction NETwork is proposed to dynamically learn the feature importance and fine-grained feature interactions. On the one hand, the FiBiNET can dynamically learn the importance of fea- tures via the Squeeze-Excitation network (SENET) mechanism; on the other hand, it is able to effectively learn the feature interactions via bilinear function. [**FiBiNET Model API**](./deepctr.models.fibinet.html) [**FiBiNET Estimator API**](./deepctr.estimator.models.fibinet.html) ![FiBiNET](../pics/FiBiNET.png) [Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019.](https://arxiv.org/pdf/1905.09433.pdf) ### FLEN(Field-Leveraged Embedding Network) A large-scale CTR prediction model with efficient usage of field information to alleviate gradient coupling problem. [**FLEN Model API**](./deepctr.models.flen.html) [FLEN example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_flen.py) ![FLEN](../pics/FLEN.jpg) [Chen W, Zhan L, Ci Y, Lin C. FLEN: Leveraging Field for Scalable CTR Prediction[J]. arXiv preprint arXiv:1911.04690, 2019.](https://arxiv.org/pdf/1911.04690.pdf) ### IFM(Input-aware Factorization Machine) IFM improves FMs by explicitly considering the impact of each individual input upon the representation of features, which learns a unique input-aware factor for the same feature in different instances via a neural network. [**IFM Model API**](./deepctr.models.ifm.html) ![IFM](../pics/IFM.jpg) [Yu Y, Wang Z, Yuan B. An Input-aware Factorization Machine for Sparse Prediction[C]//IJCAI. 2019: 1466-1472.](https://www.ijcai.org/Proceedings/2019/0203.pdf) ### DIFM(Dual Input-aware Factorization Machine) Dual Input-aware Factorization Machines (DIFMs) can adaptively reweight the original feature representations at the bit-wise and vector-wise levels simultaneously. [**DIFM Model API**](./deepctr.models.difm.html) ![DIFM](../pics/DIFM.jpg) [Lu W, Yu Y, Chang Y, et al. A Dual Input-aware Factorization Machine for CTR Prediction[C]//IJCAI. 2020: 3139-3145.](https://www.ijcai.org/Proceedings/2020/0434.pdf) ### DeepFEFM(Deep Field-Embedded Factorization Machine) FEFM learns symmetric matrix embeddings for each field pair along with the usual single vector embeddings for each feature. FEFM has significantly lower model complexity than FFM and roughly the same complexity as FwFM. [**DeepFEFM Model API**](./deepctr.models.deepfefm.html) ![DeepFEFM](../pics/DeepFEFM.jpg) [Pande H. Field-Embedded Factorization Machines for Click-through rate prediction[J]. arXiv preprint arXiv:2009.09931, 2020.](https://arxiv.org/pdf/2009.09931) ### EDCN(Enhancing Explicit and Implicit Feature Interactions DCN) EDCN introduces two advanced modules, namelybridge moduleandregulation module, which work collaboratively tocapture the layer-wise interactive signals and learn discriminativefeature distributions for each hidden layer of the parallel networks. [**EDCN Model API**](./deepctr.models.edcn.html) ![EDCN](../pics/EDCN.png) [Chen B, Wang Y, Liu Z, et al. Enhancing explicit and implicit feature interactions via information sharing for parallel deep ctr models[C]//Proceedings of the 30th ACM International Conference on Information & Knowledge Management. 2021: 3757-3766.](https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf) ## Sequence Models ### DIN (Deep Interest Network) DIN introduce a attention method to learn from sequence(multi-valued) feature. Tradional method usually use sum/mean pooling on sequence feature. DIN use a local activation unit to get the activation score between candidate item and history items. User's interest are represented by weighted sum of user behaviors. user's interest vector and other embedding vectors are concatenated and fed into a MLP to get the prediction. [**DIN Model API**](./deepctr.models.sequence.din.html) [DIN example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_din.py) ![DIN](../pics/DIN.png) [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) ### DIEN (Deep Interest Evolution Network) Deep Interest Evolution Network (DIEN) uses interest extractor layer to capture temporal interests from history behavior sequence. At this layer, an auxiliary loss is proposed to supervise interest extracting at each step. As user interests are diverse, especially in the e-commerce system, interest evolving layer is proposed to capture interest evolving process that is relative to the target item. At interest evolving layer, attention mechanism is embedded into the sequential structure novelly, and the effects of relative interests are strengthened during interest evolution. [**DIEN Model API**](./deepctr.models.sequence.dien.html) [DIEN example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_dien.py) ![DIEN](../pics/DIEN.png) [Zhou G, Mou N, Fan Y, et al. Deep Interest Evolution Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1809.03672, 2018.](https://arxiv.org/pdf/1809.03672.pdf) ### DSIN(Deep Session Interest Network) Deep Session Interest Network (DSIN) extracts users' multiple historical sessions in their behavior sequences. First it uses self-attention mechanism with bias encoding to extract users' interests in each session. Then apply Bi-LSTM to model how users' interests evolve and interact among sessions. Finally, local activation unit is used to adaptively learn the influences of various session interests on the target item. [**DSIN Model API**](./deepctr.models.sequence.dsin.html) [DSIN example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_dsin.py) ![DSIN](../pics/DSIN.png) [Feng Y, Lv F, Shen W, et al. Deep Session Interest Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.06482, 2019.](https://arxiv.org/abs/1905.06482) ### BST(Behavior Sequence Transformer) BST use the powerful Transformer model to capture the sequential signals underlying users’ behavior sequences . [**BST Model API**](./deepctr.models.sequence.bst.html) [BST example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_din.py) ![BST](../pics/BST.png) [Qiwei Chen, Huan Zhao, Wei Li, Pipei Huang, and Wenwu Ou. 2019. Behavior sequence transformer for e-commerce recommendation in Alibaba. In Proceedings of the 1st International Workshop on Deep Learning Practice for High-Dimensional Sparse Data (DLP-KDD '19). Association for Computing Machinery, New York, NY, USA, Article 12, 1–4. DOI:)](https://arxiv.org/pdf/1905.06874.pdf) ## MultiTask Models ### SharedBottom Hard parameter sharing is the most commonly used approach to MTL in neural networks. It is generally applied by sharing the hidden layers between all tasks, while keeping several task-specific output layers. [**SharedBottom Model API**](./deepctr.models.multitask.sharedbottom.html) ![SharedBottom](../pics/multitaskmodels/SharedBottom.png) [Ruder S. An overview of multi-task learning in deep neural networks[J]. arXiv preprint arXiv:1706.05098, 2017.](https://arxiv.org/pdf/1706.05098.pdf) ### ESMM(Entire Space Multi-task Model) ESMM models CVR in a brand-new perspective by making good use of sequential pattern of user actions, i.e., impression → click → conversion. The proposed Entire Space Multi-task Model (ESMM) can eliminate the two problems simultaneously by i) modeling CVR directly over the entire space, ii) employing a feature representation transfer learning strategy. [**ESMM Model API**](./deepctr.models.multitask.esmm.html) ![ESMM](../pics/multitaskmodels/ESMM.png) [Ma X, Zhao L, Huang G, et al. Entire space multi-task model: An effective approach for estimating post-click conversion rate[C]//The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval. 2018.](https://arxiv.org/abs/1804.07931) ### MMOE(Multi-gate Mixture-of-Experts) Multi-gate Mixture-of-Experts (MMoE) explicitly learns to model task relationships from data. We adapt the Mixture-of- Experts (MoE) structure to multi-task learning by sharing the expert submodels across all tasks, while also having a gating network trained to optimize each task. [**MMOE Model API**](./deepctr.models.multitask.mmoe.html) ![MMOE](../pics/multitaskmodels/MMOE.png) [Ma J, Zhao Z, Yi X, et al. Modeling task relationships in multi-task learning with multi-gate mixture-of-experts[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. 2018.](https://dl.acm.org/doi/abs/10.1145/3219819.3220007) ### PLE(Progressive Layered Extraction) PLE separates shared components and task-specific components explicitly and adopts a progressive rout- ing mechanism to extract and separate deeper semantic knowledge gradually, improving efficiency of joint representation learning and information routing across tasks in a general setup. [**PLE Model API**](./deepctr.models.multitask.ple.html) ![PLE](../pics/multitaskmodels/PLE.png) [Tang H, Liu J, Zhao M, et al. Progressive layered extraction (ple): A novel multi-task learning (mtl) model for personalized recommendations[C]//Fourteenth ACM Conference on Recommender Systems. 2020.](https://dl.acm.org/doi/10.1145/3383313.3412236) ## Layers The models of deepctr are modular, so you can use different modules to build your own models. The module is a class that inherits from `tf.keras.layers.Layer`,it has the same attributes and methods as keras Layers like `tf.keras.layers.Dense()` etc You can see layers API in [Layers](./Layers.html) ================================================ FILE: docs/source/History.md ================================================ # History - 11/10/2022 : [v0.9.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.9.3) released.Add [EDCN](./Features.html#edcn-enhancing-explicit-and-implicit-feature-interactions-dcn). - 10/15/2022 : [v0.9.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.9.2) released.Support python `3.9`,`3.10`. - 06/11/2022 : [v0.9.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.9.1) released.Improve compatibility with tensorflow `2.x`. - 09/03/2021 : [v0.9.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.9.0) released.Add multitask learning models:[SharedBottom](./Features.html#sharedbottom),[ESMM](./Features.html#esmm-entire-space-multi-task-model),[MMOE](./Features.html#mmoe-multi-gate-mixture-of-experts) and [PLE](./Features.html#ple-progressive-layered-extraction). [running example](./Examples.html#multitask-learning-mmoe) - 07/18/2021 : [v0.8.7](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.7) released.Support pre-defined key-value vocabulary in `Hash` Layer. [example](./Examples.html#hash-layer-with-pre-defined-key-value-vocabulary) - 06/14/2021 : [v0.8.6](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.6) released.Add [IFM](./Features.html#ifm-input-aware-factorization-machine) [DIFM](./Features.html#difm-dual-input-aware-factorization-machine), [FEFM and DeepFEFM](./Features.html#deepfefm-deep-field-embedded-factorization-machine) model. - 03/13/2021 : [v0.8.5](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.5) released.Add [BST](./Features.html#bst-behavior-sequence-transformer) model. - 02/12/2021 : [v0.8.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.4) released.Fix bug in DCN-Mix. - 01/06/2021 : [v0.8.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.3) released.Add [DCN-Mix](./Features.html#dcn-mix-improved-deep-cross-network-with-mix-of-experts-and-matrix-kernel) model.Support `transform_fn` in `DenseFeat`. - 10/11/2020 : [v0.8.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.2) released.Refactor `DNN` Layer. - 09/12/2020 : [v0.8.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.1) released.Improve the reproducibility & fix some bugs. - 06/27/2020 : [v0.8.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.0) released. - Support `Tensorflow Estimator` for large scale data and distributed training. [example: Estimator with TFRecord](./Examples.html#estimator-with-tfrecord-classification-criteo) - Support different initializers for different embedding weights and loading pretrained embeddings. [example](./FAQ.html#how-to-use-pretrained-weights-to-initialize-embedding-weights-and-frozen-embedding-weights) - Add new model `FwFM`. - 05/17/2020 : [v0.7.5](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.5) released.Fix numerical instability in `LayerNormalization`. - 03/15/2020 : [v0.7.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.4) released.Add [FLEN](./Features.html#flen-field-leveraged-embedding-network) and `FieldWiseBiInteraction`. - 03/04/2020 : [v0.7.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.3) released.Fix the inconsistency of prediction results when the model is loaded with trained weights. - 02/08/2020 : [v0.7.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.2) released.Fix some bugs. - 01/28/2020 : [v0.7.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.1) released.Simplify [VarLenSparseFeat](./Features.html#varlensparsefeat),support setting weight_normalization.Fix problem of embedding size of `SparseFeat` in `linear_feature_columns`. - 11/24/2019 : [v0.7.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.7.0) released.Refactor [feature columns](./Features.html#feature-columns).Different features can use different `embedding_dim` and group-wise interaction is available by setting `group_name`. - 11/06/2019 : [v0.6.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.6.3) released.Add `WeightedSequenceLayer` and support [weighted sequence feature input](./Examples.html#multi-value-input-movielens). - 10/03/2019 : [v0.6.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.6.2) released.Simplify the input logic. - 09/08/2019 : [v0.6.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.6.1) released.Fix bugs in `CCPM` and `DynamicGRU`. - 08/02/2019 : [v0.6.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.6.0) released.Now DeepCTR is compatible with tensorflow `1.14` and `2.0.0`. - 07/21/2019 : [v0.5.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.5.2) released.Refactor `Linear` Layer. - 07/10/2019 : [v0.5.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.5.1) released.Add [FiBiNET](./Features.html#fibinet-feature-importance-and-bilinear-feature-interaction-network). - 06/30/2019 : [v0.5.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.5.0) released.Refactor inputs module. - 05/19/2019 : [v0.4.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.4.1) released.Add [DSIN](./Features.html#dsin-deep-session-interest-network). - 05/04/2019 : [v0.4.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.4.0) released.Support [feature hashing on the fly](./Examples.html#classification-criteo-with-feature-hashing-on-the-fly) and python2.7. - 04/27/2019 : [v0.3.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.3.4) released.Add [FGCNN](./Features.html#fgcnn-feature-generation-by-convolutional-neural-network) and `FGCNNLayer`. - 04/21/2019 : [v0.3.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.3.3) released.Add [CCPM](./Features.html#ccpm-convolutional-click-prediction-model). - 03/30/2019 : [v0.3.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.3.2) released.Add [DIEN](./Features.html#dien-deep-interest-evolution-network) and [ONN](./Features.html#onn-operation-aware-neural-networks-for-user-response-prediction) Model. - 02/17/2019 : [v0.3.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.3.1) released.Refactor layers ,add `BiLSTM` and `Transformer`. - 01/24/2019 : [v0.2.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.3) released.Use a new feature config generation method and fix bugs. - 01/01/2019 : [v0.2.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.2) released.Add [sequence(multi-value) input support](./Examples.html#multi-value-input-movielens) for `AFM,AutoInt,DCN,DeepFM,FNN,NFM,PNN,xDeepFM` models. - 12/27/2018 : [v0.2.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.1) released.Add [AutoInt](./Features.html#autoint-automatic-feature-interaction) Model. - 12/22/2018 : [v0.2.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.0) released.Add [xDeepFM](./Features.html#xdeepfm) and automatic check for new version. - 12/19/2018 : [v0.1.6](https://github.com/shenweichen/DeepCTR/releases/tag/v0.1.6) released.Now DeepCTR is compatible with tensorflow from `1.4-1.12` except for `1.7` and `1.8`. - 11/29/2018 : [v0.1.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.1.4) released.Add [FAQ](./FAQ.html) in docs - 11/24/2018 : DeepCTR first version v0.1.0 is released on [PyPi](https://pypi.org/project/deepctr/) ================================================ FILE: docs/source/Layers.rst ================================================ DeepCTR Layers API ====================== .. toctree:: :maxdepth: 3 :caption: API: Core Layers Interaction Layers Activation Layers Normalization Layers Sequence Layers ================================================ FILE: docs/source/Model_Methods.md ================================================ # Methods ## compile ```python compile(optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None) ``` Configures the model for training. **Arguments** - **optimizer**: String (name of optimizer) or optimizer instance. See [optimizers](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/optimizers/). - **loss**: String (name of objective function) or objective function. See [losses](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/losses). If the model has multiple outputs, you can use a different loss on each output by passing a dictionary or a list of losses. The loss value that will be minimized by the model will then be the sum of all individual losses. - **metrics**: List of metrics to be evaluated by the model during training and testing. Typically you will use `metrics=['accuracy']`. To specify different metrics for different outputs of a multi-output model, you could also pass a dictionary, such as `metrics={'output_a': 'accuracy'}`. - **loss_weights**: Optional list or dictionary specifying scalar coefficients (Python floats) to weight the loss contributions of different model outputs. The loss value that will be minimized by the model will then be the weighted sum of all individual losses, weighted by the `loss_weights` coefficients. If a list, it is expected to have a 1:1 mapping to the model's outputs. If a tensor, it is expected to map output names (strings) to scalar coefficients. - **sample_weight_mode**: If you need to do timestep-wise sample weighting (2D weights), set this to `"temporal"`. `None` defaults to sample-wise weights (1D). If the model has multiple outputs, you can use a different `sample_weight_mode` on each output by passing a dictionary or a list of modes. - **weighted_metrics**: List of metrics to be evaluated and weighted by sample_weight or class_weight during training and testing. - **target_tensors**: By default, Keras will create placeholders for the model's target, which will be fed with the target data during training. If instead you would like to use your own target tensors (in turn, Keras will not expect external Numpy data for these targets at training time), you can specify them via the `target_tensors` argument. It can be a single tensor (for a single-output model), a list of tensors, or a dict mapping output names to target tensors. **Raises** - **ValueError**: In case of invalid arguments for `optimizer`, `loss`, `metrics` or `sample_weight_mode`. ## fit ```python fit(x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1) ``` Trains the model for a given number of epochs (iterations on a dataset). **Arguments** - **x**: Numpy array of training data (if the model has a single input), or list of Numpy arrays (if the model has multiple inputs). If input layers in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. `x` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors). - **y**: Numpy array of target (label) data (if the model has a single output), or list of Numpy arrays (if the model has multiple outputs). If output layers in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. `y` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors). - **batch_size**: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. - **epochs**: Integer. Number of epochs to train the model. An epoch is an iteration over the entire `x` and `y` data provided. Note that in conjunction with `initial_epoch`, `epochs` is to be understood as "final epoch". The model is not trained for a number of iterations given by `epochs`, but merely until the epoch of index `epochs` is reached. - **verbose**: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. - **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during training and validation (if ). See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks). - **validation_split**: Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. - **validation_data**: tuple `(x_val, y_val)` or tuple `(x_val, y_val, val_sample_weights)` on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. - **shuffle**: Boolean (whether to shuffle the training data before each epoch) or str (for 'batch'). 'batch' is a special option for dealing with the limitations of HDF5 data; it shuffles in batch-sized chunks. Has no effect when `steps_per_epoch` is not `None`. - **class_weight**: Optional dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). This can be useful to tell the model to "pay more attention" to samples from an under-represented class. - **sample_weight**: Optional Numpy array of weights for the training samples, used for weighting the loss function (during training only). You can either pass a flat (1D) Numpy array with the same length as the input samples (1:1 mapping between weights and samples), or in the case of temporal data, you can pass a 2D array with shape `(samples, sequence_length)`, to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`. - **initial_epoch**: Integer. Epoch at which to start training (useful for resuming a previous training run). - **steps_per_epoch**: Integer or `None`. Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. When training with input tensors such as TensorFlow data tensors, the default `None` is equal to the number of samples in your dataset divided by the batch size, or 1 if that cannot be determined. validation_steps: Only relevant if `steps_per_epoch` is specified. Total number of steps (batches of samples) to validate before stopping. - **validation_freq**: Only relevant if validation data is provided. Integer or list/tuple/set. If an integer, specifies how many training epochs to run before a new validation run is performed, e.g. `validation_freq=2` runs validation every 2 epochs. If a list, tuple, or set, specifies the epochs on which to run validation, e.g. `validation_freq=[1, 2, 10]` runs validation at the end of the 1st, 2nd, and 10th epochs. **Returns** - A `History` object. Its `History.history` attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). **Raises** - **RuntimeError**: If the model was never compiled. ValueError: In case of mismatch between the provided input data and what the model expects. ## evaluate ```python evaluate(x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None, callbacks=None) ``` Returns the loss value & metrics values for the model in test mode. Computation is done in batches. **Arguments** - **x**: Numpy array of test data (if the model has a single input), or list of Numpy arrays (if the model has multiple inputs). If input layers in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. `x` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors). - **y**: Numpy array of target (label) data (if the model has a single output), or list of Numpy arrays (if the model has multiple outputs). If output layers in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. `y` can be `None` (default) if feeding from framework-native tensors (e.g. TensorFlow data tensors). - **batch_size**: Integer or `None`. Number of samples per evaluation step. If unspecified, `batch_size` will default to 32. - **verbose**: 0 or 1. Verbosity mode. 0 = silent, 1 = progress bar. - **sample_weight**: Optional Numpy array of weights for the test samples, used for weighting the loss function. You can either pass a flat (1D) Numpy array with the same length as the input samples (1:1 mapping between weights and samples), or in the case of temporal data, you can pass a 2D array with shape `(samples, sequence_length)`, to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`. - **steps**: Integer or `None`. Total number of steps (batches of samples) before declaring the evaluation round finished. Ignored with the default value of `None`. - **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during evaluation. See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks). **Returns** - Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. ## predict ```python predict(x, batch_size=None, verbose=0, steps=None, callbacks=None) ``` Generates output predictions for the input samples. Computation is done in batches. **Arguments** - **x**: The input data, as a Numpy array (or list of Numpy arrays if the model has multiple inputs). batch_size: Integer. If unspecified, it will default to 32. - **verbose**: Verbosity mode, 0 or 1. - **steps**: Total number of steps (batches of samples) before declaring the prediction round finished. Ignored with the default value of None. - **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during prediction. See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks). **Returns** - Numpy array(s) of predictions. **Raises** - **ValueError**: In case of mismatch between the provided input data and the model's expectations, or in case a stateful model receives a number of samples that is not a multiple of the batch size. ## train_on_batch ```python train_on_batch(x, y, sample_weight=None, class_weight=None) ``` Runs a single gradient update on a single batch of data. **Arguments** - **x**: Numpy array of training data, or list of Numpy arrays if the model has multiple inputs. If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. - **y**: Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. - **sample_weight**: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). - **class_weight**: Optional dictionary mapping class indices (integers) to a weight (float) to apply to the model's loss for the samples from this class during training. This can be useful to tell the model to "pay more attention" to samples from an under-represented class. **Returns** - Scalar training loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. ## test_on_batch ```python test_on_batch(x, y, sample_weight=None) ``` Test the model on a single batch of samples. **Arguments** - **x**: Numpy array of test data, or list of Numpy arrays if the model has multiple inputs. If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. - **y**: Numpy array of target data, or list of Numpy arrays if the model has multiple outputs. If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. - **sample_weight**: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`. **Returns** - Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. ## predict_on_batch ```python predict_on_batch(x) ``` Returns predictions for a single batch of samples. **Arguments** - **x**: Input samples, as a Numpy array. **Returns** - Numpy array(s) of predictions. ## fit_generator ```python fit_generator(generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, validation_freq=1, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0) ``` Trains the model on data generated batch-by-batch by a Python generator (or an instance of `Sequence`). The generator is run in parallel to the model, for efficiency. For instance, this allows you to do real-time data augmentation on images on CPU in parallel to training your model on GPU. The use of `tf.keras.utils.Sequence` guarantees the ordering and guarantees the single use of every input per epoch when using `use_multiprocessing=True`. **Arguments** - **generator**: A generator or an instance of `Sequence` (`tf.keras.utils.Sequence`) object in order to avoid duplicate data when using multiprocessing. The output of the generator must be either a tuple `(inputs, targets)` or a tuple `(inputs, targets, sample_weights)`. This tuple (a single output of the generator) makes a single batch. Therefore, all arrays in this tuple must have the same length (equal to the size of this batch). Different batches may have different sizes. For example, the last batch of the epoch is commonly smaller than the others, if the size of the dataset is not divisible by the batch size. The generator is expected to loop over its data indefinitely. An epoch finishes when `steps_per_epoch` batches have been seen by the model. - **steps_per_epoch**: Integer. Total number of steps (batches of samples) to yield from `generator` before declaring one epoch finished and starting the next epoch. It should typically be equal to `ceil(num_samples / batch_size)` Optional for `Sequence`: if unspecified, will use the `len(generator)` as a number of steps. - **epochs**: Integer. Number of epochs to train the model. An epoch is an iteration over the entire data provided, as defined by `steps_per_epoch`. Note that in conjunction with `initial_epoch`, `epochs` is to be understood as "final epoch". The model is not trained for a number of iterations given by `epochs`, but merely until the epoch of index `epochs` is reached. - **verbose**: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch. - **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during training. See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks). - **validation_data**: This can be either a generator or a `Sequence` object for the validation data tuple `(x_val, y_val)` tuple `(x_val, y_val, val_sample_weights)` on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. - **validation_steps**: Only relevant if `validation_data` is a generator. Total number of steps (batches of samples) to yield from `validation_data` generator before stopping at the end of every epoch. It should typically be equal to the number of samples of your validation dataset divided by the batch size. Optional for `Sequence`: if unspecified, will use the `len(validation_data)` as a number of steps. - **validation_freq**: Only relevant if validation data is provided. Integer or `collections.Container` instance (e.g. list, tuple, etc.). If an integer, specifies how many training epochs to run before a new validation run is performed, e.g. `validation_freq=2` runs validation every 2 epochs. If a Container, specifies the epochs on which to run validation, e.g. `validation_freq=[1, 2, 10]` runs validation at the end of the 1st, 2nd, and 10th epochs. - **class_weight**: Optional dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). This can be useful to tell the model to "pay more attention" to samples from an under-represented class. - **max_queue_size**: Integer. Maximum size for the generator queue. If unspecified, `max_queue_size` will default to 10. - **workers**: Integer. Maximum number of processes to spin up when using process-based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. - **use_multiprocessing**: Boolean. If `True`, use process-based threading. If unspecified, `use_multiprocessing` will default to `False`. Note that because this implementation relies on multiprocessing, you should not pass non-picklable arguments to the generator as they can't be passed easily to children processes. - **shuffle**: Boolean. Whether to shuffle the order of the batches at the beginning of each epoch. Only used with instances of `Sequence` (`tf.keras.utils.Sequence`). Has no effect when `steps_per_epoch` is not `None`. initial_epoch: Integer. Epoch at which to start training (useful for resuming a previous training run). **Returns** - A `History` object. Its `History.history` attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). **Raises** - **ValueError**: In case the generator yields data in an invalid format. **Example** ```python def generate_arrays_from_file(path): while True: with open(path) as f: for line in f: # create numpy arrays of input data # and labels, from each line in the file x1, x2, y = process_line(line) yield ({'input_1': x1, 'input_2': x2}, {'output': y}) model.fit_generator(generate_arrays_from_file('/my_file.txt'), steps_per_epoch=10000, epochs=10) ``` ## evaluate_generator ```python evaluate_generator(generator, steps=None, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0) ``` Evaluates the model on a data generator. The generator should return the same kind of data as accepted by `test_on_batch`. **Arguments** - **generator**: Generator yielding tuples (inputs, targets) or (inputs, targets, sample_weights) or an instance of Sequence (tf.keras.utils.Sequence) object in order to avoid duplicate data when using multiprocessing. - **steps**: Total number of steps (batches of samples) to yield from `generator` before stopping. Optional for `Sequence`: if unspecified, will use the `len(generator)` as a number of steps. - **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during training. See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks). - **max_queue_size**: maximum size for the generator queue - **workers**: Integer. Maximum number of processes to spin up when using process based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. - **use_multiprocessing**: if True, use process based threading. Note that because this implementation relies on multiprocessing, you should not pass non picklable arguments to the generator as they can't be passed easily to children processes. - **verbose**: verbosity mode, 0 or 1. **Returns** - Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. **Raises** - **ValueError**: In case the generator yields data in an invalid format. ## predict_generator ```python predict_generator(generator, steps=None, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0) ``` Generates predictions for the input samples from a data generator. The generator should return the same kind of data as accepted by `predict_on_batch`. **Arguments** - **generator**: Generator yielding batches of input samples or an instance of Sequence (`tf.keras.utils.Sequence`) object in order to avoid duplicate data when using multiprocessing. - **steps**: Total number of steps (batches of samples) to yield from `generator` before stopping. Optional for `Sequence`: if unspecified, will use the `len(generator)` as a number of steps. - **callbacks**: List of `tf.keras.callbacks.Callback` instances. List of callbacks to apply during training. See [callbacks](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/keras/callbacks). - **max_queue_size**: Maximum size for the generator queue. - **workers**: Integer. Maximum number of processes to spin up when using process based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. - **use_multiprocessing**: If `True`, use process based threading. Note that because this implementation relies on multiprocessing, you should not pass non picklable arguments to the generator as they can't be passed easily to children processes. - **verbose**: verbosity mode, 0 or 1. **Returns** - Numpy array(s) of predictions. **Raises** - **ValueError**: In case the generator yields data in an invalid format. ## get_layer ```python get_layer(name=None, index=None) ``` Retrieves a layer based on either its name (unique) or index. If `name` and `index` are both provided, `index` will take precedence. Indices are based on order of horizontal graph traversal (bottom-up). **Arguments** - **name**: String, name of layer. - **index**: Integer, index of layer. **Returns** - A layer instance. **Raises** - **ValueError**: In case of invalid layer name or index. ================================================ FILE: docs/source/Models.rst ================================================ DeepCTR Models API ====================== .. toctree:: Model Methods CCPM FNN PNN WDL DeepFM MLR NFM AFM DCN DCNMix DIN DIEN DSIN BST xDeepFM AutoInt ONN FGCNN FiBiNET FLEN IFM DIFM DeepFEFM SharedBottom ESMM MMOE PLE EDCN ================================================ FILE: docs/source/Quick-Start.md ================================================ # Quick-Start [![](https://pai-public-data.oss-cn-beijing.aliyuncs.com/EN-pai-dsw.svg)](https://dsw-dev.data.aliyun.com/#/?fileUrl=https://pai-public-data.oss-cn-beijing.aliyuncs.com/deep-ctr/Getting-started-4-steps-to-DeepCTR.ipynb&fileName=Getting-started-4-steps-to-DeepCTR.ipynb) ## Installation Guide Now `deepctr` is available for python `2.7 `and `3.5, 3.6, 3.7`. `deepctr` depends on tensorflow, you can specify to install the cpu version or gpu version through `pip`. ### CPU version ```bash $ pip install deepctr[cpu] ``` ### GPU version ```bash $ pip install deepctr[gpu] ``` ## Getting started: 4 steps to DeepCTR ### Step 1: Import model ```python import pandas as pd from sklearn.preprocessing import LabelEncoder, MinMaxScaler from sklearn.model_selection import train_test_split from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names data = pd.read_csv('./criteo_sample.txt') sparse_features = ['C' + str(i) for i in range(1, 27)] dense_features = ['I'+str(i) for i in range(1, 14)] data[sparse_features] = data[sparse_features].fillna('-1', ) data[dense_features] = data[dense_features].fillna(0,) target = ['label'] ``` ### Step 2: Simple preprocessing Usually we have two methods to encode the sparse categorical feature for embedding - Label Encoding: map the features to integer value from 0 ~ len(#unique) - 1 ```python for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) ``` - Hash Encoding: map the features to a fix range,like 0 ~ 9999.We have 2 methods to do that: - Do feature hashing before training ```python for feat in sparse_features: lbe = HashEncoder() data[feat] = lbe.transform(data[feat]) ``` - Do feature hashing on the fly in training process We can do feature hashing by setting `use_hash=True` in `SparseFeat` or `VarlenSparseFeat` in Step3. And for dense numerical features,they are usually discretized to buckets,here we use normalization. ```python mms = MinMaxScaler(feature_range=(0,1)) data[dense_features] = mms.fit_transform(data[dense_features]) ``` ### Step 3: Generate feature columns For sparse features, we transform them into dense vectors by embedding techniques. For dense numerical features, we concatenate them to the input tensors of fully connected layer. And for varlen(multi-valued) sparse features,you can use [VarlenSparseFeat](./Features.html#varlensparsefeat). Visit [examples](./Examples.html#multi-value-input-movielens) of using `VarlenSparseFeat` - Label Encoding ```python fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1,embedding_dim=4) for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,) for feat in dense_features] ``` - Feature Hashing on the fly ```python fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=1e6,embedding_dim=4, use_hash=True, dtype='string') # the input is string for feat in sparse_features] + [DenseFeat(feat, 1, ) for feat in dense_features] ``` - generate feature columns ```python dnn_feature_columns = fixlen_feature_columns linear_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) ``` ### Step 4: Generate the training samples and train the model ```python train, test = train_test_split(data, test_size=0.2) train_model_input = {name:train[name].values for name in feature_names} test_model_input = {name:test[name].values for name in feature_names} model = DeepFM(linear_feature_columns,dnn_feature_columns,task='binary') model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) ``` You can check the full code [here](./Examples.html#classification-criteo). You also can run a distributed training job with the keras model on Kubernetes using [ElasticDL](https://github.com/sql-machine-learning/elasticdl/blob/develop/docs/tutorials/elasticdl_deepctr_keras.md). ## Getting started: 4 steps to DeepCTR Estimator with TFRecord ### Step 1: Import model ```python import tensorflow as tf from tensorflow.python.ops.parsing_ops import FixedLenFeature from deepctr.estimator.inputs import input_fn_tfrecord from deepctr.estimator.models import DeepFMEstimator ``` ### Step 2: Generate feature columns for linear part and dnn part ```python sparse_features = ['C' + str(i) for i in range(1, 27)] dense_features = ['I' + str(i) for i in range(1, 14)] dnn_feature_columns = [] linear_feature_columns = [] for i, feat in enumerate(sparse_features): dnn_feature_columns.append(tf.feature_column.embedding_column( tf.feature_column.categorical_column_with_identity(feat, 1000), 4)) linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, 1000)) for feat in dense_features: dnn_feature_columns.append(tf.feature_column.numeric_column(feat)) linear_feature_columns.append(tf.feature_column.numeric_column(feat)) ``` ### Step 3: Generate the training samples with TFRecord format ```python feature_description = {k: FixedLenFeature(dtype=tf.int64, shape=1) for k in sparse_features} feature_description.update( {k: FixedLenFeature(dtype=tf.float32, shape=1) for k in dense_features}) feature_description['label'] = FixedLenFeature(dtype=tf.float32, shape=1) train_model_input = input_fn_tfrecord('./criteo_sample.tr.tfrecords', feature_description, 'label', batch_size=256, num_epochs=1, shuffle_factor=10) test_model_input = input_fn_tfrecord('./criteo_sample.te.tfrecords', feature_description, 'label', batch_size=2 ** 14, num_epochs=1, shuffle_factor=0) ``` ### Step 4: Train and evaluate the model ```python model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary') model.train(train_model_input) eval_result = model.evaluate(test_model_input) print(eval_result) ``` You can check the full code [here](./Examples.html#estimator-with-tfrecord-classification-criteo). You also can run a distributed training job with the estimator model on Kubernetes using [ElasticDL](https://github.com/sql-machine-learning/elasticdl/blob/develop/docs/tutorials/elasticdl_deepctr_estimator.md). ================================================ FILE: docs/source/conf.py ================================================ # -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath('../../')) # -- Project information ----------------------------------------------------- project = 'DeepCTR' copyright = '2017-present, Weichen Shen' author = 'Weichen Shen' # The short X.Y version version = '' # The full version, including alpha/beta/rc tags release = '0.9.3' # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.githubpages', ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] #source_suffix = '.rst' # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . exclude_patterns = [] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'alabaster' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'DeepCTRdoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'DeepCTR.tex', 'DeepCTR Documentation', 'Weichen Shen', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'deepctr', 'DeepCTR Documentation', [author], 1) ] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'DeepCTR', 'DeepCTR Documentation', author, 'DeepCTR', 'One line description of project.', 'Miscellaneous'), ] # -- Extension configuration ------------------------------------------------- todo_include_todos = False html_theme = 'sphinx_rtd_theme' source_parsers = { '.md': 'recommonmark.parser.CommonMarkParser', } ================================================ FILE: docs/source/deepctr.contrib.rnn.rst ================================================ deepctr.contrib.rnn module ========================== .. automodule:: deepctr.contrib.rnn :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.contrib.rst ================================================ deepctr.contrib package ======================= Submodules ---------- .. toctree:: deepctr.contrib.rnn deepctr.contrib.utils Module contents --------------- .. automodule:: deepctr.contrib :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.contrib.utils.rst ================================================ deepctr.contrib.utils module ============================ .. automodule:: deepctr.contrib.utils :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.feature_column.rst ================================================ deepctr.estimator.feature\_column module ======================================== .. automodule:: deepctr.estimator.feature_column :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.inputs.rst ================================================ deepctr.estimator.inputs module =============================== .. automodule:: deepctr.estimator.inputs :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.afm.rst ================================================ deepctr.estimator.models.afm module =================================== .. automodule:: deepctr.estimator.models.afm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.autoint.rst ================================================ deepctr.estimator.models.autoint module ======================================= .. automodule:: deepctr.estimator.models.autoint :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.ccpm.rst ================================================ deepctr.estimator.models.ccpm module ==================================== .. automodule:: deepctr.estimator.models.ccpm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.dcn.rst ================================================ deepctr.estimator.models.dcn module =================================== .. automodule:: deepctr.estimator.models.dcn :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.deepfefm.rst ================================================ deepctr.estimator.models.deepfefm module ====================================== .. automodule:: deepctr.estimator.models.deepfefm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.deepfm.rst ================================================ deepctr.estimator.models.deepfm module ====================================== .. automodule:: deepctr.estimator.models.deepfm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.fibinet.rst ================================================ deepctr.estimator.models.fibinet module ======================================= .. automodule:: deepctr.estimator.models.fibinet :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.fnn.rst ================================================ deepctr.estimator.models.fnn module =================================== .. automodule:: deepctr.estimator.models.fnn :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.fwfm.rst ================================================ deepctr.estimator.models.fwfm module ======================================== .. automodule:: deepctr.estimator.models.fwfm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.nfm.rst ================================================ deepctr.estimator.models.nfm module =================================== .. automodule:: deepctr.estimator.models.nfm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.pnn.rst ================================================ deepctr.estimator.models.pnn module =================================== .. automodule:: deepctr.estimator.models.pnn :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.rst ================================================ deepctr.estimator.models package ================================ Submodules ---------- .. toctree:: deepctr.estimator.models.afm deepctr.estimator.models.autoint deepctr.estimator.models.ccpm deepctr.estimator.models.dcn deepctr.estimator.models.deepfm deepctr.estimator.models.deepfwfm deepctr.estimator.models.fibinet deepctr.estimator.models.fnn deepctr.estimator.models.nfm deepctr.estimator.models.pnn deepctr.estimator.models.wdl deepctr.estimator.models.xdeepfm Module contents --------------- .. automodule:: deepctr.estimator.models :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.wdl.rst ================================================ deepctr.estimator.models.wdl module =================================== .. automodule:: deepctr.estimator.models.wdl :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.models.xdeepfm.rst ================================================ deepctr.estimator.models.xdeepfm module ======================================= .. automodule:: deepctr.estimator.models.xdeepfm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.rst ================================================ deepctr.estimator package ========================= Subpackages ----------- .. toctree:: deepctr.estimator.models Submodules ---------- .. toctree:: deepctr.estimator.feature_column deepctr.estimator.inputs deepctr.estimator.utils Module contents --------------- .. automodule:: deepctr.estimator :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.estimator.utils.rst ================================================ deepctr.estimator.utils module ============================== .. automodule:: deepctr.estimator.utils :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.feature_column.rst ================================================ deepctr.feature\_column module ============================== .. automodule:: deepctr.feature_column :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.inputs.rst ================================================ deepctr.inputs module ===================== .. automodule:: deepctr.inputs :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.layers.activation.rst ================================================ deepctr.layers.activation module ================================ .. automodule:: deepctr.layers.activation :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.layers.core.rst ================================================ deepctr.layers.core module ========================== .. automodule:: deepctr.layers.core :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.layers.interaction.rst ================================================ deepctr.layers.interaction module ================================= .. automodule:: deepctr.layers.interaction :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.layers.normalization.rst ================================================ deepctr.layers.normalization module =================================== .. automodule:: deepctr.layers.normalization :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.layers.rst ================================================ deepctr.layers package ====================== Submodules ---------- .. toctree:: deepctr.layers.activation deepctr.layers.core deepctr.layers.interaction deepctr.layers.normalization deepctr.layers.sequence deepctr.layers.utils Module contents --------------- .. automodule:: deepctr.layers :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.layers.sequence.rst ================================================ deepctr.layers.sequence module ============================== .. automodule:: deepctr.layers.sequence :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.layers.utils.rst ================================================ deepctr.layers.utils module =========================== .. automodule:: deepctr.layers.utils :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.models.afm.rst ================================================ deepctr.models.afm module ========================= .. automodule:: deepctr.models.afm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.autoint.rst ================================================ deepctr.models.autoint module ============================= .. automodule:: deepctr.models.autoint :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.ccpm.rst ================================================ deepctr.models.ccpm module ========================== .. automodule:: deepctr.models.ccpm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.dcn.rst ================================================ deepctr.models.dcn module ========================= .. automodule:: deepctr.models.dcn :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.dcnmix.rst ================================================ deepctr.models.dcnmix module ========================= .. automodule:: deepctr.models.dcnmix :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.deepfefm.rst ================================================ deepctr.models.deepfefm module ============================== .. automodule:: deepctr.models.deepfefm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.deepfm.rst ================================================ deepctr.models.deepfm module ============================ .. automodule:: deepctr.models.deepfm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.deepfwfm.rst ================================================ deepctr.models.deepfwfm module ============================== .. automodule:: deepctr.models.deepfwfm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.difm.rst ================================================ deepctr.models.difm module ============================= .. automodule:: deepctr.models.difm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.edcn.rst ================================================ deepctr.models.edcn module ========================= .. automodule:: deepctr.models.edcn :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.fgcnn.rst ================================================ deepctr.models.fgcnn module =========================== .. automodule:: deepctr.models.fgcnn :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.fibinet.rst ================================================ deepctr.models.fibinet module ============================= .. automodule:: deepctr.models.fibinet :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.flen.rst ================================================ deepctr.models.flen module ============================= .. automodule:: deepctr.models.flen :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.fnn.rst ================================================ deepctr.models.fnn module ========================= .. automodule:: deepctr.models.fnn :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.ifm.rst ================================================ deepctr.models.ifm module ============================= .. automodule:: deepctr.models.ifm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.mlr.rst ================================================ deepctr.models.mlr module ========================= .. automodule:: deepctr.models.mlr :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.multitask.esmm.rst ================================================ deepctr.models.multitask.esmm module ============================= .. automodule:: deepctr.models.multitask.esmm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.multitask.mmoe.rst ================================================ deepctr.models.multitask.mmoe module ============================= .. automodule:: deepctr.models.multitask.mmoe :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.multitask.ple.rst ================================================ deepctr.models.multitask.ple module ============================= .. automodule:: deepctr.models.multitask.ple :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.multitask.sharedbottom.rst ================================================ deepctr.models.multitask.sharedbottom module ============================= .. automodule:: deepctr.models.multitask.sharedbottom :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.nfm.rst ================================================ deepctr.models.nfm module ========================= .. automodule:: deepctr.models.nfm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.onn.rst ================================================ deepctr.models.onn module ========================== .. automodule:: deepctr.models.onn :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.pnn.rst ================================================ deepctr.models.pnn module ========================= .. automodule:: deepctr.models.pnn :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.rst ================================================ deepctr.models package ====================== Submodules ---------- .. toctree:: deepctr.models.afm deepctr.models.autoint deepctr.models.ccpm deepctr.models.dcn deepctr.models.dcnmix deepctr.models.edcn deepctr.models.deepfm deepctr.models.dien deepctr.models.din deepctr.models.dsin deepctr.models.fgcnn deepctr.models.fibinet deepctr.models.fnn deepctr.models.mlr deepctr.models.onn deepctr.models.nfm deepctr.models.pnn deepctr.models.wdl deepctr.models.xdeepfm deepctr.models.flen deepctr.models.ifm deepctr.models.difm deepctr.models.deepfefm deepctr.models.multitask.sharedbottom deepctr.models.multitask.esmm deepctr.models.multitask.mmoe deepctr.models.multitask.ple Module contents --------------- .. automodule:: deepctr.models :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.models.sequence.bst.rst ================================================ deepctr.models.sequence.bst module ========================= .. automodule:: deepctr.models.sequence.bst :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.sequence.dien.rst ================================================ deepctr.models.sequence.dien module ========================== .. automodule:: deepctr.models.sequence.dien :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.sequence.din.rst ================================================ deepctr.models.sequence.din module ========================= .. automodule:: deepctr.models.sequence.din :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.sequence.dsin.rst ================================================ deepctr.models.sequence.dsin module ========================== .. automodule:: deepctr.models.sequence.dsin :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.wdl.rst ================================================ deepctr.models.wdl module ========================= .. automodule:: deepctr.models.wdl :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.models.xdeepfm.rst ================================================ deepctr.models.xdeepfm module ============================= .. automodule:: deepctr.models.xdeepfm :members: :no-undoc-members: :no-show-inheritance: ================================================ FILE: docs/source/deepctr.rst ================================================ deepctr package =============== Subpackages ----------- .. toctree:: deepctr.contrib deepctr.layers deepctr.models Submodules ---------- .. toctree:: deepctr.inputs deepctr.utils Module contents --------------- .. automodule:: deepctr :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/deepctr.utils.rst ================================================ deepctr.utils module ==================== .. automodule:: deepctr.utils :members: :undoc-members: :show-inheritance: ================================================ FILE: docs/source/index.rst ================================================ .. DeepCTR documentation master file, created by sphinx-quickstart on Fri Nov 23 21:08:54 2018. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Welcome to DeepCTR's documentation! =================================== |Downloads|_ |Stars|_ |Forks|_ |PyPii|_ |Issues|_ |Chat|_ .. |Downloads| image:: https://pepy.tech/badge/deepctr .. _Downloads: https://pepy.tech/project/deepctr .. |Stars| image:: https://img.shields.io/github/stars/shenweichen/deepctr.svg .. _Stars: https://github.com/shenweichen/DeepCTR .. |Forks| image:: https://img.shields.io/github/forks/shenweichen/deepctr.svg .. _Forks: https://github.com/shenweichen/DeepCTR/fork .. |PyPii| image:: https://img.shields.io/pypi/v/deepctr.svg .. _PyPii: https://pypi.org/project/deepctr .. |Issues| image:: https://img.shields.io/github/issues/shenweichen/deepctr.svg .. _Issues: https://github.com/shenweichen/deepctr/issues .. |Chat| image:: https://img.shields.io/badge/chat-wechat-brightgreen?style=flat .. _Chat: ./#disscussiongroup DeepCTR is a **Easy-to-use** , **Modular** and **Extendible** package of deep-learning based CTR models along with lots of core components layer which can be used to easily build custom models.You can use any complex model with ``model.fit()`` and ``model.predict()``. - Provide ``tf.keras.Model`` like interface for **quick experiment**. `example `_ - Provide ``tensorflow estimator`` interface for **large scale data** and **distributed training**. `example `_ - It is compatible with both ``tf 1.x`` and ``tf 2.x``. Let's `Get Started! <./Quick-Start.html>`_ (`Chinese Introduction `_) You can read the latest code and related projects - DeepCTR: https://github.com/shenweichen/DeepCTR - DeepMatch: https://github.com/shenweichen/DeepMatch - DeepCTR-Torch: https://github.com/shenweichen/DeepCTR-Torch News ----- 11/10/2022 : Add `EDCN` . `Changelog `_ 10/15/2022 : Support python `3.9` , `3.10` . `Changelog `_ 06/11/2022 : Improve compatibility with tensorflow `2.x`. `Changelog `_ DisscussionGroup ----------------------- 公众号:**浅梦学习笔记** wechat ID: **deepctrbot** `Discussions `_ `学习小组主题集合 `_ .. image:: ../pics/code2.jpg .. toctree:: :maxdepth: 2 :caption: Home: Quick-Start Features Examples FAQ History .. toctree:: :maxdepth: 3 :caption: API: Models Estimators Layers Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/source/modules.rst ================================================ deepctr ======= .. toctree:: :maxdepth: 4 deepctr ================================================ FILE: examples/avazu_sample.txt ================================================ id,click,hour,C1,banner_pos,site_id,site_domain,site_category,app_id,app_domain,app_category,device_id,device_ip,device_model,device_type,device_conn_type,C14,C15,C16,C17,C18,C19,C20,C21 1000009418151094273,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,ddd2926e,44956a24,1,2,15706,320,50,1722,0,35,-1,79 10000169349117863715,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,96809ac8,711ee120,1,0,15704,320,50,1722,0,35,100084,79 10000371904215119486,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,b3cf8def,8a4875bd,1,0,15704,320,50,1722,0,35,100084,79 10000640724480838376,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,e8275b8f,6332421a,1,0,15706,320,50,1722,0,35,100084,79 10000679056417042096,0,14102100,1005,1,fe8cc448,9166c161,0569f928,ecad2386,7801e8d9,07d7df22,a99f214a,9644d0bf,779d90c2,1,0,18993,320,50,2161,0,35,-1,157 10000720757801103869,0,14102100,1005,0,d6137915,bb1ef334,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,05241af0,8a4875bd,1,0,16920,320,50,1899,0,431,100077,117 10000724729988544911,0,14102100,1005,0,8fda644b,25d4cfcd,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,b264c159,be6db1d7,1,0,20362,320,50,2333,0,39,-1,157 10000918755742328737,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,e6f67278,be74e6fe,1,0,20632,320,50,2374,3,39,-1,23 10000949271186029916,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,37e8da74,5db079b5,1,2,15707,320,50,1722,0,35,-1,79 10001264480619467364,0,14102100,1002,0,84c7ba46,c4e18dd6,50e219e0,ecad2386,7801e8d9,07d7df22,c357dbff,f1ac7184,373ecbe6,0,0,21689,320,50,2496,3,167,100191,23 10001868339616595934,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,5d877109,8f5c9827,1,0,17747,320,50,1974,2,39,100019,33 10001966791793526909,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,6f407810,1f0bc64f,1,0,15701,320,50,1722,0,35,-1,79 10002028568167339219,0,14102100,1005,0,9e8cf15d,0d3cb7be,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,58811cdf,8326c04b,1,2,20596,320,50,2161,0,35,100148,157 10002044883120869786,0,14102100,1005,0,d6137915,bb1ef334,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,72aab6df,04258293,1,0,19771,320,50,2227,0,687,100077,48 10002518649031436658,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,98fed791,d9b5648e,0f2161f8,a99f214a,6dec2796,aad45b01,1,0,20984,320,50,2371,0,551,-1,46 10003539039235338011,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,a4f47b2e,8a4875bd,1,0,15699,320,50,1722,0,35,100084,79 10003585669470236873,0,14102100,1005,0,d9750ee7,98572c79,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,9b1fe278,128f4ba1,1,0,17914,320,50,2043,2,39,-1,32 10004105575081229495,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,c26c53cf,be87996b,1,2,15708,320,50,1722,0,35,100084,79 10004181428767727519,0,14102100,1005,1,0c2fe9d6,27e3c518,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,b7a69808,158e4944,1,0,6558,320,50,571,2,39,-1,32 10004482643316086592,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,66a5f0f3,d9b5648e,cef3e649,a99f214a,fa60af6b,b4b19c97,1,0,21234,320,50,2434,3,163,100088,61 10004510652136496837,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,8a308c73,3223bcfe,1,0,20352,320,50,2333,0,39,-1,157 10004574413841529209,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,1b6530bc,1aa0e912,1,0,15706,320,50,1722,0,35,-1,79 10004670021948955159,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,a2d12b33,607e78f2,1,0,20366,320,50,2333,0,39,-1,157 10004765361151096125,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,c6563308,7fdd04d2,1,0,15701,320,50,1722,0,35,-1,79 10005249248600843539,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,e99d0c2e,d25693ce,1,0,15706,320,50,1722,0,35,100083,79 10005334911727438633,0,14102100,1010,1,85f751fd,c4e18dd6,50e219e0,ffc6ffd0,7801e8d9,0f2161f8,fb23c543,69890c7f,9fef9da8,4,0,21665,320,50,2493,3,35,-1,117 10005541670676403131,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,c62f7206,69f9dd0e,1,0,20984,320,50,2371,0,551,100217,46 10005609489911213467,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,54c5d545,2347f47a,0f2161f8,9af87478,2a2bfc89,ecf10acf,1,0,21611,320,50,2480,3,297,100111,61 10005649443863261125,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,50d86760,d787e91b,1,0,20366,320,50,2333,0,39,-1,157 10005951398749600249,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,431b3174,f39b265e,1,0,15706,320,50,1722,0,35,-1,79 10006192453619779489,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,685d1c4c,2347f47a,8ded1f7a,6a943594,8a014cbb,81b42528,1,3,15708,320,50,1722,0,35,-1,79 10006415976094813740,0,14102100,1005,0,f84e52b6,d7e2f29b,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,a8649089,e9b8d8d7,1,0,16838,320,50,1882,3,35,-1,13 10006490708516192015,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,a4459495,517bef98,1,0,15708,320,50,1722,0,35,100083,79 10006557235872316145,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,ac77b71a,d787e91b,1,0,15699,320,50,1722,0,35,-1,79 10006629065800243858,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,6769bdb2,d787e91b,1,0,20362,320,50,2333,0,39,-1,157 10006777279679619273,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,d2bb6502,2347f47a,8ded1f7a,4b2309e9,22c2dcf4,d6e0e6ff,1,3,18987,320,50,2158,3,291,100193,61 10006789981076459409,0,14102100,1005,0,030440fe,08ba7db9,76b2941d,ecad2386,7801e8d9,07d7df22,a99f214a,692824c7,293291c1,1,0,20596,320,50,2161,0,35,-1,157 10006958186789044052,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,0acbeaa3,45a51db4,f95efa07,a99f214a,ce6e6bbd,2cd8ff6d,1,0,18993,320,50,2161,0,35,100034,157 10007163879183388340,0,14102100,1005,0,030440fe,08ba7db9,76b2941d,ecad2386,7801e8d9,07d7df22,a99f214a,5035aded,3db9fde9,1,0,18993,320,50,2161,0,35,-1,157 10007164336863914220,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,b2b14786,36d749e5,1,0,15706,320,50,1722,0,35,-1,79 10007197383452514432,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,07f39509,49ea3580,1,0,15704,320,50,1722,0,35,100084,79 10007446479189647526,0,14102100,1005,0,6ec06dbd,d262cf1e,f66779e6,ecad2386,7801e8d9,07d7df22,a99f214a,3aea6370,6360f9ec,1,0,19870,320,50,2271,0,687,100075,48 10007768440836622373,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,e2a1ca37,2347f47a,8ded1f7a,432cd280,45919d0d,1ccc7835,1,0,15708,320,50,1722,0,35,-1,79 10007830732992705885,0,14102100,1010,1,85f751fd,c4e18dd6,50e219e0,a607e6a7,7801e8d9,0f2161f8,890abcbb,9f02f646,e8c7729d,4,0,21665,320,50,2493,3,35,-1,117 10007847530896919634,1,14102100,1002,0,84c7ba46,c4e18dd6,50e219e0,ecad2386,7801e8d9,07d7df22,767a174e,3e805b2a,cf19f7f7,0,0,21661,320,50,2446,3,171,100228,156 10007908698866493310,0,14102100,1005,1,0eb72673,d2f72222,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,834f84b2,76dc4769,1,0,16208,320,50,1800,3,167,100075,23 10007944429976961145,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,07875ea4,aaffed8f,1,0,15701,320,50,1722,0,35,-1,79 10009147085943364421,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,905d2fbc,1b13b020,1,0,17037,320,50,1934,2,39,-1,16 10009190848778773294,0,14102100,1005,1,5ee41ff2,17d996e6,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,fc7f99ee,70359270,1,0,16920,320,50,1899,0,431,-1,117 10009635774586344851,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,37018b2d,24f6b932,1,0,20352,320,50,2333,0,39,-1,157 10009699694430474960,1,14102100,1005,0,4dd0a958,79cf0c8d,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,f6a5ae09,88fe1d5d,1,0,20366,320,50,2333,0,39,-1,157 10009807995169380879,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,396df801,2347f47a,0f2161f8,a99f214a,554d9f5f,36a30aeb,1,0,15705,320,50,1722,0,35,100084,79 10009910814812262951,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,a079ef6b,2347f47a,75d80bbe,a99f214a,f8c8df20,be87996b,1,2,18993,320,50,2161,0,35,100131,157 10010452321736390000,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,cede6db1,a0f5f879,1,0,15701,320,50,1722,0,35,100084,79 10010485868773711631,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,1cb5985e,1ccc7835,1,0,15701,320,50,1722,0,35,100084,79 10010504760200486071,0,14102100,1005,1,5ee41ff2,17d996e6,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,d012a1cb,ecb851b2,1,0,16615,320,50,1863,3,39,100188,23 10010730108771379386,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,08dd2eb8,cdf6ea96,1,0,20634,320,50,2374,3,39,-1,23 10010804179216291475,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,9a5911ad,1ccc7835,1,0,15704,320,50,1722,0,35,-1,79 1001082718558099372,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,1779deee,2347f47a,f95efa07,a99f214a,5a96d22e,9e3836ff,1,0,18993,320,50,2161,0,35,-1,157 10010924186026106882,0,14102100,1005,0,030440fe,08ba7db9,76b2941d,ecad2386,7801e8d9,07d7df22,a99f214a,8f6c30bb,744ae245,1,0,18993,320,50,2161,0,35,-1,157 10010966574628106108,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,0acbeaa3,45a51db4,f95efa07,a99f214a,061893d4,68b900d9,1,0,20596,320,50,2161,0,35,100034,157 10011085150831357375,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,07875ea4,d787e91b,1,0,15699,320,50,1722,0,35,-1,79 10011205200760015892,0,14102100,1005,0,6256f5b4,28f93029,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,04a1662e,521f95fe,1,0,17212,320,50,1887,3,39,100202,23 1001139595064240144,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,c9758700,76dc4769,1,0,15705,320,50,1722,0,35,-1,79 10011406079394798455,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,9ae68bb9,24f6b932,1,0,20362,320,50,2333,0,39,-1,157 1001156047808171144,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,2801fd97,575d0d2a,1,0,15708,320,50,1722,0,35,100084,79 10011561503992804801,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,931519c4,e9b8d8d7,1,0,17747,320,50,1974,2,39,100021,33 10011650513707909570,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,febd1138,82e27996,0f2161f8,a99f214a,1ce4451d,99e427c9,1,0,21611,320,50,2480,3,297,100111,61 10011658782619041235,1,14102100,1005,0,0aab7161,660aeadc,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,0086332e,1f0bc64f,1,0,15699,320,50,1722,0,35,-1,79 10011677979251422697,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,82310cab,f39b265e,1,0,15707,320,50,1722,0,35,-1,79 1001179289293608710,0,14102100,1005,1,e023ba3e,75f9ddc3,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,f7c9ee04,56f254f5,1,0,17914,320,50,2043,2,39,-1,32 10012212068904346443,0,14102100,1005,0,543a539e,c7ca3108,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,6769bdb2,d787e91b,1,0,20352,320,50,2333,0,39,-1,157 10012222478217629851,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,3738b922,d787e91b,1,0,15705,320,50,1722,0,35,100084,79 10012820175855462623,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,8acb1161,1f0bc64f,1,0,15707,320,50,1722,0,35,-1,79 10013076841337920650,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,ed326aa2,4ceb2e0b,1,0,15702,320,50,1722,0,35,-1,79 10013222055782902774,0,14102100,1005,0,5b08c53b,7687a86e,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,09b19f16,7eef184d,1,0,17654,300,250,1994,2,39,-1,33 10013330254346467994,0,14102100,1005,0,f5476ff8,00e1b9c0,3e814130,ecad2386,7801e8d9,07d7df22,a99f214a,da162469,8b1aa260,1,0,18993,320,50,2161,0,35,-1,157 10013378798301872145,1,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,40fb49ca,be74e6fe,1,0,20362,320,50,2333,0,39,-1,157 10013493678511778479,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,39947756,2347f47a,cef3e649,a2cbb1e0,d784a354,9f8d0424,1,2,18993,320,50,2161,0,35,-1,157 10013552540914034684,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,e2fcccd2,5c5a694b,0f2161f8,a99f214a,c21a1e56,89416188,1,0,4687,320,50,423,2,39,100148,32 10013750748974177308,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,8eb51743,a0f5f879,1,0,15703,320,50,1722,0,35,100083,79 1001378691598807810,0,14102100,1002,0,85f751fd,c4e18dd6,50e219e0,a37bf1e4,7801e8d9,07d7df22,1ab3feec,c45c8256,8debacdb,0,0,21691,320,50,2495,2,167,-1,23 10013840276980995258,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,e2fcccd2,5c5a694b,0f2161f8,a99f214a,07533d06,76dc4769,1,0,4687,320,50,423,2,39,100148,32 10013846047025246486,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,2e93a860,f39b265e,1,0,15702,320,50,1722,0,35,100083,79 10014026899633599058,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,9cdc12cc,711ee120,1,0,15699,320,50,1722,0,35,100084,79 10014063680973162331,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,665810f3,78d9bd10,1,0,15699,320,50,1722,0,35,100083,79 10014190212266331300,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,9c13b419,2347f47a,f95efa07,a99f214a,ed9450c2,1f0bc64f,1,0,20633,320,50,2374,3,39,-1,23 10014285064795240866,1,14102100,1002,0,84c7ba46,c4e18dd6,50e219e0,ecad2386,7801e8d9,07d7df22,c357dbff,06f76b24,373ecbe6,0,0,21682,320,50,2496,3,167,100191,23 10014385711019128754,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,12c3d700,ef726eae,1,0,15704,320,50,1722,0,35,-1,79 10014630626523032142,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,0345a137,3bd9e8e7,1,0,15702,320,50,1722,0,35,100083,79 10014764617325763141,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,4e873691,c6263d8a,1,0,15703,320,50,1722,0,35,-1,79 10014885175555340290,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,27f3fa06,d25693ce,1,0,15705,320,50,1722,0,35,100083,79 10014887683839786798,1,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,e2fcccd2,5c5a694b,0f2161f8,a99f214a,fac78767,84ebbcd4,1,0,4687,320,50,423,2,39,100148,32 10015140740686523448,0,14102100,1005,0,85f751fd,c4e18dd6,50e219e0,c51f82bc,d9b5648e,0f2161f8,a99f214a,2d227840,9b5ce758,1,0,21611,320,50,2480,3,297,100111,61 10015211672544614902,0,14102100,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,07d7df22,a99f214a,42606fe6,cb0fb677,1,0,17037,320,50,1934,2,39,-1,16 10015376300289320595,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,03108db9,a0f5f879,1,0,15701,320,50,1722,0,35,100084,79 10015405794859644629,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,0b697be1,1f0bc64f,1,0,15701,320,50,1722,0,35,100084,79 10015629448289660116,1,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,58db4f0c,6332421a,1,0,15708,320,50,1722,0,35,-1,79 100156980486870304,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,02b9b0fc,1aa0e912,1,0,15706,320,50,1722,0,35,-1,79 10015745448500295401,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,07d7df22,a99f214a,6b9769f2,4c8aeb60,1,0,15701,320,50,1722,0,35,-1,79 ================================================ FILE: examples/census-income.sample ================================================ 138481,62, Private,43,23, High school graduate,0, Not in universe, Married-civilian spouse present, Education, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1819.08, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, 50000+. 91960,18, Private,40,19, 11th grade,0, High school, Never married, Entertainment, Sales, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,645.07, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 112171,19, Not in universe,0,0, High school graduate,0, College or university, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,396.66, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,16,94, - 50000. 118554,9, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, Mexican-American, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2052.26, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, Mexico, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 193623,31, Private,45,3, Bachelors degree(BA AB BS),0, Not in universe, Never married, Other professional services, Executive admin and managerial, Black, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,614.61, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 198699,29, Private,33,29, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Retail trade, Other service, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1971.05, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 85495,52, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1079.49, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Peru, Peru, Peru, Foreign born- U S citizen by naturalization,0, Not in universe,2,0,95, - 50000. 196125,0, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1774.28, Not in universe, Not in universe, Not in universe, Not in universe under 1 year old, Not in universe,0, Both parents present, Taiwan, Taiwan, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 132109,16, Private,33,41, 9th grade,0, High school, Never married, Retail trade, Handlers equip cleaners etc , White, All other, Male, Not in universe, Job loser - on layoff, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,368.31, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 31996,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1272.86, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, Italy, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 197276,25, Private,8,36, 12th grade no diploma,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, White, Central or South American, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, West, California, Householder, Householder,1964.79, MSA to MSA, Same county, Same county, No, Yes,2, Not in universe, El-Salvador, El-Salvador, El-Salvador, Foreign born- Not a citizen of U S ,0, Not in universe,2,20,94, - 50000. 43637,52, Private,37,31, 11th grade,0, Not in universe, Never married, Business and repair services, Other service, Black, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,4059.47, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,95, - 50000. 160024,3, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, Mexican-American, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,927.49, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 184841,7, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, NA, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1516.17, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 90343,2, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,890.26, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, Philippines, Philippines, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 196773,72, Not in universe,0,0, High school graduate,0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,589.54, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Germany, Germany, Germany, Foreign born- U S citizen by naturalization,0, Not in universe,2,0,95, - 50000. 102326,61, Private,35,26, High school graduate,0, Not in universe, Divorced, Finance insurance and real estate, Adm support including clerical, White, All other, Female, No, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1042.72, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 94179,45, Self-employed-not incorporated,33,19, Associates degree-occup /vocational,0, Not in universe, Divorced, Retail trade, Sales, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,1602,0, Single, Not in universe, Not in universe, Child 18+ ever marr Not in a subfamily, Child 18 or older,4184.67, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 115094,45, Private,3,39, Some college but no degree,725, Not in universe, Married-civilian spouse present, Mining, Transportation and material moving, White, All other, Male, No, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1361.67, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,48,94, - 50000. 139808,13, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Other, Mexican-American, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1749.06, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 10547,12, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2473.12, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 140760,27, Not in universe,0,0, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2523.97, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,95, - 50000. 143136,11, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2195.61, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 198740,25, Private,37,2, Bachelors degree(BA AB BS),0, Not in universe, Never married, Business and repair services, Executive admin and managerial, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,10, Single, Not in universe, Not in universe, Other Rel 18+ never marr not in subfamily, Other relative of householder,1152.64, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, Philippines, Philippines, Philippines, Foreign born- Not a citizen of U S ,0, Not in universe,2,50,95, - 50000. 171302,5, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,467.65, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 51270,45, Private,38,31, High school graduate,0, Not in universe, Married-civilian spouse present, Business and repair services, Other service, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1155.2, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, Poland, Poland, Poland, Foreign born- Not a citizen of U S ,0, Not in universe,2,16,95, - 50000. 102571,16, Private,33,19, 10th grade,0, High school, Never married, Retail trade, Sales, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2072.15, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,20,94, - 50000. 87901,46, Private,45,4, Bachelors degree(BA AB BS),0, Not in universe, Never married, Other professional services, Professional specialty, White, All other, Male, No, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2405.49, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,45,95, 50000+. 40034,37, Private,39,2, High school graduate,0, Not in universe, Divorced, Personal services except private HH, Executive admin and managerial, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Head of household, Not in universe, Not in universe, Householder, Householder,1456.55, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 91671,42, Self-employed-not incorporated,44,32, High school graduate,0, Not in universe, Married-civilian spouse present, Social services, Other service, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1141.93, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,50,95, - 50000. 97009,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,900.5, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 178794,76, Not in universe,0,0, 10th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1131.39, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 84772,30, Not in universe,0,0, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, West, California, Spouse of householder, Spouse of householder,1707.88, MSA to MSA, Same county, Same county, No, Yes,0, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,94, - 50000. 7953,79, Not in universe,0,0, 11th grade,0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,119, Head of household, Not in universe, Not in universe, Householder, Householder,1644.11, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 56916,27, Private,39,32, High school graduate,0, Not in universe, Never married, Personal services except private HH, Other service, Black, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Head of household, Not in universe, Not in universe, RP of unrelated subfamily, Nonrelative of householder,1717.06, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 150887,5, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child under 18 of RP of unrel subfamily, Nonrelative of householder,4578.98, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 182649,5, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Amer Indian Aleut or Eskimo, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1020.52, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 177755,69, State government,50,28, High school graduate,0, Not in universe, Married-civilian spouse present, Public administration, Protective services, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,5, Joint one under 65 & one 65+, Not in universe, Not in universe, Householder, Householder,404.72, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, ?, ?, United-States, Native- Born in the United States,0, Not in universe,2,6,94, - 50000. 143031,69, Not in universe,0,0, 7th and 8th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,400, Nonfiler, Not in universe, Not in universe, Householder, Householder,1723.61, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, Poland, Poland, Poland, Foreign born- U S citizen by naturalization,0, Not in universe,2,0,94, - 50000. 17047,46, Local government,43,10, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Divorced, Education, Professional specialty, White, All other, Female, Yes, Not in universe, Children or Armed Forces,0,1876,139, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1722.26, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,36,94, - 50000. 5446,57, Private,42,13, Associates degree-occup /vocational,1329, Not in universe, Divorced, Medical except hospital, Technicians and related support, White, All other, Female, No, Not in universe, Children or Armed Forces,2202,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1168.63, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,94, - 50000. 171213,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1793.11, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 173292,43, Private,21,26, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-nondurable goods, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,3762.14, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 79813,5, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,3050.97, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, ?, ?, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 181506,57, Private,27,35, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-nondurable goods, Precision production craft & repair, White, Puerto Rican, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1101.85, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 67884,0, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,970.2, Not in universe, Not in universe, Not in universe, Not in universe under 1 year old, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 1095,0, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1952.21, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, Poland, ?, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 47621,47, Private,39,31, 11th grade,0, Not in universe, Married-civilian spouse present, Personal services except private HH, Other service, White, Central or South American, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,791.11, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, Columbia, Native- Born abroad of American Parent(s),0, Not in universe,2,52,95, - 50000. 65460,49, State government,43,3, Bachelors degree(BA AB BS),0, Not in universe, Divorced, Education, Executive admin and managerial, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Householder, Householder,251.25, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, Canada, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 140996,47, Private,33,26, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Retail trade, Adm support including clerical, White, Mexican-American, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1283.79, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,1,94, - 50000. 23431,31, Self-employed-not incorporated,2,43, High school graduate,0, Not in universe, Married-civilian spouse present, Agriculture, Farming forestry and fishing, White, All other, Female, Not in universe, Not in universe, PT for non-econ reasons usually FT,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,823.78, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 18488,57, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,548.37, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 63908,19, Private,33,29, Some college but no degree,0, College or university, Never married, Retail trade, Other service, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Grandchild 18+ never marr not in subfamily, Other relative of householder,942.2, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,94, - 50000. 147955,25, Not in universe,0,0, Bachelors degree(BA AB BS),0, Not in universe, Never married, Not in universe or children, Not in universe, White, Other Spanish, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,1087.39, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Mexico, Puerto-Rico, Mexico, Native- Born abroad of American Parent(s),0, Not in universe,2,0,95, - 50000. 1219,43, Private,33,26, High school graduate,0, Not in universe, Married-civilian spouse present, Retail trade, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,50, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,3440.67, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 98929,44, Private,30,26, Bachelors degree(BA AB BS),0, Not in universe, Never married, Communications, Adm support including clerical, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Head of household, Not in universe, Not in universe, Householder, Householder,1040.96, Nonmover, Nonmover, Nonmover, Yes, Not in universe,5, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 64415,34, Local government,47,28, Some college but no degree,0, Not in universe, Never married, Public administration, Protective services, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1161.47, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, No,1,52,95, - 50000. 197617,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2177.31, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 178368,35, Not in universe,0,0, 9th grade,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,1864.42, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 40399,19, Not in universe,0,0, Some college but no degree,0, College or university, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,598.21, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, United-States, ?, United-States, Native- Born in the United States,0, Not in universe,2,13,94, - 50000. 157159,22, Self-employed-not incorporated,37,15, Associates degree-occup /vocational,0, Not in universe, Never married, Business and repair services, Technicians and related support, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Secondary individual, Nonrelative of householder,4074.15, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, Holand-Netherlands, Native- Born abroad of American Parent(s),0, Not in universe,2,36,95, - 50000. 39951,45, Federal government,49,1, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Divorced, Public administration, Executive admin and managerial, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,1980,0, Single, Not in universe, Not in universe, Householder, Householder,1632.8, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,95, - 50000. 80149,28, Private,39,31, 5th or 6th grade,0, Not in universe, Never married, Personal services except private HH, Other service, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Other Rel 18+ never marr not in subfamily, Other relative of householder,2028.73, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, Mexico, Mexico, Mexico, Foreign born- U S citizen by naturalization,2, Not in universe,2,52,94, - 50000. 33078,70, Not in universe,0,0, High school graduate,0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,401,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,983.2, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Canada, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 118945,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1702.46, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 173073,17, Not in universe,0,0, 11th grade,0, High school, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1522.83, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 154955,33, Private,42,13, Some college but no degree,0, Not in universe, Divorced, Medical except hospital, Technicians and related support, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,177, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2359.01, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, Germany, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 22221,63, Not in universe,0,0, 10th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,7959.51, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 38335,33, Not in universe,0,0, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, Mexican-American, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1363.13, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,95, - 50000. 123934,10, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1778.48, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 185904,64, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,24, Joint one under 65 & one 65+, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2461.72, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 71771,39, Private,29,38, Some college but no degree,0, Not in universe, Never married, Transportation, Transportation and material moving, White, Mexican-American, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,702.43, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 69160,2, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,926.58, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 147725,77, Not in universe,0,0, Prof school degree (MD DDS DVM LLB JD),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,1455,0,0, Joint both 65+, Not in universe, Not in universe, Householder, Householder,1623.8, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,1,94, - 50000. 84225,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2589.81, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 58184,42, Private,5,36, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,2553.09, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 191708,30, Private,33,19, High school graduate,0, Not in universe, Never married, Retail trade, Sales, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Midwest, Tennessee, Child 18+ never marr Not in a subfamily, Child 18 or older,433.4, NonMSA to nonMSA, Same county, Same county, No, No,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 73103,48, Private,33,12, Some college but no degree,0, Not in universe, Married-civilian spouse present, Retail trade, Professional specialty, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,281.59, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,1, Not in universe,2,25,95, - 50000. 25855,20, Never worked,0,0, Some college but no degree,0, College or university, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, New entrant, Unemployed part- time,0,0,0, Nonfiler, Not in universe, Not in universe, In group quarters, Group Quarters- Secondary individual,1394.7, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, South Korea, South Korea, South Korea, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,95, - 50000. 20809,65, State government,43,9, Doctorate degree(PhD EdD),0, Not in universe, Married-civilian spouse present, Education, Professional specialty, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,2174,250, Joint both 65+, Not in universe, Not in universe, Householder, Householder,1580.56, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, 50000+. 121724,31, Local government,43,10, Bachelors degree(BA AB BS),0, Not in universe, Never married, Education, Professional specialty, White, All other, Male, Yes, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2220.04, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,43,94, - 50000. 87147,51, Not in universe,0,0, 9th grade,0, Not in universe, Widowed, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, South, Texas, Nonfamily householder, Householder,2542.38, MSA to MSA, Same county, Same county, No, Yes,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 45361,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1423.77, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 10963,42, Private,38,42, Some college but no degree,0, Not in universe, Married-civilian spouse present, Business and repair services, Handlers equip cleaners etc , White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Midwest, Montana, Spouse of householder, Spouse of householder,6282.42, MSA to MSA, Different county same state, Different county same state, No, No,6, Not in universe, El-Salvador, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,94, - 50000. 43878,20, Private,2,44, High school graduate,0, Not in universe, Never married, Agriculture, Farming forestry and fishing, White, All other, Male, Not in universe, Re-entrant, Unemployed full-time,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,258.24, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,4,95, - 50000. 19256,9, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1509.08, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, United-States, United-States, Germany, Native- Born abroad of American Parent(s),0, Not in universe,0,0,95, - 50000. 71391,48, Private,38,42, 1st 2nd 3rd or 4th grade,0, Not in universe, Married-civilian spouse present, Business and repair services, Handlers equip cleaners etc , Asian or Pacific Islander, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,2395.72, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, ?, ?, ?, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,95, - 50000. 138769,17, Not in universe,0,0, 10th grade,0, High school, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,588.0, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 98200,33, Private,42,30, High school graduate,0, Not in universe, Married-civilian spouse present, Medical except hospital, Other service, White, Chicano, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, West, New York, Householder, Householder,438.7, MSA to MSA, Same county, Same county, No, Yes,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,15,94, - 50000. 7213,2, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1043.07, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 891,15, Not in universe,0,0, 9th grade,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1206.13, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,2,94, - 50000. 45910,68, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint one under 65 & one 65+, Not in universe, Not in universe, Householder, Householder,1634.16, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 68156,16, Not in universe,0,0, 9th grade,0, High school, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,662.39, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 111042,52, Not in universe,0,0, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,10000, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1024.89, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, 50000+. 197422,67, Private,34,2, High school graduate,0, Not in universe, Widowed, Finance insurance and real estate, Executive admin and managerial, White, All other, Male, No, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Householder, Householder,1539.89, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, Ireland, Ireland, United-States, Native- Born in the United States,0, Not in universe,2,52,94, 50000+. 10440,8, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Grandchild <18 never marr not in subfamily, Other relative of householder,938.92, ?, ?, ?, Not in universe under 1 year old, ?,0, Neither parent present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 9427,42, Not in universe,0,0, 10th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2701.7, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 7449,48, Private,12,2, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Executive admin and managerial, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1965.34, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, China, Vietnam, Vietnam, Foreign born- U S citizen by naturalization,0, Not in universe,2,52,95, - 50000. 128836,8, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2298.82, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 48918,72, Not in universe,0,0, 7th and 8th grade,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,419.51, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 93667,23, Private,33,19, Associates degree-academic program,825, Not in universe, Married-civilian spouse present, Retail trade, Sales, White, All other, Female, No, Not in universe, Full-time schedules,0,0,75, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2615.23, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 29020,42, Private,45,15, Associates degree-academic program,0, Not in universe, Widowed, Other professional services, Technicians and related support, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,290, Head of household, Not in universe, Not in universe, Householder, Householder,1552.03, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 109337,10, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1640.4, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 40199,4, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2397.57, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 39475,37, Private,41,8, Associates degree-occup /vocational,2355, Not in universe, Never married, Hospital services, Professional specialty, White, All other, Female, No, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1196.52, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Yes,1,52,94, 50000+. 159112,63, Without pay,6,35, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Precision production craft & repair, White, All other, Male, Not in universe, Not in universe, PT for non-econ reasons usually FT,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,4441.94, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 152918,41, Not in universe,0,0, 1st 2nd 3rd or 4th grade,0, Not in universe, Separated, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, South, ?, Secondary individual, Nonrelative of householder,2745.08, NonMSA to nonMSA, Different county same state, Different county same state, No, No,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 88096,4, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,777.43, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, Philippines, Philippines, Philippines, Foreign born- Not a citizen of U S ,0, Not in universe,0,0,95, - 50000. 175317,43, Private,44,12, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Divorced, Social services, Professional specialty, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Head of household, Not in universe, Not in universe, Householder, Householder,2639.54, ?, ?, ?, Not in universe under 1 year old, ?,5, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,95, - 50000. 80470,49, Private,34,17, Bachelors degree(BA AB BS),0, Not in universe, Divorced, Finance insurance and real estate, Sales, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,500, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1811.45, Nonmover, Nonmover, Nonmover, Yes, Not in universe,5, Not in universe, United-States, ?, United-States, Native- Born in the United States,0, Not in universe,2,52,94, 50000+. 161690,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,281.98, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 3630,41, Not in universe,0,0, High school graduate,0, Not in universe, Divorced, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Householder, Householder,1689.66, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 12305,46, State government,43,29, High school graduate,840, Not in universe, Married-civilian spouse present, Education, Other service, White, All other, Female, No, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1227.32, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,36,95, - 50000. 100405,33, Not in universe,0,0, Some college but no degree,0, Not in universe, Divorced, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child 18+ ever marr Not in a subfamily, Child 18 or older,2798.03, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 181953,35, Private,11,37, 11th grade,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,3603.1, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 165427,43, Private,35,23, Some college but no degree,0, Not in universe, Divorced, Finance insurance and real estate, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, South, Utah, Secondary individual, Nonrelative of householder,450.49, MSA to MSA, Different region, Different state in South, No, Yes,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,48,94, - 50000. 48964,25, Private,34,3, Bachelors degree(BA AB BS),0, Not in universe, Never married, Finance insurance and real estate, Executive admin and managerial, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,10, Single, South, Utah, Nonfamily householder, Householder,2776.11, MSA to MSA, Same county, Same county, No, Yes,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,94, - 50000. 111549,80, Not in universe,0,0, 11th grade,0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2674.96, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 12284,37, Local government,40,23, High school graduate,0, Not in universe, Married-civilian spouse present, Entertainment, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2434.3, Nonmover, Nonmover, Nonmover, Yes, Not in universe,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 111003,24, Private,1,44, 11th grade,0, Not in universe, Never married, Agriculture, Farming forestry and fishing, White, Puerto Rican, Male, Not in universe, Job loser - on layoff, Children or Armed Forces,2463,0,0, Single, Not in universe, Not in universe, Householder, Householder,895.49, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, Puerto-Rico, Puerto-Rico, United-States, Native- Born in the United States,0, Not in universe,2,40,94, - 50000. 4035,52, State government,43,10, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Married-civilian spouse present, Education, Professional specialty, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,3000, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1559.39, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,15,95, 50000+. 57559,34, Private,24,26, High school graduate,0, Not in universe, Divorced, Manufacturing-nondurable goods, Adm support including clerical, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child 18+ ever marr Not in a subfamily, Child 18 or older,2878.31, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 197612,6, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1985.13, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 186539,35, Not in universe,0,0, 1st 2nd 3rd or 4th grade,0, Not in universe, Separated, Not in universe or children, Not in universe, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Householder, Householder,1346.86, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,95, - 50000. 80242,45, Private,22,36, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Manufacturing-nondurable goods, Machine operators assmblrs & inspctrs, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1108.95, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,94, - 50000. 180617,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1932.0, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 88587,3, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child under 18 of RP of unrel subfamily, Nonrelative of householder,4108.89, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 7041,45, Not in universe,0,0, Some college but no degree,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Midwest, Oklahoma, Spouse of householder, Spouse of householder,1443.81, MSA to MSA, Same county, Same county, No, No,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 139291,44, Private,44,41, 5th or 6th grade,0, Not in universe, Never married, Social services, Handlers equip cleaners etc , White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, South, Delaware, Secondary individual, Nonrelative of householder,982.19, NonMSA to nonMSA, Different county same state, Different county same state, No, No,5, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 184023,49, Local government,42,30, High school graduate,0, Not in universe, Widowed, Medical except hospital, Other service, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,993.85, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 9438,69, Not in universe,0,0, High school graduate,0, Not in universe, Widowed, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Single, Not in universe, Not in universe, Householder, Householder,2296.9, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 33628,65, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint one under 65 & one 65+, Not in universe, Not in universe, Householder, Householder,2588.07, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 129715,43, Private,31,42, High school graduate,0, Not in universe, Married-civilian spouse present, Utilities and sanitary services, Handlers equip cleaners etc , White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1036.94, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 13495,19, Private,33,19, Some college but no degree,0, College or university, Never married, Retail trade, Sales, White, Puerto Rican, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,1243.04, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, United-States, ?, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 50850,10, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2245.99, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, Philippines, Philippines, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 138847,46, Private,34,25, Some college but no degree,0, Not in universe, Married-civilian spouse present, Finance insurance and real estate, Adm support including clerical, Black, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,688.01, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 150171,34, Private,33,19, Associates degree-academic program,0, Not in universe, Divorced, Retail trade, Sales, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2227.01, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 156089,48, State government,40,23, High school graduate,0, Not in universe, Married-civilian spouse present, Entertainment, Adm support including clerical, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,607.6, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 197936,19, Private,33,19, High school graduate,0, College or university, Never married, Retail trade, Sales, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,2578.61, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 78488,45, Local government,48,21, Bachelors degree(BA AB BS),0, Not in universe, Separated, Public administration, Adm support including clerical, Black, All other, Female, Yes, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Householder, Householder,1569.36, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 44829,38, Private,33,16, High school graduate,0, Not in universe, Never married, Retail trade, Sales, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,268, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,3254.97, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 132454,41, Private,36,27, High school graduate,0, Not in universe, Married-civilian spouse present, Private household services, Private household services, White, Central or South American, Female, No, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,812.57, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, ?, ?, ?, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,95, - 50000. 52840,71, Not in universe,0,0, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both 65+, Not in universe, Not in universe, Householder, Householder,1823.75, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 121217,27, Federal government,29,25, Associates degree-occup /vocational,1575, Not in universe, Married-civilian spouse present, Transportation, Adm support including clerical, White, All other, Male, Yes, Not in universe, Children or Armed Forces,7298,0,0, Joint both under 65, Northeast, Michigan, Householder, Householder,1031.69, MSA to MSA, Same county, Same county, No, No,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 198823,29, Not in universe,0,0, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1205.55, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, India, India, India, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,95, - 50000. 148775,36, Not in universe,0,0, High school graduate,0, Not in universe, Separated, Not in universe or children, Not in universe, White, Mexican (Mexicano), Female, Not in universe, Not in universe, Not in labor force,0,0,0, Single, Not in universe, Not in universe, Child 18+ ever marr Not in a subfamily, Child 18 or older,1307.46, ?, ?, ?, Not in universe under 1 year old, ?,3, Not in universe, Mexico, Mexico, United-States, Native- Born in the United States,0, Not in universe,2,45,95, - 50000. 1702,52, Self-employed-not incorporated,39,32, Bachelors degree(BA AB BS),0, Not in universe, Divorced, Personal services except private HH, Other service, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,2000, Single, Not in universe, Not in universe, Nonfamily householder, Householder,984.25, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,46,95, - 50000. 120926,2, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2596.51, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 125722,40, Self-employed-not incorporated,33,2, Associates degree-occup /vocational,0, Not in universe, Married-civilian spouse present, Retail trade, Executive admin and managerial, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Nonfiler, Not in universe, Not in universe, Spouse of householder, Spouse of householder,198.29, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 110416,31, Private,45,12, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Other professional services, Professional specialty, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,300, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1920.41, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 47866,5, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2154.9, ?, ?, ?, Not in universe under 1 year old, ?,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 35144,31, Not in universe,0,0, Associates degree-occup /vocational,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Northeast, Connecticut, Householder, Householder,2491.83, MSA to MSA, Different county same state, Different county same state, No, No,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 167869,1, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2046.83, ?, ?, ?, Not in universe under 1 year old, ?,0, Mother only present, Philippines, Philippines, United-States, Native- Born in the United States,0, Not in universe,0,0,95, - 50000. 12432,32, Federal government,49,26, High school graduate,0, Not in universe, Married-civilian spouse present, Public administration, Adm support including clerical, White, Puerto Rican, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1020.27, Nonmover, Nonmover, Nonmover, Yes, Not in universe,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 71994,35, Self-employed-not incorporated,37,10, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Business and repair services, Professional specialty, White, All other, Male, Not in universe, Other job loser, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1132.61, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,27,94, - 50000. 190244,34, Not in universe,0,0, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, South, District of Columbia, Householder, Householder,2031.36, MSA to MSA, Different state same division, Different state in South, No, No,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 1881,45, Private,33,16, Some college but no degree,0, Not in universe, Never married, Retail trade, Sales, White, Mexican-American, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Head of household, Not in universe, Not in universe, Householder, Householder,1537.21, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 48449,40, Private,4,34, Some college but no degree,0, Not in universe, Married-civilian spouse present, Construction, Precision production craft & repair, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1631.75, ?, ?, ?, Not in universe under 1 year old, ?,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 99405,59, Private,4,2, Some college but no degree,2100, Not in universe, Married-civilian spouse present, Construction, Executive admin and managerial, White, All other, Male, Yes, Not in universe, Full-time schedules,0,0,200, Joint both under 65, Not in universe, Not in universe, Householder, Householder,2477.26, ?, ?, ?, Not in universe under 1 year old, ?,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, 50000+. 71526,26, State government,43,12, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Married-civilian spouse present, Education, Professional specialty, White, All other, Female, Not in universe, Not in universe, PT for econ reasons usually PT,0,0,0, Joint both under 65, Not in universe, Not in universe, In group quarters, Householder,1108.83, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 107493,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,1651.17, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 142743,54, Federal government,45,4, Masters degree(MA MS MEng MEd MSW MBA),0, Not in universe, Married-civilian spouse present, Other professional services, Professional specialty, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1081.54, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, 50000+. 2258,30, Private,35,2, High school graduate,0, Not in universe, Divorced, Finance insurance and real estate, Executive admin and managerial, White, All other, Female, No, Not in universe, Children or Armed Forces,2354,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,2924.14, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,1, Not in universe,2,52,94, - 50000. 66048,68, Not in universe,0,0, Some college but no degree,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both 65+, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2467.44, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 174145,57, Local government,50,5, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Public administration, Professional specialty, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,1902,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1455.29, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 7609,28, Private,43,44, Associates degree-occup /vocational,0, Not in universe, Separated, Education, Farming forestry and fishing, White, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,4173.77, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 1906,33, Private,41,7, Prof school degree (MD DDS DVM LLB JD),0, Not in universe, Married-civilian spouse present, Hospital services, Professional specialty, White, Central or South American, Male, Not in universe, Not in universe, Children or Armed Forces,3103,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,2406.32, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, ?, ?, ?, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,94, - 50000. 8197,51, Private,14,37, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,3137,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,2659.34, Nonmover, Nonmover, Nonmover, Yes, Not in universe,2, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,42,94, - 50000. 7752,59, Private,9,36, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,761.06, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 74808,19, Private,40,28, Some college but no degree,0, College or university, Never married, Entertainment, Protective services, Asian or Pacific Islander, All other, Male, Not in universe, Not in universe, Full-time schedules,0,0,0, Single, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,1264.75, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,95, - 50000. 194746,64, Not in universe,0,0, High school graduate,0, Not in universe, Divorced, Not in universe or children, Not in universe, White, Other Spanish, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Nonfamily householder, Householder,915.28, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, ?, United-States, United-States, Native- Born in the United States,0, Not in universe,2,4,94, - 50000. 156141,38, Private,41,8, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Hospital services, Professional specialty, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,991.45, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,52,95, - 50000. 132259,41, Not in universe,0,0, High school graduate,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Child 18+ never marr Not in a subfamily, Child 18 or older,3270.26, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 90484,0, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,2294.02, Not in universe, Not in universe, Not in universe, Not in universe under 1 year old, Not in universe,0, Both parents present, United-States, ?, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 78109,9, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,4408.46, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 145093,60, Private,13,37, Some college but no degree,0, Not in universe, Married-spouse absent, Manufacturing-durable goods, Machine operators assmblrs & inspctrs, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,500, Single, Not in universe, Not in universe, Nonfamily householder, Householder,1392.3, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, 50000+. 108692,52, Not in universe,0,0, 11th grade,0, Not in universe, Divorced, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Householder, Householder,1476.96, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 155779,70, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,67, Joint both 65+, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1385.67, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 38262,14, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, Puerto Rican, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Northeast, North Carolina, Child <18 never marr not in subfamily, Child under 18 never married,1153.13, MSA to MSA, Same county, Same county, No, No,0, Mother only present, Puerto-Rico, Puerto-Rico, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 89021,30, Not in universe,0,0, High school graduate,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both under 65, West, California, Spouse of householder, Spouse of householder,463.68, MSA to nonMSA, Different division same region, Different state in West, No, No,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 177664,74, Not in universe,0,0, Bachelors degree(BA AB BS),0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,4882, Single, Northeast, ?, Nonfamily householder, Householder,1591.41, MSA to MSA, Different county same state, Different county same state, No, No,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 188163,10, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,776.08, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Both parents present, ?, ?, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 125830,46, Local government,43,10, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Education, Professional specialty, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,3103,0,100, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1006.86, Nonmover, Nonmover, Nonmover, Yes, Not in universe,3, Not in universe, United-States, United-States, United-States, Native- Born in the United States,1, Not in universe,2,52,94, - 50000. 78253,26, Federal government,29,25, High school graduate,0, Not in universe, Never married, Transportation, Adm support including clerical, Asian or Pacific Islander, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,1000, Single, Not in universe, Not in universe, Nonfamily householder, Householder,915.75, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, ?, ?, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 171521,25, Private,31,37, Some college but no degree,0, Not in universe, Never married, Utilities and sanitary services, Machine operators assmblrs & inspctrs, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Midwest, Kentucky, Nonfamily householder, Householder,1417.25, MSA to MSA, Same county, Same county, No, No,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 122703,30, Private,45,31, 5th or 6th grade,0, Not in universe, Married-civilian spouse present, Other professional services, Other service, White, Mexican (Mexicano), Male, Not in universe, Not in universe, Full-time schedules,2885,0,0, Joint both under 65, Not in universe, Not in universe, Householder, Householder,1207.48, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, Mexico, Mexico, Mexico, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,95, - 50000. 57986,62, State government,41,36, High school graduate,0, Not in universe, Married-civilian spouse present, Hospital services, Machine operators assmblrs & inspctrs, White, All other, Female, No, Not in universe, Full-time schedules,0,0,0, Joint one under 65 & one 65+, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1252.17, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, - 50000. 100807,58, Not in universe,0,0, Some college but no degree,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,330, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1550.66, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 199197,39, Not in universe,0,0, Bachelors degree(BA AB BS),0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,3802.81, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, ?, ?, ?, Foreign born- Not a citizen of U S ,0, Not in universe,2,0,95, - 50000. 44919,55, Private,33,16, High school graduate,1400, Not in universe, Married-civilian spouse present, Retail trade, Sales, White, All other, Female, No, Not in universe, Children or Armed Forces,0,0,100, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2392.55, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,94, - 50000. 48655,74, Not in universe,0,0, Some college but no degree,0, Not in universe, Married-civilian spouse present, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Spouse of householder, Spouse of householder,2367.66, Nonmover, Nonmover, Nonmover, Yes, Not in universe,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,94, - 50000. 37451,76, Not in universe,0,0, High school graduate,0, Not in universe, Widowed, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Householder, Householder,1551.72, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 9376,39, Private,37,3, Some college but no degree,0, Not in universe, Never married, Business and repair services, Executive admin and managerial, White, Puerto Rican, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Head of household, Not in universe, Not in universe, Householder, Householder,774.83, ?, ?, ?, Not in universe under 1 year old, ?,2, Not in universe, Puerto-Rico, Puerto-Rico, Puerto-Rico, Native- Born in Puerto Rico or U S Outlying,0, Not in universe,2,52,95, - 50000. 176075,71, Not in universe,0,0, 9th grade,0, Not in universe, Divorced, Not in universe or children, Not in universe, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,609.05, Nonmover, Nonmover, Nonmover, Yes, Not in universe,1, Not in universe, United-States, United-States, United-States, Native- Born in the United States,2, Not in universe,2,20,94, - 50000. 40950,37, Private,42,2, Associates degree-academic program,0, Not in universe, Married-civilian spouse present, Medical except hospital, Executive admin and managerial, White, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,4000, Joint both under 65, Midwest, Mississippi, Spouse of householder, Spouse of householder,1532.26, MSA to nonMSA, Different region, Different state in Midwest, No, No,4, Not in universe, United-States, United-States, United-States, Native- Born in the United States,1, Not in universe,2,52,94, - 50000. 187455,31, Private,33,19, 11th grade,0, Not in universe, Married-spouse absent, Retail trade, Sales, Asian or Pacific Islander, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Single, Not in universe, Not in universe, Child 18+ ever marr Not in a subfamily, Child 18 or older,1366.06, Nonmover, Nonmover, Nonmover, Yes, Not in universe,4, Not in universe, India, India, India, Foreign born- Not a citizen of U S ,0, Not in universe,2,52,94, - 50000. 94473,0, Not in universe,0,0, Children,0, Not in universe, Never married, Not in universe or children, Not in universe, Black, All other, Female, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Nonfiler, Not in universe, Not in universe, Child <18 never marr not in subfamily, Child under 18 never married,558.42, Not in universe, Not in universe, Not in universe, Not in universe under 1 year old, Not in universe,0, Mother only present, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,0,0,94, - 50000. 177027,77, Not in universe,0,0, High school graduate,0, Not in universe, Divorced, Not in universe or children, Not in universe, White, All other, Female, Not in universe, Not in universe, Not in labor force,0,0,0, Nonfiler, Not in universe, Not in universe, Nonfamily householder, Householder,3316.65, ?, ?, ?, Not in universe under 1 year old, ?,0, Not in universe, ?, ?, United-States, Native- Born in the United States,0, Not in universe,2,0,95, - 50000. 98120,76, Private,21,31, 7th and 8th grade,0, Not in universe, Married-civilian spouse present, Manufacturing-nondurable goods, Other service, White, All other, Male, Not in universe, Not in universe, Children or Armed Forces,0,0,0, Joint both 65+, Not in universe, Not in universe, Householder, Householder,785.0, Nonmover, Nonmover, Nonmover, Yes, Not in universe,6, Not in universe, Canada, Canada, United-States, Native- Born in the United States,0, No,1,52,94, - 50000. 179503,34, Private,25,37, High school graduate,0, Not in universe, Married-civilian spouse present, Manufacturing-nondurable goods, Machine operators assmblrs & inspctrs, White, All other, Female, Not in universe, Not in universe, Full-time schedules,0,0,0, Joint both under 65, Not in universe, Not in universe, Spouse of householder, Spouse of householder,1515.34, ?, ?, ?, Not in universe under 1 year old, ?,6, Not in universe, United-States, United-States, United-States, Native- Born in the United States,0, Not in universe,2,52,95, 50000+. ================================================ FILE: examples/criteo_sample.txt ================================================ label,I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26 0,,3,260.0,,17668.0,,,33.0,,,,0.0,,05db9164,08d6d899,9143c832,f56b7dd5,25c83c98,7e0ccccf,df5c2d18,0b153874,a73ee510,8f48ce11,a7b606c4,ae1bb660,eae197fd,b28479f6,bfef54b3,bad5ee18,e5ba7672,87c6f83c,,,0429f84b,,3a171ecb,c0d61a5c,, 0,,-1,19.0,35.0,30251.0,247.0,1.0,35.0,160.0,,1.0,,35.0,68fd1e64,04e09220,95e13fd4,a1e6a194,25c83c98,fe6b92e5,f819e175,062b5529,a73ee510,ab9456b4,6153cf57,8882c6cd,769a1844,b28479f6,69f825dd,23056e4f,d4bb7bd8,6fc84bfb,,,5155d8a3,,be7c41b4,ded4aac9,, 0,0.0,0,2.0,12.0,2013.0,164.0,6.0,35.0,523.0,0.0,3.0,,18.0,05db9164,38a947a1,3f55fb72,5de245c7,30903e74,7e0ccccf,b72ec13d,1f89b562,a73ee510,acce978c,3547565f,a5b0521a,12880350,b28479f6,c12fc269,95a8919c,e5ba7672,675c9258,,,2e01979f,,bcdee96c,6d5d1302,, 0,,13,1.0,4.0,16836.0,200.0,5.0,4.0,29.0,,2.0,,4.0,05db9164,8084ee93,02cf9876,c18be181,25c83c98,,e14874c9,0b153874,7cc72ec2,2462946f,636405ac,8fe001f4,31b42deb,07d13a8f,422c8577,36103458,e5ba7672,52e44668,,,e587c466,,32c7478e,3b183c5c,, 0,0.0,0,104.0,27.0,1990.0,142.0,4.0,32.0,37.0,0.0,1.0,,27.0,05db9164,207b2d81,5d076085,862b5ba0,25c83c98,fbad5c96,17c22666,0b153874,a73ee510,534fc986,feb49a68,f24b551c,8978af5c,64c94865,32ec6582,b6d021e8,e5ba7672,25c88e42,21ddcdc9,b1252a9d,0e8585d2,,32c7478e,0d4a6d1a,001f3601,92c878de 0,0.0,-1,63.0,40.0,1470.0,61.0,4.0,37.0,46.0,0.0,1.0,,40.0,68fd1e64,207b2d81,9dd3c4fc,a09fab49,25c83c98,,271190b7,5b392875,a73ee510,49d5fa15,26a64614,3c5900b5,51351dd6,b28479f6,c38116c9,0decd005,e5ba7672,d3303ea5,21ddcdc9,b1252a9d,7633c7c8,,32c7478e,17f458f7,001f3601,71236095 0,0.0,370,4.0,1.0,1787.0,65.0,14.0,25.0,489.0,0.0,7.0,,25.0,05db9164,2a69d406,fcae8bfa,13508380,25c83c98,,cd846c62,0b153874,a73ee510,3b08e48b,0ec1e215,18917580,44af41ef,07d13a8f,3b2d8705,51b69881,3486227d,642f2610,55dd3565,b1252a9d,5c8dc711,,423fab69,45ab94c8,2bf691b1,c84c4aec 1,19.0,10,30.0,10.0,1.0,3.0,33.0,47.0,126.0,3.0,5.0,,2.0,05db9164,403ea497,2cbec47f,3e2bfbda,30903e74,,7227c706,0b153874,a73ee510,5fcee6b1,9625b211,21a23bfe,dccbd94b,b28479f6,91f74a64,587267a3,e5ba7672,a78bd508,21ddcdc9,5840adea,c2a93b37,,32c7478e,1793a828,e8b83407,2fede552 0,0.0,0,36.0,22.0,4684.0,217.0,9.0,35.0,135.0,0.0,1.0,0.0,43.0,8cf07265,0aadb108,c798ded6,91e6318a,25c83c98,fe6b92e5,2aef1419,0b153874,a73ee510,3b08e48b,d027c970,1b2022a0,00e20e7b,1adce6ef,2de5271c,b74e1eb0,e5ba7672,7ce63c71,,,af5dc647,,dbb486d7,1793a828,, 0,2.0,11,8.0,23.0,30.0,11.0,2.0,8.0,23.0,1.0,1.0,,11.0,05db9164,58e67aaf,ea997bbe,72bea89f,384874ce,7e0ccccf,5b18f3d9,0b153874,a73ee510,012f45e7,720446f5,33ec1af8,034e5f3b,051219e6,d83fb924,4558136f,07c540c4,c21c3e4c,21ddcdc9,a458ea53,31c8e642,,c7dc6720,3e983c86,9b3e8820,d597922b 0,2.0,1,190.0,25.0,8.0,26.0,2.0,27.0,25.0,1.0,1.0,,25.0,05db9164,e77e5e6e,c23785fe,67dd8a70,25c83c98,7e0ccccf,0c41b6a1,37e4aa92,a73ee510,78d5c363,4ba74619,d8acd6f9,879fa878,07d13a8f,2eb18840,df604f5b,e5ba7672,449d6705,6f3756eb,5840adea,07b6c66f,,423fab69,246f2e7f,e8b83407,350a6bdb 0,,2,2.0,1.0,5533.0,1.0,41.0,1.0,33.0,,5.0,0.0,1.0,05db9164,d7988e72,25111132,d13862c2,25c83c98,6f6d9be8,84c427f0,5b392875,a73ee510,00f2b452,41b3f655,7c5cd1c7,ce5114a2,64c94865,846fb5bd,696fb81d,e5ba7672,0f2f9850,b6baba3f,a458ea53,06e40c52,8ec974f4,32c7478e,3fdb382b,e8b83407,49d68486 0,0.0,5,,,18424.0,461.0,23.0,4.0,231.0,0.0,2.0,,,05db9164,ed7b1c58,b063fe4e,4b972461,25c83c98,7e0ccccf,afa309bd,5b392875,a73ee510,23de5a4a,77212bd7,8cdc4941,7203f04e,b28479f6,298421a5,3084c78b,e5ba7672,8814ed47,,,514b7308,,c7dc6720,2fd70e1c,, 0,8.0,-1,,,732.0,2.0,22.0,2.0,2.0,1.0,4.0,,,68fd1e64,38a947a1,,,25c83c98,7e0ccccf,1c86e0eb,0b153874,a73ee510,e8f7c7e8,755e4a50,,5978055e,b28479f6,7ba31d46,,e5ba7672,9b82aca5,,,,,32c7478e,,, 1,0.0,0,24.0,36.0,5022.0,436.0,25.0,32.0,192.0,0.0,9.0,0.0,36.0,5bfa8ab5,84b4e42f,45f68c2a,39547932,384874ce,fbad5c96,85e1a170,0b153874,a73ee510,2bf8bed1,a4ea009a,78a16776,1e9339bc,91233270,cdb87fb5,e15ad623,8efede7f,67bd0ece,,,78c1dd4b,,c7dc6720,4f7b7578,, 0,,82,20.0,4.0,507333.0,,0.0,4.0,4.0,,0.0,,4.0,05db9164,38d50e09,5d0ec1e8,e63708e9,25c83c98,fbad5c96,bc324536,0b153874,7cc72ec2,f6540b40,2bcfb78f,506bb280,e6fc496d,07d13a8f,ee569ce2,81db2bec,e5ba7672,582152eb,21ddcdc9,5840adea,4a8f0a7f,c9d4222a,32c7478e,1989e165,001f3601,09929967 0,,24,3.0,2.0,10195.0,,0.0,32.0,55.0,,0.0,,2.0,5a9ed9b0,68b3edbf,b00d1501,d16679b9,4cf72387,7e0ccccf,36b796aa,0b153874,a73ee510,8b7e0638,7373475d,e0d76380,cfbfce5c,b28479f6,f511c49f,1203a270,e5ba7672,752d8b8a,,,73d06dde,,3a171ecb,aee52b6f,, 0,,105,4.0,1.0,2200.0,,0.0,1.0,1.0,,0.0,,1.0,05db9164,38d50e09,fc1cad4b,40ed41e5,25c83c98,7e0ccccf,88afd773,51d76abe,a73ee510,3b08e48b,c6cb726f,153ff04a,176d07bc,b28479f6,42b3012c,1bf03082,776ce399,582152eb,21ddcdc9,5840adea,84ec2c79,,be7c41b4,a415643d,001f3601,c4304c4b 1,5.0,85,52.0,6.0,36.0,36.0,30.0,24.0,281.0,1.0,5.0,2.0,6.0,9a89b36c,1cfdf714,9d427ddf,4eadb673,25c83c98,7e0ccccf,2555b4d9,0b153874,a73ee510,4c89c3af,0e4ebdac,cf724373,779f824b,07d13a8f,f775a6d5,6512dce6,8efede7f,e88ffc9d,21ddcdc9,b1252a9d,361a1080,,423fab69,3fdb382b,cb079c2d,49d68486 0,2.0,3,4.0,1.0,4.0,1.0,2.0,1.0,1.0,1.0,1.0,,1.0,68fd1e64,2eb7b10e,378112d3,684abf7b,25c83c98,fbad5c96,0d15142a,5b392875,a73ee510,ac473633,df7e8e0b,38176faa,84c02464,1adce6ef,0816fba2,f2c6a810,07c540c4,21eb63af,,,8b7fb864,,423fab69,45b2acf4,, 0,,1,5.0,36.0,239721.0,,0.0,0.0,123.0,,0.0,,62.0,8cf07265,4f25e98b,a68b0bcf,c194aaab,25c83c98,fbad5c96,a2f7459e,0b153874,7cc72ec2,b393caa5,15eced00,ab1307ec,bd251a95,64c94865,40e29d2a,65a31309,e5ba7672,7ef5affa,738584ec,a458ea53,fca82615,,32c7478e,74f7ceeb,9d93af03,d14e41ff 0,,4,,,1572.0,,0.0,17.0,55.0,,0.0,,,05db9164,8947f767,6bbe880c,feb6eb1a,4cf72387,7e0ccccf,3babeb61,0b153874,a73ee510,3b08e48b,565788d0,d06dc48e,8e7ad399,1adce6ef,ba8b8b16,30e6420c,776ce399,bd17c3da,ba92e49d,b1252a9d,65f3080f,,be7c41b4,42a310e6,010f6491,0eabc199 0,0.0,0,,,1464.0,4.0,5.0,3.0,4.0,0.0,1.0,,,68fd1e64,38a947a1,dd8e6407,db4eb846,25c83c98,13718bbd,963d99df,062b5529,a73ee510,3b08e48b,bffe9c30,eb43b195,e62d6c68,07d13a8f,3d2c6113,de815c2d,776ce399,d3c7daaa,,,5def73cb,,32c7478e,aa5529de,, 1,0.0,43,2.0,3.0,1700.0,21.0,6.0,10.0,21.0,0.0,1.0,,7.0,5a9ed9b0,46bbf321,c5d94b65,5cc8f91d,25c83c98,7e0ccccf,4157815a,1f89b562,a73ee510,4e979b5e,7056d78a,75c79158,08775c1b,e8dce07a,80d1ee72,208d4baf,e5ba7672,906ff5cb,,,6a909d9a,,3a171ecb,1f68c81f,, 0,0.0,1,2.0,1.0,2939.0,39.0,17.0,3.0,437.0,0.0,7.0,,1.0,68fd1e64,38a947a1,98351ee6,811ce8e8,25c83c98,fbad5c96,4a6c02fb,37e4aa92,a73ee510,3b08e48b,0cb221d0,617c70e9,ea18ebd8,07d13a8f,31b59ad3,121f63c9,e5ba7672,065917ca,,,c3739d01,,423fab69,d4af2638,, 1,9.0,1,2.0,5.0,18.0,5.0,9.0,5.0,5.0,1.0,1.0,0.0,5.0,5a9ed9b0,9819deea,6813d33b,f922efad,25c83c98,fbad5c96,34cbc0af,0b153874,a73ee510,bac95df6,88196a93,b99ddbc8,1211c647,b28479f6,1150f5ed,87acb535,07c540c4,7e32f7a4,,,a4b7004c,,32c7478e,b34f3128,, 0,,1,2.0,16.0,14404.0,79.0,2.0,16.0,103.0,,1.0,,16.0,05db9164,38a947a1,5492524f,ae59cd56,25c83c98,7e0ccccf,7925e09b,5b392875,7cc72ec2,56c80038,1cba690a,e00462bb,1d0f2da8,64c94865,51c5d5ca,ebbb82d7,07c540c4,be5810bd,,,bd1f6272,c9d4222a,32c7478e,043a382b,, 0,0.0,26,7.0,1.0,3412.0,104.0,10.0,2.0,6.0,0.0,1.0,1.0,1.0,05db9164,287130e0,5e25fa67,dd47ba3b,25c83c98,13718bbd,412cb2ce,0b153874,a73ee510,3b08e48b,b9ec9192,8ebd48c3,df5886ca,07d13a8f,10040656,e05d680b,3486227d,891589e7,ff6cdd42,a458ea53,a2b7caec,,c7dc6720,1481ceb4,e8b83407,988b0775 0,8.0,-1,60.0,11.0,11.0,7.0,9.0,30.0,39.0,1.0,2.0,,7.0,2d4ea12b,d97d4ce8,c725873a,d0189e5a,25c83c98,fe6b92e5,07d75b52,1f89b562,a73ee510,4f1c6ae7,a2c1d2d9,49fee879,ea31804b,1adce6ef,46218630,3b87fa92,e5ba7672,fb342121,7be4df37,5840adea,d90f665b,,32c7478e,6c1cdd05,ea9a246c,1219b447 0,,1,13.0,1.0,3150.0,163.0,1.0,1.0,32.0,,1.0,,1.0,39af2607,c44e8a72,3f7f3d24,8eb89744,4cf72387,7e0ccccf,86651165,0b153874,a73ee510,3b08e48b,39dd23e7,538a49e7,0159bf9f,b28479f6,1addf65e,0596b5be,07c540c4,456d734d,af1445c4,a458ea53,cf79f8fa,c9d4222a,3a171ecb,d5b4ea7d,010f6491,deffd9e3 0,1.0,302,71.0,3.0,270.0,19.0,1.0,6.0,19.0,1.0,1.0,,19.0,68fd1e64,876465ad,da89f77a,37ee624b,43b19349,fe6b92e5,2b3ce8b7,5b392875,a73ee510,8a99abc1,4352b29b,8065cc64,5f4de855,b28479f6,9c382f7a,a14df6f7,d4bb7bd8,08154af3,21ddcdc9,5840adea,e7f0c6dc,,bcdee96c,3e30919e,f55c04b6,2fede552 1,1.0,0,1.0,0.0,2.0,0.0,4.0,0.0,0.0,1.0,2.0,,0.0,241546e0,6887a43c,9b792af9,9c6d05a0,25c83c98,6f6d9be8,adbcc874,0b153874,a73ee510,fbbf2c95,46031dab,6532318c,377af8aa,1adce6ef,ef6b7bdf,2c9d222f,e5ba7672,8f0f692f,21ddcdc9,a458ea53,cc6a9262,,32c7478e,a5862ce8,445bbe3b,b6a3490e 0,11.0,251,9.0,5.0,21.0,6.0,34.0,5.0,5.0,1.0,4.0,,5.0,05db9164,4322636e,e007dfac,77b99936,4ea20c7d,fe6b92e5,2be44e4e,25239412,a73ee510,18e09007,364e8b48,9c841b74,34cbb1bc,07d13a8f,14674f9b,9b3f7aa2,e5ba7672,9d3171e9,21ddcdc9,a458ea53,61b4555a,ad3062eb,32c7478e,38b97a31,ea9a246c,074bb89f 1,10.0,1,4.0,4.0,1.0,0.0,10.0,4.0,4.0,1.0,1.0,,0.0,09ca0b81,4f25e98b,0b2640f7,4badfc0c,4cf72387,fe6b92e5,df5c2d18,0b153874,a73ee510,da272362,a7b606c4,33c282f5,eae197fd,07d13a8f,dfab705f,635c3e13,e5ba7672,7ef5affa,2f4b9dd2,b1252a9d,cff19dc6,,c7dc6720,8535db9f,001f3601,b98a5b90 0,0.0,-1,1.0,23.0,3169.0,147.0,62.0,0.0,753.0,0.0,9.0,1.0,39.0,05db9164,942f9a8d,69b028e3,003ceb8c,25c83c98,7e0ccccf,3f4ec687,1f89b562,a73ee510,c5fe5cb9,c4adf918,424ba327,85dbe138,b28479f6,ac182643,169f1150,8efede7f,1f868fdd,1d04f4a4,b1252a9d,15414e28,,32c7478e,aa9b9ab9,9d93af03,c73ed234 0,0.0,35,13.0,5.0,4939.0,140.0,1.0,22.0,61.0,0.0,1.0,,11.0,05db9164,4f25e98b,5e25fa67,dd47ba3b,a9411994,7e0ccccf,2e62d414,0b153874,a73ee510,4b415bb3,258875ea,8ebd48c3,dcc8f90a,07d13a8f,5be89da3,e05d680b,d4bb7bd8,bc5a0ff7,ff6cdd42,a458ea53,a2b7caec,,32c7478e,1481ceb4,e8b83407,988b0775 0,,1,13.0,2.0,59865.0,292.0,0.0,2.0,87.0,,0.0,0.0,2.0,68fd1e64,287130e0,b87cffc0,ffacf4e8,43b19349,,04277bf9,5b392875,7cc72ec2,4ea0d483,7e2c5c15,5ea407f3,91a1b611,b28479f6,9efd8b77,9906d656,07c540c4,891589e7,55dd3565,a458ea53,37a23b2d,,32c7478e,3fdb382b,ea9a246c,49d68486 1,,0,,1.0,16732.0,2.0,1.0,1.0,1.0,,1.0,,1.0,87552397,6e638bbc,598b72ce,3c7eb23c,25c83c98,fbad5c96,675e81f6,0b153874,a73ee510,d9b71390,4a77ddca,f21f7d11,dc1d72e4,07d13a8f,d4525f76,e2e3cf1c,d4bb7bd8,f6a2fc70,21ddcdc9,a458ea53,605776ee,,32c7478e,f93938dd,e8b83407,322cbe58 1,0.0,212,,,1632.0,65.0,24.0,1.0,113.0,0.0,6.0,,,be589b51,b0d4a6f6,50a6bc33,335e428a,25c83c98,7e0ccccf,1171550e,1f89b562,a73ee510,23724df8,031ba22d,4baf63a1,bb7a2c12,32813e21,b0369b63,c73993da,e5ba7672,e01eacde,,,1d14288c,,3a171ecb,c9bc2384,, 0,10.0,11,3.0,3.0,1026.0,3.0,88.0,3.0,131.0,1.0,15.0,0.0,3.0,9a89b36c,1cfdf714,8b14bdd6,3bf2df8b,25c83c98,,e807f153,0b153874,a73ee510,8627508e,1054ae5c,3cd57e51,d7ce3abd,b28479f6,d345b1a0,4d664c70,27c07bd6,e88ffc9d,712d530c,b1252a9d,9ecb9e0d,,bcdee96c,a8380e43,cb079c2d,37c5e077 0,,5,22.0,5.0,10324.0,,0.0,5.0,13.0,,0.0,,5.0,f434fac1,40ed0c67,374195a1,6f5d5092,4cf72387,6f6d9be8,555d7949,1f89b562,a73ee510,3b08e48b,91e8fc27,752343e3,9ff13f22,1adce6ef,f8ebf901,c43b15fe,776ce399,2585827d,21ddcdc9,5840adea,a66e7b01,,be7c41b4,e33735a0,e8b83407,f95af538 0,,779,1.0,1.0,676.0,,0.0,4.0,4.0,,0.0,,1.0,68fd1e64,e5fb1af3,9b953c56,7be07df9,25c83c98,7e0ccccf,5e4f7d2b,0b153874,a73ee510,3b08e48b,25f4f871,6bca71b1,e67cdf97,07d13a8f,b5de5956,fb8ca891,d4bb7bd8,13145934,55dd3565,b1252a9d,b1ae3ed2,ad3062eb,423fab69,3fdb382b,9b3e8820,49d68486 0,,179,61.0,,3316.0,,,1.0,,,,,,f473b8dc,38a947a1,223b0e16,ca55061c,43b19349,7e0ccccf,7f2c5a6e,64523cfa,a73ee510,f6c6d9f8,d21494f8,156f99ef,f47f13e4,1adce6ef,0e78291e,5fbf4a84,1e88c74f,1999bae9,,,deb9605d,,32c7478e,e448275f,, 0,1.0,1,5.0,7.0,1238.0,13.0,9.0,15.0,89.0,0.0,3.0,0.0,7.0,8cf07265,09e68b86,aa8c1539,85dd697c,25c83c98,7e0ccccf,92ce5a7d,37e4aa92,a73ee510,15fa156b,e0c3cae0,d8c29807,e8df3343,8ceecbc8,d2f03b75,c64d548f,8efede7f,63cdbb21,cf99e5de,5840adea,5f957280,c9d4222a,55dd3565,1793a828,e8b83407,b7d9c3bc 0,2.0,72,20.0,11.0,4.0,11.0,24.0,14.0,69.0,1.0,7.0,,11.0,05db9164,09e68b86,6ef2aa66,20af9140,25c83c98,7e0ccccf,372a0c4c,0b153874,a73ee510,a08eee5a,ec88dd34,4df84614,94881fc3,b28479f6,52baadf5,cf3ec61f,3486227d,5aed7436,7be4df37,b1252a9d,98a79791,,bcdee96c,3fdb382b,e8b83407,49d68486 0,,57,60.0,20.0,11862.0,20.0,1.0,19.0,20.0,,1.0,,20.0,5bfa8ab5,4f25e98b,15363e12,f9e8a6fb,384874ce,,65c53f25,0b153874,a73ee510,3b08e48b,ad2bc6f4,d63df4e6,39ccb769,b28479f6,8ab5b746,a694f6ce,d4bb7bd8,7ef5affa,21ddcdc9,a458ea53,a370fd83,,32c7478e,d5b01f55,9b3e8820,85cebe8c 0,4.0,1,29.0,30.0,112.0,30.0,27.0,33.0,144.0,2.0,4.0,0.0,30.0,05db9164,58e67aaf,99815367,771966f0,4cf72387,6f6d9be8,cdc0ad95,5b392875,a73ee510,b0c25211,69926409,e802f466,2fc3058f,051219e6,d83fb924,f6613e51,e5ba7672,c21c3e4c,21ddcdc9,a458ea53,3aa05bfb,,32c7478e,9f0d87bf,9b3e8820,bde577f6 0,2.0,4,53.0,14.0,1499.0,20.0,11.0,19.0,98.0,0.0,3.0,7.0,14.0,75ac2fe6,287130e0,b264d69e,ce831e6d,25c83c98,,5aef82b1,0b153874,a73ee510,7fdb06fe,010265ac,74138b6d,0e5bc979,f7c1b33f,42793602,b49f63ab,8efede7f,891589e7,55dd3565,b1252a9d,a1229e5f,,32c7478e,3fdb382b,ea9a246c,49d68486 0,,5,3.0,5.0,17405.0,,0.0,8.0,8.0,,0.0,,6.0,05db9164,c5c1d6ae,8018e37d,d8660950,43b19349,fbad5c96,c1e20400,5b392875,a73ee510,3b08e48b,60a1c175,22cad86a,9b9e44d2,07d13a8f,b25845fd,2a27c935,776ce399,561cabfe,21ddcdc9,5840adea,d479575f,,be7c41b4,9b18ad04,7a402766,67ebe777 0,,49,1.0,1.0,3116.0,72.0,3.0,1.0,48.0,,1.0,,1.0,7e5c2ff4,2c8c5f5d,13cd0697,352cefe6,25c83c98,7e0ccccf,4fb73f5f,985e3fcb,a73ee510,3b08e48b,6a447eb3,c3cdaf85,9dfda2b9,1adce6ef,5edc1a28,08514295,e5ba7672,f5f4ae5b,,,6387fda4,,55dd3565,d36c7dbf,, 0,,2865,23.0,0.0,23584.0,,0.0,2.0,47.0,,0.0,,2.0,05db9164,0468d672,cedcacac,7967fcf5,25c83c98,7e0ccccf,33b15f2c,0b153874,a73ee510,0f6ee8ce,419d31d4,553e02c3,08961fd0,1adce6ef,4f3b3616,91a6eec5,1e88c74f,9880032b,21ddcdc9,5840adea,a97b62ca,,423fab69,727a7cc7,ea9a246c,6935065e 0,,119,4.0,4.0,13528.0,,0.0,7.0,35.0,,0.0,,4.0,87552397,38a947a1,695a85e0,d502349a,25c83c98,7e0ccccf,82f666b6,0b153874,a73ee510,631ddef6,e51ddf94,67b31aac,3516f6e6,cfef1c29,d33de6b0,d2b0336b,07c540c4,48ce336b,,,ea6a0e31,,3a171ecb,da408463,, 0,,25,5.0,4.0,0.0,,0.0,4.0,4.0,,0.0,,1.0,68fd1e64,71ca0a25,44e7b8ec,3b989466,307e775a,7e0ccccf,d0519bab,0b153874,a73ee510,3b08e48b,38914a66,d7cd5e08,c281c227,1adce6ef,ae3a9888,4032eea3,1e88c74f,9bf8ffef,21ddcdc9,5840adea,53def47b,c9d4222a,dbb486d7,8849cfac,001f3601,aa5f0a15 0,2.0,180,94.0,7.0,151.0,38.0,2.0,30.0,26.0,1.0,1.0,,25.0,5bfa8ab5,421b43cd,33ebdbb6,29998ed1,25c83c98,fbad5c96,6ad82e7a,0b153874,a73ee510,451bd4e4,c1ee56d0,6aaba33c,ebd756bd,b28479f6,2d0bb053,b041b04a,e5ba7672,2804effd,,,723b4dfd,,32c7478e,b34f3128,, 0,,2,0.0,,,,,0.0,,,,,,be589b51,38a947a1,4470baf4,8c8a4c47,307e775a,fe6b92e5,ae1dfa39,0b153874,7cc72ec2,3b08e48b,ee26f284,bb669e25,48b975db,b28479f6,717db705,2b2ce127,2005abd1,ade68c22,,,2b796e4a,,be7c41b4,8d365d3b,, 0,,0,9.0,,17907.0,59.0,2.0,0.0,98.0,,1.0,,,68fd1e64,80e26c9b,ba1947d0,85dd697c,25c83c98,fe6b92e5,3d63f4e6,0b153874,a73ee510,94e68c1d,af6a4ffc,34a238e0,2a1579a2,b28479f6,a785131a,da441c7e,e5ba7672,005c6740,21ddcdc9,5840adea,8717ea07,,32c7478e,1793a828,e8b83407,b9809574 0,7.0,84,,7.0,10.0,6.0,29.0,41.0,288.0,1.0,4.0,,5.0,05db9164,38a947a1,840eeb3a,f7263320,25c83c98,7e0ccccf,3baecfcb,0b153874,a73ee510,98d5faa2,96a54d80,317bfd7d,dbe5226f,07d13a8f,d4a5a2be,1689e4de,e5ba7672,5d961bca,,,dc55d6df,,423fab69,aa0115d2,, 0,0.0,0,1.0,,3667.0,42.0,2.0,30.0,37.0,0.0,1.0,1.0,,05db9164,e5fb1af3,909286bb,252734c9,25c83c98,7e0ccccf,b28fa88b,0b153874,a73ee510,4b8a7639,9f0003f4,233fde4c,5afd9e51,b28479f6,23287566,1871ac47,8efede7f,13145934,1d1eb838,b1252a9d,23da7042,,bcdee96c,1be0cc0a,e8b83407,f89dfbcc 0,5.0,1,46.0,6.0,1046.0,112.0,5.0,43.0,111.0,1.0,1.0,,6.0,05db9164,4f25e98b,f86649de,f56f6045,25c83c98,fe6b92e5,21c0ea1a,0b153874,a73ee510,cfa407de,bc862fb6,b9b3b7ef,4f487d87,07d13a8f,dfab705f,33301a0b,e5ba7672,7ef5affa,92524a76,a458ea53,d5a53bc3,c9d4222a,423fab69,3fdb382b,001f3601,79883c16 0,,7,4.0,3.0,75211.0,,0.0,3.0,3.0,,0.0,,3.0,8cf07265,0468d672,00d3cdb7,d4125c6f,25c83c98,7e0ccccf,71ccc25b,0b153874,7cc72ec2,e89812b3,5cab60cb,d286aff3,ce418dc9,07d13a8f,a888f201,7d9d720d,1e88c74f,9880032b,21ddcdc9,5840adea,8443660f,,3a171ecb,52d7797f,e8b83407,ddf88ddd 1,,54,1.0,1.0,,,0.0,1.0,1.0,,0.0,,1.0,68fd1e64,38a947a1,0d15d9b5,bfe24cb7,b0530c50,,d9aa9d97,0b153874,7cc72ec2,3b08e48b,6e647667,72a52d4c,85dbe138,b28479f6,06809048,58cacba8,2005abd1,670f513e,,,b7ba6151,,32c7478e,7b80ab11,, 0,,0,34.0,3.0,,,0.0,3.0,3.0,,0.0,,3.0,68fd1e64,287130e0,38610f2f,28d2973d,25c83c98,,88002ee1,0b153874,7cc72ec2,3b08e48b,f1b78ab4,b345f76c,6e5da64f,b28479f6,9efd8b77,569a0480,2005abd1,891589e7,712d530c,b1252a9d,c2af6d9f,,32c7478e,58e38a64,ea9a246c,70451962 1,,1,1.0,,7814.0,119.0,1.0,19.0,30.0,,1.0,,,05db9164,80e26c9b,eb08d440,f922efad,25c83c98,fe6b92e5,41e1828d,0b153874,a73ee510,3b08e48b,b6358cf2,654bb16a,61c65daf,1adce6ef,0f942372,87acb535,d4bb7bd8,005c6740,21ddcdc9,5840adea,a4b7004c,,32c7478e,b34f3128,e8b83407,9904c656 0,2.0,5,11.0,9.0,24.0,9.0,110.0,9.0,148.0,1.0,10.0,0.0,9.0,be30ca83,8f5b4275,b009d929,c7043c4b,30903e74,fbad5c96,a90a99c5,51d76abe,a73ee510,e6003298,c804061c,3563ab62,1cc9ac51,1adce6ef,a6bf53df,b688c8cc,8efede7f,65c9624a,21ddcdc9,5840adea,2754aaf1,c9d4222a,55dd3565,3b183c5c,e8b83407,adb5d234 0,,19,1.0,1.0,7476.0,9.0,9.0,1.0,9.0,,1.0,,1.0,8cf07265,537e899b,5037b88e,9dde01fd,25c83c98,fbad5c96,aafae983,0b153874,a73ee510,dc790dda,c3a20c8d,680d7261,7ce5cdf0,07d13a8f,6d68e99c,c0673b44,e5ba7672,b34aa802,,,e049c839,,32c7478e,6095f986,, 0,4.0,0,131.0,1.0,0.0,1.0,14.0,10.0,40.0,1.0,3.0,,0.0,05db9164,80e26c9b,13193952,f922efad,25c83c98,fe6b92e5,124131fa,1f89b562,a73ee510,a1ee64a6,9ba53fcc,654bb16a,42156eb4,1adce6ef,0f942372,87acb535,e5ba7672,005c6740,21ddcdc9,5840adea,a4b7004c,ad3062eb,bcdee96c,b34f3128,e8b83407,9904c656 1,0.0,5,2.0,1.0,1526.0,3.0,9.0,2.0,2.0,0.0,1.0,,1.0,05db9164,38a947a1,60c37737,8a77aa30,25c83c98,fe6b92e5,1c63b114,1f89b562,a73ee510,f6f942d1,67841877,94a1cc80,781f4d92,b28479f6,962bbefe,3eef319d,e5ba7672,0ad1cc71,,,1c63c71e,c9d4222a,3a171ecb,ad80aaa7,, 0,1.0,1,5.0,18.0,475.0,63.0,15.0,4.0,803.0,1.0,4.0,,63.0,05db9164,3e4b7926,7442ec70,bb8645c3,0942e0a7,7e0ccccf,3a7402e7,51d76abe,a73ee510,aa91245c,b4bb4248,a5ab10e6,3eb2f9dc,07d13a8f,e6863a8e,1cdb3603,e5ba7672,e261f8d8,21ddcdc9,5840adea,1380864e,,32c7478e,be2f0db5,47907db5,68d9ada1 0,,1,1.0,18.0,10791.0,,0.0,1.0,281.0,,0.0,,18.0,05db9164,46bbf321,c5d94b65,5cc8f91d,4cf72387,7e0ccccf,2773eaab,5b392875,a73ee510,1a428761,06474f17,75c79158,2ec4b007,91233270,cddd56a1,208d4baf,1e88c74f,906ff5cb,,,6a909d9a,ad3062eb,3a171ecb,1f68c81f,, 0,1.0,-1,,,528.0,15.0,8.0,2.0,585.0,1.0,4.0,,,05db9164,ef69887a,3fea0364,9c32fadc,30903e74,,ec1a1856,0b153874,a73ee510,22a99f9d,a04e019f,cc606cbe,07a906b4,b28479f6,902a109f,0ab5ee0c,e5ba7672,4bcc9449,083e89d9,b1252a9d,6c38450e,,32c7478e,394c5a53,47907db5,1d7b6578 0,,18,9.0,0.0,,,0.0,7.0,16.0,,0.0,,7.0,68fd1e64,38a947a1,2273663d,3beb8147,25c83c98,fbad5c96,88002ee1,985e3fcb,7cc72ec2,3b08e48b,f1b78ab4,c47972c1,6e5da64f,1adce6ef,8d3c9c0c,e638c51d,2005abd1,35176a17,,,0370bc83,ad3062eb,55dd3565,cde6fafb,, 0,,5,,13.0,10467.0,170.0,4.0,13.0,96.0,,1.0,,13.0,be589b51,8084ee93,02cf9876,c18be181,0942e0a7,7e0ccccf,ad82323c,37e4aa92,a73ee510,bdfd8a02,7ca25fd2,8fe001f4,d3802338,b28479f6,b2ff8c6b,36103458,e5ba7672,52e44668,,,e587c466,,32c7478e,3b183c5c,, 1,,27,,,27753.0,,,3.0,,,,,,05db9164,efb7db0e,bf05882d,9e3f04df,25c83c98,7e0ccccf,73e2fc5e,062b5529,a73ee510,f8f0e86f,4e46b019,9da0a604,07c072b7,b28479f6,5ab7247d,929eef3c,d4bb7bd8,a863ac26,,,fb19a39b,ad3062eb,3a171ecb,cc4079ea,, 0,0.0,49,,,3732.0,20.0,1.0,3.0,20.0,0.0,1.0,,,17f69355,09e68b86,5be9b239,ace52998,25c83c98,,82cfb145,0b153874,a73ee510,9b8e7680,3f31bb3e,e5b118b4,c6378246,b28479f6,52baadf5,f68bd494,d4bb7bd8,5aed7436,21ddcdc9,a458ea53,ba3c688b,,32c7478e,3fdb382b,b9266ff0,49d68486 1,1.0,19,18.0,16.0,178.0,32.0,34.0,34.0,200.0,0.0,9.0,,16.0,05db9164,ea3a5818,7ee60f5f,bebc14b3,25c83c98,6f6d9be8,4f900c22,f0e5818a,a73ee510,47e01053,7c4f062c,cc22efeb,76dfc898,b28479f6,0a069322,606df1fe,e5ba7672,a1d0cc4f,21ddcdc9,b1252a9d,aebdd3c2,8ec974f4,32c7478e,e4e10900,b9266ff0,7a1ac642 1,0.0,1,2.0,5.0,6613.0,104.0,1.0,17.0,74.0,0.0,1.0,,5.0,8cf07265,8db5bc37,,,25c83c98,7e0ccccf,5a103f30,0b153874,a73ee510,3b08e48b,8487a168,,636195f8,64c94865,00e52733,,d4bb7bd8,821c30b8,,,,,32c7478e,,, 0,,1,,,29111.0,,,0.0,,,,,,ae82ea21,5dac953d,d032c263,c18be181,384874ce,,6b406125,5b392875,a73ee510,f1311559,278636c9,dfbb09fb,b87a829f,b28479f6,78e3b025,84898b2a,e5ba7672,35a9ed38,,,0014c32a,c0061c6d,32c7478e,3b183c5c,, 0,,58,,20.0,21659.0,1033.0,9.0,1.0,151.0,,2.0,,43.0,05db9164,80e26c9b,,,25c83c98,7e0ccccf,622305e6,5b392875,a73ee510,e70742b0,319687c9,,62036f49,07d13a8f,f3635baf,,e5ba7672,f54016b9,21ddcdc9,5840adea,,,3a171ecb,,e8b83407,00ed90d0 0,0.0,11,11.0,5.0,4325.0,61.0,4.0,14.0,68.0,0.0,2.0,0.0,5.0,68fd1e64,d8fc04df,f652979e,32a55192,25c83c98,7e0ccccf,19d92932,5b392875,a73ee510,f710483a,d54a5851,ed5cfa27,a36387e6,b28479f6,9da6bb5f,3141102a,1e88c74f,cbadff99,21ddcdc9,5840adea,3df2213d,,3a171ecb,42998020,010f6491,dd8b4f5c 1,,2560,2.0,0.0,63552.0,398.0,0.0,7.0,122.0,,0.0,,1.0,9a89b36c,39dfaa0d,a17519ab,5b392af8,25c83c98,fbad5c96,14ba4967,64523cfa,7cc72ec2,9ffc445a,c21c44c8,834b5edc,5b3fc509,07d13a8f,60fa10e5,e66306df,d4bb7bd8,df4fffb7,21ddcdc9,5840adea,9988d803,,c7dc6720,abe3a684,010f6491,f3737bd0 0,0.0,30,2.0,15.0,2712.0,210.0,5.0,43.0,242.0,0.0,2.0,,15.0,05db9164,207b2d81,2b280564,ad5ffc6b,25c83c98,fe6b92e5,559eb1e1,0b153874,a73ee510,51e04895,91875c79,2a064dba,ea519e47,64c94865,11b2ae92,7d9b60c8,e5ba7672,395856b0,21ddcdc9,a458ea53,9c3eb598,,32c7478e,c0b8dfd6,001f3601,81be451e 0,0.0,49,,3.0,1732.0,20.0,1.0,14.0,16.0,0.0,1.0,,3.0,8cf07265,e112a9de,4e1c9eda,22504558,25c83c98,fbad5c96,01620311,0b153874,a73ee510,66c281d9,922bbb91,23bc90a1,ad61640d,1adce6ef,6da7d68c,776f5665,e5ba7672,d495a339,,,5a5953a2,,32c7478e,8f079aa5,, 0,,-1,,,357.0,,0.0,10.0,11.0,,0.0,,,68fd1e64,403ea497,2cbec47f,3e2bfbda,25c83c98,7e0ccccf,9d8d7034,0b153874,a73ee510,b3d657b8,51ef0313,21a23bfe,e8f6ccfe,07d13a8f,e3209fc2,587267a3,e5ba7672,a78bd508,21ddcdc9,5840adea,c2a93b37,,32c7478e,1793a828,e8b83407,2fede552 0,2.0,7,,22.0,37.0,22.0,4.0,1.0,135.0,1.0,3.0,,22.0,98237733,b26462db,dad8b3db,06b1cf6e,25c83c98,7e0ccccf,ade953a9,5b392875,a73ee510,0eca1729,29e4ad33,422e8212,80467802,07d13a8f,72fbc65c,25b075e4,e5ba7672,35ee3e9e,,,a13bd40d,,3a171ecb,0ff91809,, 0,,68,1.0,1.0,24513.0,43.0,4.0,12.0,62.0,,1.0,,1.0,fc9c62bb,80e26c9b,,,25c83c98,6f6d9be8,e746fe19,1f89b562,a73ee510,c9ac91cb,0bc63bd0,,ef007ecc,b28479f6,4c1df281,,e5ba7672,f54016b9,21ddcdc9,5840adea,,,32c7478e,,e8b83407,c4e4eabb 1,0.0,304,1.0,,13599.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,,68fd1e64,064c8f31,70168f62,585ab217,25c83c98,fe6b92e5,b3a5258d,0b153874,a73ee510,7cda6c86,30b2a438,eb83af8a,aebdb575,07d13a8f,81d3f724,69f67894,3486227d,d4a314a2,21ddcdc9,5840adea,e1627e2c,,32c7478e,a6e7d8d3,001f3601,2fede552 0,0.0,2,4.0,7.0,1568.0,70.0,4.0,42.0,117.0,0.0,1.0,,36.0,de4dac42,b7ca2abd,022a0b3c,d6b6e0bf,25c83c98,13718bbd,33cca6fa,0b153874,a73ee510,fb999b75,9f7c4fc1,05e68866,2b9fb512,07d13a8f,2f453358,6de617d3,e5ba7672,4771e483,,,df66957b,,3a171ecb,b34f3128,, 0,,0,3.0,2.0,,,0.0,3.0,13.0,,0.0,,2.0,05db9164,38a947a1,d125aecd,82a61820,25c83c98,7e0ccccf,d18f8f99,0b153874,7cc72ec2,3b08e48b,6c27619d,49507531,61e43922,07d13a8f,bb1e9ca8,0fd6d3ca,2005abd1,e96a7df2,,,7eefff0d,,be7c41b4,cafb4e4d,, 0,0.0,0,5.0,1.0,1751.0,37.0,1.0,8.0,11.0,0.0,1.0,,1.0,8cf07265,09e68b86,fc25ffd0,991a22ae,25c83c98,fbad5c96,6da2fbd6,f0e5818a,a73ee510,78ed0c4d,7bbe6c06,c35b992b,ea1f21b7,1adce6ef,dbc5e126,068a2c9f,e5ba7672,5aed7436,21ddcdc9,b1252a9d,df9de95c,,423fab69,3fdb382b,cb079c2d,49d68486 1,3.0,22,7.0,9.0,269.0,11.0,12.0,15.0,573.0,1.0,7.0,,9.0,05db9164,558b4efb,1b5e2c32,8a2b280f,25c83c98,13718bbd,6d51a5b0,966033bc,a73ee510,2e48a61d,61af8052,733bbdf2,2f3ee7fb,64c94865,2cd24ac0,8ac5e229,e5ba7672,c68ebaa0,21ddcdc9,5840adea,0be61dd1,,32c7478e,3b183c5c,ea9a246c,9973f80f 1,,1,,,14447.0,328.0,15.0,0.0,432.0,,9.0,0.0,,5bfa8ab5,26ece8a8,58ca7e87,3db5e097,25c83c98,fbad5c96,877d7f71,0b153874,a73ee510,afc4d756,5bd8a4ae,91f87a19,7a3043c0,07d13a8f,102fc449,834b85f5,3486227d,87fd936e,,,e339163e,,423fab69,c9a8db2a,, 0,,1,4.0,1.0,235065.0,,0.0,3.0,1.0,,0.0,,1.0,5a9ed9b0,a8da270e,6392b1c1,4e1c036b,25c83c98,6f6d9be8,863329da,0b153874,7cc72ec2,fbc2dc95,a89c45cb,4ea4e9d5,a4fafa5b,b28479f6,f2252b1c,b7f61016,e5ba7672,130ebfcd,,,f15fe1ee,,32c7478e,2896ad66,, 0,1.0,4,75.0,21.0,246.0,69.0,1.0,33.0,33.0,1.0,1.0,,31.0,3b65d647,512fdf0c,b3ee24fe,631a0f79,25c83c98,7e0ccccf,86b374da,1f89b562,a73ee510,3b08e48b,07678d3e,9b665b9c,0159bf9f,b28479f6,fc29c5a9,b7a016ed,e5ba7672,fd3919f9,21ddcdc9,5840adea,1df3ad93,,3a171ecb,3aebd96a,724b04da,56be3401 1,,64,3.0,7.0,14747.0,38.0,4.0,16.0,25.0,,3.0,,17.0,05db9164,8b0005b7,62acd884,7736c782,25c83c98,fbad5c96,b01d50d5,5b392875,a73ee510,3b08e48b,cd1b7031,0b7afe9e,4d8657a2,07d13a8f,715f1291,7d0949a5,07c540c4,dff11f14,,,c12eabbb,,3a171ecb,af0cb2c3,, 0,,0,2.0,,4317.0,0.0,8.0,0.0,0.0,,1.0,,,68fd1e64,09e68b86,29dbbee7,15c721d8,4cf72387,,f33e4fa1,5b392875,a73ee510,e5330e23,7b5deffb,526eb908,269889be,b28479f6,52baadf5,e71dfc2d,e5ba7672,5aed7436,39e30682,b1252a9d,b4770b64,,32c7478e,2f34b1ef,e8b83407,4a449e4c 0,0.0,1,5.0,0.0,11738.0,490.0,10.0,13.0,140.0,0.0,1.0,,1.0,52f1e825,9819deea,a2b48926,f922efad,4cf72387,7e0ccccf,d385ea68,0b153874,a73ee510,3b08e48b,7940fc2a,b99ddbc8,00e20e7b,b28479f6,1150f5ed,87acb535,e5ba7672,7e32f7a4,,,a4b7004c,ad3062eb,32c7478e,b34f3128,, 1,0.0,53,17.0,4.0,1517.0,87.0,1.0,5.0,11.0,0.0,1.0,0.0,4.0,05db9164,38d50e09,948ee031,b7ab56a2,384874ce,fbad5c96,879ccac6,0b153874,a73ee510,9ca0fba4,e931c5cd,42bee2f2,580817cd,b28479f6,06373944,67b3c631,e5ba7672,fffe2a63,21ddcdc9,b1252a9d,bd074856,,32c7478e,df487a73,001f3601,c27f155b 0,,0,7.0,14.0,3751.0,646.0,0.0,37.0,432.0,,0.0,,14.0,0e78bd46,ae46a29d,770451b6,f922efad,25c83c98,fe6b92e5,01620311,0b153874,a73ee510,5a01afad,922bbb91,4bba7327,ad61640d,b28479f6,cccdd69e,e2e2fcd9,e5ba7672,e32bf683,,,b964dee0,c9d4222a,32c7478e,b34f3128,, 0,1.0,1,14.0,1.0,118.0,1.0,4.0,1.0,32.0,1.0,1.0,,1.0,05db9164,4f25e98b,79bdb97a,bdbe850d,43b19349,,38eb9cf4,0b153874,a73ee510,49d1ad89,7f8ffe57,30ed85b5,46f42a63,07d13a8f,dfab705f,e75cb6ea,e5ba7672,7ef5affa,21ddcdc9,a458ea53,72c8ca0c,,32c7478e,3fdb382b,001f3601,49d68486 0,3.0,1,25.0,9.0,1396.0,39.0,5.0,32.0,37.0,0.0,2.0,,10.0,05db9164,dde11b16,c6616b04,e6996139,25c83c98,3bf701e7,2e8a689b,0b153874,a73ee510,efea433b,e51ddf94,3a802941,3516f6e6,07d13a8f,e28388cc,f4944655,3486227d,43dfe9bd,,,81f8278e,,3a171ecb,772b286f,, 0,,0,37.0,10.0,15.0,,0.0,10.0,10.0,,0.0,,10.0,05db9164,95e2d337,da3ad2bd,a95c56ca,25c83c98,fbad5c96,d7f3ff9f,1f89b562,a73ee510,3b08e48b,29473fc8,359d194a,aa902020,051219e6,003cf364,8023d5ba,776ce399,7b06fafe,d913d8f1,a458ea53,15bb899d,,32c7478e,6c25dad0,2bf691b1,59e91663 0,,0,4.0,,11534.0,,0.0,0.0,1.0,,0.0,,,39af2607,78ccd99e,55f298ba,1de19bc2,25c83c98,fbad5c96,63b7fcf7,1f89b562,a73ee510,3b08e48b,779482a8,624029b0,7d65a908,051219e6,9917ad07,270e2a53,1e88c74f,e7e991cb,21ddcdc9,a458ea53,5ff5ac4a,ad3062eb,32c7478e,d65fa724,875ea8a7,86601e0a 0,,498,,0.0,92.0,,0.0,0.0,0.0,,0.0,,0.0,5bfa8ab5,90081f33,fd22e418,36375a46,43b19349,fbad5c96,6c338953,0b153874,a73ee510,3b08e48b,553ebda3,fb991bf5,49fe3d4e,b28479f6,50b07d60,d1a4e968,776ce399,7da6ea7e,,,9fb07dd2,,be7c41b4,359dd977,, 1,8.0,7,20.0,8.0,5.0,22.0,172.0,21.0,568.0,1.0,21.0,,0.0,05db9164,404660bb,97d1681e,ffe40d5f,25c83c98,7e0ccccf,1c86e0eb,1f89b562,a73ee510,f3b83678,755e4a50,7e7a6264,5978055e,1adce6ef,6ddbba94,e7af7559,e5ba7672,4b17f8a2,21ddcdc9,5840adea,5a49c6db,,32c7478e,faf5d8b3,f0f449dd,984e0db0 0,,4,1.0,1.0,270.0,170.0,1.0,19.0,196.0,,1.0,0.0,1.0,3b65d647,4c2bc594,d032c263,c18be181,25c83c98,fbad5c96,cd98cc3d,0b153874,a73ee510,493b74f2,dcc84468,dfbb09fb,b72482f5,8ceecbc8,7ac43a46,84898b2a,e5ba7672,bc48b783,,,0014c32a,,55dd3565,3b183c5c,, 0,,6,52.0,15.0,383.0,,0.0,21.0,21.0,,0.0,,15.0,05db9164,09e68b86,88290645,0676a23d,25c83c98,fe6b92e5,f14f1abf,0b153874,a73ee510,3b08e48b,7b5deffb,f6d35a1e,269889be,b28479f6,52baadf5,90d6ddcd,776ce399,5aed7436,21ddcdc9,b1252a9d,29d21ab1,,32c7478e,69e4f188,e8b83407,e001324a 0,0.0,57,2.0,6.0,1683.0,550.0,5.0,48.0,412.0,0.0,1.0,0.0,102.0,39af2607,c5fe64d9,fda0b584,13508380,25c83c98,7e0ccccf,295cc387,0b153874,a73ee510,3b08e48b,7d5ece85,ffcedb7a,e4b5ce61,07d13a8f,52b49730,f39f1141,d4bb7bd8,c235abed,4cc48856,a458ea53,fdc724a8,,32c7478e,45ab94c8,46fbac64,c84c4aec 0,,90,,0.0,1455.0,,0.0,6.0,10.0,,0.0,,2.0,05db9164,6f609dc9,d032c263,c18be181,25c83c98,7e0ccccf,315c76f3,37e4aa92,a73ee510,3b08e48b,e51ddf94,dfbb09fb,3516f6e6,07d13a8f,c169c458,84898b2a,776ce399,381bd833,,,0014c32a,,3a171ecb,3b183c5c,, 0,,29,4.0,4.0,12245.0,,0.0,19.0,73.0,,0.0,,4.0,05db9164,3df44d94,d032c263,c18be181,4cf72387,7e0ccccf,81bb0302,5b392875,a73ee510,f918493f,b7094596,dfbb09fb,1f9d2c38,b28479f6,e0052e65,84898b2a,07c540c4,e7648a8f,,,0014c32a,,32c7478e,3b183c5c,, 0,3.0,-1,3.0,2.0,285.0,5.0,6.0,8.0,30.0,1.0,4.0,,5.0,05db9164,73b37f46,cd82408a,eb45e6e4,25c83c98,7e0ccccf,ead731f4,0b153874,a73ee510,3b08e48b,e9c32980,d1fb0874,3fe840eb,ec19f520,f3a94039,6d87c0d4,07c540c4,d1605c46,,,ed01532f,,3a171ecb,8d49fa4b,, 1,,2,3.0,,5091.0,0.0,6.0,0.0,3.0,,5.0,,,5a9ed9b0,4f25e98b,10ee5afb,1d29846e,db679829,,1971812a,0b153874,a73ee510,aed8755c,5307d8e2,5e76bfca,8368e64b,b28479f6,8ab5b746,5fb9ff62,07c540c4,7ef5affa,2e30f394,5840adea,e208a45f,,32c7478e,3fdb382b,001f3601,49d68486 0,,78,8.0,,35203.0,853.0,2.0,0.0,98.0,,1.0,,,05db9164,c41a84c8,d627c43e,759c4a2e,25c83c98,fbad5c96,61beb1aa,0b153874,a73ee510,a5270a71,81a23494,2d15871c,3796b047,b28479f6,55d28d38,9243e635,07c540c4,2b46823a,,,ec5ac7c6,ad3062eb,32c7478e,590b856f,, 1,37.0,113,2815.0,5.0,2.0,3.0,26.0,49.0,78.0,0.0,1.0,,3.0,05db9164,c5c1d6ae,b2de8002,f9a7e394,25c83c98,7e0ccccf,0d00feb3,0b153874,a73ee510,ff4776d6,640d8b63,76517c94,18041128,b28479f6,29a18ba0,afc96aa6,e5ba7672,836a67dd,21ddcdc9,5840adea,c0cd6339,78e2e389,32c7478e,7e60320b,7a402766,ba14bbcb 0,5.0,1,28.0,22.0,11.0,24.0,5.0,22.0,22.0,3.0,3.0,,21.0,05db9164,89ddfee8,7e4ea1b2,bc17b20f,25c83c98,,a6624a99,5b392875,a73ee510,3b08e48b,f161ec47,49a5dd4f,1e18519e,051219e6,d5223973,9fa82d1c,e5ba7672,5bb2ec8e,4b1019ff,a458ea53,40b11f62,,32c7478e,eaa38671,f0f449dd,8b3e7faa 0,,0,1.0,33.0,11774.0,,0.0,1.0,502.0,,0.0,,33.0,5a9ed9b0,2ae0a573,0739daa8,4fbef8bb,4cf72387,7e0ccccf,ca4fd8f8,0b153874,a73ee510,3b08e48b,a0060bca,9148b680,22d23aac,07d13a8f,413cc8c6,64e0265f,776ce399,f2fc99b1,,,38879cfe,ad3062eb,32c7478e,7836b4d5,, 0,,1,14.0,3.0,3008.0,15.0,6.0,5.0,146.0,,3.0,,3.0,68fd1e64,a0e12995,b3693f43,f888df5a,25c83c98,7e0ccccf,fcf0132a,0b153874,a73ee510,aed3d80e,d650f1bd,63314ad3,863f8f8a,07d13a8f,73e2709e,ea1c4696,e5ba7672,1616f155,21ddcdc9,5840adea,67afd8d0,,c7dc6720,e3aea32f,9b3e8820,e75c9ae9 1,0.0,1,27.0,38.0,1499.0,73.0,14.0,35.0,269.0,0.0,4.0,0.0,38.0,8cf07265,04e09220,b1ecc6c4,5dff9b29,4cf72387,fe6b92e5,53ef84c0,0b153874,a73ee510,267caf03,643327e3,2436ff75,478ebe53,07d13a8f,f6b23a53,f4ead43c,e5ba7672,6fc84bfb,,,4f1aa25f,,423fab69,ded4aac9,, 0,,5,44.0,4.0,12143.0,,0.0,4.0,4.0,,0.0,,4.0,05db9164,38d50e09,0c7bb149,a35517fb,25c83c98,3bf701e7,e14874c9,0b153874,7cc72ec2,3b08e48b,636405ac,96fa9c01,31b42deb,07d13a8f,ee569ce2,7ce58da8,776ce399,582152eb,21ddcdc9,5840adea,d1d4f4a9,ad3062eb,3a171ecb,03955d00,001f3601,4e7af834 1,3.0,2,37.0,87.0,190.0,90.0,3.0,49.0,88.0,2.0,2.0,,88.0,68fd1e64,38a947a1,,,43b19349,,d385ea68,0b153874,a73ee510,3b08e48b,7940fc2a,,00e20e7b,07d13a8f,7f1c4567,,d4bb7bd8,95f5c722,,,,,32c7478e,,, 0,,8,8.0,5.0,25660.0,,0.0,3.0,5.0,,0.0,,5.0,05db9164,90081f33,fd22e418,36375a46,25c83c98,7e0ccccf,0bdc3959,0b153874,a73ee510,3b08e48b,c6cb726f,fb991bf5,176d07bc,b28479f6,13f8263b,d1a4e968,1e88c74f,c191a3ff,,,9fb07dd2,,32c7478e,359dd977,, 0,0.0,0,35.0,4.0,190.0,85.0,43.0,18.0,177.0,0.0,3.0,1.0,8.0,05db9164,207b2d81,2b280564,ad5ffc6b,5a3e1872,7e0ccccf,4aa938fc,0b153874,a73ee510,efea433b,7e40f08a,2a064dba,1aa94af3,07d13a8f,0c67c4ca,7d9b60c8,3486227d,395856b0,21ddcdc9,a458ea53,9c3eb598,,32c7478e,c0b8dfd6,001f3601,7a2fb9af 1,2.0,1,19.0,20.0,1.0,20.0,2.0,14.0,20.0,1.0,1.0,0.0,12.0,68fd1e64,06174070,a3829614,b0ed6de7,4cf72387,fe6b92e5,71c23d74,0b153874,a73ee510,c6c8dd7c,ae4c531b,3b917db0,01c2bbc7,cfef1c29,73438c3b,12e989e9,07c540c4,836a11e3,a34d2cf6,5840adea,9179411e,,32c7478e,1793a828,e8b83407,fa3124de 0,1.0,1849,4.0,0.0,28.0,0.0,1.0,0.0,0.0,1.0,1.0,,0.0,be589b51,ef69887a,771a1642,2e946ee2,4cf72387,,5d7d417f,0b153874,a73ee510,50c56209,52d28861,77f29381,a4b04123,b28479f6,902a109f,9fe6f065,07c540c4,4bcc9449,566c492c,5840adea,7b6393e8,,32c7478e,3fdb382b,47907db5,2fc5e3d4 0,0.0,65,,7.0,10346.0,67.0,1.0,16.0,67.0,0.0,1.0,0.0,7.0,8cf07265,68b3edbf,77f2f2e5,d16679b9,4cf72387,7e0ccccf,e465eb54,5b392875,a73ee510,f0c8b1be,01a88896,9f32b866,dfb2a8fa,07d13a8f,fd888b80,31ca40b6,d4bb7bd8,cf1cde40,,,dfcfc3fa,,93bad2c0,aee52b6f,, 0,7.0,164,33.0,12.0,84.0,63.0,8.0,19.0,18.0,1.0,2.0,,18.0,87773c45,58e67aaf,104c93d5,90b69619,25c83c98,7e0ccccf,e3b8f237,0b153874,a73ee510,aed3d80e,1aa6cf31,61ea5878,3b03d76e,1adce6ef,d002b6d9,33a55538,e5ba7672,c21c3e4c,444a605d,b1252a9d,37c3d851,,32c7478e,364442f6,9b3e8820,bdc8589e 0,,10,5.0,3.0,8913.0,68.0,2.0,42.0,168.0,,2.0,0.0,3.0,68fd1e64,1cfdf714,3f850fa0,db781543,25c83c98,7e0ccccf,2555b4d9,0b153874,a73ee510,f9065d00,98579192,3317996d,779f824b,d2dfe871,ca8b2a1a,bc3ccba9,27c07bd6,e88ffc9d,e27c6abe,a458ea53,6b4fc63c,,423fab69,c94ffa50,cb079c2d,d5ca783a 0,,15,9.0,1.0,20553.0,,,12.0,,,,,4.0,05db9164,0b8e9caf,6858baef,3f647607,4cf72387,fbad5c96,b647358a,0b153874,a73ee510,3b08e48b,88731e13,f6148255,2723b688,b28479f6,5340cb84,03b5b1e2,07c540c4,ca6a63cf,,,3b66cfcf,,bcdee96c,08b0ce98,, 0,0.0,-1,,,1539.0,115.0,17.0,20.0,276.0,0.0,5.0,,,68fd1e64,287130e0,9dfde63d,9c9a6068,25c83c98,6f6d9be8,32da4b59,5b392875,a73ee510,eff5602f,9ee336c5,1310a7dd,094e10ad,b28479f6,9efd8b77,b3dc5e07,e5ba7672,891589e7,bdffef68,b1252a9d,33706b2d,,32c7478e,88cba9eb,9b3e8820,1ba54abc 0,0.0,3,,5.0,1920.0,22.0,50.0,5.0,98.0,0.0,4.0,0.0,5.0,68fd1e64,3df44d94,d032c263,c18be181,25c83c98,7e0ccccf,9ec884dc,5b392875,a73ee510,aa6da1ef,5b906b78,dfbb09fb,c95c9034,b28479f6,b96e7224,84898b2a,3486227d,79a92e0a,,,0014c32a,,bcdee96c,3b183c5c,, 0,2.0,0,6.0,2.0,70.0,10.0,248.0,1.0,1034.0,1.0,32.0,,2.0,05db9164,404660bb,f1397040,09003f7b,25c83c98,7e0ccccf,1c86e0eb,0b153874,a73ee510,67eea4ef,755e4a50,0cdb9a18,5978055e,07d13a8f,633f1661,82708081,e5ba7672,4b17f8a2,21ddcdc9,5840adea,4c14738f,,32c7478e,a86c0565,f0f449dd,984e0db0 1,,1,10.0,6.0,11665.0,,0.0,10.0,6.0,,0.0,,6.0,05db9164,38a947a1,7fd859b3,19ae4fbd,25c83c98,,16401b7d,0b153874,a73ee510,3b08e48b,20ec800a,6aa4c9a8,18a5e4b8,cfef1c29,cb0f0e06,b50d9336,1e88c74f,3c4f2d82,,,cc86f2c1,,32c7478e,1793a828,, 0,12.0,1,1.0,15.0,548.0,24.0,12.0,18.0,20.0,2.0,2.0,,16.0,05db9164,0c0567c2,700014ea,560f248f,25c83c98,7e0ccccf,fe4dce68,0b153874,a73ee510,ab9e9acf,68357db6,093a009d,768f6658,07d13a8f,aa39dd42,9e6ff465,e5ba7672,bb983d97,,,5c859cae,,32c7478e,996f5a43,, 1,0.0,152,3.0,3.0,1847.0,96.0,12.0,6.0,11.0,0.0,1.0,0.0,3.0,05db9164,4f25e98b,6d1384bc,74ce146b,4cf72387,7e0ccccf,26817995,a61cc0ef,a73ee510,cf500eab,8b92652b,a4b73157,c5bc951e,b28479f6,8ab5b746,19f6b83c,e5ba7672,7ef5affa,21ddcdc9,b1252a9d,9efd5ec7,,c7dc6720,3fdb382b,001f3601,49d68486 0,0.0,1,9.0,0.0,6431.0,136.0,2.0,6.0,98.0,0.0,1.0,,2.0,05db9164,6887a43c,9b792af9,9c6d05a0,43b19349,,60d4eb86,e8663cb1,a73ee510,07c7b3f7,0ad37b4b,6532318c,f9d99d81,8ceecbc8,4e06592a,2c9d222f,e5ba7672,8f0f692f,21ddcdc9,b1252a9d,cc6a9262,,32c7478e,a5862ce8,445bbe3b,1793fb3f 0,,-1,,,20646.0,,0.0,5.0,8.0,,0.0,,,9a89b36c,09e68b86,0271c22e,caa16f04,25c83c98,,47aa6d2e,0b153874,a73ee510,9d4b7dce,c30e7b00,f993725b,4f8670dc,1adce6ef,dbc5e126,1c3a7247,e5ba7672,5aed7436,21ddcdc9,5840adea,4d2b0d06,,32c7478e,3fdb382b,e8b83407,8ded0b41 0,,14,3.0,2.0,306036.0,,0.0,2.0,105.0,,0.0,,2.0,68fd1e64,09e68b86,cce54c2c,6e8c7c0e,4cf72387,,c642e324,a6d156f4,7cc72ec2,b6900243,82af9502,9e82f486,90dca23e,07d13a8f,36721ddc,e3a83d5c,d4bb7bd8,5aed7436,2b558521,a458ea53,ebfa4c53,,32c7478e,a9d9c151,e8b83407,3a97b421 0,,-1,,,,,,0.0,,,,,,5a9ed9b0,38a947a1,,,4cf72387,7e0ccccf,e7698644,66f29b89,7cc72ec2,3b08e48b,f9d0f35e,,b55434a9,07d13a8f,681a3f32,,2005abd1,19ef42ad,,,,c9d4222a,be7c41b4,,, 1,1.0,2,6.0,2.0,8.0,9.0,1.0,2.0,2.0,1.0,1.0,0.0,2.0,05db9164,f0cf0024,619e87b2,cfc23926,384874ce,7e0ccccf,02914429,5b392875,a73ee510,575cd9b2,419d31d4,c0d8d575,08961fd0,1adce6ef,55dc357b,29a3715b,e5ba7672,b04e4670,21ddcdc9,a458ea53,e54f0804,,423fab69,936da3dd,ea9a246c,27029e68 0,0.0,17,34.0,11.0,1784.0,50.0,1.0,25.0,102.0,0.0,1.0,0.0,11.0,68fd1e64,e77e5e6e,fdd14ae2,8b7d76a3,25c83c98,fbad5c96,15ce37bc,0b153874,a73ee510,25e9e422,ff78732c,07cecd0e,9b656adc,f862f261,903024b9,d08de474,e5ba7672,449d6705,1d1eb838,a458ea53,26e36622,,55dd3565,3fdb382b,33d94071,49d68486 0,0.0,1,7.0,8.0,4501.0,184.0,2.0,4.0,184.0,0.0,1.0,,46.0,05db9164,58e67aaf,8b376137,270b5720,4cf72387,7e0ccccf,67b7679f,0b153874,a73ee510,19feb952,16faa766,8d526153,4422e246,b28479f6,62eca3c0,23c4fd37,07c540c4,c21c3e4c,6301e460,b1252a9d,632bf881,,bcdee96c,18109ace,9b3e8820,070f6cb2 0,,183,3.0,3.0,5778.0,,0.0,3.0,9.0,,0.0,,3.0,39af2607,c5c1d6ae,027b4cc5,9affccc2,25c83c98,6f6d9be8,d2bfca2c,5b392875,a73ee510,3b08e48b,f72b4bd1,7e98747a,01f32ac8,07d13a8f,99153e7d,64223df7,776ce399,836a67dd,21ddcdc9,5840adea,301fc194,,be7c41b4,365def8b,7a402766,00efb483 0,,13,3.0,10.0,48.0,16.0,11.0,10.0,163.0,,3.0,0.0,6.0,05db9164,40ed0c67,61b8caf0,5ef5cf67,25c83c98,7e0ccccf,a7565058,d7c4a8f5,a73ee510,567ba666,69afd526,765cb3ea,84def884,07d13a8f,622c34d8,5c646b1e,e5ba7672,2585827d,21ddcdc9,5840adea,c4c42074,,3a171ecb,42df8359,e8b83407,c0fca43d 0,,1,25.0,22.0,39424.0,66.0,1.0,28.0,60.0,,0.0,,29.0,5a9ed9b0,9b25e48b,f25edca2,418ae7fb,25c83c98,7e0ccccf,a5a83bdd,5b392875,a73ee510,5ea6fa93,f697a983,ad46dc69,e5643e9a,07d13a8f,054ebda1,967bc626,3486227d,7d8c03aa,2442feac,a458ea53,30244f84,,c7dc6720,3a6f67d1,010f6491,f4642e0e 0,,1,13.0,3.0,5646.0,49.0,3.0,3.0,59.0,,1.0,,3.0,8cf07265,558b4efb,40361716,f2159098,25c83c98,fbad5c96,6005554a,062b5529,a73ee510,b1442b2a,c19406bc,842839b9,07fdb6cc,07d13a8f,c1ddc990,9f1d1f70,27c07bd6,c68ebaa0,21ddcdc9,5840adea,16f71b82,ad3062eb,32c7478e,3b183c5c,ea9a246c,2f44e540 1,0.0,1,2.0,2.0,1795.0,4.0,1.0,2.0,2.0,0.0,1.0,,2.0,05db9164,38a947a1,bd4d1b8d,097de257,25c83c98,,788ff59f,0b153874,a73ee510,3b08e48b,9c9d4957,3263408b,9325eab4,07d13a8f,456583e6,c57bda3a,d4bb7bd8,4b0f5ddd,,,6fb7987f,,32c7478e,9b7eed78,, 1,1.0,2,603.0,11.0,2.0,11.0,2.0,11.0,11.0,1.0,2.0,,11.0,05db9164,58e67aaf,f5cdf14a,39cc9792,4cf72387,7e0ccccf,9ff9bbde,0b153874,a73ee510,8c8662e4,f89fe102,5d84eb4a,83e6ca2e,1adce6ef,d002b6d9,a98ec356,07c540c4,c21c3e4c,c79aad78,b1252a9d,ec4a835a,,423fab69,b44bd498,9b3e8820,8fd6bdd6 1,9.0,1,39.0,6.0,48.0,14.0,13.0,30.0,68.0,2.0,4.0,,6.0,be589b51,4f25e98b,761d2b40,5f379ae0,4cf72387,fe6b92e5,9b98e9fc,0b153874,a73ee510,2a47dab8,7f8ffe57,beb94e00,46f42a63,07d13a8f,dfab705f,9066bcfb,e5ba7672,7ef5affa,49463d54,b1252a9d,822be048,c9d4222a,32c7478e,3fdb382b,001f3601,49d68486 0,1.0,12,4.0,2.0,5.0,3.0,25.0,19.0,113.0,1.0,2.0,2.0,2.0,68fd1e64,a5b69ae3,0b793d71,813cb08c,4cf72387,7e0ccccf,468a0854,0b153874,a73ee510,3b08e48b,a60de4e5,f9bf526c,605bbc24,b28479f6,9703aa2f,9ee32e6f,8efede7f,a1654f4f,21ddcdc9,5840adea,7a380bd1,,32c7478e,08b0ce98,2bf691b1,984e0db0 0,0.0,0,21.0,5.0,2865.0,,0.0,31.0,1.0,0.0,0.0,,31.0,ae82ea21,38d50e09,01a0648b,657dc3b9,25c83c98,7e0ccccf,0c41b6a1,0b153874,a73ee510,56ef22e9,4ba74619,11fcf7fa,879fa878,07d13a8f,fa321567,5e1b6b9d,e5ba7672,52b872ed,21ddcdc9,a458ea53,bfeb50f6,,423fab69,df487a73,e8b83407,c27f155b 0,,-1,66.0,29.0,2940.0,87.0,69.0,35.0,82.0,,5.0,0.0,32.0,68fd1e64,1cfdf714,3cb0ff62,9b17f367,43b19349,7e0ccccf,e2de05d6,0b153874,a73ee510,1ce1e29d,b26d847d,59a625a9,38016f21,1adce6ef,f3002fbd,229bf6f4,3486227d,e88ffc9d,edb3d180,a458ea53,5362f5c3,,423fab69,f20c047e,cb079c2d,0facb2ea 1,,370,,3.0,357.0,,0.0,4.0,5.0,,0.0,,3.0,68fd1e64,2ae0a573,af21d90e,dc0a11c7,4cf72387,,ed0714a0,1f89b562,a73ee510,f1b39deb,b85b416c,a4425bd8,c3f71b59,07d13a8f,413cc8c6,41bec2fe,d4bb7bd8,f2fc99b1,,,95ee3d7a,,32c7478e,7836b4d5,, 0,0.0,237,1.0,1.0,4619.0,53.0,17.0,16.0,272.0,0.0,1.0,,1.0,f473b8dc,89ddfee8,f153af65,13508380,25c83c98,3bf701e7,c96de117,37e4aa92,a73ee510,995c2a7f,ad757a5a,99ec4e40,93b18cb5,07d13a8f,59a58e86,13ede1b5,3486227d,ae46962e,55dd3565,b1252a9d,8a93f0a1,ad3062eb,423fab69,45ab94c8,f0f449dd,c84c4aec 0,,0,2.0,3.0,10327.0,648.0,11.0,3.0,127.0,,3.0,,3.0,39af2607,68b3edbf,ad4b77ff,d16679b9,25c83c98,7e0ccccf,b00f5963,c8ddd494,a73ee510,ac82cac0,b91c2548,a2f4e8b5,a03da696,b28479f6,12f48803,89052618,e5ba7672,cf1cde40,,,d4703ebd,,bcdee96c,aee52b6f,, 1,,3,,24.0,1853.0,36.0,10.0,9.0,175.0,,2.0,,24.0,05db9164,38a947a1,03689820,21817e80,25c83c98,7e0ccccf,50a5390e,0b153874,a73ee510,0466803a,159499d1,79b98d3d,4ab361e1,b28479f6,72f85ad5,8e47fca6,e5ba7672,5ba7fffe,,,15fb7955,,32c7478e,71dc4ef2,, 0,4.0,1,2.0,17.0,7.0,4.0,4.0,18.0,18.0,1.0,1.0,3.0,3.0,05db9164,0a519c5c,77f2f2e5,d16679b9,43b19349,fbad5c96,c78204a1,0b153874,a73ee510,3b08e48b,5f5e6091,9f32b866,aa655a2f,07d13a8f,b812f9f2,31ca40b6,27c07bd6,2efa89c6,,,dfcfc3fa,,3a171ecb,aee52b6f,, 0,0.0,10,1.0,0.0,5781.0,164.0,5.0,6.0,160.0,0.0,5.0,,5.0,8cf07265,e112a9de,af5655e7,22504558,4cf72387,7e0ccccf,133643ef,0b153874,a73ee510,64145819,84bc66d0,252162ec,bcb2e77c,1adce6ef,11da3cff,776f5665,e5ba7672,a7cf409e,,,5c7c443c,,32c7478e,8f079aa5,, 0,,2,2.0,3.0,3379.0,,0.0,5.0,4.0,,0.0,,3.0,09ca0b81,287130e0,20fb5e45,aafb54fa,25c83c98,fbad5c96,bf115338,56563555,a73ee510,3b08e48b,41516dc9,2ea11a49,8b11c4b8,1adce6ef,310d155b,b9a4d133,776ce399,891589e7,f30f7842,a458ea53,86a8e85e,c9d4222a,be7c41b4,bc491035,e8b83407,bd2ec696 0,0.0,1,7.0,12.0,3011.0,126.0,5.0,41.0,121.0,0.0,2.0,,12.0,be589b51,d833535f,77f2f2e5,d16679b9,43b19349,fe6b92e5,6978304f,0b153874,a73ee510,fbbf2c95,78f92234,9f32b866,9be66b48,b28479f6,a66dcf27,31ca40b6,e5ba7672,7b49e3d2,,,dfcfc3fa,,3a171ecb,aee52b6f,, 1,2.0,1,3.0,1.0,63.0,1.0,21.0,2.0,108.0,2.0,9.0,2.0,1.0,68fd1e64,e5fb1af3,be0a348d,e0e934af,25c83c98,13718bbd,372a0c4c,0b153874,a73ee510,e8e8c8ac,ec88dd34,7ac672aa,94881fc3,07d13a8f,b5de5956,e3d99bf0,27c07bd6,13145934,42e59f55,5840adea,8f78192f,,3a171ecb,198d16cc,e8b83407,0e2018ec 0,,1,3.0,1.0,563.0,,0.0,5.0,3.0,,0.0,,1.0,05db9164,55e0a784,5b54e5b4,c5699aad,25c83c98,7e0ccccf,dcab49d9,0b153874,a73ee510,34dd9626,cd3a0eb4,c492212b,715b22a3,07d13a8f,45e17a48,1f55226d,1e88c74f,6c5555bd,21ddcdc9,b1252a9d,99712f38,,423fab69,167193c9,e8b83407,ae5fce01 0,,1,4.0,2.0,8684.0,11.0,1.0,3.0,7.0,,1.0,,2.0,05db9164,e5fb1af3,c8b80f97,311f127a,25c83c98,fe6b92e5,372a0c4c,0b153874,a73ee510,6f0b6a04,2e15139e,9ffdd484,94881fc3,07d13a8f,b5de5956,5891d119,d4bb7bd8,13145934,cc4c70c1,a458ea53,cd11300e,ad3062eb,3a171ecb,cf300ce9,001f3601,814b9a6b 0,8.0,1,3.0,14.0,351.0,50.0,8.0,35.0,37.0,1.0,1.0,,18.0,05db9164,e9b8a266,be3b6a18,62169fb6,0942e0a7,7e0ccccf,d55d70ca,5b392875,a73ee510,1d56e466,9cf09d42,6647ec34,f66b043c,b28479f6,fb67e61d,236709b9,e5ba7672,d452c287,,,77799c4f,c9d4222a,32c7478e,5fd07f39,, 1,0.0,-1,,,1398.0,0.0,1.0,0.0,0.0,0.0,1.0,,,05db9164,512fdf0c,98bb788f,e0a2ecca,0942e0a7,7e0ccccf,d01ba955,7b6fecd5,a73ee510,3b08e48b,c0edaa76,167ba71f,34fc0029,07d13a8f,aa322bcf,5e622e84,d4bb7bd8,fd3919f9,21ddcdc9,5840adea,43d01030,,c7dc6720,4acb8523,724b04da,c986348f 1,,74,3.0,4.0,17991.0,32.0,11.0,9.0,98.0,,10.0,,4.0,5a9ed9b0,8947f767,9ea04474,2b0aadf8,25c83c98,6f6d9be8,368f84ee,0b153874,a73ee510,3b08e48b,6dc69f41,4640585e,fca56425,f7c1b33f,7f758956,d8831736,e5ba7672,bd17c3da,bf212c4c,b1252a9d,d4f22efc,,32c7478e,0ac1b18a,010f6491,6d73203e 0,,38,14.0,46.0,6426.0,888.0,12.0,9.0,862.0,,1.0,,46.0,05db9164,95e2d337,0d71b822,3fb81b62,30903e74,7e0ccccf,8f572b5e,0b153874,a73ee510,897188be,434d6c13,28283f53,7301027a,b28479f6,17a3bcd8,9e724f87,e5ba7672,7b06fafe,21ddcdc9,5840adea,07b818d7,,c7dc6720,b2df17ed,c243e98b,33757f80 0,0.0,1,,2.0,14496.0,895.0,3.0,7.0,58.0,0.0,1.0,,2.0,05db9164,9a82ab91,d032c263,c18be181,25c83c98,7e0ccccf,d9f4e70f,0b153874,a73ee510,27f4bf82,da89cb9b,dfbb09fb,165642be,07d13a8f,33d2c881,84898b2a,07c540c4,004fdf10,,,0014c32a,,32c7478e,3b183c5c,, 0,0.0,14,15.0,11.0,4108.0,125.0,4.0,35.0,111.0,0.0,1.0,,14.0,05db9164,e3a0dc66,2ba709bb,7be47200,25c83c98,fe6b92e5,8a850658,0b153874,a73ee510,3094253e,d9b1e3ff,fa5eca9d,cd98af01,07d13a8f,c251e774,22283336,e5ba7672,b608c073,,,fd0e41ce,c9d4222a,c7dc6720,f2e9f0dd,, 1,,18,23.0,,42024.0,,,0.0,,,,,,05db9164,09e68b86,aa8c1539,85dd697c,25c83c98,,b87f4a4a,5b392875,a73ee510,e70742b0,319687c9,d8c29807,62036f49,07d13a8f,801ee1ae,c64d548f,e5ba7672,63cdbb21,cf99e5de,5840adea,5f957280,,32c7478e,1793a828,e8b83407,b7d9c3bc 1,1.0,2,76.0,4.0,0.0,4.0,1.0,4.0,4.0,1.0,1.0,,4.0,05db9164,38a947a1,f1a544c6,9c65ce26,25c83c98,fbad5c96,df5c2d18,0b153874,a73ee510,903f1f14,a7b606c4,8f1a16da,eae197fd,b28479f6,b842e9bb,789e0e3e,e5ba7672,38f08461,,,79fe2943,,bcdee96c,325bcd40,, 0,1.0,0,29.0,5.0,40.0,5.0,1.0,5.0,5.0,1.0,1.0,,5.0,8cf07265,09e68b86,8530c58f,abfc27b2,25c83c98,,197b4575,0b153874,a73ee510,6c47047a,606866a9,8a433ec1,e40e52ae,64c94865,91126f30,cc93bd1d,d4bb7bd8,5aed7436,6d82104d,a458ea53,c1429b47,,3a171ecb,a0634086,e8b83407,9c015713 0,1.0,2921,,0.0,48.0,17.0,20.0,10.0,84.0,1.0,2.0,1.0,0.0,39af2607,4f25e98b,b0874fd0,b696e406,25c83c98,fbad5c96,dc7659bd,0b153874,a73ee510,03e48276,e51ddf94,6536f6f8,3516f6e6,b28479f6,8ab5b746,271d5b6c,27c07bd6,7ef5affa,21ddcdc9,a458ea53,a716bbe2,,3a171ecb,3fdb382b,001f3601,a39e1586 0,,55,10.0,12.0,299.0,,0.0,23.0,26.0,,0.0,,26.0,17f69355,38a947a1,4470baf4,8c8a4c47,25c83c98,7e0ccccf,2a37bb01,5b392875,a73ee510,3b08e48b,61ba19ac,bb669e25,fa17cc68,b28479f6,a3443e75,2b2ce127,776ce399,ade68c22,,,2b796e4a,ad3062eb,be7c41b4,8d365d3b,, 0,2.0,8,6.0,3.0,5.0,3.0,25.0,11.0,722.0,1.0,6.0,,3.0,05db9164,09e68b86,57231f4a,c38a1d7d,25c83c98,fbad5c96,968a6688,0b153874,a73ee510,e851ff7b,f25fe7e9,2849c511,dd183b4c,f7c1b33f,5726b2dc,2b7f6e55,e5ba7672,5aed7436,4a237258,b1252a9d,fd3ca145,c9d4222a,32c7478e,0ea7be91,e8b83407,f610730e 1,1.0,493,155.0,2.0,1.0,0.0,8.0,7.0,45.0,1.0,7.0,,0.0,68fd1e64,78ccd99e,ac203f6f,13508380,25c83c98,7e0ccccf,e24d7cb8,0b153874,a73ee510,6f07d986,03458ded,2d72bfb9,8019075f,07d13a8f,162f3329,eedd265a,e5ba7672,e7e991cb,21ddcdc9,b1252a9d,56b58097,c9d4222a,423fab69,45ab94c8,e8b83407,c84c4aec 0,,35,,,293044.0,,,7.0,,,,,,05db9164,38a947a1,1678e0d8,bd6ffe0f,25c83c98,7e0ccccf,e2ec9176,0b153874,7cc72ec2,3b08e48b,6fc6ad29,704629a2,b0c30eeb,b28479f6,443b0c0b,809c9e0e,e5ba7672,f0959f21,,,6a41d841,,be7c41b4,0ee762c3,, 0,,8,8.0,12.0,39343.0,1820.0,0.0,19.0,318.0,,0.0,,12.0,05db9164,d57c0709,d032c263,c18be181,25c83c98,7e0ccccf,122c542a,0b153874,a73ee510,801e8634,7fee217f,dfbb09fb,6e2907f1,cfef1c29,487ddf17,84898b2a,e5ba7672,3ae505af,,,0014c32a,,423fab69,3b183c5c,, 0,5.0,0,1.0,,92.0,0.0,5.0,0.0,0.0,1.0,1.0,,,05db9164,78ccd99e,bf30cf68,49c94103,30903e74,7e0ccccf,a1eeac3d,1f89b562,a73ee510,12bb8262,2e9d5aa6,975f89b0,0a9ac04c,f862f261,ada14dd8,a9b56248,e5ba7672,e7e991cb,21ddcdc9,a458ea53,0d7a15fd,,32c7478e,fb890da1,33d94071,86174332 1,,0,1.0,,19088.0,11.0,11.0,0.0,89.0,,2.0,,,68fd1e64,c5fe64d9,01ac13ea,f6dbd8fb,4cf72387,6f6d9be8,6cdb3998,062b5529,a73ee510,b173a655,5874c9c9,16a886e7,740c210d,07d13a8f,52b49730,a249bde3,e5ba7672,c235abed,f30f7842,a458ea53,c4b9fb56,8ec974f4,32c7478e,44aeb111,33d94071,df46df55 0,,248,1.0,1.0,79620.0,,,1.0,,,,,1.0,da4eff0f,d833535f,77f2f2e5,d16679b9,25c83c98,fe6b92e5,8f801a1a,1f89b562,7cc72ec2,3b08e48b,f295b28a,9f32b866,f5df7ab9,07d13a8f,943169c2,31ca40b6,d4bb7bd8,281769c2,,,dfcfc3fa,,3a171ecb,aee52b6f,, 0,0.0,0,3.0,2.0,3150.0,21.0,4.0,3.0,24.0,0.0,2.0,,2.0,05db9164,80e26c9b,e346a5fd,85dd697c,4cf72387,,55fc227e,0b153874,a73ee510,b1aa986c,d8d7567b,539c5644,47d6a934,b28479f6,a785131a,aafa191e,e5ba7672,005c6740,21ddcdc9,5840adea,7e5b7cc4,,32c7478e,1793a828,e8b83407,b9809574 0,,0,10.0,2.0,41706.0,84.0,0.0,5.0,49.0,,0.0,,2.0,8cf07265,942f9a8d,d1ffd05c,9df780c1,25c83c98,7e0ccccf,49b74ebc,1f89b562,a73ee510,0e9ead52,c4adf918,f0c1019c,85dbe138,b28479f6,ac182643,52bee03d,d4bb7bd8,1f868fdd,5b885066,a458ea53,35198a67,ad3062eb,32c7478e,30ab4eb4,e8b83407,85fd868a 1,4.0,-1,6.0,6.0,872.0,31.0,37.0,42.0,334.0,1.0,16.0,,6.0,8cf07265,d4bd9877,a55127b0,90044821,4cf72387,3bf701e7,6a858837,0b153874,a73ee510,3b08e48b,eb9eb939,a0015d5d,2b54e95d,07d13a8f,10139ce3,b458da0e,e5ba7672,62acb0f3,,,d7a43622,,423fab69,dcba8699,, 0,,38,,,43205.0,680.0,0.0,2.0,20.0,,0.0,0.0,,68fd1e64,2c8c5f5d,0f09a700,38aca36b,4cf72387,fbad5c96,91282309,0b153874,7cc72ec2,dcbc7c2b,9e511730,25644e7d,04e4a7e0,64c94865,c1124d0c,4c7535f3,3486227d,f5f4ae5b,,,5b6b6b73,,3a171ecb,1793a828,, 0,,0,6.0,6.0,124027.0,,0.0,5.0,19.0,,0.0,,6.0,05db9164,38a947a1,acbabfa5,187dc42d,25c83c98,fbad5c96,e14874c9,51d76abe,7cc72ec2,ff5a1549,636405ac,8d2c704a,31b42deb,07d13a8f,55808bb2,c66a58da,e5ba7672,824dcc94,,,9308de7e,ad3062eb,3a171ecb,9d8b4082,, 1,2.0,6,,,300.0,25.0,2.0,25.0,68.0,1.0,1.0,,,5a9ed9b0,38a947a1,b1b6f323,be4cb064,25c83c98,7e0ccccf,00dd27a6,0b153874,a73ee510,98bd7a24,55065437,d28c687a,80dcea18,1adce6ef,fc42663d,f2a191bd,e5ba7672,c9da8737,,,5911ddcb,,32c7478e,1335030a,, 0,,27,,,112878.0,2106.0,0.0,2.0,95.0,,0.0,,,5a9ed9b0,38a947a1,2d8004c4,40ed41e5,25c83c98,7e0ccccf,4d9d55ae,5b392875,7cc72ec2,3b08e48b,55065437,ad972965,80dcea18,07d13a8f,c68ba31d,1206a8a1,d4bb7bd8,e96a7df2,,,54d8bb06,,3a171ecb,a415643d,, 0,0.0,3001,2.0,,3134.0,47.0,1.0,0.0,1.0,0.0,1.0,0.0,,05db9164,403ea497,2cbec47f,3e2bfbda,25c83c98,,19672560,0b153874,a73ee510,a8d1ae09,2591ca7a,21a23bfe,9b7d472e,07d13a8f,e3209fc2,587267a3,3486227d,a78bd508,21ddcdc9,5840adea,c2a93b37,,c7dc6720,1793a828,e8b83407,2fede552 1,0.0,179,5.0,1.0,1464.0,6.0,70.0,6.0,16.0,0.0,10.0,,3.0,68fd1e64,404660bb,f1397040,09003f7b,25c83c98,7e0ccccf,1c86e0eb,5b392875,a73ee510,67eea4ef,755e4a50,0cdb9a18,5978055e,1adce6ef,6ddbba94,82708081,e5ba7672,4b17f8a2,21ddcdc9,5840adea,4c14738f,,32c7478e,a86c0565,f0f449dd,984e0db0 1,,1,7.0,2.0,2910.0,2.0,301.0,3.0,54.0,,15.0,0.0,2.0,8cf07265,942f9a8d,3a3d6eeb,eabe170f,25c83c98,6f6d9be8,49b74ebc,0b153874,a73ee510,0e9ead52,c4adf918,a66cfe4b,85dbe138,07d13a8f,a8e962af,a3d7b1d6,e5ba7672,1f868fdd,fc134659,a458ea53,bbcf650c,,32c7478e,75b9c133,9d93af03,e438a496 0,0.0,0,8.0,6.0,125.0,122.0,5.0,34.0,107.0,0.0,3.0,,24.0,5a9ed9b0,c5e4f7c9,,,25c83c98,7e0ccccf,95402f9a,64523cfa,a73ee510,5162b19c,c82f1813,,949ea585,b28479f6,b16ae607,,e5ba7672,ac02dc99,,,,c9d4222a,32c7478e,,, 0,0.0,0,5.0,6.0,6461.0,93.0,19.0,7.0,37.0,0.0,1.0,1.0,7.0,68fd1e64,09e68b86,5f8d9359,2628b8d6,25c83c98,13718bbd,53e14bd5,0b153874,a73ee510,97d3ddaa,319687c9,de2ecc9c,62036f49,cfef1c29,18847041,62675893,3486227d,5aed7436,b1fb78cc,a458ea53,be01d6b1,,3a171ecb,b1aad66f,e8b83407,3df61e3d 1,0.0,2,1.0,11.0,2119.0,79.0,6.0,2.0,114.0,0.0,3.0,1.0,11.0,05db9164,2ae0a573,4993b2b2,9ab05b8f,25c83c98,7e0ccccf,9e8dab66,0b153874,a73ee510,5ba575e7,2d9eed4d,bdf9cff8,949ea585,07d13a8f,413cc8c6,fb2ac6b5,3486227d,f2fc99b1,,,0fbced35,ad3062eb,32c7478e,d91ea8bd,, 0,0.0,17,5.0,7.0,6288.0,,0.0,42.0,1.0,0.0,0.0,,35.0,5a9ed9b0,62e9e9bf,,,25c83c98,7e0ccccf,f74ed3c0,0b153874,a73ee510,39046df2,e90cbbe1,,a4c7bffd,07d13a8f,de829bed,,e5ba7672,d2651d6e,,,,,32c7478e,,, 0,,2,23.0,20.0,148.0,,0.0,20.0,20.0,,0.0,,20.0,68fd1e64,09e68b86,7edab412,f1d06e8a,43b19349,,16401b7d,0b153874,a73ee510,3b08e48b,20ec800a,0a02e48e,18a5e4b8,1adce6ef,dbc5e126,e2bc04da,776ce399,5aed7436,0053530c,a458ea53,1de5dd94,,32c7478e,43fe299c,f0f449dd,f3b1f00d 0,,19,535.0,7.0,61968.0,,0.0,7.0,2.0,,0.0,,7.0,05db9164,8ab240be,145f2f75,82a61820,25c83c98,7e0ccccf,ff08f605,0b153874,7cc72ec2,ec4d75ea,6939835e,7161e106,dc1d72e4,1adce6ef,28883800,bb6d240e,e5ba7672,ca533012,21ddcdc9,5840adea,5fe17899,,72592995,cafb4e4d,e8b83407,99f4f64c 0,,0,113.0,3.0,3036.0,575.0,2.0,3.0,214.0,,1.0,,3.0,05db9164,0468d672,628b07b0,b63c0277,25c83c98,7e0ccccf,0d339a25,c8ddd494,a73ee510,1722d4c8,7d756b25,0c87b3e9,6f833c7a,1adce6ef,4f3b3616,48af915a,07c540c4,9880032b,21ddcdc9,5840adea,34cc61bb,c9d4222a,32c7478e,e5ed7da2,ea9a246c,984e0db0 1,0.0,1,1.0,1.0,1607.0,12.0,1.0,12.0,15.0,0.0,1.0,,12.0,be589b51,aa8fcc21,4255f8fd,7501d94a,25c83c98,fe6b92e5,0492c809,1f89b562,a73ee510,13ba96b0,ba0f9e8a,887a0c20,4e4dd817,07d13a8f,a4f91020,022714ba,1e88c74f,3972b4ed,,,d1aa4512,,32c7478e,9257f75f,, 1,1.0,0,6.0,3.0,0.0,0.0,19.0,3.0,3.0,1.0,9.0,0.0,0.0,05db9164,09e68b86,db151f8b,f1b645fc,25c83c98,,b87f4a4a,0b153874,a73ee510,e70742b0,319687c9,af6ad6b6,62036f49,f862f261,1dca7862,05a97a3c,3486227d,5aed7436,54591762,a458ea53,4a2c3526,,32c7478e,1793a828,e8b83407,1a02cbe1 0,0.0,22,6.0,22.0,203.0,153.0,80.0,18.0,508.0,0.0,11.0,0.0,22.0,05db9164,e5fb1af3,7e1ad1fe,46ec0a38,43b19349,7e0ccccf,24c48926,0b153874,a73ee510,afa26c81,9f0003f4,651d80c6,5afd9e51,07d13a8f,b5de5956,72401022,3486227d,13145934,55dd3565,5840adea,bf647035,,32c7478e,1481ceb4,e8b83407,988b0775 0,1.0,-1,,,138.0,0.0,1.0,0.0,0.0,1.0,1.0,,,be589b51,b46aceb6,,,43b19349,,17cdc396,0b153874,a73ee510,75d852fc,d79cc967,,115d29f4,07d13a8f,217d99f2,,d4bb7bd8,908eaeb8,,,,,32c7478e,,, ================================================ FILE: examples/gen_tfrecords.py ================================================ import tensorflow as tf def make_example(line, sparse_feature_name, dense_feature_name, label_name): features = {feat: tf.train.Feature(int64_list=tf.train.Int64List(value=[int(line[1][feat])])) for feat in sparse_feature_name} features.update( {feat: tf.train.Feature(float_list=tf.train.FloatList(value=[line[1][feat]])) for feat in dense_feature_name}) features[label_name] = tf.train.Feature(float_list=tf.train.FloatList(value=[line[1][label_name]])) return tf.train.Example(features=tf.train.Features(feature=features)) def write_tfrecord(filename, df, sparse_feature_names, dense_feature_names, label_name): writer = tf.python_io.TFRecordWriter(filename) for line in df.iterrows(): ex = make_example(line, sparse_feature_names, dense_feature_names, label_name) writer.write(ex.SerializeToString()) writer.close() # write_tfrecord('./criteo_sample.tr.tfrecords',train,sparse_features,dense_features,'label') # write_tfrecord('./criteo_sample.te.tfrecords',test,sparse_features,dense_features,'label') ================================================ FILE: examples/movielens_age_vocabulary.csv ================================================ 1,1 2,18 3,25 4,35 5,45 6,50 7,56 ================================================ FILE: examples/movielens_sample.txt ================================================ user_id,movie_id,rating,timestamp,title,genres,gender,age,occupation,zip 3299,235,4,968035345,Ed Wood (1994),Comedy|Drama,F,25,4,19119 3630,3256,3,966536874,Patriot Games (1992),Action|Thriller,M,18,4,77005 517,105,4,976203603,"Bridges of Madison County, The (1995)",Drama|Romance,F,25,14,55408 785,2115,3,975430389,Indiana Jones and the Temple of Doom (1984),Action|Adventure,M,18,19,29307 5848,909,5,957782527,"Apartment, The (1960)",Comedy|Drama,M,50,20,20009 2996,2799,1,972769867,Problem Child 2 (1991),Comedy,M,18,0,63011 3087,837,5,969738869,Matilda (1996),Children's|Comedy,F,1,1,90802 872,3092,5,975273310,Chushingura (1962),Drama,M,50,1,20815 4094,529,5,966223349,Searching for Bobby Fischer (1993),Drama,M,25,17,49017 1868,3508,3,974694703,"Outlaw Josey Wales, The (1976)",Western,M,50,11,92346 2913,1387,5,971769808,Jaws (1975),Action|Horror,F,35,20,98119 380,3481,5,976316283,High Fidelity (2000),Comedy,M,25,2,92024 2073,1784,5,974759084,As Good As It Gets (1997),Comedy|Drama,F,18,4,13148 80,2059,3,977788576,"Parent Trap, The (1998)",Children's|Drama,M,56,1,49327 3679,2557,1,976298130,I Stand Alone (Seul contre tous) (1998),Drama,M,25,4,68108 2077,788,3,980013556,"Nutty Professor, The (1996)",Comedy|Fantasy|Romance|Sci-Fi,M,18,0,55112 6036,2085,4,956716684,101 Dalmatians (1961),Animation|Children's,F,25,15,32603 3675,532,3,966363610,Serial Mom (1994),Comedy|Crime|Horror,M,35,7,06680 4566,3683,4,964489599,Blood Simple (1984),Drama|Film-Noir,M,35,17,19473 2996,3763,3,972413564,F/X (1986),Action|Crime|Thriller,M,18,0,63011 5831,2458,1,957898337,Armed and Dangerous (1986),Comedy|Crime,M,25,1,92120 1869,1244,2,974695654,Manhattan (1979),Comedy|Drama|Romance,M,45,14,95148 5389,2657,3,960328279,"Rocky Horror Picture Show, The (1975)",Comedy|Horror|Musical|Sci-Fi,M,45,7,01905 1391,1535,3,974851275,Love! Valour! Compassion! (1997),Drama|Romance,M,35,15,20723 3123,2407,3,969324381,Cocoon (1985),Comedy|Sci-Fi,M,25,2,90401 4694,159,3,963602574,Clockers (1995),Drama,M,56,7,40505 1680,1988,3,974709821,Hello Mary Lou: Prom Night II (1987),Horror,M,25,20,95380 2002,1945,4,974677761,On the Waterfront (1954),Crime|Drama,F,56,13,02136-1522 3430,2690,4,979949863,"Ideal Husband, An (1999)",Comedy,F,45,1,15208 425,471,4,976284972,"Hudsucker Proxy, The (1994)",Comedy|Romance,M,25,12,55303 1841,2289,2,974699637,"Player, The (1992)",Comedy|Drama,M,18,0,95037 4964,2348,4,962619587,Sid and Nancy (1986),Drama,M,35,0,94110 4520,2160,4,964883648,Rosemary's Baby (1968),Horror|Thriller,M,25,4,45810 1265,2396,4,1011716691,Shakespeare in Love (1998),Comedy|Romance,F,18,20,49321 2496,1278,5,974435324,Young Frankenstein (1974),Comedy|Horror,M,50,1,37932 5511,2174,4,959787754,Beetlejuice (1988),Comedy|Fantasy,M,45,1,92407 621,833,1,975799925,High School High (1996),Comedy,M,18,4,93560 3045,2762,5,970189524,"Sixth Sense, The (1999)",Thriller,M,45,1,90631 2050,2546,4,975522689,"Deep End of the Ocean, The (1999)",Drama,F,35,3,99504 613,32,4,975812238,Twelve Monkeys (1995),Drama|Sci-Fi,M,35,20,10562 366,1077,5,978471241,Sleeper (1973),Comedy|Sci-Fi,M,50,15,55126 5108,367,4,962338215,"Mask, The (1994)",Comedy|Crime|Fantasy,F,25,9,93940 4502,1960,4,965094644,"Last Emperor, The (1987)",Drama|War,M,50,0,01379 5512,1801,5,959713840,"Man in the Iron Mask, The (1998)",Action|Drama|Romance,F,25,17,01701 1861,2642,2,974699627,Superman III (1983),Action|Adventure|Sci-Fi,M,50,16,92129 1667,1240,4,975016698,"Terminator, The (1984)",Action|Sci-Fi|Thriller,M,50,16,98516 753,434,3,975460449,Cliffhanger (1993),Action|Adventure|Crime,M,1,10,42754 1836,2736,5,974826228,Brighton Beach Memoirs (1986),Comedy,M,25,0,10016 5626,474,5,959052158,In the Line of Fire (1993),Action|Thriller,M,56,16,32043 1601,1396,4,978576948,Sneakers (1992),Crime|Drama|Sci-Fi,M,25,12,83001 4725,1100,4,963369546,Days of Thunder (1990),Action|Romance,M,35,5,96707-1321 2837,2396,5,972571456,Shakespeare in Love (1998),Comedy|Romance,M,18,0,49506 1776,3882,4,1001558470,Bring It On (2000),Comedy,M,25,0,45801 2820,457,2,972662398,"Fugitive, The (1993)",Action|Thriller,F,35,0,02138 1834,2288,3,1038179198,"Thing, The (1982)",Action|Horror|Sci-Fi|Thriller,M,35,5,10990 284,2716,4,976570902,Ghostbusters (1984),Comedy|Horror,M,25,12,91910 2744,588,1,973215985,Aladdin (1992),Animation|Children's|Comedy|Musical,M,18,17,53818 881,4,2,975264028,Waiting to Exhale (1995),Comedy|Drama,M,18,14,76401 2211,916,3,974607067,Roman Holiday (1953),Comedy|Romance,M,45,6,01950 2271,2671,4,1007158806,Notting Hill (1999),Comedy|Romance,M,50,14,13210 1010,2953,1,975222613,Home Alone 2: Lost in New York (1992),Children's|Comedy,M,25,0,10310 1589,2594,4,974735454,Open Your Eyes (Abre los ojos) (1997),Drama|Romance|Sci-Fi,M,25,0,95136 1724,597,5,976441106,Pretty Woman (1990),Comedy|Romance,M,18,4,00961 2590,2097,3,973840056,Something Wicked This Way Comes (1983),Children's|Horror,M,18,4,94044 1717,1352,3,1009256707,Albino Alligator (1996),Crime|Thriller,F,50,6,30307 1391,3160,2,974850796,Magnolia (1999),Drama,M,35,15,20723 1941,1263,3,974954220,"Deer Hunter, The (1978)",Drama|War,M,35,17,94550 3526,2867,4,966906064,Fright Night (1985),Comedy|Horror,M,35,2,62263-3004 5767,198,3,958192148,Strange Days (1995),Action|Crime|Sci-Fi,M,25,2,75287 5355,590,4,960596927,Dances with Wolves (1990),Adventure|Drama|Western,M,56,0,78232 5788,156,4,958108785,Blue in the Face (1995),Comedy,M,25,0,92646 1078,1307,4,974938851,When Harry Met Sally... (1989),Comedy|Romance,F,45,9,95661 3808,61,2,965973222,Eye for an Eye (1996),Drama|Thriller,M,25,7,60010 974,3897,4,975106398,Almost Famous (2000),Comedy|Drama,M,35,19,94930 5153,1290,4,961972292,Some Kind of Wonderful (1987),Drama|Romance,M,25,7,60046 5732,2115,3,958434069,Indiana Jones and the Temple of Doom (1984),Action|Adventure,F,25,11,02111 4627,2478,3,964110136,Three Amigos! (1986),Comedy|Western,M,56,1,45224 1884,1831,2,975648062,Lost in Space (1998),Action|Sci-Fi|Thriller,M,45,20,93108 4284,517,4,965277546,Rising Sun (1993),Action|Drama|Mystery,M,50,7,40601 1383,468,2,975979732,"Englishman Who Went Up a Hill, But Came Down a Mountain, The (1995)",Comedy|Romance,F,25,7,19806 2230,2873,3,974599097,Lulu on the Bridge (1998),Drama|Mystery|Romance,F,45,1,60302 2533,2266,4,974055724,"Butcher's Wife, The (1991)",Comedy|Romance,F,25,3,49423 6040,3224,5,956716750,Woman in the Dunes (Suna no onna) (1964),Drama,M,25,6,11106 4384,2918,5,965171739,Ferris Bueller's Day Off (1986),Comedy,M,25,0,43623 5156,3688,3,961946487,Porky's (1981),Comedy,M,18,14,10024 615,296,3,975805801,Pulp Fiction (1994),Crime|Drama,M,50,17,32951 2753,3045,3,973198964,Peter's Friends (1992),Comedy|Drama,F,50,20,27516 2438,1125,5,974259943,"Return of the Pink Panther, The (1974)",Comedy,M,35,1,22903 5746,1242,4,958354460,Glory (1989),Action|Drama|War,M,18,15,94061 5157,3462,5,961944604,Modern Times (1936),Comedy,M,35,1,74012 3402,1252,5,967433929,Chinatown (1974),Film-Noir|Mystery|Thriller,M,35,20,30306 76,593,5,977847255,"Silence of the Lambs, The (1991)",Drama|Thriller,M,35,7,55413 2067,1019,3,974658834,"20,000 Leagues Under the Sea (1954)",Adventure|Children's|Fantasy|Sci-Fi,M,50,16,06430 2181,2020,3,979353437,Dangerous Liaisons (1988),Drama|Romance,M,25,0,45245 3947,593,5,965691680,"Silence of the Lambs, The (1991)",Drama|Thriller,M,25,0,90019 546,218,4,976069421,Boys on the Side (1995),Comedy|Drama,F,25,0,37211 1246,3030,5,1032056405,Yojimbo (1961),Comedy|Drama|Western,M,18,4,98225 4214,3186,5,965319143,"Girl, Interrupted (1999)",Drama,F,25,0,20121 2841,680,3,982805796,Alphaville (1965),Sci-Fi,M,50,12,98056 4205,3175,4,965321085,Galaxy Quest (1999),Adventure|Comedy|Sci-Fi,F,25,15,87801 1120,1097,4,974911354,E.T. the Extra-Terrestrial (1982),Children's|Drama|Fantasy|Sci-Fi,M,18,4,95616 5371,3194,3,960481000,"Way We Were, The (1973)",Drama,M,25,11,55408 2695,1278,5,973310827,Young Frankenstein (1974),Comedy|Horror,M,35,11,46033 3312,520,2,976673070,Robin Hood: Men in Tights (1993),Comedy,F,18,4,90039 5039,1792,1,962513044,U.S. Marshalls (1998),Action|Thriller,F,35,4,97068 4655,2146,3,963903103,St. Elmo's Fire (1985),Drama|Romance,F,25,1,92037 3558,1580,5,966802528,Men in Black (1997),Action|Adventure|Comedy|Sci-Fi,M,18,17,66044 506,3354,1,976208080,Mission to Mars (2000),Sci-Fi,M,25,16,55103-1006 3568,1230,3,966745594,Annie Hall (1977),Comedy|Romance,M,25,0,98503 2943,1197,5,971319983,"Princess Bride, The (1987)",Action|Adventure|Comedy|Romance,M,35,12,95864 716,737,3,982881364,Barb Wire (1996),Action|Sci-Fi,M,18,4,98188 5964,454,3,956999469,"Firm, The (1993)",Drama|Thriller,M,18,5,97202 4802,1208,4,996034747,Apocalypse Now (1979),Drama|War,M,56,1,40601 1106,3624,4,974920622,Shanghai Noon (2000),Action,M,18,4,90241 3410,2565,3,967419652,"King and I, The (1956)",Musical,M,35,1,20653 1273,3095,5,974814536,"Grapes of Wrath, The (1940)",Drama,M,35,2,19123 1706,1916,4,974709448,Buffalo 66 (1998),Action|Comedy|Drama,M,25,20,19134 4889,590,5,962909224,Dances with Wolves (1990),Adventure|Drama|Western,M,18,4,63108 4966,2100,3,962609782,Splash (1984),Comedy|Fantasy|Romance,M,50,14,55407 4238,1884,4,965343416,Fear and Loathing in Las Vegas (1998),Comedy|Drama,M,35,16,44691 5365,1042,3,960502974,That Thing You Do! (1996),Comedy,M,18,12,90250 415,1302,3,977501743,Field of Dreams (1989),Drama,F,35,0,55406 4658,1009,5,963966553,Escape to Witch Mountain (1975),Adventure|Children's|Fantasy,M,25,4,99163 854,345,3,975357801,"Adventures of Priscilla, Queen of the Desert, The (1994)",Comedy|Drama,F,25,16,44092 2857,436,4,972509362,Color of Night (1994),Drama|Thriller,M,25,0,10469 1835,1330,4,974878241,April Fool's Day (1986),Comedy|Horror,M,25,19,11501 1321,2240,3,974778494,My Bodyguard (1980),Drama,F,25,14,34639 3274,3698,2,979767184,"Running Man, The (1987)",Action|Adventure|Sci-Fi,M,25,20,02062 5893,2144,3,957470619,Sixteen Candles (1984),Comedy,M,25,7,02139 3436,2724,3,967328026,Runaway Bride (1999),Comedy|Romance,M,35,0,98503 3315,2918,5,967942960,Ferris Bueller's Day Off (1986),Comedy,M,25,12,78731 5056,2700,5,962488280,"South Park: Bigger, Longer and Uncut (1999)",Animation|Comedy,M,45,1,16673 5256,208,2,961271616,Waterworld (1995),Action|Adventure,M,25,16,30269 4290,1193,4,965274348,One Flew Over the Cuckoo's Nest (1975),Drama,M,25,17,98661 1010,1379,2,975220259,Young Guns II (1990),Action|Comedy|Western,M,25,0,10310 829,904,4,975368038,Rear Window (1954),Mystery|Thriller,M,1,19,53711 5953,480,4,957143581,Jurassic Park (1993),Action|Adventure|Sci-Fi,M,1,10,21030 4732,3016,4,963332896,Creepshow (1982),Horror,M,25,14,24450 4815,3181,5,972240802,Titus (1999),Drama,F,50,18,04849 1164,1894,2,1004486985,Six Days Seven Nights (1998),Adventure|Comedy|Romance,F,25,19,90020 4373,3167,5,965180829,Carnal Knowledge (1971),Drama,M,50,12,32920 5293,1374,4,961055887,Star Trek: The Wrath of Khan (1982),Action|Adventure|Sci-Fi,M,25,12,95030 1579,3101,4,981272057,Fatal Attraction (1987),Thriller,M,25,0,60201 2600,3147,5,973804787,"Green Mile, The (1999)",Drama|Thriller,M,25,14,19312 1283,480,4,974793389,Jurassic Park (1993),Action|Adventure|Sci-Fi,F,18,1,94607 3242,3062,5,968341175,"Longest Day, The (1962)",Action|Drama|War,M,50,13,94089 3618,3374,3,967116272,Daughters of the Dust (1992),Drama,M,56,17,22657 3762,1337,4,966434517,"Body Snatcher, The (1945)",Horror,M,50,6,11746 1015,1184,3,975018699,Mediterraneo (1991),Comedy|War,M,35,3,11220 4645,2344,5,963976808,Runaway Train (1985),Action|Adventure|Drama|Thriller,F,50,6,48094 3184,1397,4,968709039,Bastard Out of Carolina (1996),Drama,F,25,18,21214 1285,1794,4,974833328,Love and Death on Long Island (1997),Comedy|Drama,M,35,4,98125 5521,3354,2,959833154,Mission to Mars (2000),Sci-Fi,F,25,6,02118 1472,2278,3,974767792,Ronin (1998),Action|Crime|Thriller,M,25,7,90248 5630,21,4,980085414,Get Shorty (1995),Action|Comedy|Drama,M,35,17,06854 3710,3033,5,966272980,Spaceballs (1987),Comedy|Sci-Fi,M,1,10,02818 192,761,1,977028390,"Phantom, The (1996)",Adventure,M,18,1,10977 1285,1198,5,974880310,Raiders of the Lost Ark (1981),Action|Adventure,M,35,4,98125 2174,1046,4,974613044,Beautiful Thing (1996),Drama|Romance,M,50,12,87505 635,1270,4,975768106,Back to the Future (1985),Comedy|Sci-Fi,M,56,17,33785 910,412,5,975207742,"Age of Innocence, The (1993)",Drama,F,50,0,98226 1752,2021,4,975729332,Dune (1984),Fantasy|Sci-Fi,M,25,3,96813 1408,198,4,974762924,Strange Days (1995),Action|Crime|Sci-Fi,M,25,0,90046 4738,1242,4,963279051,Glory (1989),Action|Drama|War,M,56,1,23608 1503,1971,2,974748897,"Nightmare on Elm Street 4: The Dream Master, A (1988)",Horror,M,25,12,92688 3053,1296,3,970601837,"Room with a View, A (1986)",Drama|Romance,F,25,3,55102 3471,3614,2,973297828,Honeymoon in Vegas (1992),Comedy|Romance,M,18,4,80302 678,1972,3,988638700,"Nightmare on Elm Street 5: The Dream Child, A (1989)",Horror,M,25,0,34952 3483,2561,3,986327282,True Crime (1999),Crime|Thriller,F,45,7,30260 3910,3108,5,965756244,"Fisher King, The (1991)",Comedy|Drama|Romance,M,25,20,91505 182,1089,1,977085647,Reservoir Dogs (1992),Crime|Thriller,M,18,4,03052 1755,1653,3,1036917836,Gattaca (1997),Drama|Sci-Fi|Thriller,F,18,4,77005 3589,70,2,966658567,From Dusk Till Dawn (1996),Action|Comedy|Crime|Horror|Thriller,F,45,0,80010 471,3481,4,976222483,High Fidelity (2000),Comedy,M,35,7,08904 1141,813,2,974878678,Larger Than Life (1996),Comedy,F,25,3,84770 5227,1196,2,961476022,Star Wars: Episode V - The Empire Strikes Back (1980),Action|Adventure|Drama|Sci-Fi|War,M,18,10,64050 1303,2344,2,974837844,Runaway Train (1985),Action|Adventure|Drama|Thriller,M,25,19,94111 5080,3102,5,962412804,Jagged Edge (1985),Thriller,F,50,12,95472 2023,1012,4,1006290836,Old Yeller (1957),Children's|Drama,M,18,4,56001 3759,2151,5,966094413,"Gods Must Be Crazy II, The (1989)",Comedy,M,35,6,54751 1685,2664,2,974709721,Invasion of the Body Snatchers (1956),Horror|Sci-Fi,M,35,12,95833 4715,1221,4,963508830,"Godfather: Part II, The (1974)",Action|Crime|Drama,M,25,2,97205 1591,350,5,974742941,"Client, The (1994)",Drama|Mystery|Thriller,M,50,7,26501 4227,3635,3,965411938,"Spy Who Loved Me, The (1977)",Action,M,25,19,11414-2520 1908,36,5,974697744,Dead Man Walking (1995),Drama,M,56,13,95129 5365,1892,4,960503255,"Perfect Murder, A (1998)",Mystery|Thriller,M,18,12,90250 1579,2420,4,981272235,"Karate Kid, The (1984)",Drama,M,25,0,60201 1866,3948,5,974753321,Meet the Parents (2000),Comedy,M,25,7,94043 4238,3543,4,965415533,Diner (1982),Comedy|Drama,M,35,16,44691 3590,2000,5,966657892,Lethal Weapon (1987),Action|Comedy|Crime|Drama,F,18,15,02115 3401,3256,5,980115327,Patriot Games (1992),Action|Thriller,M,35,7,76109 3705,540,2,966287116,Sliver (1993),Thriller,M,45,7,30076 4973,1246,3,962607149,Dead Poets Society (1989),Drama,F,56,2,949702 4947,380,4,962651180,True Lies (1994),Action|Adventure|Comedy|Romance,M,35,17,90035 2346,1416,4,974413811,Evita (1996),Drama|Musical,F,1,10,48105 1427,3596,3,974840560,Screwed (2000),Comedy,M,25,12,21401 3868,1626,3,965855033,Fire Down Below (1997),Action|Drama|Thriller,M,18,12,73112 249,2369,3,976730191,Desperately Seeking Susan (1985),Comedy|Romance,F,18,14,48126 5720,349,4,958503395,Clear and Present Danger (1994),Action|Adventure|Thriller,M,25,0,60610 877,1485,3,975270899,Liar Liar (1997),Comedy,M,25,0,90631 ================================================ FILE: examples/run_all.sh ================================================ #!/usr/bin/env bash function run_py(){ code_path=./ for file in $(ls) do if [[ $file =~ .py ]] then python $code_path$file if [ $? -eq 0 ] then echo run $code_path$file succeed in $python_version else echo run $code_path$file failed in $python_version exit -1 fi fi done } ## python3 python_version=python3 source activate base cd .. python setup.py install cd ./examples run_py #python2 python_version=python2 source activate py27 cd .. python setup.py install cd ./examples run_py echo "all examples run succeed in python2.7" echo "all examples run succeed in python3.6" echo "all examples run succeed in python2.7 and python3.6" ================================================ FILE: examples/run_classification_criteo.py ================================================ import pandas as pd from sklearn.metrics import log_loss, roc_auc_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, MinMaxScaler from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names if __name__ == "__main__": data = pd.read_csv('./criteo_sample.txt') sparse_features = ['C' + str(i) for i in range(1, 27)] dense_features = ['I' + str(i) for i in range(1, 14)] data[sparse_features] = data[sparse_features].fillna('-1', ) data[dense_features] = data[dense_features].fillna(0, ) target = ['label'] # 1.Label Encoding for sparse features,and do simple Transformation for dense features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) mms = MinMaxScaler(feature_range=(0, 1)) data[dense_features] = mms.fit_transform(data[dense_features]) # 2.count #unique features for each sparse field,and record dense feature field name fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4) for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, ) for feat in dense_features] dnn_feature_columns = fixlen_feature_columns linear_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2020) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary') model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) ================================================ FILE: examples/run_classification_criteo_hash.py ================================================ import pandas as pd from sklearn.metrics import log_loss, roc_auc_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names if __name__ == "__main__": data = pd.read_csv('./criteo_sample.txt') sparse_features = ['C' + str(i) for i in range(1, 27)] dense_features = ['I' + str(i) for i in range(1, 14)] data[sparse_features] = data[sparse_features].fillna('-1', ) data[dense_features] = data[dense_features].fillna(0, ) target = ['label'] # 1.do simple Transformation for dense features mms = MinMaxScaler(feature_range=(0, 1)) data[dense_features] = mms.fit_transform(data[dense_features]) # 2.set hashing space for each sparse field,and record dense feature field name fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=1000,embedding_dim=4, use_hash=True, dtype='string') # since the input is string for feat in sparse_features] + [DenseFeat(feat, 1, ) for feat in dense_features] linear_feature_columns = fixlen_feature_columns dnn_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns, ) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2020) train_model_input = {name:train[name] for name in feature_names} test_model_input = {name:test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate model = DeepFM(linear_feature_columns,dnn_feature_columns, task='binary') model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) ================================================ FILE: examples/run_classification_criteo_multi_gpu.py ================================================ import pandas as pd from sklearn.metrics import log_loss, roc_auc_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, MinMaxScaler from tensorflow.python.keras.utils import multi_gpu_model from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names from deepctr.models import DeepFM if __name__ == "__main__": data = pd.read_csv('./criteo_sample.txt') sparse_features = ['C' + str(i) for i in range(1, 27)] dense_features = ['I' + str(i) for i in range(1, 14)] data[sparse_features] = data[sparse_features].fillna('-1', ) data[dense_features] = data[dense_features].fillna(0, ) target = ['label'] # 1.Label Encoding for sparse features,and do simple Transformation for dense features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) mms = MinMaxScaler(feature_range=(0, 1)) data[dense_features] = mms.fit_transform(data[dense_features]) # 2.count #unique features for each sparse field,and record dense feature field name fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4) for feat in sparse_features] + [DenseFeat(feat, 1, ) for feat in dense_features] dnn_feature_columns = fixlen_feature_columns linear_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2020) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary') model = multi_gpu_model(model, gpus=2) model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) ================================================ FILE: examples/run_dien.py ================================================ import numpy as np import tensorflow as tf from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat,get_feature_names from deepctr.models import DIEN def get_xy_fd(use_neg=False, hash_flag=False): feature_columns = [SparseFeat('user', 3, embedding_dim=10, use_hash=hash_flag), SparseFeat('gender', 2, embedding_dim=4, use_hash=hash_flag), SparseFeat('item_id', 3 + 1, embedding_dim=8, use_hash=hash_flag), SparseFeat('cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag), DenseFeat('pay_score', 1)] feature_columns += [ VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'), maxlen=4, length_name="seq_length"), VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4, length_name="seq_length")] behavior_feature_list = ["item_id", "cate_id"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value cate_id = np.array([1, 2, 2]) # 0 is mask value score = np.array([0.1, 0.2, 0.3]) hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) hist_cate_id = np.array([[1, 2, 2, 0], [1, 2, 2, 0], [1, 2, 0, 0]]) behavior_length = np.array([3, 3, 2]) feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id, 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id, 'pay_score': score, "seq_length": behavior_length} if use_neg: feature_dict['neg_hist_item_id'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) feature_dict['neg_hist_cate_id'] = np.array([[1, 2, 2, 0], [1, 2, 2, 0], [1, 2, 0, 0]]) feature_columns += [ VarLenSparseFeat(SparseFeat('neg_hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'), maxlen=4, length_name="seq_length"), VarLenSparseFeat(SparseFeat('neg_hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4, length_name="seq_length")] x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} y = np.array([1, 0, 1]) return x, y, feature_columns, behavior_feature_list if __name__ == "__main__": if tf.__version__ >= '2.0.0': tf.compat.v1.disable_eager_execution() USE_NEG = True x, y, feature_columns, behavior_feature_list = get_xy_fd(use_neg=USE_NEG) model = DIEN(feature_columns, behavior_feature_list, dnn_hidden_units=[4, 4, 4], dnn_dropout=0.6, gru_type="AUGRU", use_negsampling=USE_NEG) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5) ================================================ FILE: examples/run_din.py ================================================ import numpy as np from deepctr.models import DIN from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names def get_xy_fd(): feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat( 'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8), SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)] feature_columns += [ VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'), maxlen=4, length_name="seq_length"), VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4, length_name="seq_length")] # Notice: History behavior sequence feature name must start with "hist_". behavior_feature_list = ["item_id", "cate_id"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value cate_id = np.array([1, 2, 2]) # 0 is mask value pay_score = np.array([0.1, 0.2, 0.3]) hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]]) hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]]) seq_length = np.array([3, 3, 2]) # the actual length of the behavior sequence feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id, 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id, 'pay_score': pay_score, 'seq_length': seq_length} x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} y = np.array([1, 0, 1]) return x, y, feature_columns, behavior_feature_list if __name__ == "__main__": x, y, feature_columns, behavior_feature_list = get_xy_fd() model = DIN(feature_columns, behavior_feature_list) # model = BST(feature_columns, behavior_feature_list,att_head_num=4) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5) ================================================ FILE: examples/run_dsin.py ================================================ import numpy as np import tensorflow as tf from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat,get_feature_names from deepctr.models import DSIN def get_xy_fd(hash_flag=False): feature_columns = [SparseFeat('user', 3, embedding_dim=10, use_hash=hash_flag), SparseFeat('gender', 2, embedding_dim=4, use_hash=hash_flag), SparseFeat('item', 3 + 1, embedding_dim=4, use_hash=hash_flag), SparseFeat('cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag), DenseFeat('pay_score', 1)] feature_columns += [ VarLenSparseFeat(SparseFeat('sess_0_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'), maxlen=4), VarLenSparseFeat( SparseFeat('sess_0_cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='cate_id'), maxlen=4)] feature_columns += [ VarLenSparseFeat(SparseFeat('sess_1_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'), maxlen=4), VarLenSparseFeat( SparseFeat('sess_1_cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='cate_id'), maxlen=4)] behavior_feature_list = ["item", "cate_id"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value cateid = np.array([1, 2, 2]) # 0 is mask value score = np.array([0.1, 0.2, 0.3]) sess1_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [0, 0, 0, 0]]) sess1_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [0, 0, 0, 0]]) sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) sess2_cate_id = np.array([[1, 2, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) sess_number = np.array([2, 1, 0]) feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'cate_id': cateid, 'sess_0_item': sess1_iid, 'sess_0_cate_id': sess1_cate_id, 'pay_score': score, 'sess_1_item': sess2_iid, 'sess_1_cate_id': sess2_cate_id, } x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} x["sess_length"] = sess_number y = np.array([1, 0, 1]) return x, y, feature_columns, behavior_feature_list if __name__ == "__main__": if tf.__version__ >= '2.0.0': tf.compat.v1.disable_eager_execution() x, y, feature_columns, behavior_feature_list = get_xy_fd(True) model = DSIN(feature_columns, behavior_feature_list, sess_max_count=2, dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, ) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5) ================================================ FILE: examples/run_estimator_pandas_classification.py ================================================ import pandas as pd import tensorflow as tf from sklearn.metrics import log_loss, roc_auc_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, MinMaxScaler from deepctr.estimator import DeepFMEstimator from deepctr.estimator.inputs import input_fn_pandas if __name__ == "__main__": data = pd.read_csv('./criteo_sample.txt') sparse_features = ['C' + str(i) for i in range(1, 27)] dense_features = ['I' + str(i) for i in range(1, 14)] data[sparse_features] = data[sparse_features].fillna('-1', ) data[dense_features] = data[dense_features].fillna(0, ) target = ['label'] # 1.Label Encoding for sparse features,and do simple Transformation for dense features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) mms = MinMaxScaler(feature_range=(0, 1)) data[dense_features] = mms.fit_transform(data[dense_features]) # 2.count #unique features for each sparse field,and record dense feature field name dnn_feature_columns = [] linear_feature_columns = [] for i, feat in enumerate(sparse_features): dnn_feature_columns.append(tf.feature_column.embedding_column( tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1), 4)) linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1)) for feat in dense_features: dnn_feature_columns.append(tf.feature_column.numeric_column(feat)) linear_feature_columns.append(tf.feature_column.numeric_column(feat)) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2021) # Not setting default value for continuous feature. filled with mean. train_model_input = input_fn_pandas(train, sparse_features + dense_features, 'label', shuffle=True) test_model_input = input_fn_pandas(test, sparse_features + dense_features, None, shuffle=False) # 4.Define Model,train,predict and evaluate model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary', config=tf.estimator.RunConfig(tf_random_seed=2021)) model.train(train_model_input) pred_ans_iter = model.predict(test_model_input) pred_ans = list(map(lambda x: x['pred'], pred_ans_iter)) # print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) ================================================ FILE: examples/run_estimator_tfrecord_classification.py ================================================ import tensorflow as tf from tensorflow.python.ops.parsing_ops import FixedLenFeature from deepctr.estimator import DeepFMEstimator from deepctr.estimator.inputs import input_fn_tfrecord if __name__ == "__main__": # 1.generate feature_column for linear part and dnn part sparse_features = ['C' + str(i) for i in range(1, 27)] dense_features = ['I' + str(i) for i in range(1, 14)] dnn_feature_columns = [] linear_feature_columns = [] for i, feat in enumerate(sparse_features): dnn_feature_columns.append(tf.feature_column.embedding_column( tf.feature_column.categorical_column_with_identity(feat, 1000), 4)) linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, 1000)) for feat in dense_features: dnn_feature_columns.append(tf.feature_column.numeric_column(feat)) linear_feature_columns.append(tf.feature_column.numeric_column(feat)) # 2.generate input data for model feature_description = {k: FixedLenFeature(dtype=tf.int64, shape=1) for k in sparse_features} feature_description.update( {k: FixedLenFeature(dtype=tf.float32, shape=1) for k in dense_features}) feature_description['label'] = FixedLenFeature(dtype=tf.float32, shape=1) train_model_input = input_fn_tfrecord('./criteo_sample.tr.tfrecords', feature_description, 'label', batch_size=256, num_epochs=1, shuffle_factor=10) test_model_input = input_fn_tfrecord('./criteo_sample.te.tfrecords', feature_description, 'label', batch_size=2 ** 14, num_epochs=1, shuffle_factor=0) # 3.Define Model,train,predict and evaluate model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary', config=tf.estimator.RunConfig(tf_random_seed=2021)) model.train(train_model_input) eval_result = model.evaluate(test_model_input) print(eval_result) ================================================ FILE: examples/run_flen.py ================================================ import pandas as pd from sklearn.metrics import log_loss, roc_auc_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from deepctr.feature_column import SparseFeat,get_feature_names from deepctr.models import FLEN if __name__ == "__main__": data = pd.read_csv('./avazu_sample.txt') data['day'] = data['hour'].apply(lambda x: str(x)[4:6]) data['hour'] = data['hour'].apply(lambda x: str(x)[6:]) sparse_features = ['hour', 'C1', 'banner_pos', 'site_id', 'site_domain', 'site_category', 'app_id', 'app_domain', 'app_category', 'device_id', 'device_model', 'device_type', 'device_conn_type', # 'device_ip', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', ] data[sparse_features] = data[sparse_features].fillna('-1', ) target = ['click'] # 1.Label Encoding for sparse features,and do simple Transformation for dense features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) # 2.count #unique features for each sparse field,and record dense feature field name field_info = dict(C14='user', C15='user', C16='user', C17='user', C18='user', C19='user', C20='user', C21='user', C1='user', banner_pos='context', site_id='context', site_domain='context', site_category='context', app_id='item', app_domain='item', app_category='item', device_model='user', device_type='user', device_conn_type='context', hour='context', device_id='user' ) fixlen_feature_columns = [ SparseFeat(name, vocabulary_size=data[name].max() + 1, embedding_dim=16, use_hash=False, dtype='int32', group_name=field_info[name]) for name in sparse_features] dnn_feature_columns = fixlen_feature_columns linear_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2020) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate model = FLEN(linear_feature_columns, dnn_feature_columns, task='binary') model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) ================================================ FILE: examples/run_mtl.py ================================================ import pandas as pd from sklearn.metrics import roc_auc_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, MinMaxScaler from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names from deepctr.models import MMOE if __name__ == "__main__": column_names = ['age', 'class_worker', 'det_ind_code', 'det_occ_code', 'education', 'wage_per_hour', 'hs_college', 'marital_stat', 'major_ind_code', 'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member', 'unemp_reason', 'full_or_part_emp', 'capital_gains', 'capital_losses', 'stock_dividends', 'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat', 'det_hh_summ', 'instance_weight', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt', 'num_emp', 'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship', 'own_or_self', 'vet_question', 'vet_benefits', 'weeks_worked', 'year', 'income_50k'] data = pd.read_csv('./census-income.sample', header=None, names=column_names) data['label_income'] = data['income_50k'].map({' - 50000.': 0, ' 50000+.': 1}) data['label_marital'] = data['marital_stat'].apply(lambda x: 1 if x == ' Never married' else 0) data.drop(labels=['income_50k', 'marital_stat'], axis=1, inplace=True) columns = data.columns.values.tolist() sparse_features = ['class_worker', 'det_ind_code', 'det_occ_code', 'education', 'hs_college', 'major_ind_code', 'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member', 'unemp_reason', 'full_or_part_emp', 'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat', 'det_hh_summ', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt', 'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship', 'vet_question'] dense_features = [col for col in columns if col not in sparse_features and col not in ['label_income', 'label_marital']] data[sparse_features] = data[sparse_features].fillna('-1', ) data[dense_features] = data[dense_features].fillna(0, ) mms = MinMaxScaler(feature_range=(0, 1)) data[dense_features] = mms.fit_transform(data[dense_features]) for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4) for feat in sparse_features] \ + [DenseFeat(feat, 1, ) for feat in dense_features] dnn_feature_columns = fixlen_feature_columns linear_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2020) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate model = MMOE(dnn_feature_columns, tower_dnn_hidden_units=[], task_types=['binary', 'binary'], task_names=['label_income', 'label_marital']) model.compile("adam", loss=["binary_crossentropy", "binary_crossentropy"], metrics=['binary_crossentropy'], ) history = model.fit(train_model_input, [train['label_income'].values, train['label_marital'].values], batch_size=256, epochs=10, verbose=2, validation_split=0.2) pred_ans = model.predict(test_model_input, batch_size=256) print("test income AUC", round(roc_auc_score(test['label_income'], pred_ans[0]), 4)) print("test marital AUC", round(roc_auc_score(test['label_marital'], pred_ans[1]), 4)) ================================================ FILE: examples/run_multivalue_movielens.py ================================================ import numpy as np import pandas as pd from sklearn.preprocessing import LabelEncoder from tensorflow.python.keras.preprocessing.sequence import pad_sequences from deepctr.feature_column import SparseFeat, VarLenSparseFeat,get_feature_names from deepctr.models import DeepFM def split(x): key_ans = x.split('|') for key in key_ans: if key not in key2index: # Notice : input value 0 is a special "padding",so we do not use 0 to encode valid feature for sequence input key2index[key] = len(key2index) + 1 return list(map(lambda x: key2index[x], key_ans)) if __name__ == "__main__": data = pd.read_csv("./movielens_sample.txt") sparse_features = ["movie_id", "user_id", "gender", "age", "occupation", "zip", ] target = ['rating'] # 1.Label Encoding for sparse features,and process sequence features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) # preprocess the sequence feature key2index = {} genres_list = list(map(split, data['genres'].values)) genres_length = np.array(list(map(len, genres_list))) max_len = max(genres_length) # Notice : padding=`post` genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', ) # 2.count #unique features for each sparse field and generate feature config for sequence feature fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4) for feat in sparse_features] use_weighted_sequence = False if use_weighted_sequence: varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len( key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean', weight_name='genres_weight')] # Notice : value 0 is for padding for sequence input feature else: varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len( key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean', weight_name=None)] # Notice : value 0 is for padding for sequence input feature linear_feature_columns = fixlen_feature_columns + varlen_feature_columns dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model model_input = {name: data[name] for name in sparse_features} # model_input["genres"] = genres_list model_input["genres_weight"] = np.random.randn(data.shape[0], max_len, 1) # 4.Define Model,compile and train model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') model.compile("adam", "mse", metrics=['mse'], ) history = model.fit(model_input, data[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) ================================================ FILE: examples/run_multivalue_movielens_hash.py ================================================ import numpy as np import pandas as pd from tensorflow.python.keras.preprocessing.sequence import pad_sequences from deepctr.feature_column import SparseFeat, VarLenSparseFeat,get_feature_names from deepctr.models import DeepFM if __name__ == "__main__": data = pd.read_csv("./movielens_sample.txt") sparse_features = ["movie_id", "user_id", "gender", "age", "occupation", "zip", ] data[sparse_features] = data[sparse_features].astype(str) target = ['rating'] # 1.Use hashing encoding on the fly for sparse features,and process sequence features genres_list = list(map(lambda x: x.split('|'), data['genres'].values)) genres_length = np.array(list(map(len, genres_list))) max_len = max(genres_length) # Notice : padding=`post` genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str) # 2.set hashing space for each sparse field and generate feature config for sequence feature fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True, dtype='string') for feat in sparse_features] varlen_feature_columns = [ VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4, use_hash=True, dtype="string"), maxlen=max_len, combiner='mean', )] # Notice : value 0 is for padding for sequence input feature linear_feature_columns = fixlen_feature_columns + varlen_feature_columns dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model model_input = {name: data[name] for name in feature_names} model_input['genres'] = genres_list # 4.Define Model,compile and train model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') model.compile("adam", "mse", metrics=['mse'], ) history = model.fit(model_input, data[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) ================================================ FILE: examples/run_multivalue_movielens_vocab_hash.py ================================================ from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names import numpy as np import pandas as pd from tensorflow.python.keras.preprocessing.sequence import pad_sequences try: import tensorflow.compat.v1 as tf except ImportError as e: import tensorflow as tf if __name__ == "__main__": data = pd.read_csv("./movielens_sample.txt") sparse_features = ["movie_id", "user_id", "gender", "age", "occupation", "zip", ] data[sparse_features] = data[sparse_features].astype(str) target = ['rating'] # 1.Use hashing encoding on the fly for sparse features,and process sequence features genres_list = list(map(lambda x: x.split('|'), data['genres'].values)) genres_length = np.array(list(map(len, genres_list))) max_len = max(genres_length) # Notice : padding=`post` genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str) # 2.set hashing space for each sparse field and generate feature config for sequence feature fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True, vocabulary_path='./movielens_age_vocabulary.csv' if feat == 'age' else None, dtype='string') for feat in sparse_features] varlen_feature_columns = [ VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4, use_hash=True, dtype="string"), maxlen=max_len, combiner='mean', )] # Notice : value 0 is for padding for sequence input feature linear_feature_columns = fixlen_feature_columns + varlen_feature_columns dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model model_input = {name: data[name] for name in feature_names} model_input['genres'] = genres_list # 4.Define Model,compile and train model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') model.compile("adam", "mse", metrics=['mse'], ) if not hasattr(tf, 'version') or tf.version.VERSION < '2.0.0': with tf.Session() as sess: sess.run(tf.tables_initializer()) history = model.fit(model_input, data[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) else: history = model.fit(model_input, data[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) ================================================ FILE: examples/run_regression_movielens.py ================================================ import pandas as pd from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat,get_feature_names if __name__ == "__main__": data = pd.read_csv("./movielens_sample.txt") sparse_features = ["movie_id", "user_id", "gender", "age", "occupation", "zip"] target = ['rating'] # 1.Label Encoding for sparse features,and do simple Transformation for dense features for feat in sparse_features: lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) # 2.count #unique features for each sparse field fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1,embedding_dim=4) for feat in sparse_features] linear_feature_columns = fixlen_feature_columns dnn_feature_columns = fixlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2, random_state=2020) train_model_input = {name:train[name].values for name in feature_names} test_model_input = {name:test[name].values for name in feature_names} # 4.Define Model,train,predict and evaluate model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') model.compile("adam", "mse", metrics=['mse'], ) history = model.fit(train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) pred_ans = model.predict(test_model_input, batch_size=256) print("test MSE", round(mean_squared_error( test[target].values, pred_ans), 4)) ================================================ FILE: setup.cfg ================================================ [metadata] desciption-file = README.md #[coverage:run] #branch = True [coverage:report] exclude_lines = # Have to re-enable the standard pragma pragma: no cover # Don't complain about missing debug-only code: def __repr__ if self\.debug # Don't complain if tests don't hit defensive assertion code: raise ValueError raise AssertionError raise NotImplementedError # Don't complain if non-runnable code isn't run: if 0: if False: if __name__ == .__main__.: [coverage:run] omit = # omit anything in a .local directory anywhere #*/.local/* # omit everything in /usr deepctr/contrib/* # omit this single file #utils/tirefire.py ================================================ FILE: setup.py ================================================ import sys import setuptools with open("README.md", "r") as fh: long_description = fh.read() REQUIRED_PACKAGES = [ 'requests', 'h5py==3.7.0; python_version>="3.9"', 'h5py==2.10.0; python_version<"3.9"' ] setuptools.setup( name="deepctr", version="0.9.3", author="Weichen Shen", author_email="weichenswc@163.com", description="Easy-to-use,Modular and Extendible package of deep learning based CTR(Click Through Rate) prediction models with tensorflow 1.x and 2.x .", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/shenweichen/deepctr", download_url='https://github.com/shenweichen/deepctr/tags', packages=setuptools.find_packages( exclude=["tests", "tests.models", "tests.layers"]), python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*", # '>=3.4', # 3.4.6 install_requires=REQUIRED_PACKAGES, extras_require={ "cpu": ["tensorflow>=1.4.0,!=1.7.*,!=1.8.*"], "gpu": ["tensorflow-gpu>=1.4.0,!=1.7.*,!=1.8.*"], }, entry_points={ }, classifiers=( "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", 'Intended Audience :: Developers', 'Intended Audience :: Education', 'Intended Audience :: Science/Research', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Artificial Intelligence', 'Topic :: Software Development', 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules', ), license="Apache-2.0", keywords=['ctr', 'click through rate', 'deep learning', 'tensorflow', 'tensor', 'keras'], ) ================================================ FILE: tests/README.md ================================================ ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/feature_test.py ================================================ from deepctr.models import DeepFM from deepctr.feature_column import SparseFeat, DenseFeat, VarLenSparseFeat, get_feature_names import numpy as np def test_long_dense_vector(): feature_columns = [SparseFeat('user_id', 4, ), SparseFeat('item_id', 5, ), DenseFeat("pic_vec", 5)] fixlen_feature_names = get_feature_names(feature_columns) user_id = np.array([[1], [0], [1]]) item_id = np.array([[3], [2], [1]]) pic_vec = np.array([[0.1, 0.5, 0.4, 0.3, 0.2], [0.1, 0.5, 0.4, 0.3, 0.2], [0.1, 0.5, 0.4, 0.3, 0.2]]) label = np.array([1, 0, 1]) input_dict = {'user_id': user_id, 'item_id': item_id, 'pic_vec': pic_vec} model_input = [input_dict[name] for name in fixlen_feature_names] model = DeepFM(feature_columns, feature_columns[:-1]) model.compile('adagrad', 'binary_crossentropy') model.fit(model_input, label) def test_feature_column_sparsefeat_vocabulary_path(): vocab_path = "./dummy_test.csv" sf = SparseFeat('user_id', 4, vocabulary_path=vocab_path) if sf.vocabulary_path != vocab_path: raise ValueError("sf.vocabulary_path is invalid") vlsf = VarLenSparseFeat(sf, 6) if vlsf.vocabulary_path != vocab_path: raise ValueError("vlsf.vocabulary_path is invalid") ================================================ FILE: tests/layers/__init__.py ================================================ ================================================ FILE: tests/layers/activations_test.py ================================================ from deepctr.layers import activation try: from tensorflow.python.keras.utils.generic_utils import CustomObjectScope except ImportError: from tensorflow.python.keras.utils import CustomObjectScope from tests.utils import layer_test def test_dice(): with CustomObjectScope({'Dice': activation.Dice}): layer_test(activation.Dice, kwargs={}, input_shape=(2, 3)) ================================================ FILE: tests/layers/core_test.py ================================================ import pytest import tensorflow as tf from tensorflow.python.keras.layers import PReLU try: from tensorflow.python.keras.utils.generic_utils import CustomObjectScope except ImportError: from tensorflow.python.keras.utils import CustomObjectScope from deepctr import layers from deepctr.layers import Dice from tests.layers.interaction_test import BATCH_SIZE, EMBEDDING_SIZE, SEQ_LENGTH from tests.utils import layer_test @pytest.mark.parametrize( 'hidden_units,activation', [(hidden_units, activation) for hidden_units in [(), (10,)] for activation in ['sigmoid', Dice, PReLU] ] ) def test_LocalActivationUnit(hidden_units, activation): if tf.__version__ >= '1.13.0' and activation != 'sigmoid': return with CustomObjectScope({'LocalActivationUnit': layers.LocalActivationUnit}): layer_test(layers.LocalActivationUnit, kwargs={'hidden_units': hidden_units, 'activation': activation, 'dropout_rate': 0.5}, input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE)]) @pytest.mark.parametrize( 'hidden_units,use_bn', [(hidden_units, use_bn) for hidden_units in [(), (10,)] for use_bn in [True, False] ] ) def test_DNN(hidden_units, use_bn): with CustomObjectScope({'DNN': layers.DNN}): layer_test(layers.DNN, kwargs={'hidden_units': hidden_units, 'use_bn': use_bn, 'dropout_rate': 0.5}, input_shape=( BATCH_SIZE, EMBEDDING_SIZE)) @pytest.mark.parametrize( 'task,use_bias', [(task, use_bias) for task in ['binary', 'regression'] for use_bias in [True, False] ] ) def test_PredictionLayer(task, use_bias): with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}): layer_test(layers.PredictionLayer, kwargs={'task': task, 'use_bias': use_bias }, input_shape=(BATCH_SIZE, 1)) @pytest.mark.xfail(reason="dim size must be 1 except for the batch size dim") def test_test_PredictionLayer_invalid(): # with pytest.raises(ValueError): with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}): layer_test(layers.PredictionLayer, kwargs={'use_bias': True, }, input_shape=(BATCH_SIZE, 2, 1)) ================================================ FILE: tests/layers/interaction_test.py ================================================ import pytest try: from tensorflow.python.keras.utils.generic_utils import CustomObjectScope except ImportError: from tensorflow.python.keras.utils import CustomObjectScope from deepctr import layers from tests.utils import layer_test BATCH_SIZE = 5 FIELD_SIZE = 4 EMBEDDING_SIZE = 3 SEQ_LENGTH = 10 def test_FEFMLayer(): with CustomObjectScope({'FEFMLayer': layers.FEFMLayer}): layer_test(layers.FEFMLayer, kwargs={'regularizer': 0.000001}, input_shape=(BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) @pytest.mark.parametrize( 'reg_strength', [0.000001] ) def test_FwFM(reg_strength): with CustomObjectScope({'FwFMLayer': layers.FwFMLayer}): layer_test(layers.FwFMLayer, kwargs={'num_fields': FIELD_SIZE, 'regularizer': reg_strength}, input_shape=(BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) @pytest.mark.parametrize( 'layer_num', [0, 1] ) def test_CrossNet(layer_num, ): with CustomObjectScope({'CrossNet': layers.CrossNet}): layer_test(layers.CrossNet, kwargs={ 'layer_num': layer_num, }, input_shape=(2, 3)) # def test_CrossNet_invalid(): # with pytest.raises(ValueError): # with CustomObjectScope({'CrossNet': layers.CrossNet}): # layer_test(layers.CrossNet, kwargs={ # 'layer_num': 1, 'l2_reg': 0}, input_shape=(2, 3, 4)) @pytest.mark.parametrize( 'reduce_sum', [reduce_sum for reduce_sum in [True, False] ] ) def test_InnerProductLayer(reduce_sum): with CustomObjectScope({'InnerProductLayer': layers.InnerProductLayer}): layer_test(layers.InnerProductLayer, kwargs={ 'reduce_sum': reduce_sum}, input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE)] * FIELD_SIZE) @pytest.mark.parametrize( 'kernel_type', [kernel_type for kernel_type in ['mat', 'vec', 'num'] ] ) def test_OutterProductLayer(kernel_type): with CustomObjectScope({'OutterProductLayer': layers.OutterProductLayer}): layer_test(layers.OutterProductLayer, kwargs={ 'kernel_type': kernel_type}, input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE)] * FIELD_SIZE) def test_BiInteractionPooling(): with CustomObjectScope({'BiInteractionPooling': layers.BiInteractionPooling}): layer_test(layers.BiInteractionPooling, kwargs={}, input_shape=(BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) def test_FM(): with CustomObjectScope({'FM': layers.FM}): layer_test(layers.FM, kwargs={}, input_shape=( BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) def test_AFMLayer(): with CustomObjectScope({'AFMLayer': layers.AFMLayer}): layer_test(layers.AFMLayer, kwargs={'dropout_rate': 0.5}, input_shape=[( BATCH_SIZE, 1, EMBEDDING_SIZE)] * FIELD_SIZE) @pytest.mark.parametrize( 'layer_size,split_half', [((10,), False), ((10, 8), True) ] ) def test_CIN(layer_size, split_half): with CustomObjectScope({'CIN': layers.CIN}): layer_test(layers.CIN, kwargs={"layer_size": layer_size, "split_half": split_half}, input_shape=( BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) # @pytest.mark.parametrize( # 'layer_size', # [(), (3, 10) # ] # ) # def test_test_CIN_invalid(layer_size): # with pytest.raises(ValueError): # with CustomObjectScope({'CIN': layers.CIN}): # layer_test(layers.CIN, kwargs={"layer_size": layer_size}, input_shape=( # BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) @pytest.mark.parametrize( 'head_num,use_res', [(1, True), (2, False,)] ) def test_InteractingLayer(head_num, use_res, ): with CustomObjectScope({'InteractingLayer': layers.InteractingLayer}): layer_test(layers.InteractingLayer, kwargs={"head_num": head_num, "use_res": use_res, }, input_shape=( BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) def test_FGCNNLayer(): with CustomObjectScope({'FGCNNLayer': layers.FGCNNLayer}): layer_test(layers.FGCNNLayer, kwargs={'filters': (4, 6,), 'kernel_width': (7, 7,)}, input_shape=( BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) # def test_SENETLayer(): # with CustomObjectScope({'SENETLayer': layers.SENETLayer}): # layer_test(layers.SENETLayer, kwargs={'reduction_ratio':2}, input_shape=[( # BATCH_SIZE, 1, EMBEDDING_SIZE)]*FIELD_SIZE) @pytest.mark.parametrize( 'bilinear_type', ['all', 'each', 'interaction' ] ) def test_BilinearInteraction(bilinear_type): with CustomObjectScope({'BilinearInteraction': layers.BilinearInteraction}): layer_test(layers.BilinearInteraction, kwargs={'bilinear_type': bilinear_type}, input_shape=[( BATCH_SIZE, 1, EMBEDDING_SIZE)] * FIELD_SIZE) ================================================ FILE: tests/layers/normalization_test.py ================================================ import pytest try: from tensorflow.python.keras.utils.generic_utils import CustomObjectScope except ImportError: from tensorflow.python.keras.utils import CustomObjectScope from deepctr import layers from tests.layers.interaction_test import BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE from tests.utils import layer_test @pytest.mark.parametrize( 'axis', [-1, -2 ] ) def test_LayerNormalization(axis): with CustomObjectScope({'LayerNormalization': layers.LayerNormalization}): layer_test(layers.LayerNormalization, kwargs={"axis": axis, }, input_shape=( BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) ================================================ FILE: tests/layers/sequence_test.py ================================================ import pytest from packaging import version try: from tensorflow.python.keras.utils.generic_utils import CustomObjectScope except ImportError: from tensorflow.python.keras.utils import CustomObjectScope import tensorflow as tf from deepctr.layers import sequence from tests.utils import layer_test try: tf.keras.backend.set_learning_phase(True) except ImportError: from tensorflow.python.keras.backend import set_learning_phase set_learning_phase(True) BATCH_SIZE = 4 EMBEDDING_SIZE = 8 SEQ_LENGTH = 10 @pytest.mark.parametrize( 'weight_normalization', [True, False ] ) def test_AttentionSequencePoolingLayer(weight_normalization): with CustomObjectScope({'AttentionSequencePoolingLayer': sequence.AttentionSequencePoolingLayer}): layer_test(sequence.AttentionSequencePoolingLayer, kwargs={'weight_normalization': weight_normalization}, input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, 1)]) @pytest.mark.parametrize( 'mode,supports_masking,input_shape', [('sum', False, [(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, 1)]), ('mean', True, (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE)), ('max', True, (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE)) ] ) def test_SequencePoolingLayer(mode, supports_masking, input_shape): if version.parse(tf.__version__) >= version.parse('1.14.0') and mode != 'sum': # todo check further version return with CustomObjectScope({'SequencePoolingLayer': sequence.SequencePoolingLayer}): layer_test(sequence.SequencePoolingLayer, kwargs={'mode': mode, 'supports_masking': supports_masking}, input_shape=input_shape, supports_masking=supports_masking) # @pytest.mark.parametrize( # # 'supports_masking,input_shape', # # [( False, [(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, 1),(BATCH_SIZE, 1)]), ( True, [(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE),(BATCH_SIZE, 1)]) # ] # # ) # def test_WeightedSequenceLayer(supports_masking, input_shape): # # if version.parse(tf.__version__) >= version.parse('1.14.0') : #todo check further version # # return # with CustomObjectScope({'WeightedSequenceLayer': sequence.WeightedSequenceLayer}): # layer_test(sequence.WeightedSequenceLayer, kwargs={'supports_masking': supports_masking}, # input_shape=input_shape, supports_masking=supports_masking) # @pytest.mark.parametrize( 'merge_mode', ['concat', 'ave', 'fw', 'bw', 'sum', 'mul'] ) def test_BiLSTM(merge_mode): with CustomObjectScope({'BiLSTM': sequence.BiLSTM}): layer_test(sequence.BiLSTM, kwargs={'merge_mode': merge_mode, 'units': EMBEDDING_SIZE, 'dropout_rate': 0.0}, # todo 0.5 input_shape=(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE)) @pytest.mark.parametrize( 'attention_type', ['scaled_dot_product', 'cos', 'ln', 'additive'] ) def test_Transformer(attention_type): with CustomObjectScope({'Transformer': sequence.Transformer}): layer_test(sequence.Transformer, kwargs={'att_embedding_size': 1, 'head_num': 8, 'use_layer_norm': True, 'supports_masking': False, 'attention_type': attention_type, 'dropout_rate': 0.5, 'output_type': 'sum'}, input_shape=[(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, 1), (BATCH_SIZE, 1)]) def test_KMaxPooling(): with CustomObjectScope({'KMaxPooling': sequence.KMaxPooling}): layer_test(sequence.KMaxPooling, kwargs={'k': 3, 'axis': 1}, input_shape=(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE, 2)) @pytest.mark.parametrize( 'pos_embedding_trainable,zero_pad', [(True, False), (False, True) ] ) def test_PositionEncoding(pos_embedding_trainable, zero_pad): with CustomObjectScope({'PositionEncoding': sequence.PositionEncoding, "tf": tf}): layer_test(sequence.PositionEncoding, kwargs={'pos_embedding_trainable': pos_embedding_trainable, 'zero_pad': zero_pad}, input_shape=(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE)) ================================================ FILE: tests/layers/utils_test.py ================================================ import numpy as np import pytest import tensorflow as tf from deepctr.layers.utils import Hash, Linear from tests.layers.interaction_test import BATCH_SIZE, EMBEDDING_SIZE from tests.utils import layer_test try: from tensorflow.python.keras.utils.generic_utils import CustomObjectScope except ImportError: from tensorflow.python.keras.utils import CustomObjectScope @pytest.mark.parametrize( 'num_buckets,mask_zero,vocabulary_path,input_data,expected_output', [ (3 + 1, False, None, ['lakemerson'], None), (3 + 1, True, None, ['lakemerson'], None), ( 3 + 1, False, "./tests/layers/vocabulary_example.csv", [['lake'], ['johnson'], ['lakemerson']], [[1], [3], [0]]) ] ) def test_Hash(num_buckets, mask_zero, vocabulary_path, input_data, expected_output): if not hasattr(tf, 'version') or tf.version.VERSION < '2.0.0': return with CustomObjectScope({'Hash': Hash}): layer_test(Hash, kwargs={'num_buckets': num_buckets, 'mask_zero': mask_zero, 'vocabulary_path': vocabulary_path}, input_dtype=tf.string, input_data=np.array(input_data, dtype='str'), expected_output_dtype=tf.int64, expected_output=expected_output) def test_Linear(): with CustomObjectScope({'Linear': Linear}): layer_test(Linear, kwargs={'mode': 1, 'use_bias': True}, input_shape=(BATCH_SIZE, EMBEDDING_SIZE)) ================================================ FILE: tests/layers/vocabulary_example.csv ================================================ 1,lake 2,merson 3,johnson ================================================ FILE: tests/models/AFM_test.py ================================================ import pytest from deepctr.models import AFM from ..utils import check_model, check_estimator, get_test_data, get_test_data_estimator, SAMPLE_SIZE, \ TEST_Estimator @pytest.mark.parametrize( 'use_attention,sparse_feature_num,dense_feature_num', [(True, 3, 0), ] ) def test_AFM(use_attention, sparse_feature_num, dense_feature_num): model_name = "AFM" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num) model = AFM(feature_columns, feature_columns, use_attention=use_attention, afm_dropout=0.5) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'use_attention,sparse_feature_num,dense_feature_num', [(True, 3, 0), ] ) def test_AFMEstimator(use_attention, sparse_feature_num, dense_feature_num): if not TEST_Estimator: return from deepctr.estimator import AFMEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num) model = AFMEstimator(linear_feature_columns, dnn_feature_columns, use_attention=use_attention, afm_dropout=0.5) check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/models/AutoInt_test.py ================================================ import pytest import tensorflow as tf from packaging import version from deepctr.models import AutoInt from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \ TEST_Estimator @pytest.mark.parametrize( 'att_layer_num,dnn_hidden_units,sparse_feature_num', [(1, (), 1), (1, (4,), 1)] # (0, (4,), 2), (2, (4, 4,), 2) ) def test_AutoInt(att_layer_num, dnn_hidden_units, sparse_feature_num): if version.parse(tf.__version__) >= version.parse("1.14.0") and len(dnn_hidden_units) == 0: # todo check version return model_name = "AutoInt" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = AutoInt(feature_columns, feature_columns, att_layer_num=att_layer_num, dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5, ) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'att_layer_num,dnn_hidden_units,sparse_feature_num', [(1, (4,), 1)] # (0, (4,), 2), (2, (4, 4,), 2) ) def test_AutoIntEstimator(att_layer_num, dnn_hidden_units, sparse_feature_num): if not TEST_Estimator: return from deepctr.estimator import AutoIntEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = AutoIntEstimator(linear_feature_columns, dnn_feature_columns, att_layer_num=att_layer_num, dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5, ) check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/models/BST_test.py ================================================ from deepctr.models import BST from ..utils import check_model from .DIN_test import get_xy_fd def test_BST(): model_name = "BST" x, y, feature_columns, behavior_feature_list = get_xy_fd(hash_flag=True) model = BST(dnn_feature_columns=feature_columns, history_feature_list=behavior_feature_list, att_head_num=4) check_model(model, model_name, x, y, check_model_io=True) if __name__ == "__main__": pass ================================================ FILE: tests/models/CCPM_test.py ================================================ import pytest import tensorflow as tf from deepctr.models import CCPM from ..utils import check_model, get_test_data, SAMPLE_SIZE, check_estimator, get_test_data_estimator, TEST_Estimator @pytest.mark.parametrize( 'sparse_feature_num,dense_feature_num', [(3, 0) ] ) def test_CCPM(sparse_feature_num, dense_feature_num): if tf.__version__ >= "2.0.0": # todo return model_name = "CCPM" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num) model = CCPM(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=( 2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'sparse_feature_num,dense_feature_num', [(2, 0), ] ) def test_CCPM_without_seq(sparse_feature_num, dense_feature_num): if tf.__version__ >= "2.0.0": return model_name = "CCPM" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num, sequence_feature=()) model = CCPM(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=( 2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'sparse_feature_num,dense_feature_num', [(2, 0), ] ) def test_CCPMEstimator_without_seq(sparse_feature_num, dense_feature_num): if not TEST_Estimator: return from deepctr.estimator import CCPMEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = CCPMEstimator(linear_feature_columns, dnn_feature_columns, conv_kernel_width=(3, 2), conv_filters=( 2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5) check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/models/DCNMix_test.py ================================================ import pytest from deepctr.models import DCNMix from ..utils import check_model, get_test_data, SAMPLE_SIZE @pytest.mark.parametrize( 'cross_num,hidden_size,sparse_feature_num', [(0, (8,), 2), (1, (), 1), (1, (8,), 3) ] ) def test_DCNMix(cross_num, hidden_size, sparse_feature_num): model_name = "DCNMix" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = DCNMix(feature_columns, feature_columns, cross_num=cross_num, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_model(model, model_name, x, y) if __name__ == "__main__": pass ================================================ FILE: tests/models/DCN_test.py ================================================ import pytest from deepctr.models import DCN from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator @pytest.mark.parametrize( 'cross_num,hidden_size,sparse_feature_num,cross_parameterization', [(0, (8,), 2, 'vector'), (1, (), 1, 'vector'), (1, (8,), 3, 'vector'), (0, (8,), 2, 'matrix'), (1, (), 1, 'matrix'), (1, (8,), 3, 'matrix'), ] ) def test_DCN(cross_num, hidden_size, sparse_feature_num, cross_parameterization): model_name = "DCN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = DCN(feature_columns, feature_columns, cross_num=cross_num, cross_parameterization=cross_parameterization, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_model(model, model_name, x, y) def test_DCN_2(): model_name = "DCN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=3, dense_feature_num=2) model = DCN([], feature_columns, cross_num=1, dnn_hidden_units=(8,), dnn_dropout=0.5) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'cross_num,hidden_size,sparse_feature_num', [(1, (8,), 3) ] ) def test_DCNEstimator(cross_num, hidden_size, sparse_feature_num): if not TEST_Estimator: return from deepctr.estimator import DCNEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = DCNEstimator(linear_feature_columns, dnn_feature_columns, cross_num=cross_num, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_estimator(model, input_fn) # def test_DCN_invalid(embedding_size=8, cross_num=0, hidden_size=()): # feature_dim_dict = {'sparse': [SparseFeat('sparse_1', 2), SparseFeat('sparse_2', 5), SparseFeat('sparse_3', 10)], # 'dense': [SparseFeat('dense_1', 1), SparseFeat('dense_1', 1), SparseFeat('dense_1', 1)]} # with pytest.raises(ValueError): # _ = DCN(None, embedding_size=embedding_size, cross_num=cross_num, dnn_hidden_units=hidden_size, dnn_dropout=0.5) if __name__ == "__main__": pass ================================================ FILE: tests/models/DIEN_test.py ================================================ import numpy as np import pytest import tensorflow as tf from packaging import version from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names from deepctr.models import DIEN from ..utils import check_model def get_xy_fd(use_neg=False, hash_flag=False): feature_columns = [SparseFeat('user', 3, hash_flag), SparseFeat('gender', 2, hash_flag), SparseFeat('item', 3 + 1, hash_flag), SparseFeat('item_gender', 2 + 1, hash_flag), DenseFeat('score', 1)] feature_columns += [ VarLenSparseFeat(SparseFeat('hist_item', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item'), maxlen=4, length_name="seq_length"), VarLenSparseFeat(SparseFeat('hist_item_gender', 2 + 1, embedding_dim=4, embedding_name='item_gender'), maxlen=4, length_name="seq_length")] behavior_feature_list = ["item", "item_gender"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value igender = np.array([1, 2, 1]) # 0 is mask value score = np.array([0.1, 0.2, 0.3]) hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]]) behavior_length = np.array([3, 3, 2]) feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, 'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score,"seq_length":behavior_length} if use_neg: feature_dict['neg_hist_item'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) feature_dict['neg_hist_item_gender'] = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]]) feature_columns += [ VarLenSparseFeat(SparseFeat('neg_hist_item', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item'), maxlen=4, length_name="seq_length"), VarLenSparseFeat(SparseFeat('neg_hist_item_gender', 2 + 1, embedding_dim=4, embedding_name='item_gender'), maxlen=4, length_name="seq_length")] feature_names = get_feature_names(feature_columns) x = {name: feature_dict[name] for name in feature_names} y = np.array([1, 0, 1]) return x, y, feature_columns, behavior_feature_list # @pytest.mark.xfail(reason="There is a bug when save model use Dice") # @pytest.mark.skip(reason="misunderstood the API") @pytest.mark.parametrize( 'gru_type', ['GRU', 'AIGRU', 'AGRU' # ,'AUGRU', ] ) def test_DIEN(gru_type): if version.parse(tf.__version__) >= version.parse('2.0.0'): tf.compat.v1.disable_eager_execution() # todo return model_name = "DIEN_" + gru_type x, y, feature_columns, behavior_feature_list = get_xy_fd(hash_flag=True) model = DIEN(feature_columns, behavior_feature_list, dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, gru_type=gru_type) check_model(model, model_name, x, y, check_model_io=(gru_type == "GRU")) # TODO:fix bugs when load model in other type def test_DIEN_neg(): model_name = "DIEN_neg" if version.parse(tf.__version__) >= version.parse("1.14.0"): return x, y, feature_dim_dict, behavior_feature_list = get_xy_fd(use_neg=True) model = DIEN(feature_dim_dict, behavior_feature_list, dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, gru_type="AUGRU", use_negsampling=True) check_model(model, model_name, x, y) if __name__ == "__main__": pass ================================================ FILE: tests/models/DIFM_test.py ================================================ import pytest from deepctr.models import DIFM from ..utils import check_model, get_test_data, SAMPLE_SIZE @pytest.mark.parametrize( 'att_head_num,dnn_hidden_units,sparse_feature_num', [(1, (4,), 2), (2, (4, 4,), 2), (1, (4,), 1)] ) def test_DIFM(att_head_num, dnn_hidden_units, sparse_feature_num): model_name = "DIFM" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = DIFM(feature_columns, feature_columns, dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5) check_model(model, model_name, x, y) if __name__ == "__main__": pass ================================================ FILE: tests/models/DIN_test.py ================================================ import numpy as np import tensorflow as tf from packaging import version from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names from deepctr.models.sequence.din import DIN from ..utils import check_model def get_xy_fd(hash_flag=False): feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat( 'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8), SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)] feature_columns += [ VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'), maxlen=4, length_name="seq_length"), VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4, length_name="seq_length")] # Notice: History behavior sequence feature name must start with "hist_". behavior_feature_list = ["item_id", "cate_id"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value cate_id = np.array([1, 2, 2]) # 0 is mask value pay_score = np.array([0.1, 0.2, 0.3]) hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]]) hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]]) seq_length = np.array([3, 3, 2]) # the actual length of the behavior sequence feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id, 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id, 'pay_score': pay_score, 'seq_length': seq_length} x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} y = np.array([1, 0, 1]) return x, y, feature_columns, behavior_feature_list # @pytest.mark.xfail(reason="There is a bug when save model use Dice") # @pytest.mark.skip(reason="misunderstood the API") def test_DIN(): model_name = "DIN" x, y, feature_columns, behavior_feature_list = get_xy_fd(True) cur_version = version.parse(tf.__version__) if cur_version >= version.parse('2.8.0'): # todo: att_activation = 'sigmoid' else: att_activation = 'dice' model = DIN(feature_columns, behavior_feature_list, dnn_hidden_units=[4, 4, 4], att_activation=att_activation, dnn_dropout=0.5) # todo test dice check_model(model, model_name, x, y) if __name__ == "__main__": pass ================================================ FILE: tests/models/DSIN_test.py ================================================ import numpy as np import pytest from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names from deepctr.models.sequence.dsin import DSIN from ..utils import check_model def get_xy_fd(hash_flag=False): feature_columns = [SparseFeat('user', 3, use_hash=hash_flag), SparseFeat('gender', 2, use_hash=hash_flag), SparseFeat('item', 3 + 1, use_hash=hash_flag), SparseFeat('item_gender', 2 + 1, use_hash=hash_flag), DenseFeat('score', 1)] feature_columns += [ VarLenSparseFeat(SparseFeat('sess_0_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'), maxlen=4), VarLenSparseFeat( SparseFeat('sess_0_item_gender', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item_gender'), maxlen=4)] feature_columns += [ VarLenSparseFeat(SparseFeat('sess_1_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'), maxlen=4), VarLenSparseFeat( SparseFeat('sess_1_item_gender', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item_gender'), maxlen=4)] behavior_feature_list = ["item", "item_gender"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value igender = np.array([1, 2, 1]) # 0 is mask value score = np.array([0.1, 0.2, 0.3]) sess1_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [0, 0, 0, 0]]) sess1_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [0, 0, 0, 0]]) sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) sess2_igender = np.array([[1, 1, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) sess_number = np.array([2, 1, 0]) feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, 'sess_0_item': sess1_iid, 'sess_0_item_gender': sess1_igender, 'score': score, 'sess_1_item': sess2_iid, 'sess_1_item_gender': sess2_igender, } x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} x["sess_length"] = sess_number y = np.array([1, 0, 1]) return x, y, feature_columns, behavior_feature_list @pytest.mark.parametrize( 'bias_encoding', [True, False] ) def test_DSIN(bias_encoding): model_name = "DSIN" x, y, feature_columns, behavior_feature_list = get_xy_fd(True) model = DSIN(feature_columns, behavior_feature_list, sess_max_count=2, bias_encoding=bias_encoding, dnn_hidden_units=[4, 4], dnn_dropout=0.5, ) check_model(model, model_name, x, y) if __name__ == "__main__": pass ================================================ FILE: tests/models/DeepFEFM_test.py ================================================ import pytest import tensorflow as tf from deepctr.models import DeepFEFM from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator @pytest.mark.parametrize( 'hidden_size,sparse_feature_num,use_fefm,use_linear,use_fefm_embed_in_dnn', [((2,), 1, True, True, True), ((2,), 1, True, True, False), ((2,), 1, True, False, True), ((2,), 1, False, True, True), ((2,), 1, True, False, False), ((2,), 1, False, True, False), ((2,), 1, False, False, True), ((2,), 1, False, False, False), ((), 1, True, True, True) ] ) def test_DeepFEFM(hidden_size, sparse_feature_num, use_fefm, use_linear, use_fefm_embed_in_dnn): if tf.__version__ == "1.15.0" or tf.__version__ == "1.4.0": # slow in tf 1.15 return model_name = "DeepFEFM" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = DeepFEFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5, use_linear=use_linear, use_fefm=use_fefm, use_fefm_embed_in_dnn=use_fefm_embed_in_dnn) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'hidden_size,sparse_feature_num', [((2,), 2), ((), 2), ] ) def test_DeepFEFMEstimator(hidden_size, sparse_feature_num): import tensorflow as tf if not TEST_Estimator or tf.__version__ == "1.4.0": return from deepctr.estimator import DeepFEFMEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = DeepFEFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/models/DeepFM_test.py ================================================ import pytest from deepctr.models import DeepFM from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator @pytest.mark.parametrize( 'hidden_size,sparse_feature_num', [((2,), 1), # ((3,), 2) ] # (True, (32,), 3), (False, (32,), 1) ) def test_DeepFM(hidden_size, sparse_feature_num): model_name = "DeepFM" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = DeepFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'hidden_size,sparse_feature_num', [ ((3,), 2) ] # (True, (32,), 3), (False, (32,), 1) ) def test_DeepFMEstimator(hidden_size, sparse_feature_num): if not TEST_Estimator: return from deepctr.estimator import DeepFMEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num, classification=False) model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5, task="regression") check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/models/EDCN_test.py ================================================ import pytest from deepctr.models import EDCN from ..utils import check_model, get_test_data, SAMPLE_SIZE @pytest.mark.parametrize( 'bridge_type, cross_num, cross_parameterization, sparse_feature_num', [ ('pointwise_addition', 2, 'vector', 3), ('hadamard_product', 2, 'vector', 4), ('concatenation', 1, 'vector', 5), ('attention_pooling', 2, 'matrix', 6), ] ) def test_EDCN(bridge_type, cross_num, cross_parameterization, sparse_feature_num): model_name = "EDCN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=0) model = EDCN(feature_columns, feature_columns, cross_num, cross_parameterization, bridge_type) check_model(model, model_name, x, y) if __name__ == "__main__": pass ================================================ FILE: tests/models/FGCNN_test.py ================================================ import pytest from deepctr.models import FGCNN from tests.utils import check_model, get_test_data, SAMPLE_SIZE @pytest.mark.parametrize( 'sparse_feature_num,dense_feature_num', [(1, 1), (3, 3) ] ) def test_FGCNN(sparse_feature_num, dense_feature_num): model_name = "FGCNN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, embedding_size=8, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num) model = FGCNN(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=(2, 1), new_maps=( 2, 2), pooling_width=(2, 2), dnn_hidden_units=(32,), dnn_dropout=0.5, ) # TODO: add model_io check check_model(model, model_name, x, y, check_model_io=False) # @pytest.mark.parametrize( # 'sparse_feature_num,dense_feature_num', # [(2, 1), # ] # ) # def test_FGCNN_without_seq(sparse_feature_num, dense_feature_num): # model_name = "FGCNN_noseq" # # sample_size = SAMPLE_SIZE # x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, # dense_feature_num=dense_feature_num, sequence_feature=()) # # model = FGCNN(feature_columns, feature_columns, conv_kernel_width=(), conv_filters=( # ), new_maps=(), pooling_width=(), dnn_hidden_units=(32,), dnn_dropout=0.5, ) # # TODO: add model_io check # check_model(model, model_name, x, y, check_model_io=False) if __name__ == "__main__": pass ================================================ FILE: tests/models/FLEN_test.py ================================================ import pytest from deepctr.models import FLEN from ..utils import check_model, get_test_data, SAMPLE_SIZE @pytest.mark.parametrize( 'hidden_size,sparse_feature_num', [ ((3,), 6) ] # (True, (32,), 3), (False, (32,), 1) ) def test_FLEN(hidden_size, sparse_feature_num): model_name = "FLEN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, embedding_size=2, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num, use_group=True) model = FLEN(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_model(model, model_name, x, y) if __name__ == "__main__": pass ================================================ FILE: tests/models/FNN_test.py ================================================ import pytest import tensorflow as tf from deepctr.models import FNN from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator @pytest.mark.parametrize( 'sparse_feature_num,dense_feature_num', [(1, 1), (3, 3) ] ) def test_FNN(sparse_feature_num, dense_feature_num): if tf.__version__ >= "2.0.0": return model_name = "FNN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num) model = FNN(feature_columns, feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5) check_model(model, model_name, x, y) # @pytest.mark.parametrize( # 'sparse_feature_num,dense_feature_num', # [(0, 1), (1, 0) # ] # ) # def test_FNN_without_seq(sparse_feature_num, dense_feature_num): # model_name = "FNN" # # sample_size = SAMPLE_SIZE # x, y, feature_columns = get_test_data(sample_size, sparse_feature_num, dense_feature_num, sequence_feature=()) # # model = FNN(feature_columns,feature_columns, dnn_hidden_units=[32, 32], dnn_dropout=0.5) # check_model(model, model_name, x, y) @pytest.mark.parametrize( 'sparse_feature_num,dense_feature_num', [(2, 2), ] ) def test_FNNEstimator(sparse_feature_num, dense_feature_num): if not TEST_Estimator: return from deepctr.estimator import FNNEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num) model = FNNEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5) check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/models/FiBiNET_test.py ================================================ import pytest from deepctr.models import FiBiNET from ..utils import check_model, SAMPLE_SIZE, get_test_data, get_test_data_estimator, check_estimator, TEST_Estimator @pytest.mark.parametrize( 'bilinear_type', ["each", "all", "interaction"] ) def test_FiBiNET(bilinear_type): model_name = "FiBiNET" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=2, dense_feature_num=2) model = FiBiNET(feature_columns, feature_columns, bilinear_type=bilinear_type, dnn_hidden_units=[4, ], dnn_dropout=0.5, ) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'bilinear_type', ["interaction"] ) def test_FiBiNETEstimator(bilinear_type): if not TEST_Estimator: return from deepctr.estimator import FiBiNETEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=2, dense_feature_num=2) model = FiBiNETEstimator(linear_feature_columns, dnn_feature_columns, bilinear_type=bilinear_type, dnn_hidden_units=[4, ], dnn_dropout=0.5, ) check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/models/FwFM_test.py ================================================ import pytest from deepctr.models import FwFM from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator @pytest.mark.parametrize( 'hidden_size,sparse_feature_num', [((2,), 1), ((), 1), ] ) def test_FwFM(hidden_size, sparse_feature_num): model_name = "FwFM" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = FwFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'hidden_size,sparse_feature_num', [((2,), 2), ] ) def test_FwFMEstimator(hidden_size, sparse_feature_num): if not TEST_Estimator: return from deepctr.estimator import FwFMEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = FwFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/models/IFM_test.py ================================================ import pytest from deepctr.models import IFM from ..utils import check_model, get_test_data, SAMPLE_SIZE @pytest.mark.parametrize( 'hidden_size,sparse_feature_num', [((2,), 1), ((3,), 2) ] ) def test_IFM(hidden_size, sparse_feature_num): model_name = "IFM" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = IFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_model(model, model_name, x, y) if __name__ == "__main__": pass ================================================ FILE: tests/models/MLR_test.py ================================================ import pytest from deepctr.models import MLR from ..utils import check_model, SAMPLE_SIZE, get_test_data @pytest.mark.parametrize( 'region_sparse,region_dense,base_sparse,base_dense,bias_sparse,bias_dense', [(0, 2, 0, 2, 0, 1), (0, 2, 0, 1, 0, 2), (0, 2, 0, 0, 1, 0), # (0, 1, 1, 2, 1, 1,), (0, 1, 1, 1, 1, 2), (0, 1, 1, 0, 2, 0), # (1, 0, 2, 2, 2, 1), (2, 0, 2, 1, 2, 2), (2, 0, 2, 0, 0, 0) ] ) def test_MLRs(region_sparse, region_dense, base_sparse, base_dense, bias_sparse, bias_dense): model_name = "MLRs" _, y, region_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse, dense_feature_num=region_dense, prefix='region') base_x, y, base_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse, dense_feature_num=region_dense, prefix='base') bias_x, y, bias_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse, dense_feature_num=region_dense, prefix='bias') model = MLR(region_feature_columns, base_feature_columns, bias_feature_columns=bias_feature_columns) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) print(model_name + " test pass!") def test_MLR(): model_name = "MLR" region_x, y, region_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3, prefix='region') base_x, y, base_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3, prefix='base') bias_x, y, bias_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3, prefix='bias') model = MLR(region_feature_columns) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) check_model(model, model_name, region_x, y) print(model_name + " test pass!") if __name__ == "__main__": pass ================================================ FILE: tests/models/MTL_test.py ================================================ import pytest import tensorflow as tf from deepctr.models.multitask import SharedBottom, ESMM, MMOE, PLE from ..utils_mtl import get_mtl_test_data, check_mtl_model def test_SharedBottom(): if tf.__version__ == "1.15.0": # slow in tf 1.15 return model_name = "SharedBottom" x, y_list, dnn_feature_columns = get_mtl_test_data() model = SharedBottom(dnn_feature_columns, bottom_dnn_hidden_units=(8,), tower_dnn_hidden_units=(8,), task_types=['binary', 'binary'], task_names=['label_income', 'label_marital']) check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary']) def test_ESMM(): if tf.__version__ == "1.15.0": # slow in tf 1.15 return model_name = "ESMM" x, y_list, dnn_feature_columns = get_mtl_test_data() model = ESMM(dnn_feature_columns, tower_dnn_hidden_units=(8,), task_types=['binary', 'binary'], task_names=['label_marital', 'label_income']) check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary']) def test_MMOE(): if tf.__version__ == "1.15.0": # slow in tf 1.15 return model_name = "MMOE" x, y_list, dnn_feature_columns = get_mtl_test_data() model = MMOE(dnn_feature_columns, num_experts=3, expert_dnn_hidden_units=(8,), tower_dnn_hidden_units=(8,), gate_dnn_hidden_units=(), task_types=['binary', 'binary'], task_names=['income', 'marital']) check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary']) @pytest.mark.parametrize( 'num_levels,gate_dnn_hidden_units', [(2, ()), (1, (4,))] ) def test_PLE(num_levels, gate_dnn_hidden_units): if tf.__version__ == "1.15.0": # slow in tf 1.15 return model_name = "PLE" x, y_list, dnn_feature_columns = get_mtl_test_data() model = PLE(dnn_feature_columns, num_levels=num_levels, expert_dnn_hidden_units=(8,), tower_dnn_hidden_units=(8,), gate_dnn_hidden_units=gate_dnn_hidden_units, task_types=['binary', 'binary'], task_names=['income', 'marital']) check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary']) if __name__ == "__main__": pass ================================================ FILE: tests/models/NFM_test.py ================================================ import pytest from deepctr.models import NFM from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator @pytest.mark.parametrize( 'hidden_size,sparse_feature_num', [((8,), 1), ((8, 8,), 2)] ) def test_NFM(hidden_size, sparse_feature_num): model_name = "NFM" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = NFM(feature_columns, feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'hidden_size,sparse_feature_num', [((8,), 1), ((8, 8,), 2)] ) def test_FNNEstimator(hidden_size, sparse_feature_num): if not TEST_Estimator: return from deepctr.estimator import NFMEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = NFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5) check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/models/ONN_test.py ================================================ import pytest import tensorflow as tf from packaging import version from deepctr.models import ONN from ..utils import check_model, get_test_data, SAMPLE_SIZE @pytest.mark.parametrize( 'sparse_feature_num', [2] ) def test_ONN(sparse_feature_num): if version.parse(tf.__version__) >= version.parse('1.15.0'): return model_name = "ONN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num, sequence_feature=('sum', 'mean', 'max',), hash_flag=True) model = ONN(feature_columns, feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5) check_model(model, model_name, x, y) if __name__ == "__main__": pass ================================================ FILE: tests/models/PNN_test.py ================================================ import pytest from deepctr.models import PNN from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator @pytest.mark.parametrize( 'use_inner, use_outter,sparse_feature_num', [(True, True, 3), (False, False, 1) ] ) def test_PNN(use_inner, use_outter, sparse_feature_num): model_name = "PNN" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = PNN(feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5, use_inner=use_inner, use_outter=use_outter) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'use_inner, use_outter,sparse_feature_num', [(True, True, 2) ] ) def test_PNNEstimator(use_inner, use_outter, sparse_feature_num): if not TEST_Estimator: return from deepctr.estimator import PNNEstimator sample_size = SAMPLE_SIZE _, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = PNNEstimator(dnn_feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5, use_inner=use_inner, use_outter=use_outter) check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/models/WDL_test.py ================================================ import pytest import tensorflow as tf from packaging import version from deepctr.models import WDL from ..utils import check_model, check_estimator, SAMPLE_SIZE, get_test_data, get_test_data_estimator, TEST_Estimator @pytest.mark.parametrize( 'sparse_feature_num,dense_feature_num', [(2, 0), (0, 2) # ,(2, 2) ] ) def test_WDL(sparse_feature_num, dense_feature_num): if version.parse(tf.__version__) >= version.parse('2.0.0'): return model_name = "WDL" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num, hash_flag=True) model = WDL(feature_columns, feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5) check_model(model, model_name, x, y) @pytest.mark.parametrize( 'sparse_feature_num,dense_feature_num', [(2, 1), # (0, 2)#,(2, 2) ] ) def test_WDLEstimator(sparse_feature_num, dense_feature_num): if not TEST_Estimator: return from deepctr.estimator import WDLEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num, dense_feature_num) model = WDLEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5) check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/models/__init__.py ================================================ ================================================ FILE: tests/models/xDeepFM_test.py ================================================ import pytest from deepctr.models import xDeepFM from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, TEST_Estimator @pytest.mark.parametrize( 'dnn_hidden_units,cin_layer_size,cin_split_half,cin_activation,sparse_feature_num,dense_feature_dim', [ # ((), (), True, 'linear', 1, 2), ((8,), (), True, 'linear', 1, 1), ((), (8,), True, 'linear', 2, 2), ((8,), (8,), False, 'relu', 1, 0) ] ) def test_xDeepFM(dnn_hidden_units, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num, dense_feature_dim): model_name = "xDeepFM" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = xDeepFM(feature_columns, feature_columns, dnn_hidden_units=dnn_hidden_units, cin_layer_size=cin_layer_size, cin_split_half=cin_split_half, cin_activation=cin_activation, dnn_dropout=0.5) check_model(model, model_name, x, y) # @pytest.mark.parametrize( # 'hidden_size,cin_layer_size,', # [((8,), (3, 8)), # ] # ) # def test_xDeepFM_invalid(hidden_size, cin_layer_size): # feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5, # 'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']} # with pytest.raises(ValueError): # _ = xDeepFM(feature_dim_dict, None, dnn_hidden_units=hidden_size, cin_layer_size=cin_layer_size) @pytest.mark.parametrize( 'dnn_hidden_units,cin_layer_size,cin_split_half,cin_activation,sparse_feature_num,dense_feature_dim', [ # ((), (), True, 'linear', 1, 2), ((8,), (8,), False, 'relu', 2, 1) ] ) def test_xDeepFMEstimator(dnn_hidden_units, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num, dense_feature_dim): import tensorflow as tf if not TEST_Estimator or tf.__version__ == "1.4.0": return from deepctr.estimator import xDeepFMEstimator sample_size = SAMPLE_SIZE linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=sparse_feature_num) model = xDeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=dnn_hidden_units, cin_layer_size=cin_layer_size, cin_split_half=cin_split_half, cin_activation=cin_activation, dnn_dropout=0.5) check_estimator(model, input_fn) if __name__ == "__main__": pass ================================================ FILE: tests/utils.py ================================================ from __future__ import absolute_import, division, print_function import inspect import os import sys import numpy as np import tensorflow as tf from numpy.testing import assert_allclose from packaging import version from tensorflow.python.keras import backend as K from tensorflow.python.keras.layers import Input, Masking from tensorflow.python.keras.models import Model, load_model, save_model from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, DEFAULT_GROUP_NAME from deepctr.layers import custom_objects SAMPLE_SIZE = 8 VOCABULARY_SIZE = 4 def test_estimator_version(tf_version): cur_version = version.parse(tf_version) tf2_version = version.parse('2.0.0') left_version = version.parse('2.2.0') right_version = version.parse('2.6.0') return cur_version < tf2_version or left_version <= cur_version < right_version TEST_Estimator = test_estimator_version(tf.__version__) def gen_sequence(dim, max_len, sample_size): return np.array([np.random.randint(0, dim, max_len) for _ in range(sample_size)]), np.random.randint(1, max_len + 1, sample_size) def get_test_data(sample_size=1000, embedding_size=4, sparse_feature_num=1, dense_feature_num=1, sequence_feature=None, classification=True, include_length=False, hash_flag=False, prefix='', use_group=False): if sequence_feature is None: sequence_feature = ['sum', 'mean', 'max', 'weight'] feature_columns = [] model_input = {} if 'weight' in sequence_feature: feature_columns.append( VarLenSparseFeat(SparseFeat(prefix + "weighted_seq", vocabulary_size=2, embedding_dim=embedding_size), maxlen=3, length_name=prefix + "weighted_seq" + "_seq_length", weight_name=prefix + "weight")) s_input, s_len_input = gen_sequence( 2, 3, sample_size) model_input[prefix + "weighted_seq"] = s_input model_input[prefix + 'weight'] = np.random.randn(sample_size, 3, 1) model_input[prefix + "weighted_seq" + "_seq_length"] = s_len_input sequence_feature.pop(sequence_feature.index('weight')) for i in range(sparse_feature_num): if use_group: group_name = str(i % 3) else: group_name = DEFAULT_GROUP_NAME dim = np.random.randint(1, 10) feature_columns.append( SparseFeat(prefix + 'sparse_feature_' + str(i), dim, embedding_size, use_hash=hash_flag, dtype=tf.int32, group_name=group_name)) for i in range(dense_feature_num): def transform_fn(x): return (x - 0.0) / 1.0 feature_columns.append( DenseFeat( prefix + 'dense_feature_' + str(i), 1, dtype=tf.float32, transform_fn=transform_fn ) ) for i, mode in enumerate(sequence_feature): dim = np.random.randint(1, 10) maxlen = np.random.randint(1, 10) feature_columns.append( VarLenSparseFeat(SparseFeat(prefix + 'sequence_' + mode, vocabulary_size=dim, embedding_dim=embedding_size), maxlen=maxlen, combiner=mode)) for fc in feature_columns: if isinstance(fc, SparseFeat): model_input[fc.name] = np.random.randint(0, fc.vocabulary_size, sample_size) elif isinstance(fc, DenseFeat): model_input[fc.name] = np.random.random(sample_size) else: s_input, s_len_input = gen_sequence( fc.vocabulary_size, fc.maxlen, sample_size) model_input[fc.name] = s_input if include_length: fc.length_name = prefix + "sequence_" + str(i) + '_seq_length' model_input[prefix + "sequence_" + str(i) + '_seq_length'] = s_len_input if classification: y = np.random.randint(0, 2, sample_size) else: y = np.random.random(sample_size) return model_input, y, feature_columns def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None, input_data=None, expected_output=None, expected_output_dtype=None, fixed_batch_size=False, supports_masking=False): # generate input data if kwargs is None: kwargs = {} if input_data is None: if not input_shape: raise AssertionError() if not input_dtype: input_dtype = K.floatx() input_data_shape = list(input_shape) for i, e in enumerate(input_data_shape): if e is None: input_data_shape[i] = np.random.randint(1, 4) input_mask = [] if all(isinstance(e, tuple) for e in input_data_shape): input_data = [] for e in input_data_shape: input_data.append( (10 * np.random.random(e)).astype(input_dtype)) if supports_masking: a = np.full(e[:2], False) a[:, :e[1] // 2] = True input_mask.append(a) else: input_data = (10 * np.random.random(input_data_shape)) input_data = input_data.astype(input_dtype) if supports_masking: a = np.full(input_data_shape[:2], False) a[:, :input_data_shape[1] // 2] = True print(a) print(a.shape) input_mask.append(a) else: if input_shape is None: input_shape = input_data.shape if input_dtype is None: input_dtype = input_data.dtype if expected_output_dtype is None: expected_output_dtype = input_dtype # instantiation layer = layer_cls(**kwargs) # test get_weights , set_weights at layer level weights = layer.get_weights() layer.set_weights(weights) try: expected_output_shape = layer.compute_output_shape(input_shape) except Exception: expected_output_shape = layer._compute_output_shape(input_shape) # test in functional API if isinstance(input_shape, list): if fixed_batch_size: x = [Input(batch_shape=e, dtype=input_dtype) for e in input_shape] if supports_masking: mask = [Input(batch_shape=e[0:2], dtype=bool) for e in input_shape] else: x = [Input(shape=e[1:], dtype=input_dtype) for e in input_shape] if supports_masking: mask = [Input(shape=(e[1],), dtype=bool) for e in input_shape] else: if fixed_batch_size: x = Input(batch_shape=input_shape, dtype=input_dtype) if supports_masking: mask = Input(batch_shape=input_shape[0:2], dtype=bool) else: x = Input(shape=input_shape[1:], dtype=input_dtype) if supports_masking: mask = Input(shape=(input_shape[1],), dtype=bool) if supports_masking: y = layer(Masking()(x), mask=mask) else: y = layer(x) if not (K.dtype(y) == expected_output_dtype): raise AssertionError() # check with the functional API if supports_masking: model = Model([x, mask], y) actual_output = model.predict([input_data, input_mask[0]]) else: model = Model(x, y) actual_output = model.predict(input_data) actual_output_shape = actual_output.shape for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: if not (expected_dim == actual_dim): raise AssertionError("expected_shape", expected_output_shape, "actual_shape", actual_output_shape) if expected_output is not None: assert_allclose(actual_output, expected_output, rtol=1e-3) # test serialization, weight setting at model level model_config = model.get_config() recovered_model = model.__class__.from_config(model_config) if model.weights: weights = model.get_weights() recovered_model.set_weights(weights) _output = recovered_model.predict(input_data) assert_allclose(_output, actual_output, rtol=1e-3) # test training mode (e.g. useful when the layer has a # different behavior at training and testing time). if has_arg(layer.call, 'training'): model.compile('rmsprop', 'mse') model.train_on_batch(input_data, actual_output) # test instantiation from layer config layer_config = layer.get_config() layer_config['batch_input_shape'] = input_shape layer = layer.__class__.from_config(layer_config) # for further checks in the caller function return actual_output def has_arg(fn, name, accept_all=False): """Checks if a callable accepts a given keyword argument. For Python 2, checks if there is an argument with the given name. For Python 3, checks if there is an argument with the given name, and also whether this argument can be called with a keyword (i.e. if it is not a positional-only argument). # Arguments fn: Callable to inspect. name: Check if `fn` can be called with `name` as a keyword argument. accept_all: What to return if there is no parameter called `name` but the function accepts a `**kwargs` argument. # Returns bool, whether `fn` accepts a `name` keyword argument. """ if sys.version_info < (3,): arg_spec = inspect.getargspec(fn) if accept_all and arg_spec.keywords is not None: return True return (name in arg_spec.args) elif sys.version_info < (3, 3): arg_spec = inspect.getfullargspec(fn) if accept_all and arg_spec.varkw is not None: return True return (name in arg_spec.args or name in arg_spec.kwonlyargs) else: signature = inspect.signature(fn) parameter = signature.parameters.get(name) if parameter is None: if accept_all: for param in signature.parameters.values(): if param.kind == inspect.Parameter.VAR_KEYWORD: return True return False return (parameter.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY)) def check_model(model, model_name, x, y, check_model_io=True): """ compile model,train and evaluate it,then save/load weight and model file. :param model: :param model_name: :param x: :param y: :param check_model_io: test save/load model file or not :return: """ model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5) print(model_name + " test train valid pass!") model.save_weights(model_name + '_weights.h5') model.load_weights(model_name + '_weights.h5') os.remove(model_name + '_weights.h5') print(model_name + " test save load weight pass!") if check_model_io: save_model(model, model_name + '.h5') model = load_model(model_name + '.h5', custom_objects) os.remove(model_name + '.h5') print(model_name + " test save load model pass!") print(model_name + " test pass!") def get_test_data_estimator(sample_size=1000, embedding_size=4, sparse_feature_num=1, dense_feature_num=1, classification=True): x = {} dnn_feature_columns = [] linear_feature_columns = [] voc_size = 4 for i in range(sparse_feature_num): name = 's_' + str(i) x[name] = np.random.randint(0, voc_size, sample_size) dnn_feature_columns.append( tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_identity(name, voc_size), embedding_size)) linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(name, voc_size)) for i in range(dense_feature_num): name = 'd_' + str(i) x[name] = np.random.random(sample_size) dnn_feature_columns.append(tf.feature_column.numeric_column(name)) linear_feature_columns.append(tf.feature_column.numeric_column(name)) if classification: y = np.random.randint(0, 2, sample_size) else: y = np.random.random(sample_size) if tf.__version__ >= "2.0.0": input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(x, y, shuffle=False) else: input_fn = tf.estimator.inputs.numpy_input_fn(x, y, shuffle=False) return linear_feature_columns, dnn_feature_columns, input_fn def check_estimator(model, input_fn): model.train(input_fn) model.evaluate(input_fn) ================================================ FILE: tests/utils_mtl.py ================================================ # test utils for multi task learning import os import numpy as np import tensorflow as tf from tensorflow.python.keras.models import load_model, save_model from deepctr.feature_column import SparseFeat, DenseFeat, DEFAULT_GROUP_NAME from deepctr.layers import custom_objects def get_mtl_test_data(sample_size=10, embedding_size=4, sparse_feature_num=1, dense_feature_num=1, task_types=('binary', 'binary'), hash_flag=False, prefix='', use_group=False): feature_columns = [] model_input = {} for i in range(sparse_feature_num): if use_group: group_name = str(i % 3) else: group_name = DEFAULT_GROUP_NAME dim = np.random.randint(1, 10) feature_columns.append( SparseFeat(prefix + 'sparse_feature_' + str(i), dim, embedding_size, use_hash=hash_flag, dtype=tf.int32, group_name=group_name)) for i in range(dense_feature_num): def transform_fn(x): return (x - 0.0) / 1.0 feature_columns.append( DenseFeat( prefix + 'dense_feature_' + str(i), 1, dtype=tf.float32, transform_fn=transform_fn ) ) for fc in feature_columns: if isinstance(fc, SparseFeat): model_input[fc.name] = np.random.randint(0, fc.vocabulary_size, sample_size) elif isinstance(fc, DenseFeat): model_input[fc.name] = np.random.random(sample_size) y_list = [] # multi label for task in task_types: if task == 'binary': y = np.random.randint(0, 2, sample_size) y_list.append(y) else: y = np.random.random(sample_size) y_list.append(y) return model_input, y_list, feature_columns def check_mtl_model(model, model_name, x, y_list, task_types, check_model_io=True): """ compile model,train and evaluate it,then save/load weight and model file. :param model: :param model_name: :param x: :param y_list: mutil label of y :param check_model_io: test save/load model file or not :return: """ loss_list = [] metric_list = [] for task_type in task_types: if task_type == 'binary': loss_list.append('binary_crossentropy') # metric_list.append('accuracy') elif task_type == 'regression': loss_list.append('mean_squared_error') # metric_list.append('mae') print('loss:', loss_list) print('metric:', metric_list) model.compile('adam', loss=loss_list, metrics=metric_list) model.fit(x, y_list, batch_size=100, epochs=1, validation_split=0.5) print(model_name + " test train valid pass!") model.save_weights(model_name + '_weights.h5') model.load_weights(model_name + '_weights.h5') os.remove(model_name + '_weights.h5') print(model_name + " test save load weight pass!") if check_model_io: save_model(model, model_name + '.h5') model = load_model(model_name + '.h5', custom_objects) os.remove(model_name + '.h5') print(model_name + " test save load model pass!") print(model_name + " test pass!") ================================================ FILE: tests/utils_test.py ================================================ from deepctr.utils import check_version def test_check_version(): check_version('0.1.0') check_version(20191231)