[
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n\n# Data files\ndata/\n\n# Global model files\n**/global\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "# FLSim\n\n## About\n\nWelcome to **FLSim**, a PyTorch based federated learning simulation framework, created for experimental research in a paper accepted by [IEEE INFOCOM 2020](https://infocom2020.ieee-infocom.org):\n\n[Hao Wang](https://www.haow.ca), Zakhary Kaplan, [Di Niu](https://sites.ualberta.ca/~dniu/Homepage/Home.html), [Baochun Li](http://iqua.ece.toronto.edu/bli/index.html). \"Optimizing Federated Learning on Non-IID Data with Reinforcement Learning,\" in the Proceedings of IEEE INFOCOM, Beijing, China, April 27-30, 2020.\n\n\n\n## Installation\n\nTo install **FLSim**, all that needs to be done is clone this repository to the desired directory.\n\n### Dependencies\n\n**FLSim** uses [Anaconda](https://www.anaconda.com/distribution/) to manage Python and it's dependencies, listed in [`environment.yml`](environment.yml). To install the `fl-py37` Python environment, set up Anaconda (or Miniconda), then download the environment dependencies with:\n\n```shell\nconda env create -f environment.yml\n```\n\n## Usage\n\nBefore using the repository, make sure to activate the `fl-py37` environment with:\n\n```shell\nconda activate fl-py37\n```\n\n### Simulation\n\nTo start a simulation, run [`run.py`](run.py) from the repository's root directory:\n\n```shell\npython run.py\n  --config=config.json\n  --log=INFO\n```\n\n##### `run.py` flags\n\n* `--config` (`-c`): path to the configuration file to be used.\n* `--log` (`-l`): level of logging info to be written to console, defaults to `INFO`.\n\n##### `config.json` files\n\n**FLSim** uses a JSON file to manage the configuration parameters for a federated learning simulation. Provided in the repository is a generic template and three preconfigured simulation files for the CIFAR-10, FashionMNIST, and MNIST datasets.\n\nFor a detailed list of configuration options, see the [wiki page](https://github.com/iQua/flsim/wiki/Configuration).\n\nIf you have any questions, please feel free to contact Hao Wang (haowang@ece.utoronto.ca)\n"
  },
  {
    "path": "client.py",
    "content": "import logging\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\n\n\nclass Client(object):\n    \"\"\"Simulated federated learning client.\"\"\"\n\n    def __init__(self, client_id):\n        self.client_id = client_id\n\n    def __repr__(self):\n        return 'Client #{}: {} samples in labels: {}'.format(\n            self.client_id, len(self.data), set([label for _, label in self.data]))\n\n    # Set non-IID data configurations\n    def set_bias(self, pref, bias):\n        self.pref = pref\n        self.bias = bias\n\n    def set_shard(self, shard):\n        self.shard = shard\n\n    # Server interactions\n    def download(self, argv):\n        # Download from the server.\n        try:\n            return argv.copy()\n        except:\n            return argv\n\n    def upload(self, argv):\n        # Upload to the server\n        try:\n            return argv.copy()\n        except:\n            return argv\n\n    # Federated learning phases\n    def set_data(self, data, config):\n        # Extract from config\n        do_test = self.do_test = config.clients.do_test\n        test_partition = self.test_partition = config.clients.test_partition\n\n        # Download data\n        self.data = self.download(data)\n\n        # Extract trainset, testset (if applicable)\n        data = self.data\n        if do_test:  # Partition for testset if applicable\n            self.trainset = data[:int(len(data) * (1 - test_partition))]\n            self.testset = data[int(len(data) * (1 - test_partition)):]\n        else:\n            self.trainset = data\n\n    def configure(self, config):\n        import fl_model  # pylint: disable=import-error\n\n        # Extract from config\n        model_path = self.model_path = config.paths.model\n\n        # Download from server\n        config = self.download(config)\n\n        # Extract machine learning task from config\n        self.task = config.fl.task\n        self.epochs = config.fl.epochs\n        self.batch_size = config.fl.batch_size\n\n        # Download most recent global model\n        path = model_path + '/global'\n        self.model = fl_model.Net()\n        self.model.load_state_dict(torch.load(path))\n        self.model.eval()\n\n        # Create optimizer\n        self.optimizer = fl_model.get_optimizer(self.model)\n\n    def run(self):\n        # Perform federated learning task\n        {\n            \"train\": self.train()\n        }[self.task]\n\n    def get_report(self):\n        # Report results to server.\n        return self.upload(self.report)\n\n    # Machine learning tasks\n    def train(self):\n        import fl_model  # pylint: disable=import-error\n\n        logging.info('Training on client #{}'.format(self.client_id))\n\n        # Perform model training\n        trainloader = fl_model.get_trainloader(self.trainset, self.batch_size)\n        fl_model.train(self.model, trainloader,\n                       self.optimizer, self.epochs)\n\n        # Extract model weights and biases\n        weights = fl_model.extract_weights(self.model)\n\n        # Generate report for server\n        self.report = Report(self)\n        self.report.weights = weights\n\n        # Perform model testing if applicable\n        if self.do_test:\n            testloader = fl_model.get_testloader(self.testset, 1000)\n            self.report.accuracy = fl_model.test(self.model, testloader)\n\n    def test(self):\n        # Perform model testing\n        raise NotImplementedError\n\n\nclass Report(object):\n    \"\"\"Federated learning client report.\"\"\"\n\n    def __init__(self, client):\n        self.client_id = client.client_id\n        self.num_samples = len(client.data)\n"
  },
  {
    "path": "config.py",
    "content": "from collections import namedtuple\nimport json\n\n\nclass Config(object):\n    \"\"\"Configuration module.\"\"\"\n\n    def __init__(self, config):\n        self.paths = \"\"\n        # Load config file\n        with open(config, 'r') as config:\n            self.config = json.load(config)\n        # Extract configuration\n        self.extract()\n\n    def extract(self):\n        config = self.config\n\n        # -- Clients --\n        fields = ['total', 'per_round', 'label_distribution',\n                  'do_test', 'test_partition']\n        defaults = (0, 0, 'uniform', False, None)\n        params = [config['clients'].get(field, defaults[i])\n                  for i, field in enumerate(fields)]\n        self.clients = namedtuple('clients', fields)(*params)\n\n        assert self.clients.per_round <= self.clients.total\n\n        # -- Data --\n        fields = ['loading', 'partition', 'IID', 'bias', 'shard']\n        defaults = ('static', 0, True, None, None)\n        params = [config['data'].get(field, defaults[i])\n                  for i, field in enumerate(fields)]\n        self.data = namedtuple('data', fields)(*params)\n\n        # Determine correct data loader\n        assert self.data.IID ^ bool(self.data.bias) ^ bool(self.data.shard)\n        if self.data.IID:\n            self.loader = 'basic'\n        elif self.data.bias:\n            self.loader = 'bias'\n        elif self.data.shard:\n            self.loader = 'shard'\n\n        # -- Federated learning --\n        fields = ['rounds', 'target_accuracy', 'task', 'epochs', 'batch_size']\n        defaults = (0, None, 'train', 0, 0)\n        params = [config['federated_learning'].get(field, defaults[i])\n                  for i, field in enumerate(fields)]\n        self.fl = namedtuple('fl', fields)(*params)\n\n        # -- Model --\n        self.model = config['model']\n\n        # -- Paths --\n        fields = ['data', 'model', 'reports']\n        defaults = ('./data', './models', None)\n        params = [config['paths'].get(field, defaults[i])\n                  for i, field in enumerate(fields)]\n        # Set specific model path\n        params[fields.index('model')] += '/' + self.model\n\n        self.paths = namedtuple('paths', fields)(*params)\n\n        # -- Server --\n        self.server = config['server']\n"
  },
  {
    "path": "configs/CIFAR-10/cifar-10.json",
    "content": "{\n    \"clients\": {\n        \"total\": 100,\n        \"per_round\": 10\n    },\n    \"data\": {\n        \"loading\": \"static\",\n        \"partition\": {\n            \"size\": 600\n        },\n        \"IID\": true\n    },\n    \"federated_learning\": {\n        \"rounds\": 10000,\n        \"target_accuracy\": 0.99,\n        \"task\": \"train\",\n        \"epochs\": 5,\n        \"batch_size\": 10\n    },\n    \"model\": \"CIFAR-10\",\n    \"paths\": {\n        \"data\": \"./data\",\n        \"model\": \"./models\"\n    },\n    \"server\": \"basic\"\n}"
  },
  {
    "path": "configs/FashionMNIST/fashionmnist.json",
    "content": "{\n    \"clients\": {\n        \"total\": 100,\n        \"per_round\": 10\n    },\n    \"data\": {\n        \"loading\": \"static\",\n        \"partition\": {\n            \"size\": 600\n        },\n        \"IID\": true\n    },\n    \"federated_learning\": {\n        \"rounds\": 10000,\n        \"target_accuracy\": 0.99,\n        \"task\": \"train\",\n        \"epochs\": 5,\n        \"batch_size\": 10\n    },\n    \"model\": \"FashionMNIST\",\n    \"paths\": {\n        \"data\": \"./data\",\n        \"model\": \"./models\"\n    },\n    \"server\": \"basic\"\n}"
  },
  {
    "path": "configs/MNIST/mnist.json",
    "content": "{\n    \"clients\": {\n        \"total\": 100,\n        \"per_round\": 10\n    },\n    \"data\": {\n        \"loading\": \"static\",\n        \"partition\": {\n            \"size\": 600\n        },\n        \"IID\": true\n    },\n    \"federated_learning\": {\n        \"rounds\": 10000,\n        \"target_accuracy\": 0.99,\n        \"task\": \"train\",\n        \"epochs\": 5,\n        \"batch_size\": 10\n    },\n    \"model\": \"MNIST\",\n    \"paths\": {\n        \"data\": \"./data\",\n        \"model\": \"./models\"\n    },\n    \"server\": \"basic\"\n}"
  },
  {
    "path": "configs/config.json.template",
    "content": "{\n    \"clients\": {\n        \"total\": 1000,\n        \"per_round\": 20,\n        \"label_distribution\": \"uniform\",\n        \"do_test\": false,\n        \"test_partition\": 0.2\n    },\n    \"data\": {\n        \"loading\": \"dynamic\",\n        \"partition\": {\n            \"size\": 600,\n            \"range\": [\n                50,\n                200\n            ]\n        },\n        \"IID\": false,\n        \"bias\": {\n            \"primary\": 0.8,\n            \"secondary\": false\n        }\n    },\n    \"federated_learning\": {\n        \"rounds\": 200,\n        \"target_accuracy\": 0.95,\n        \"task\": \"train\",\n        \"epochs\": 5,\n        \"batch_size\": 10\n    },\n    \"model\": \"MNIST\",\n    \"paths\": {\n        \"data\": \"./data\",\n        \"model\": \"./models\",\n        \"reports\": \"reports.pkl\"\n    },\n    \"server\": \"basic\"\n}\n"
  },
  {
    "path": "environment.yml",
    "content": "name: fl-py37\nchannels:\n  - pytorch\n  - defaults\ndependencies:\n  - astroid=2.2.5\n  - autopep8=1.4.4\n  - blas=1.0\n  - ca-certificates=2019.5.15\n  - certifi=2019.6.16\n  - cffi=1.12.3\n  - cycler=0.10.0\n  - freetype=2.9.1\n  - intel-openmp=2019.4\n  - isort=4.3.20\n  - joblib=0.13.2\n  - jpeg=9b\n  - kiwisolver=1.1.0\n  - lazy-object-proxy=1.4.1\n  - libedit=3.1.20181209\n  - libffi=3.2.1\n  - libpng=1.6.37\n  - libtiff=4.0.10\n  - matplotlib=3.1.0\n  - mccabe=0.6.1\n  - mkl=2019.4\n  - mkl-service=2.0.2\n  - mkl_fft=1.0.12\n  - mkl_random=1.0.2\n  - ncurses=6.1\n  - ninja=1.9.0\n  - numpy=1.16.4\n  - numpy-base=1.16.4\n  - olefile=0.46\n  - openssl=1.1.1c\n  - pandas=0.24.2\n  - pillow=6.0.0\n  - pip=19.1.1\n  - pycodestyle=2.5.0\n  - pycparser=2.19\n  - pylint=2.3.1\n  - pyparsing=2.4.0\n  - python=3.7.3\n  - python-dateutil=2.8.0\n  - pytorch=1.1.0\n  - pytz=2019.1\n  - readline=7.0\n  - rope=0.14.0\n  - scikit-learn=0.21.2\n  - scipy=1.2.1\n  - setuptools=41.0.1\n  - six=1.12.0\n  - sqlite=3.28.0\n  - tk=8.6.8\n  - torchvision=0.3.0\n  - tornado=6.0.2\n  - wheel=0.33.4\n  - wrapt=1.11.1\n  - xz=5.2.4\n  - zlib=1.2.11\n  - zstd=1.3.7\nprefix: /Users/zakharykaplan/.miniconda3/envs/fl-py37\n\n"
  },
  {
    "path": "load_data.py",
    "content": "import logging\nimport random\nfrom torchvision import datasets, transforms\nimport utils.dists as dists\n\n\nclass Generator(object):\n    \"\"\"Generate federated learning training and testing data.\"\"\"\n\n    # Abstract read function\n    def read(self, path):\n        # Read the dataset, set: trainset, testset, labels\n        raise NotImplementedError\n\n    # Group the data by label\n    def group(self):\n        # Create empty dict of labels\n        grouped_data = {label: []\n                        for label in self.labels}  # pylint: disable=no-member\n\n        # Populate grouped data dict\n        for datapoint in self.trainset:  # pylint: disable=all\n            _, label = datapoint  # Extract label\n            label = self.labels[label]\n\n            grouped_data[label].append(  # pylint: disable=no-member\n                datapoint)\n\n        self.trainset = grouped_data  # Overwrite trainset with grouped data\n\n    # Run data generation\n    def generate(self, path):\n        self.read(path)\n        self.trainset_size = len(self.trainset)  # Extract trainset size\n        self.group()\n\n        return self.trainset\n\n\nclass Loader(object):\n    \"\"\"Load and pass IID data partitions.\"\"\"\n\n    def __init__(self, config, generator):\n        # Get data from generator\n        self.config = config\n        self.trainset = generator.trainset\n        self.testset = generator.testset\n        self.labels = generator.labels\n        self.trainset_size = generator.trainset_size\n\n        # Store used data seperately\n        self.used = {label: [] for label in self.labels}\n        self.used['testset'] = []\n\n    def extract(self, label, n):\n        if len(self.trainset[label]) > n:\n            extracted = self.trainset[label][:n]  # Extract data\n            self.used[label].extend(extracted)  # Move data to used\n            del self.trainset[label][:n]  # Remove from trainset\n            return extracted\n        else:\n            logging.warning('Insufficient data in label: {}'.format(label))\n            logging.warning('Dumping used data for reuse')\n\n            # Unmark data as used\n            for label in self.labels:\n                self.trainset[label].extend(self.used[label])\n                self.used[label] = []\n\n            # Extract replenished data\n            return self.extract(label, n)\n\n    def get_partition(self, partition_size):\n        # Get an partition uniform across all labels\n\n        # Use uniform distribution\n        dist = dists.uniform(partition_size, len(self.labels))\n\n        partition = []  # Extract data according to distribution\n        for i, label in enumerate(self.labels):\n            partition.extend(self.extract(label, dist[i]))\n\n        # Shuffle data partition\n        random.shuffle(partition)\n\n        return partition\n\n    def get_testset(self):\n        # Return the entire testset\n        return self.testset\n\n\nclass BiasLoader(Loader):\n    \"\"\"Load and pass 'preference bias' data partitions.\"\"\"\n\n    def get_partition(self, partition_size, pref):\n        # Get a non-uniform partition with a preference bias\n\n        # Extract bias configuration from config\n        bias = self.config.data.bias['primary']\n        secondary = self.config.data.bias['secondary']\n\n       # Calculate sizes of majorty and minority portions\n        majority = int(partition_size * bias)\n        minority = partition_size - majority\n\n        # Calculate number of minor labels\n        len_minor_labels = len(self.labels) - 1\n\n        if secondary:\n                # Distribute to random secondary label\n            dist = [0] * len_minor_labels\n            dist[random.randint(0, len_minor_labels - 1)] = minority\n        else:\n            # Distribute among all minority labels\n            dist = dists.uniform(minority, len_minor_labels)\n\n        # Add majority data to distribution\n        dist.insert(self.labels.index(pref), majority)\n\n        partition = []  # Extract data according to distribution\n        for i, label in enumerate(self.labels):\n            partition.extend(self.extract(label, dist[i]))\n\n        # Shuffle data partition\n        random.shuffle(partition)\n\n        return partition\n\n\nclass ShardLoader(Loader):\n    \"\"\"Load and pass 'shard' data partitions.\"\"\"\n\n    def create_shards(self):\n        # Extract shard configuration from config\n        per_client = self.config.data.shard['per_client']\n\n        # Determine correct total shards, shard size\n        total = self.config.clients.total * per_client\n        shard_size = int(self.trainset_size / total)\n\n        data = []  # Flatten data\n        for _, items in self.trainset.items():\n            data.extend(items)\n\n        shards = [data[(i * shard_size):((i + 1) * shard_size)]\n                  for i in range(total)]\n        random.shuffle(shards)\n\n        self.shards = shards\n        self.used = []\n\n        logging.info('Created {} shards of size {}'.format(\n            len(shards), shard_size))\n\n    def extract_shard(self):\n        shard = self.shards[0]\n        self.used.append(shard)\n        del self.shards[0]\n        return shard\n\n    def get_partition(self):\n        # Get a partition shard\n\n        # Extract number of shards per client\n        per_client = self.config.data.shard['per_client']\n\n        # Create data partition\n        partition = []\n        for i in range(per_client):\n            partition.extend(self.extract_shard())\n\n        # Shuffle data partition\n        random.shuffle(partition)\n\n        return partition\n"
  },
  {
    "path": "models/CIFAR-10/fl_model.py",
    "content": "import load_data\nimport logging\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.optim as optim\nfrom torchvision import datasets, transforms\n\n# Training settings\nlr = 0.01\nmomentum = 0.9\nlog_interval = 10\n\n# Cuda settings\nuse_cuda = torch.cuda.is_available()\ndevice = torch.device(  # pylint: disable=no-member\n    'cuda' if use_cuda else 'cpu')\n\n\nclass Generator(load_data.Generator):\n    \"\"\"Generator for CIFAR-10 dataset.\"\"\"\n\n    # Extract CIFAR-10 data using torchvision datasets\n    def read(self, path):\n        self.trainset = datasets.CIFAR10(\n            path, train=True, download=True, transform=transforms.Compose([\n                transforms.ToTensor(),\n                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n            ]))\n        self.testset = datasets.CIFAR10(\n            path, train=False, transform=transforms.Compose([\n                transforms.ToTensor(),\n                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n            ]))\n        self.labels = list(self.trainset.classes)\n\n\nclass Net(nn.Module):\n    def __init__(self):\n        super(Net, self).__init__()\n        self.conv1 = nn.Conv2d(3, 6, 5)\n        self.pool = nn.MaxPool2d(2, 2)\n        self.conv2 = nn.Conv2d(6, 16, 5)\n        self.fc1 = nn.Linear(16 * 5 * 5, 120)\n        self.fc2 = nn.Linear(120, 84)\n        self.fc3 = nn.Linear(84, 10)\n\n    def forward(self, x):\n        x = self.pool(F.relu(self.conv1(x)))\n        x = self.pool(F.relu(self.conv2(x)))\n        x = x.view(-1, 16 * 5 * 5)\n        x = F.relu(self.fc1(x))\n        x = F.relu(self.fc2(x))\n        x = self.fc3(x)\n        return x\n\n\ndef get_optimizer(model):\n    return optim.SGD(model.parameters(), lr=lr, momentum=momentum)\n\n\ndef get_trainloader(trainset, batch_size):\n    return torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)\n\n\ndef get_testloader(testset, batch_size):\n    return torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True)\n\n\ndef extract_weights(model):\n    weights = []\n    for name, weight in model.to(torch.device('cpu')).named_parameters():  # pylint: disable=no-member\n        if weight.requires_grad:\n            weights.append((name, weight.data))\n\n    return weights\n\n\ndef load_weights(model, weights):\n    updated_state_dict = {}\n    for name, weight in weights:\n        updated_state_dict[name] = weight\n\n    model.load_state_dict(updated_state_dict, strict=False)\n\n\ndef train(model, trainloader, optimizer, epochs):\n    model.to(device)\n    model.train()\n    criterion = nn.CrossEntropyLoss()\n\n    for epoch in range(1, epochs + 1):\n        for batch_id, data in enumerate(trainloader):\n            # get the inputs; data is a list of [inputs, labels]\n            inputs, labels = data\n            inputs, labels = inputs.to(device), labels.to(device)\n\n            # zero the parameter gradients\n            optimizer.zero_grad()\n\n            # forward + backward + optimize\n            outputs = model(inputs)\n            loss = criterion(outputs, labels)\n            loss.backward()\n            optimizer.step()\n\n            if batch_id % log_interval == 0:\n                logging.debug('Epoch: [{}/{}]\\tLoss: {:.6f}'.format(\n                    epoch, epochs, loss.item()))\n\n\ndef test(model, testloader):\n    model.to(device)\n    model.eval()\n    correct = 0\n    total = 0\n    with torch.no_grad():\n        for data in testloader:\n            images, labels = data\n            images, labels = images.to(device), labels.to(device)\n            outputs = model(images)\n            _, predicted = torch.max(  # pylint: disable=no-member\n                outputs.data, 1)\n            total += labels.size(0)\n            correct += (predicted == labels).sum().item()\n\n    accuracy = correct / total\n    logging.debug('Accuracy: {:.2f}%'.format(100 * accuracy))\n\n    return accuracy\n"
  },
  {
    "path": "models/FashionMNIST/fl_model.py",
    "content": "import load_data\nimport logging\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.optim as optim\nfrom torchvision import datasets, transforms\n\n# Training settings\nlr = 0.01\nmomentum = 0.5\nlog_interval = 10\n\n# Cuda settings\nuse_cuda = torch.cuda.is_available()\ndevice = torch.device (  # pylint: disable=no-member\n    'cuda' if use_cuda else 'cpu')\n\n\nclass Generator(load_data.Generator):\n    \"\"\"Generator for FashionMNIST dataset.\"\"\"\n\n    # Extract FashionMNIST data using torchvision datasets\n    def read(self, path):\n        self.trainset = datasets.FashionMNIST(\n            path, train=True, download=True, transform=transforms.Compose([\n                transforms.ToTensor(),\n                transforms.Normalize(\n                    (0.1307,), (0.3081,))\n            ]))\n        self.testset = datasets.FashionMNIST(\n            path, train=False, transform=transforms.Compose([\n                transforms.ToTensor(),\n                transforms.Normalize(\n                    (0.1307,), (0.3081,))\n            ]))\n        self.labels = list(self.trainset.classes)\n\n\nclass Net(nn.Module):\n    def __init__(self):\n        super(Net, self).__init__()\n        self.layer1 = nn.Sequential(\n            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),\n            nn.BatchNorm2d(16),\n            nn.ReLU(),\n            nn.MaxPool2d(kernel_size=2, stride=2))\n        self.layer2 = nn.Sequential(\n            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),\n            nn.BatchNorm2d(32),\n            nn.ReLU(),\n            nn.MaxPool2d(kernel_size=2, stride=2))\n        self.fc = nn.Linear(7 * 7 * 32, 10)\n\n    def forward(self, x):\n        out = self.layer1(x)\n        out = self.layer2(out)\n        out = out.reshape(out.size(0), -1)\n        out = self.fc(out)\n        return out\n\n\ndef get_optimizer(model):\n    return optim.Adam(model.parameters(), lr=lr)\n\n\ndef get_trainloader(trainset, batch_size):\n    return torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)\n\n\ndef get_testloader(testset, batch_size):\n    return torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True)\n\n\ndef extract_weights(model):\n    weights = []\n    for name, weight in model.to(torch.device('cpu')).named_parameters():  # pylint: disable=no-member\n        if weight.requires_grad:\n            weights.append((name, weight.data))\n\n    return weights\n\n\ndef load_weights(model, weights):\n    updated_state_dict = {}\n    for name, weight in weights:\n        updated_state_dict[name] = weight\n\n    model.load_state_dict(updated_state_dict, strict=False)\n\n\ndef train(model, trainloader, optimizer, epochs):\n    model.to(device)\n    model.train()\n    criterion = nn.CrossEntropyLoss()\n\n    for epoch in range(1, epochs + 1):\n        for batch_id, data in enumerate(trainloader):\n            inputs, labels = data\n            inputs, labels = inputs.to(device), labels.to(device)\n\n            outputs = model(inputs)\n            loss = criterion(outputs, labels)\n\n            optimizer.zero_grad()\n            loss.backward()\n            optimizer.step()\n            if batch_id % log_interval == 0:\n                logging.debug('Epoch: [{}/{}]\\tLoss: {:.6f}'.format(\n                    epoch, epochs, loss.item()))\n\n\ndef test(model, testloader):\n    model.to(device)\n    model.eval()\n\n    with torch.no_grad():\n        correct = 0\n        total = 0\n        for data in testloader:\n            images, labels = data\n            images, labels = images.to(device), labels.to(device)\n            outputs = model(images)\n            predicted = torch.argmax(  # pylint: disable=no-member\n                outputs, dim=1)\n            total += labels.size(0)\n            correct += (predicted == labels).sum().item()\n\n    accuracy = correct / total\n    logging.debug('Accuracy: {:.2f}%'.format(100 * accuracy))\n\n    return accuracy\n"
  },
  {
    "path": "models/MNIST/fl_model.py",
    "content": "import load_data\nimport logging\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.optim as optim\nfrom torchvision import datasets, transforms\n\n# Training settings\nlr = 0.01\nmomentum = 0.5\nlog_interval = 10\n\n# Cuda settings\nuse_cuda = torch.cuda.is_available()\ndevice = torch.device(  # pylint: disable=no-member\n    'cuda' if use_cuda else 'cpu')\n\n\nclass Generator(load_data.Generator):\n    \"\"\"Generator for MNIST dataset.\"\"\"\n\n    # Extract MNIST data using torchvision datasets\n    def read(self, path):\n        self.trainset = datasets.MNIST(\n            path, train=True, download=True, transform=transforms.Compose([\n                transforms.ToTensor(),\n                transforms.Normalize(\n                    (0.1307,), (0.3081,))\n            ]))\n        self.testset = datasets.MNIST(\n            path, train=False, transform=transforms.Compose([\n                transforms.ToTensor(),\n                transforms.Normalize(\n                    (0.1307,), (0.3081,))\n            ]))\n        self.labels = list(self.trainset.classes)\n\n\nclass Net(nn.Module):\n    def __init__(self):\n        super(Net, self).__init__()\n        self.conv1 = nn.Conv2d(1, 20, 5, 1)\n        self.conv2 = nn.Conv2d(20, 50, 5, 1)\n        self.fc1 = nn.Linear(4 * 4 * 50, 500)\n        self.fc2 = nn.Linear(500, 10)\n\n    def forward(self, x):\n        x = F.relu(self.conv1(x))\n        x = F.max_pool2d(x, 2, 2)\n        x = F.relu(self.conv2(x))\n        x = F.max_pool2d(x, 2, 2)\n        x = x.view(-1, 4 * 4 * 50)\n        x = F.relu(self.fc1(x))\n        x = self.fc2(x)\n        return F.log_softmax(x, dim=1)\n\n\ndef get_optimizer(model):\n    return optim.SGD(model.parameters(), lr=lr, momentum=momentum)\n\n\ndef get_trainloader(trainset, batch_size):\n    return torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)\n\n\ndef get_testloader(testset, batch_size):\n    return torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True)\n\n\ndef extract_weights(model):\n    weights = []\n    for name, weight in model.to(torch.device('cpu')).named_parameters():  # pylint: disable=no-member\n        if weight.requires_grad:\n            weights.append((name, weight.data))\n\n    return weights\n\n\ndef load_weights(model, weights):\n    updated_state_dict = {}\n    for name, weight in weights:\n        updated_state_dict[name] = weight\n\n    model.load_state_dict(updated_state_dict, strict=False)\n\n\ndef train(model, trainloader, optimizer, epochs):\n    model.to(device)\n    model.train()\n    for epoch in range(1, epochs + 1):\n        for batch_id, (image, label) in enumerate(trainloader):\n            image, label = image.to(device), label.to(device)\n            optimizer.zero_grad()\n            output = model(image)\n            loss = F.nll_loss(output, label)\n            loss.backward()\n            optimizer.step()\n            if batch_id % log_interval == 0:\n                logging.debug('Epoch: [{}/{}]\\tLoss: {:.6f}'.format(\n                    epoch, epochs, loss.item()))\n\n\ndef test(model, testloader):\n    model.to(device)\n    model.eval()\n    test_loss = 0\n    correct = 0\n    total = len(testloader.dataset)\n    with torch.no_grad():\n        for image, label in testloader:\n            image, label = image.to(device), label.to(device)\n            output = model(image)\n            # sum up batch loss\n            test_loss += F.nll_loss(output, label, reduction='sum').item()\n            # get the index of the max log-probability\n            pred = output.argmax(dim=1, keepdim=True)\n            correct += pred.eq(label.view_as(pred)).sum().item()\n\n    accuracy = correct / total\n    logging.debug('Accuracy: {:.2f}%'.format(100 * accuracy))\n\n    return accuracy\n"
  },
  {
    "path": "models/fl_model.py",
    "content": "# pylint: skip-file\n\nimport load_data\nimport logging\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.optim as optim\nfrom torchvision import datasets, transforms\n\n# Training settings\nlr = 0.01  # CHECKME\nmomentum = 0.5  # CHECKME\nlog_interval = 10  # CHECKME\n\n\nclass Generator(load_data.Generator):  # CHECKME\n    \"\"\"Generator for UNNAMED dataset.\"\"\"\n\n    # Extract UNNAMED data using torchvision datasets\n    def read(self, path):\n        self.trainset = datasets.UNNAMED(\n            path, train=True, download=True, transform=transforms.Compose([\n                \"\"\"\n                    Add transforms here...\n                \"\"\"\n            ]))\n        self.testset = datasets.UNNAMED(\n            path, train=False, transform=transforms.Compose([\n                \"\"\"\n                    Add transforms here...\n                \"\"\"\n            ]))\n        self.labels = list(self.trainset.classes)\n\n\nclass Net(nn.Module):  # CHECKME\n    def __init__(self):\n        super(Net, self).__init__()\n        raise NotImplementedError\n\n    def forward(self, x):\n        raise NotImplementedError\n\n\ndef get_optimizer(model):  # CHECKME\n    return optim.SGD(model.parameters(), lr=lr, momentum=momentum)\n\n\ndef get_trainloader(trainset, batch_size):  # CHECKME\n    return torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)\n\n\ndef get_testloader(testset, batch_size):  # CHECKME\n    return torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True)\n\n\ndef extract_weights(model):  # CHECKME\n    weights = []\n    for UNNAMED, weight in model.UNNAMEDd_parameters():\n        if weight.requires_grad:\n            weights.append((UNNAMED, weight.data))\n\n    return weights\n\n\ndef load_weights(model, weights):  # CHECKME\n    updated_weights_dict = {}\n    for UNNAMED, weight in weights:\n        updated_weights_dictUNNAMED = weight\n\n    model.load_state_dict(updated_weights_dict, strict=False)\n\n\ndef train(model, trainloader, optimizer, epochs):  # CHECKME\n    \"\"\"\n        Set up for training here...\n    \"\"\"\n\n    for epoch in range(1, epochs + 1):\n        for batch_id, (image, label) in enumerate(trainloader):\n            \"\"\"\n                Train model here...\n            \"\"\"\n\n            if batch_id % log_interval == 0:\n                logging.debug('Epoch: [{}/{}]\\tLoss: {:.6f}'.format(\n                    epoch, epochs, loss.item()))\n\n\ndef test(model, testloader):  # CHECKME\n    \"\"\"\n        Set up for testing here...\n    \"\"\"\n\n    correct = 0\n    total = 0\n    with torch.no_grad():\n        for image, label in testloader:\n            \"\"\"\n                Test model here...\n            \"\"\"\n\n    accuracy = correct / total\n    logging.debug('Accuracy: {:.2f}%'.format(100 * accuracy))\n\n    return accuracy\n"
  },
  {
    "path": "run.py",
    "content": "import argparse\nimport client\nimport config\nimport logging\nimport os\nimport server\n\n\n# Set up parser\nparser = argparse.ArgumentParser()\nparser.add_argument('-c', '--config', type=str, default='./config.json',\n                    help='Federated learning configuration file.')\nparser.add_argument('-l', '--log', type=str, default='INFO',\n                    help='Log messages level.')\n\nargs = parser.parse_args()\n\n# Set logging\nlogging.basicConfig(\n    format='[%(levelname)s][%(asctime)s]: %(message)s', level=getattr(logging, args.log.upper()), datefmt='%H:%M:%S')\n\n\ndef main():\n    \"\"\"Run a federated learning simulation.\"\"\"\n\n    # Read configuration file\n    fl_config = config.Config(args.config)\n\n    # Initialize server\n    fl_server = {\n        \"basic\": server.Server(fl_config),\n        \"accavg\": server.AccAvgServer(fl_config),\n        \"directed\": server.DirectedServer(fl_config),\n        \"kcenter\": server.KCenterServer(fl_config),\n        \"kmeans\": server.KMeansServer(fl_config),\n        \"magavg\": server.MagAvgServer(fl_config),\n        # \"dqn\": server.DQNServer(fl_config), # DQN server disabled\n        # \"dqntrain\": server.DQNTrainServer(fl_config), # DQN server disabled\n    }[fl_config.server]\n    fl_server.boot()\n\n    # Run federated learning\n    fl_server.run()\n\n    # Delete global model\n    os.remove(fl_config.paths.model + '/global')\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "scripts/analyze_logs.py",
    "content": "import argparse\nfrom datetime import datetime\nimport re\n\n\n# Set up parser\nparser = argparse.ArgumentParser()\nparser.add_argument('--log', type=str, help='Simmulation log file.')\nargs = parser.parse_args()\n\n# Read log\nwith open(args.log, 'r') as f:\n    log = [x for x in f.readlines() if x != '\\n']\n\n# Extract time\ndef extract_time(line):\n    return datetime.strptime([x for x in re.split('\\[|\\]', line) if x][1], '%H:%M:%S')\n\n# Extract lines\ntraining = []\n\nfor line in log:\n    if 'Round 1/' in line:\n        training.append(line)\n\ntraining.append(log[-1])\n\n# Calculate duration\ntraining_duration = (extract_time(training[1]) - extract_time(training[0])).seconds\nprint('{}: training time: {} s'.format(args.log, training_duration))\n"
  },
  {
    "path": "scripts/pca.py",
    "content": "import argparse\nimport client\nimport config\nimport logging\nimport os\nimport pickle\nfrom sklearn.decomposition import PCA\nimport server\n\n\n# Set logging\nlogging.basicConfig(\n    format='[%(levelname)s][%(asctime)s]: %(message)s', level=logging.INFO, datefmt='%H:%M:%S')\n\n# Set up parser\nparser = argparse.ArgumentParser()\nparser.add_argument('-c', '--config', type=str, default='./template.json',\n                    help='Configuration file for server.')\nparser.add_argument('-o', '--output', type=str, default='./output.pkl',\n                    help='Output pickle file')\n\nargs = parser.parse_args()\n\n\ndef main():\n    \"\"\"Extract PCA vectors from FL clients.\"\"\"\n\n    # Read configuration file\n    fl_config = config.Config(args.config)\n\n    # Initialize server\n    fl_server = server.KMeansServer(fl_config)\n    fl_server.boot()\n\n    # Run client profiling\n    fl_server.profile_clients()\n\n    # Extract clients, reports, weights\n    clients = [client for client in group for group in [\n        fl_server.clients[profile] for profile in fl_server.clients.keys()]]\n    reports = [client.get_report() for client in clients]\n    weights = [report.weights for report in reports]\n\n    # Flatten weights\n    def flatten_weights(weights):\n        weight_vecs = []\n        for _, weight in weights:\n            weight_vecs.extend(weight.flatten())\n        return weight_vecs\n\n    logging.info('Flattening weights...')\n    weight_vecs = [flatten_weights(weight) for weight in weights]\n\n    # Perform PCA on weight vectors\n    logging.info('Assembling output...')\n    output = [(clients[i].client_id, clients[i].pref, weight) for i, weight in enumerate(weight_vecs)]\n    logging.into('Writing output to binary...')\n    with open(args.output, 'wb') as f:\n        pickle.dump(output, f)\n\n    logging.info('Done!')\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "server/__init__.py",
    "content": "from .server import Server\nfrom .accavg import AccAvgServer\nfrom .directed import DirectedServer\nfrom .kcenter import KCenterServer\nfrom .kmeans import KMeansServer\nfrom .magavg import MagAvgServer\n# from .dqn import DQNServer # DQN server disbled\n# from .dqn import DQNTrainServer # DQN server disabled\n"
  },
  {
    "path": "server/accavg.py",
    "content": "from server import Server\nimport numpy as np\nimport torch\n\n\nclass AccAvgServer(Server):\n    \"\"\"Federated learning server that performs accuracy weighted federated averaging.\"\"\"\n\n    # Federated learning phases\n    def aggregation(self, reports):\n        return self.accuracy_fed_avg(reports)\n\n    # Report aggregation\n    def accuracy_fed_avg(self, reports):\n        import fl_model  # pylint: disable=import-error\n\n        # Extract updates from reports\n        updates = self.extract_client_updates(reports)\n\n        # Extract client accuracies\n        accuracies = np.array([report.accuracy for report in reports])\n\n        # Determine weighting based on accuracies\n        factor = 8  # Exponentiation factor\n        w = accuracies**factor / sum(accuracies**factor)\n\n        # Perform weighted averaging\n        avg_update = [torch.zeros(x.size())  # pylint: disable=no-member\n                      for _, x in updates[0]]\n        for i, update in enumerate(updates):\n            for j, (_, delta) in enumerate(update):\n                # Use weighted average by magnetude of updates\n                avg_update[j] += delta * w[i]\n\n        # Extract baseline model weights\n        baseline_weights = fl_model.extract_weights(self.model)\n\n        # Load updated weights into model\n        updated_weights = []\n        for i, (name, weight) in enumerate(baseline_weights):\n            updated_weights.append((name, weight + avg_update[i]))\n\n        return updated_weights\n\n    # Server operations\n    def set_client_data(self, client):\n        super().set_client_data(client)\n\n        # Send each client a testing partition\n        client.testset = client.download(self.loader.get_testset())\n        client.do_test = True  # Tell client to perform testing\n"
  },
  {
    "path": "server/directed.py",
    "content": "import logging\nfrom server import Server\nimport numpy as np\nfrom threading import Thread\n\n\nclass DirectedServer(Server):\n    \"\"\"Federated learning server that uses profiles to direct during selection.\"\"\"\n\n    # Run federated learning\n    def run(self):\n        # Perform profiling on all clients\n        self.profiling()\n\n        # Continue federated learning\n        super().run()\n\n    # Federated learning phases\n    def selection(self):\n        import fl_model  # pylint: disable=import-error\n\n        clients = self.clients\n        clients_per_round = self.config.clients.per_round\n        profiles = self.profiles\n        w_previous = self.w_previous\n\n        # Extract directors from profiles\n        directors = [d for _, d in profiles]\n\n        # Extract most recent model weights\n        w_current = self.flatten_weights(fl_model.extract_weights(self.model))\n        model_direction = w_current - w_previous\n        # Normalize model direction\n        model_direction = model_direction / \\\n            np.sqrt(np.dot(model_direction, model_direction))\n\n        # Update previous model weights\n        self.w_previous = w_current\n\n        # Generate client director scores (closer direction is better)\n        scores = [np.dot(director, model_direction) for director in directors]\n        # Apply punishment for repeatedly selected clients\n        p = self.punishment\n        scores = [x * (0.9)**p[i] for i, x in enumerate(scores)]\n\n        # Select clients with highest scores\n        sample_clients_index = []\n        for _ in range(clients_per_round):\n            top_score_index = scores.index(max(scores))\n            sample_clients_index.append(top_score_index)\n            # Overwrite to avoid reselection\n            scores[top_score_index] = min(scores) - 1\n\n        # Extract selected sample clients\n        sample_clients = [clients[i] for i in sample_clients_index]\n\n        # Update punishment factors\n        self.punishment = [\n            p[i] + 1 if i in sample_clients_index else 0 for i in range(len(clients))]\n\n        return sample_clients\n\n    def profiling(self):\n        import fl_model  # pylint: disable=import-error\n\n        # Use all clients for profiling\n        clients = self.clients\n\n        # Configure clients for training\n        self.configuration(clients)\n\n        # Train on clients to generate profile weights\n        threads = [Thread(target=client.train) for client in self.clients]\n        [t.start() for t in threads]\n        [t.join() for t in threads]\n\n        # Recieve client reports\n        reports = self.reporting(clients)\n\n        # Extract weights from reports\n        weights = [report.weights for report in reports]\n        weights = [self.flatten_weights(weight) for weight in weights]\n\n        # Extract initial model weights\n        w0 = self.flatten_weights(fl_model.extract_weights(self.model))\n\n        # Save as initial previous model weights\n        self.w_previous = w0.copy()\n\n        # Update initial model using results of profiling\n        # Perform weight aggregation\n        logging.info('Aggregating updates')\n        updated_weights = self.aggregation(reports)\n\n        # Load updated weights\n        fl_model.load_weights(self.model, updated_weights)\n\n        # Calculate direction vectors (directors)\n        directors = [(w - w0) for w in weights]\n        # Normalize directors to unit length\n        directors = [d / np.sqrt(np.dot(d, d)) for d in directors]\n\n        # Initialize punishment factors\n        self.punishment = [0 for _ in range(len(clients))]\n\n        # Use directors for client profiles\n        self.profiles = [(client, directors[i])\n                         for i, client in enumerate(clients)]\n        return self.profiles\n"
  },
  {
    "path": "server/kcenter.py",
    "content": "import logging\nimport random\nfrom server import Server\nfrom threading import Thread\nfrom utils.kcenter import GreedyKCenter  # pylint: disable=no-name-in-module\n\n\nclass KCenterServer(Server):\n    \"\"\"Federated learning server that performs KCenter profiling during selection.\"\"\"\n\n    # Run federated learning\n    def run(self):\n        # Perform profiling on all clients\n        self.profiling()\n\n        # Designate space for storing used client profiles\n        self.used_profiles = []\n\n        # Continue federated learning\n        super().run()\n\n    # Federated learning phases\n    def selection(self):\n        # Select devices to participate in round\n\n        profiles = self.profiles\n        k = self.config.clients.per_round\n\n        if len(profiles) < k:  # Reuse clients when needed\n            logging.warning('Not enough unused clients')\n            logging.warning('Dumping clients for reuse')\n            self.profiles.extend(self.used_profiles)\n            self.used_profiles = []\n\n        # Shuffle profiles\n        random.shuffle(profiles)\n\n        # Cluster clients based on profile weights\n        weights = [weight for _, weight in profiles]\n        KCenter = GreedyKCenter()\n        KCenter.fit(weights, k)\n\n        logging.info('KCenter: {} clients, {} centers'.format(\n            len(profiles), k))\n\n        # Select clients marked as cluster centers\n        centers_index = KCenter.centers_index\n        sample_profiles = [profiles[i] for i in centers_index]\n        sample_clients = [client for client, _ in sample_profiles]\n\n        # Mark sample profiles as used\n        self.used_profiles.extend(sample_profiles)\n        for i in sorted(centers_index, reverse=True):\n            del self.profiles[i]\n\n        return sample_clients\n\n    def profiling(self):\n        # Use all clients for profiling\n        clients = self.clients\n\n        # Configure clients for training\n        self.configuration(clients)\n\n        # Train on clients to generate profile weights\n        threads = [Thread(target=client.train) for client in self.clients]\n        [t.start() for t in threads]\n        [t.join() for t in threads]\n\n        # Recieve client reports\n        reports = self.reporting(clients)\n\n        # Extract weights from reports\n        weights = [report.weights for report in reports]\n        weights = [self.flatten_weights(weight) for weight in weights]\n\n        # Use weights for client profiles\n        self.profiles = [(client, weights[i])\n                         for i, client in enumerate(clients)]\n        return self.profiles\n"
  },
  {
    "path": "server/kmeans.py",
    "content": "import logging\nimport random\nfrom server import Server\nfrom sklearn.cluster import KMeans\nfrom threading import Thread\nimport utils.dists as dists  # pylint: disable=no-name-in-module\n\n\nclass KMeansServer(Server):\n    \"\"\"Federated learning server that performs KMeans profiling during selection.\"\"\"\n\n    # Run federated learning\n    def run(self):\n        # Perform profiling on all clients\n        self.profile_clients()\n\n        # Continue federated learning\n        super().run()\n\n    # Federated learning phases\n    def selection(self):\n        # Select devices to participate in round\n        clients_per_round = self.config.clients.per_round\n        cluster_labels = self.clients.keys()\n\n        # Generate uniform distribution for selecting clients\n        dist = dists.uniform(clients_per_round, len(cluster_labels))\n\n        # Select clients from KMeans clusters\n        sample_clients = []\n        for i, cluster in enumerate(cluster_labels):\n            # Select clients according to distribution\n            if len(self.clients[cluster]) >= dist[i]:\n                k = dist[i]\n            else:  # If not enough clients in cluster, use all avaliable\n                k = len(self.clients[cluster])\n\n            sample_clients.extend(random.sample(\n                self.clients[cluster], k))\n\n         # Shuffle selected sample clients\n        random.shuffle(sample_clients)\n\n        return sample_clients\n\n    # Output model weights\n    def model_weights(self, clients):\n        # Configure clients to train on local data\n        self.configuration(clients)\n\n        # Train on local data for profiling purposes\n        threads = [Thread(target=client.train) for client in self.clients]\n        [t.start() for t in threads]\n        [t.join() for t in threads]\n\n        # Recieve client reports\n        reports = self.reporting(clients)\n\n        # Extract weights from reports\n        weights = [report.weights for report in reports]\n\n        return [self.flatten_weights(weight) for weight in weights]\n\n    def prefs_to_weights(self):\n        prefs = [client.pref for client in self.clients]\n        return list(zip(prefs, self.model_weights(self.clients)))\n\n    def profiling(self, clients):\n        # Perform clustering\n\n        weight_vecs = self.model_weights(clients)\n\n        # Use the number of clusters as there are labels\n        n_clusters = len(self.loader.labels)\n\n        logging.info('KMeans: {} clients, {} clusters'.format(\n            len(weight_vecs), n_clusters))\n        kmeans = KMeans(  # Use KMeans clustering algorithm\n            n_clusters=n_clusters).fit(weight_vecs)\n\n        return kmeans.labels_\n\n    # Server operations\n    def profile_clients(self):\n        # Perform profiling on all clients\n        kmeans = self.profiling(self.clients)\n\n        # Group clients by profile\n        grouped_clients = {cluster: [] for cluster in\n                           range(len(self.loader.labels))}\n        for i, client in enumerate(self.clients):\n            grouped_clients[kmeans[i]].append(client)\n\n        self.clients = grouped_clients  # Replace linear client list with dict\n\n    def add_client(self):\n        # Add a new client to the server\n        raise NotImplementedError\n"
  },
  {
    "path": "server/magavg.py",
    "content": "from server import Server\nimport numpy as np\nimport torch\n\n\nclass MagAvgServer(Server):\n    \"\"\"Federated learning server that performs magnetude weighted federated averaging.\"\"\"\n\n    # Federated learning phases\n    def aggregation(self, reports):\n        return self.magnetude_fed_avg(reports)\n\n    # Report aggregation\n    def magnetude_fed_avg(self, reports):\n        import fl_model  # pylint: disable=import-error\n\n        # Extract updates from reports\n        updates = self.extract_client_updates(reports)\n\n        # Extract update magnetudes\n        magnetudes = []\n        for update in updates:\n            magnetude = 0\n            for _, weight in update:\n                magnetude += weight.norm() ** 2\n            magnetudes.append(np.sqrt(magnetude))\n\n        # Perform weighted averaging\n        avg_update = [torch.zeros(x.size())  # pylint: disable=no-member\n                      for _, x in updates[0]]\n        for i, update in enumerate(updates):\n            for j, (_, delta) in enumerate(update):\n                # Use weighted average by magnetude of updates\n                avg_update[j] += delta * (magnetudes[i] / sum(magnetudes))\n\n        # Extract baseline model weights\n        baseline_weights = fl_model.extract_weights(self.model)\n\n        # Load updated weights into model\n        updated_weights = []\n        for i, (name, weight) in enumerate(baseline_weights):\n            updated_weights.append((name, weight + avg_update[i]))\n\n        return updated_weights\n"
  },
  {
    "path": "server/server.py",
    "content": "import client\nimport load_data\nimport logging\nimport numpy as np\nimport pickle\nimport random\nimport sys\nfrom threading import Thread\nimport torch\nimport utils.dists as dists  # pylint: disable=no-name-in-module\n\n\nclass Server(object):\n    \"\"\"Basic federated learning server.\"\"\"\n\n    def __init__(self, config):\n        self.config = config\n\n    # Set up server\n    def boot(self):\n        logging.info('Booting {} server...'.format(self.config.server))\n\n        model_path = self.config.paths.model\n        total_clients = self.config.clients.total\n\n        # Add fl_model to import path\n        sys.path.append(model_path)\n\n        # Set up simulated server\n        self.load_data()\n        self.load_model()\n        self.make_clients(total_clients)\n\n    def load_data(self):\n        import fl_model  # pylint: disable=import-error\n\n        # Extract config for loaders\n        config = self.config\n\n        # Set up data generator\n        generator = fl_model.Generator()\n\n        # Generate data\n        data_path = self.config.paths.data\n        data = generator.generate(data_path)\n        labels = generator.labels\n\n        logging.info('Dataset size: {}'.format(\n            sum([len(x) for x in [data[label] for label in labels]])))\n        logging.debug('Labels ({}): {}'.format(\n            len(labels), labels))\n\n        # Set up data loader\n        self.loader = {\n            'basic': load_data.Loader(config, generator),\n            'bias': load_data.BiasLoader(config, generator),\n            'shard': load_data.ShardLoader(config, generator)\n        }[self.config.loader]\n\n        logging.info('Loader: {}, IID: {}'.format(\n            self.config.loader, self.config.data.IID))\n\n    def load_model(self):\n        import fl_model  # pylint: disable=import-error\n\n        model_path = self.config.paths.model\n        model_type = self.config.model\n\n        logging.info('Model: {}'.format(model_type))\n\n        # Set up global model\n        self.model = fl_model.Net()\n        self.save_model(self.model, model_path)\n\n        # Extract flattened weights (if applicable)\n        if self.config.paths.reports:\n            self.saved_reports = {}\n            self.save_reports(0, [])  # Save initial model\n\n    def make_clients(self, num_clients):\n        IID = self.config.data.IID\n        labels = self.loader.labels\n        loader = self.config.loader\n        loading = self.config.data.loading\n\n        if not IID:  # Create distribution for label preferences if non-IID\n            dist = {\n                \"uniform\": dists.uniform(num_clients, len(labels)),\n                \"normal\": dists.normal(num_clients, len(labels))\n            }[self.config.clients.label_distribution]\n            random.shuffle(dist)  # Shuffle distribution\n\n        # Make simulated clients\n        clients = []\n        for client_id in range(num_clients):\n\n            # Create new client\n            new_client = client.Client(client_id)\n\n            if not IID:  # Configure clients for non-IID data\n                if self.config.data.bias:\n                    # Bias data partitions\n                    bias = self.config.data.bias\n                    # Choose weighted random preference\n                    pref = random.choices(labels, dist)[0]\n\n                    # Assign preference, bias config\n                    new_client.set_bias(pref, bias)\n                elif self.config.data.shard:\n                    # Shard data partitions\n                    shard = self.config.data.shard\n\n                    # Assign shard config\n                    new_client.set_shard(shard)\n\n            clients.append(new_client)\n\n        logging.info('Total clients: {}'.format(len(clients)))\n\n        if loader == 'bias':\n            logging.info('Label distribution: {}'.format(\n                [[client.pref for client in clients].count(label) for label in labels]))\n\n        if loading == 'static':\n            if loader == 'shard':  # Create data shards\n                self.loader.create_shards()\n\n            # Send data partition to all clients\n            [self.set_client_data(client) for client in clients]\n\n        self.clients = clients\n\n    # Run federated learning\n    def run(self):\n        rounds = self.config.fl.rounds\n        target_accuracy = self.config.fl.target_accuracy\n        reports_path = self.config.paths.reports\n\n        if target_accuracy:\n            logging.info('Training: {} rounds or {}% accuracy\\n'.format(\n                rounds, 100 * target_accuracy))\n        else:\n            logging.info('Training: {} rounds\\n'.format(rounds))\n\n        # Perform rounds of federated learning\n        for round in range(1, rounds + 1):\n            logging.info('**** Round {}/{} ****'.format(round, rounds))\n\n            # Run the federated learning round\n            accuracy = self.round()\n\n            # Break loop when target accuracy is met\n            if target_accuracy and (accuracy >= target_accuracy):\n                logging.info('Target accuracy reached.')\n                break\n\n        if reports_path:\n            with open(reports_path, 'wb') as f:\n                pickle.dump(self.saved_reports, f)\n            logging.info('Saved reports: {}'.format(reports_path))\n\n    def round(self):\n        import fl_model  # pylint: disable=import-error\n\n        # Select clients to participate in the round\n        sample_clients = self.selection()\n\n        # Configure sample clients\n        self.configuration(sample_clients)\n\n        # Run clients using multithreading for better parallelism\n        threads = [Thread(target=client.run) for client in sample_clients]\n        [t.start() for t in threads]\n        [t.join() for t in threads]\n\n        # Recieve client updates\n        reports = self.reporting(sample_clients)\n\n        # Perform weight aggregation\n        logging.info('Aggregating updates')\n        updated_weights = self.aggregation(reports)\n\n        # Load updated weights\n        fl_model.load_weights(self.model, updated_weights)\n\n        # Extract flattened weights (if applicable)\n        if self.config.paths.reports:\n            self.save_reports(round, reports)\n\n        # Save updated global model\n        self.save_model(self.model, self.config.paths.model)\n\n        # Test global model accuracy\n        if self.config.clients.do_test:  # Get average accuracy from client reports\n            accuracy = self.accuracy_averaging(reports)\n        else:  # Test updated model on server\n            testset = self.loader.get_testset()\n            batch_size = self.config.fl.batch_size\n            testloader = fl_model.get_testloader(testset, batch_size)\n            accuracy = fl_model.test(self.model, testloader)\n\n        logging.info('Average accuracy: {:.2f}%\\n'.format(100 * accuracy))\n        return accuracy\n\n    # Federated learning phases\n\n    def selection(self):\n        # Select devices to participate in round\n        clients_per_round = self.config.clients.per_round\n\n        # Select clients randomly\n        sample_clients = [client for client in random.sample(\n            self.clients, clients_per_round)]\n\n        return sample_clients\n\n    def configuration(self, sample_clients):\n        loader_type = self.config.loader\n        loading = self.config.data.loading\n\n        if loading == 'dynamic':\n            # Create shards if applicable\n            if loader_type == 'shard':\n                self.loader.create_shards()\n\n        # Configure selected clients for federated learning task\n        for client in sample_clients:\n            if loading == 'dynamic':\n                self.set_client_data(client)  # Send data partition to client\n\n            # Extract config for client\n            config = self.config\n\n            # Continue configuraion on client\n            client.configure(config)\n\n    def reporting(self, sample_clients):\n        # Recieve reports from sample clients\n        reports = [client.get_report() for client in sample_clients]\n\n        logging.info('Reports recieved: {}'.format(len(reports)))\n        assert len(reports) == len(sample_clients)\n\n        return reports\n\n    def aggregation(self, reports):\n        return self.federated_averaging(reports)\n\n    # Report aggregation\n    def extract_client_updates(self, reports):\n        import fl_model  # pylint: disable=import-error\n\n        # Extract baseline model weights\n        baseline_weights = fl_model.extract_weights(self.model)\n\n        # Extract weights from reports\n        weights = [report.weights for report in reports]\n\n        # Calculate updates from weights\n        updates = []\n        for weight in weights:\n            update = []\n            for i, (name, weight) in enumerate(weight):\n                bl_name, baseline = baseline_weights[i]\n\n                # Ensure correct weight is being updated\n                assert name == bl_name\n\n                # Calculate update\n                delta = weight - baseline\n                update.append((name, delta))\n            updates.append(update)\n\n        return updates\n\n    def federated_averaging(self, reports):\n        import fl_model  # pylint: disable=import-error\n\n        # Extract updates from reports\n        updates = self.extract_client_updates(reports)\n\n        # Extract total number of samples\n        total_samples = sum([report.num_samples for report in reports])\n\n        # Perform weighted averaging\n        avg_update = [torch.zeros(x.size())  # pylint: disable=no-member\n                      for _, x in updates[0]]\n        for i, update in enumerate(updates):\n            num_samples = reports[i].num_samples\n            for j, (_, delta) in enumerate(update):\n                # Use weighted average by number of samples\n                avg_update[j] += delta * (num_samples / total_samples)\n\n        # Extract baseline model weights\n        baseline_weights = fl_model.extract_weights(self.model)\n\n        # Load updated weights into model\n        updated_weights = []\n        for i, (name, weight) in enumerate(baseline_weights):\n            updated_weights.append((name, weight + avg_update[i]))\n\n        return updated_weights\n\n    def accuracy_averaging(self, reports):\n        # Get total number of samples\n        total_samples = sum([report.num_samples for report in reports])\n\n        # Perform weighted averaging\n        accuracy = 0\n        for report in reports:\n            accuracy += report.accuracy * (report.num_samples / total_samples)\n\n        return accuracy\n\n    # Server operations\n    @staticmethod\n    def flatten_weights(weights):\n        # Flatten weights into vectors\n        weight_vecs = []\n        for _, weight in weights:\n            weight_vecs.extend(weight.flatten().tolist())\n\n        return np.array(weight_vecs)\n\n    def set_client_data(self, client):\n        loader = self.config.loader\n\n        # Get data partition size\n        if loader != 'shard':\n            if self.config.data.partition.get('size'):\n                partition_size = self.config.data.partition.get('size')\n            elif self.config.data.partition.get('range'):\n                start, stop = self.config.data.partition.get('range')\n                partition_size = random.randint(start, stop)\n\n        # Extract data partition for client\n        if loader == 'basic':\n            data = self.loader.get_partition(partition_size)\n        elif loader == 'bias':\n            data = self.loader.get_partition(partition_size, client.pref)\n        elif loader == 'shard':\n            data = self.loader.get_partition()\n        else:\n            logging.critical('Unknown data loader type')\n\n        # Send data to client\n        client.set_data(data, self.config)\n\n    def save_model(self, model, path):\n        path += '/global'\n        torch.save(model.state_dict(), path)\n        logging.info('Saved global model: {}'.format(path))\n\n    def save_reports(self, round, reports):\n        import fl_model  # pylint: disable=import-error\n\n        if reports:\n            self.saved_reports['round{}'.format(round)] = [(report.client_id, self.flatten_weights(\n                report.weights)) for report in reports]\n\n        # Extract global weights\n        self.saved_reports['w{}'.format(round)] = self.flatten_weights(\n            fl_model.extract_weights(self.model))\n"
  },
  {
    "path": "utils/dists.py",
    "content": "import numpy as np\nimport random\n\n\ndef uniform(N, k):\n    \"\"\"Uniform distribution of 'N' items into 'k' groups.\"\"\"\n    dist = []\n    avg = N / k\n    # Make distribution\n    for i in range(k):\n        dist.append(int((i + 1) * avg) - int(i * avg))\n    # Return shuffled distribution\n    random.shuffle(dist)\n    return dist\n\n\ndef normal(N, k):\n    \"\"\"Normal distribution of 'N' items into 'k' groups.\"\"\"\n    dist = []\n    # Make distribution\n    for i in range(k):\n        x = i - (k - 1) / 2\n        dist.append(int(N * (np.exp(-x) / (np.exp(-x) + 1)**2)))\n    # Add remainders\n    remainder = N - sum(dist)\n    dist = list(np.add(dist, uniform(remainder, k)))\n    # Return non-shuffled distribution\n    return dist\n"
  },
  {
    "path": "utils/kcenter.py",
    "content": "import numpy as np\n\n\nclass GreedyKCenter(object):\n    def fit(self, points, k):\n        centers = []\n        centers_index = []\n        # Initialize distances\n        distances = [np.inf for u in points]\n        # Initialize cluster labels\n        labels = [np.inf for u in points]\n\n        for cluster in range(k):\n            # Let u be the point of P such that d[u] is maximum\n            u_index = distances.index(max(distances))\n            u = points[u_index]\n            # u is the next cluster center\n            centers.append(u)\n            centers_index.append(u_index)\n\n            # Update distance to nearest center\n            for i, v in enumerate(points):\n                distance_to_u = self.distance(u, v)  # Calculate from v to u\n                if distance_to_u < distances[i]:\n                    distances[i] = distance_to_u\n                    labels[i] = cluster\n\n            # Update the bottleneck distance\n            max_distance = max(distances)\n\n        # Return centers, labels, max delta, labels\n        self.centers = centers\n        self.centers_index = centers_index\n        self.max_distance = max_distance\n        self.labels = labels\n\n    @staticmethod\n    def distance(u, v):\n        displacement = u - v\n        return np.sqrt(displacement.dot(displacement))\n"
  }
]