Repository: eleweek/inside_python_dict Branch: master Commit: 8d7cda3a5324 Files: 70 Total size: 470.3 KB Directory structure: gitextract__zrkfi9_/ ├── .babelrc ├── .gitignore ├── .prettierignore ├── LICENSE ├── README.md ├── package.json ├── patches/ │ ├── python32_debug.diff │ ├── smooth-scrollbar+8.3.1.patch │ └── subscribe-ui-event+2.0.4.patch ├── python_code/ │ ├── actual_dict_factory_test.py │ ├── build_autogenerated_chapter1_hash.py │ ├── build_autogenerated_chapter2.py │ ├── build_autogenerated_chapter3_chapter4.py │ ├── chapter1_linear_search_reimplementation_test.py │ ├── chapter4_probing_python_reimplementation_test.py │ ├── common.py │ ├── dict32_reimplementation_test_v2.py │ ├── dict_reimpl_common.py │ ├── dict_reimplementation.py │ ├── dictinfo.py │ ├── dictinfo32.py │ ├── dictinfo33.py │ ├── hash_chapter1_impl.py │ ├── hash_chapter1_reimpl_js.py │ ├── hash_chapter1_reimplementation_test.py │ ├── hash_chapter2_impl.py │ ├── hash_chapter2_impl_test.py │ ├── hash_chapter2_reimpl_js.py │ ├── hash_chapter2_reimplementation_test.py │ ├── hash_chapter3_class_impl.py │ ├── hash_chapter3_class_impl_test.py │ ├── interface_test.py │ ├── js_reimpl_common.py │ └── js_reimplementation_interface.py ├── scripts/ │ ├── extractPythonCode.js │ ├── pyReimplWrapper.js │ └── ssr.js ├── src/ │ ├── app.js │ ├── autogenerated/ │ │ ├── chapter1.html │ │ ├── chapter2.html │ │ ├── chapter3.html │ │ └── chapter4.html │ ├── chapter1_simplified_hash.js │ ├── chapter2_hash_table_functions.js │ ├── chapter3_and_4_common.js │ ├── chapter3_hash_class.js │ ├── chapter4_real_python_dict.js │ ├── code_blocks.js │ ├── common_formatters.js │ ├── hash_impl_common.js │ ├── hash_impl_common.test.js │ ├── index.js │ ├── inputs.js │ ├── mustache/ │ │ ├── chapter1.json │ │ ├── chapter2.json │ │ ├── chapter3.json │ │ └── chapter4.json │ ├── page.html.template │ ├── probing_visualization.js │ ├── py_obj_parsing.js │ ├── py_obj_parsing.test.js │ ├── store.js │ ├── styles.css │ └── util.js ├── ssr-all.sh ├── stress_test_python.sh ├── unittest_python.sh ├── webpack.common.js ├── webpack.dev.js └── webpack.prod.js ================================================ FILE CONTENTS ================================================ ================================================ FILE: .babelrc ================================================ { "presets": [ [ "@babel/preset-env", { "modules": false } ], "@babel/preset-react" ], "plugins": [ [ "@babel/plugin-proposal-decorators", { "legacy": true } ], "@babel/plugin-proposal-class-properties", "@babel/plugin-syntax-dynamic-import", "@babel/plugin-syntax-import-meta", "@babel/plugin-proposal-json-strings", "@babel/plugin-proposal-function-sent", "@babel/plugin-proposal-export-namespace-from", "@babel/plugin-proposal-numeric-separator", "@babel/plugin-proposal-throw-expressions", "@babel/plugin-proposal-optional-chaining" ], "env": { "test": { "presets": [ [ "@babel/preset-env", { "modules": "auto" } ], "@babel/preset-react" ] } } } ================================================ FILE: .gitignore ================================================ ### PYTHON # # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log .static_storage/ .media/ local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ ### NODE # Logs logs *.log npm-debug.log* yarn-debug.log* yarn-error.log* # Runtime data pids *.pid *.seed *.pid.lock # Directory for instrumented libs generated by jscoverage/JSCover lib-cov # Coverage directory used by tools like istanbul coverage # nyc test coverage .nyc_output # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) .grunt # Bower dependency directory (https://bower.io/) bower_components # node-waf configuration .lock-wscript # Compiled binary addons (https://nodejs.org/api/addons.html) build/Release # Dependency directories node_modules/ jspm_packages/ # Typescript v1 declaration files typings/ # Optional npm cache directory .npm # Optional eslint cache .eslintcache # Optional REPL history .node_repl_history # Output of 'npm pack' *.tgz # Yarn Integrity file .yarn-integrity # dotenv environment variables file .env # next.js build output .next # vim custom added *.swp *.swo ================================================ FILE: .prettierignore ================================================ package-lock.json package.json ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) 2017-2018 Alexander Putilin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Inside Python Dict - an explorable explanation This repository contains the code for ["Inside Python Dict"](https://just-taking-a-ride.com/inside_python_dict/), a explorable explanation of python dicts. ## Code The code is quite messy, the build system works on my laptop but might not work on your machine. That's all fixable, but I am not sure if there is any interest from people in using the actual code. If so, let me know, I'll clean things up a bit and write an overview of the codebase. Meanwhile, try running `npm install && npm start` and see if if works. Bugfixes will be gladly accepted. ### Disclaimer I am providing code in the repository to you under an open source license. Because this is my personal repository, the license you receive to my code is from me and not my employer (Facebook) ================================================ FILE: package.json ================================================ { "name": "inside_python_dict", "version": "1.0.0", "description": "Inside python dict - an explorable explanation", "main": "index.js", "scripts": { "jest": "jest --env=node", "test:pystress": "npm run extractcode && ./stress_test_python.sh", "test:pyunit": "npm run extractcode && ./unittest_python.sh", "test": "npm run jest && npm run test:pyunit && npm run test:pystress", "build:ssr": "./ssr-all.sh", "update:html": "mkdir -p build && (for i in {chapter1,chapter2,chapter3,chapter4}; do mustache src/mustache/$i.json src/page.html.template > src/autogenerated/$i.html; done)", "start": "webpack-dev-server --config webpack.dev.js --host 0.0.0.0", "serve": "http-server -p 9090 dist/", "build": "npm run build:ssr && webpack --config webpack.prod.js", "babel-node": "npx babel-node --presets '@babel/env'", "extractcode": "mkdir -p build && npm run babel-node scripts/extractPythonCode.js", "dictserver": "rm pynode.sock ; npm run babel-node scripts/pyReimplWrapper.js; rm pynode.sock", "postinstall": "patch-package" }, "repository": { "type": "git", "url": "git+https://github.com/eleweek/inside_python_dict.git" }, "author": "Alexander Putilin", "license": "MIT", "bugs": { "url": "https://github.com/eleweek/inside_python_dict/issues" }, "homepage": "https://github.com/eleweek/inside_python_dict#readme", "prettier": { "printWidth": 120, "tabWidth": 4, "useTabs": false, "singleQuote": true, "bracketSpacing": false, "semi": true, "trailingComma": "es5" }, "dependencies": { "@fortawesome/fontawesome-svg-core": "^1.2.10", "@fortawesome/free-brands-svg-icons": "^5.6.1", "@fortawesome/free-solid-svg-icons": "^5.6.1", "@fortawesome/react-fontawesome": "^0.1.3", "bignumber.js": "^8.0.1", "bootstrap": "^4.1.3", "bowser": "^2.0.0-beta.3", "classnames": "^2.2.6", "d3": "^5.7.0", "d3-selection-multi": "^1.0.1", "i": "^0.3.6", "immutable": "^4.0.0-rc.12", "lodash": "^4.17.11", "lowlight": "^1.11.0", "memoize-one": "^4.1.0", "mobx": "^5.8.0", "mobx-react": "^5.4.3", "rc-slider": "^8.6.4", "react": "^16.6.3", "react-css-transition-replace": "^3.0.3", "react-dom": "^16.6.3", "react-error-boundary": "^1.2.3", "react-input-autosize": "^2.2.1", "react-popper": "^1.3.2", "react-smooth-scrollbar": "^8.0.6", "react-stickynode": "^2.1.0", "rehype": "^7.0.0", "smooth-scrollbar": "8.3.1" }, "devDependencies": { "@babel/cli": "^7.2.0", "@babel/core": "^7.2.2", "@babel/node": "^7.2.2", "@babel/plugin-proposal-class-properties": "^7.2.1", "@babel/plugin-proposal-decorators": "^7.2.2", "@babel/plugin-proposal-export-namespace-from": "^7.2.0", "@babel/plugin-proposal-function-sent": "^7.2.0", "@babel/plugin-proposal-json-strings": "^7.2.0", "@babel/plugin-proposal-numeric-separator": "^7.2.0", "@babel/plugin-proposal-object-rest-spread": "^7.2.0", "@babel/plugin-proposal-optional-chaining": "^7.2.0", "@babel/plugin-proposal-throw-expressions": "^7.2.0", "@babel/plugin-syntax-dynamic-import": "^7.2.0", "@babel/plugin-syntax-import-meta": "^7.2.0", "@babel/plugin-transform-destructuring": "^7.2.0", "@babel/preset-env": "^7.2.0", "@babel/preset-react": "^7.0.0", "babel-core": "^7.0.0-bridge.0", "babel-jest": "^23.4.2", "babel-loader": "^8.0.0", "babel-plugin-lodash": "^3.3.4", "clean-webpack-plugin": "^1.0.0", "css-loader": "^2.0.1", "dotenv": "^6.2.0", "html-webpack-plugin": "^3.2.0", "http-server": "^0.11.1", "husky": "^1.2.1", "ignore-styles": "^5.0.1", "jest": "^23.6.0", "mini-css-extract-plugin": "^0.5.0", "mustache": "^3.0.1", "npm": "^6.5.0", "patch-package": "^5.1.1", "prettier": "^1.15.3", "pretty-quick": "^1.8.0", "split": "^1.0.1", "style-loader": "^0.23.1", "unminified-webpack-plugin": "^2.0.0", "webpack": "^4.27.1", "webpack-bundle-analyzer": "^3.0.3", "webpack-cli": "^3.1.2", "webpack-dev-server": "^3.1.10", "webpack-merge": "^4.1.5" }, "husky": { "hooks": { "pre-commit": "pretty-quick --staged" } } } ================================================ FILE: patches/python32_debug.diff ================================================ diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c10bfccdce..3734a08281 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -321,6 +321,8 @@ lookdict(PyDictObject *mp, PyObject *key, register Py_hash_t hash) PyObject *startkey; i = (size_t)hash & mask; + fprintf(stderr, "lookdict hash = %ld\n", hash); + fprintf(stderr, "initial i = %zu\n", i); ep = &ep0[i]; if (ep->me_key == NULL || ep->me_key == key) return ep; @@ -355,7 +357,9 @@ lookdict(PyDictObject *mp, PyObject *key, register Py_hash_t hash) least likely outcome, so test for that last. */ for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; + fprintf(stderr, "next i = %zu perturb = %zu\n", i, perturb); ep = &ep0[i & mask]; + fprintf(stderr, "next i & mask = %zu perturb = %zu\n", i & mask, perturb); if (ep->me_key == NULL) return freeslot == NULL ? ep : freeslot; if (ep->me_key == key) @@ -648,6 +652,7 @@ dictresize(PyDictObject *mp, Py_ssize_t minused) } } else { + fprintf(stderr, "PyMem_NEW branch"); newtable = PyMem_NEW(PyDictEntry, newsize); if (newtable == NULL) { PyErr_NoMemory(); @@ -693,6 +698,7 @@ PyObject * _PyDict_NewPresized(Py_ssize_t minused) { PyObject *op = PyDict_New(); + fprintf(stderr, "_PyDict_NewPresized() %p %d\n", op, (int)minused); if (minused>5 && op != NULL && dictresize((PyDictObject *)op, minused) == -1) { Py_DECREF(op); diff --git a/Objects/longobject.c b/Objects/longobject.c index e2a4ef9c5e..7d72c88417 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2611,6 +2611,7 @@ long_hash(PyLongObject *v) sign = -1; i = -(i); } + fprintf(stderr, "i = %ld\n", i); while (--i >= 0) { /* Here x is a quantity in the range [0, _PyHASH_MODULUS); we want to compute x * 2**PyLong_SHIFT + v->ob_digit[i] modulo ================================================ FILE: patches/smooth-scrollbar+8.3.1.patch ================================================ patch-package --- a/node_modules/smooth-scrollbar/events/touch.js +++ b/node_modules/smooth-scrollbar/events/touch.js @@ -1,7 +1,7 @@ import { eventScope, TouchRecord, } from '../utils/'; var activeScrollbar; export function touchHandler(scrollbar) { - var MIN_EAING_MOMENTUM = 50; + var MIN_EAING_MOMENTUM = 3; var EASING_MULTIPLIER = /Android/.test(navigator.userAgent) ? 3 : 2; var target = scrollbar.options.delegateTo || scrollbar.containerEl; var touchRecord = new TouchRecord(); --- a/node_modules/smooth-scrollbar/geometry/update.js +++ b/node_modules/smooth-scrollbar/geometry/update.js @@ -4,6 +4,9 @@ export function update(scrollbar) { x: Math.max(newSize.content.width - newSize.container.width, 0), y: Math.max(newSize.content.height - newSize.container.height, 0), }; + // hack for a weird chrome on windows bug + if (limit.x <= 2) limit.x = 0; + if (limit.y <= 2) limit.y = 0; // metrics var containerBounding = scrollbar.containerEl.getBoundingClientRect(); var bounding = { --- a/node_modules/smooth-scrollbar/scrollbar.js +++ b/node_modules/smooth-scrollbar/scrollbar.js @@ -322,6 +322,10 @@ var Scrollbar = /** @class */ (function () { if (limit.x === 0 && limit.y === 0) { this._updateDebounced(); } + if (Math.abs(deltaY) > Math.abs(deltaX)) { + if (deltaY > 0 && offset.y === limit.y) return true; + if (deltaY < 0 && offset.y === 0) return true; + } var destX = clamp(deltaX + offset.x, 0, limit.x); var destY = clamp(deltaY + offset.y, 0, limit.y); var res = true; --- a/node_modules/smooth-scrollbar/track/track.js +++ b/node_modules/smooth-scrollbar/track/track.js @@ -41,8 +41,9 @@ var ScrollbarTrack = /** @class */ (function () { this.element.classList.remove('show'); }; ScrollbarTrack.prototype.update = function (scrollOffset, containerSize, pageSize) { + // -2 is a hack for a weird chrome on windows bug setStyle(this.element, { - display: pageSize <= containerSize ? 'none' : 'block', + display: pageSize - 2 <= containerSize ? 'none' : 'block', }); this.thumb.update(scrollOffset, containerSize, pageSize); }; deleted file mode 100644 --- a/node_modules/smooth-scrollbar/track/track.js.map +++ /dev/null @@ -1 +0,0 @@ -{"version":3,"file":"track.js","sourceRoot":"","sources":["../src/track/track.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAEzC,OAAO,EACL,QAAQ,GACT,MAAM,WAAW,CAAC;AAEnB;IAUE,wBACE,SAAyB,EACzB,YAAwB;QAAxB,6BAAA,EAAA,gBAAwB;QAT1B;;WAEG;QACM,YAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAEzC,aAAQ,GAAG,KAAK,CAAC;QAMvB,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,qCAAmC,SAAW,CAAC;QAExE,IAAI,CAAC,KAAK,GAAG,IAAI,cAAc,CAC7B,SAAS,EACT,YAAY,CACb,CAAC;QAEF,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpC,CAAC;IAED;;;;OAIG;IACH,iCAAQ,GAAR,UAAS,kBAA+B;QACtC,kBAAkB,CAAC,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,6BAAI,GAAJ;QACE,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YAClB,MAAM,CAAC;QACT,CAAC;QAED,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACrB,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,6BAAI,GAAJ;QACE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;YACnB,MAAM,CAAC;QACT,CAAC;QAED,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;QACtB,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACxC,CAAC;IAED,+BAAM,GAAN,UACE,YAAoB,EACpB,aAAqB,EACrB,QAAgB;QAEhB,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE;YACrB,OAAO,EAAE,QAAQ,IAAI,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO;SACtD,CAAC,CAAC;QAEH,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,YAAY,EAAE,aAAa,EAAE,QAAQ,CAAC,CAAC;IAC3D,CAAC;IACH,qBAAC;AAAD,CAAC,AApED,IAoEC"} \ No newline at end of file ================================================ FILE: patches/subscribe-ui-event+2.0.4.patch ================================================ patch-package new file mode 100644 Binary files /dev/null and b/node_modules/subscribe-ui-event/.index.es.js.swp differ --- a/node_modules/subscribe-ui-event/index.es.js +++ b/node_modules/subscribe-ui-event/index.es.js @@ -7,7 +7,7 @@ import listenLib from './dist-es/lib/listen'; import subscribeLib from './dist-es/subscribe'; import unsubscribeLib from './dist-es/unsubscribe'; -const IS_CLIENT = typeof window !== 'undefined'; +var IS_CLIENT = typeof window !== 'undefined'; function warn() { if (process.env.NODE_ENV !== 'production') { @@ -15,6 +15,6 @@ function warn() { } } -export const listen = IS_CLIENT ? listenLib : warn; -export const subscribe = IS_CLIENT ? subscribeLib : warn; -export const unsubscribe = IS_CLIENT ? unsubscribeLib : warn; +export var listen = IS_CLIENT ? listenLib : warn; +export var subscribe = IS_CLIENT ? subscribeLib : warn; +export var unsubscribe = IS_CLIENT ? unsubscribeLib : warn; ================================================ FILE: python_code/actual_dict_factory_test.py ================================================ import unittest from dict32_reimplementation_test_v2 import dict_factory from dictinfo import dump_py_dict def table_size(d): return len(dump_py_dict(d)[0]) class TestDictFactory(unittest.TestCase): def test_dict_factory(self): self.assertEqual(table_size(dict_factory([])), 8) self.assertEqual(table_size(dict_factory([(1, 1)])), 8) self.assertEqual(table_size(dict_factory([(1, 1), (1, 2), (1, 3), (1, 4)])), 8) self.assertEqual(table_size(dict_factory([(1, 1), (1, 2), (1, 3), (1, 4), (1, 5)])), 8) self.assertEqual(table_size(dict_factory([(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8)])), 16) self.assertEqual(table_size({1: 1, 1: 2, 1: 3, 1: 4, 1: 5, 1: 6, 1: 7, 1: 8}), 16) self.assertEqual(table_size(dict([(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8)])), 8) self.assertEqual(table_size({"x": "y", "abde": 1, "cdef": 4, "world": 9, "hmmm": 16, "hello": 25, "xxx": 36, "ya": 49, "hello,world!": 64, "well": 81, "meh": 100}), 64) def main(): unittest.main() if __name__ == "__main__": main() ================================================ FILE: python_code/build_autogenerated_chapter1_hash.py ================================================ import os import sys sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'build')) from hash_chapter1_extracted import * def create_new(numbers): return build_insert_all(numbers) def create_new_broken(numbers): return build_not_quite_what_we_want(numbers) def has_key(keys, key): return has_number(keys, key) def linear_search(numbers, number): return simple_search(numbers, number) ================================================ FILE: python_code/build_autogenerated_chapter2.py ================================================ import os import sys sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'build')) from hash_chapter2_extracted import * ================================================ FILE: python_code/build_autogenerated_chapter3_chapter4.py ================================================ import os import sys sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'build')) from dict32js_extracted import Dict32Extracted from hash_class_recycling_extracted import HashClassRecyclingExtracted from hash_class_no_recycling_extracted import HashClassNoRecyclingExtracted ================================================ FILE: python_code/chapter1_linear_search_reimplementation_test.py ================================================ import random import argparse import hash_chapter1_reimpl_js import hash_chapter1_impl import build_autogenerated_chapter1_hash IMPLEMENTATIONS = { 'ref': hash_chapter1_impl.linear_search, 'js': hash_chapter1_reimpl_js.linear_search, 'py_extracted': build_autogenerated_chapter1_hash.linear_search } def run(test_implementation, size): MAX_VAL = 5000 ref_search = IMPLEMENTATIONS['ref'] test_search = IMPLEMENTATIONS[test_implementation] numbers = [random.randint(-MAX_VAL, MAX_VAL) for _ in range(size)] for number in numbers: assert ref_search(numbers, number) assert test_search(numbers, number) for i in range(size * 3): number = random.randint(-MAX_VAL, MAX_VAL) assert ref_search(numbers, number) == test_search(numbers, number) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Stress-test chapter1 reimplementation') parser.add_argument('--test-implementation', choices=['py_extracted', 'js'], required=True) parser.add_argument('--size', type=int, default=100) args = parser.parse_args() run(test_implementation=args.test_implementation, size=args.size) ================================================ FILE: python_code/chapter4_probing_python_reimplementation_test.py ================================================ import json from common import AllKeyValueFactory from js_reimpl_common import _init_sock_stuff, dump_simple_py_obj from pprint import pprint sock, sockfile = _init_sock_stuff() def probe_all_js(key, slots_count): global sockfile global sock data = { "dict": "pythonProbing", "args": { 'key': dump_simple_py_obj(key), 'slotsCount': slots_count }, } sock.send(bytes(json.dumps(data) + "\n", 'UTF-8')) response = json.loads(sockfile.readline()) return response['result'] def probe_all(key, slots_count=8): PERTURB_SHIFT = 5 links = [[] for _ in range(slots_count)] hash_code = hash(key) perturb = 2**64 + hash_code if hash_code < 0 else hash_code idx = hash_code % slots_count start_idx = idx visited = set() while len(visited) < slots_count: visited.add(idx) next_idx = (idx * 5 + perturb + 1) % slots_count links[idx].append({'nextIdx': next_idx, 'perturbLink': perturb != 0}) idx = next_idx perturb >>= PERTURB_SHIFT return {'startIdx': start_idx, 'links': links} def test(): factory = AllKeyValueFactory(100) for slots_count in [8, 16, 32]: for i in range(300): key = factory.generate_key() assert probe_all(key, slots_count) == probe_all_js(key, slots_count) if __name__ == "__main__": test() ================================================ FILE: python_code/common.py ================================================ import random import string class EmptyValueClass(object): def __str__(self): return "EMPTY" def __repr__(self): return "" class DummyValueClass(object): def __str__(self): return "" def __repr__(self): return "" EMPTY = EmptyValueClass() DUMMY = DummyValueClass() def get_object_field_or_null(obj, field_name): try: return getattr(obj, field_name) except ValueError: return EMPTY def get_object_field_or_none(obj, field_name): try: return getattr(obj, field_name) except ValueError: return None def generate_random_string(str_len=5): # FROM: https://stackoverflow.com/questions/2257441/random-string-generation-with-upper-case-letters-and-digits-in-python return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(str_len)) _unicode_chars = string.ascii_uppercase + string.digits + "йцукенгшщзхъфывапролджэячсмитьбю" def generate_random_unicode(str_len): # FROM: https://stackoverflow.com/questions/2257441/random-string-generation-with-upper-case-letters-and-digits-in-python return ''.join(random.choice(_unicode_chars) for _ in range(str_len)) class IntKeyValueFactory(object): def __init__(self, n_inserts): self.n_inserts = n_inserts self._insert_count = 0 self._key_range = list(range(n_inserts)) def generate_key(self): return random.choice(self._key_range) def generate_value(self): self._insert_count += 1 return self._insert_count class AllKeyValueFactory(object): def __init__(self, n_inserts, int_chance=0.1, long_chance=0.1, len0_chance=0.01, len1_chance=0.1, len2_chance=0.3, len3_chance=0.2, len_random_chance=0.17): self.int_pbf = int_chance self.long_pbf = self.int_pbf + long_chance self.len0_pbf = self.int_pbf + len0_chance self.len1_pbf = self.len0_pbf + len1_chance self.len2_pbf = self.len1_pbf + len2_chance self.len3_pbf = self.len2_pbf + len3_chance self.len_random_pbf = self.len3_pbf + len_random_chance assert 0.0 <= self.len3_pbf <= 1.0 half_range = int(n_inserts / 2) self._int_range = [i - half_range for i in range(2 * half_range)] def _generate_obj(self): r = random.random() if r <= self.int_pbf: return random.choice(self._int_range) if r <= self.long_pbf: sign = "-" if random.random() < 0.5 else "" first_digit = random.choice("123456789") return sign + first_digit + ''.join(random.choice("0123456789") for _ in range(random.randint(20, 50))) if r <= self.len0_pbf: return "" if r <= self.len1_pbf: return generate_random_unicode(1) if r <= self.len2_pbf: return generate_random_unicode(2) if r <= self.len3_pbf: return generate_random_unicode(3) if r <= self.len_random_pbf: return generate_random_unicode(random.randint(4, 25)) return None def generate_key(self): return self._generate_obj() def generate_value(self): return self._generate_obj() ================================================ FILE: python_code/dict32_reimplementation_test_v2.py ================================================ import random import argparse import json from common import EMPTY, AllKeyValueFactory, IntKeyValueFactory from dictinfo import dump_py_dict from dict_reimplementation import PyDictReimplementation32, dump_reimpl_dict from js_reimplementation_interface import Dict32JsImpl, AlmostPythonDictRecyclingJsImpl, AlmostPythonDictNoRecyclingJsImpl import hash_chapter3_class_impl import build_autogenerated_chapter3_chapter4 def dict_factory(pairs=None): if not pairs: return {} # quick&dirty def to_string(x): return json.dumps(x) if x is not None else "None" d = eval("{" + ", ".join("{}:{}".format(to_string(k), to_string(v)) for [k, v] in pairs) + "}") return d IMPLEMENTATIONS = { "dict_actual": (dict_factory, dump_py_dict), "dict32_reimpl_py": (PyDictReimplementation32, dump_reimpl_dict), "dict32_reimpl_js": (Dict32JsImpl, dump_reimpl_dict), "dict32_reimpl_py_extracted": (build_autogenerated_chapter3_chapter4.Dict32Extracted, dump_reimpl_dict), "almost_python_dict_recycling_py": (hash_chapter3_class_impl.AlmostPythonDictImplementationRecycling, dump_reimpl_dict), "almost_python_dict_no_recycling_py": (hash_chapter3_class_impl.AlmostPythonDictImplementationNoRecycling, dump_reimpl_dict), "almost_python_dict_no_recycling_py_simpler": (hash_chapter3_class_impl.AlmostPythonDictImplementationNoRecyclingSimplerVersion, dump_reimpl_dict), "almost_python_dict_recycling_js": (AlmostPythonDictRecyclingJsImpl, dump_reimpl_dict), "almost_python_dict_no_recycling_js": (AlmostPythonDictNoRecyclingJsImpl, dump_reimpl_dict), "almost_python_dict_recycling_py_extracted": (build_autogenerated_chapter3_chapter4.HashClassRecyclingExtracted, dump_reimpl_dict), "almost_python_dict_no_recycling_py_extracted": (build_autogenerated_chapter3_chapter4.HashClassNoRecyclingExtracted, dump_reimpl_dict), } def verify_same(d, dump_d_func, dreimpl, dump_dreimpl_func): dump_d = dump_d_func(d) dump_reimpl = dump_dreimpl_func(dreimpl) if dump_d != dump_reimpl: hashes_orig, keys_orig, values_orig, fill_orig, used_orig = dump_d hashes_new, keys_new, values_new, fill_new, used_new = dump_reimpl print("ORIG SIZE", len(hashes_orig)) print("NEW SIZE", len(hashes_new)) print("ORIG fill/used: ", fill_orig, used_orig) print("NEW fill/used: ", fill_new, used_new) if len(hashes_orig) == len(hashes_new): size = len(hashes_orig) print("NEW | ORIG") for i in range(size): if hashes_new[i] is not EMPTY or hashes_orig[i] is not EMPTY: print(i, " " * 3, hashes_new[i], keys_new[i], values_new[i], " " * 3, hashes_orig[i], keys_orig[i], values_orig[i]) assert dump_d == dump_reimpl def run(ref_impl_factory, ref_impl_dump, test_impl_factory, test_impl_dump, n_inserts, extra_checks, key_value_factory, initial_state, verbose): SINGLE_REMOVE_CHANCE = 0.3 MASS_REMOVE_CHANCE = 0.002 MASS_REMOVE_COEFF = 0.8 removed = set() if initial_state: d = ref_impl_factory(initial_state) else: d = ref_impl_factory() if initial_state: dreimpl = test_impl_factory(initial_state) else: dreimpl = test_impl_factory() if verbose: print("Starting test") for i in range(n_inserts): should_remove = (random.random() < SINGLE_REMOVE_CHANCE) if should_remove and d and d.keys(): # TODO: ugly, written while on a plane to_remove = random.choice(list(d.keys())) if verbose: print("Removing {}".format(to_remove)) del d[to_remove] del dreimpl[to_remove] if verbose: print(d) verify_same(d, ref_impl_dump, dreimpl, test_impl_dump) removed.add(to_remove) should_mass_remove = (random.random() < MASS_REMOVE_CHANCE) if should_mass_remove and len(d) > 10: to_remove_list = random.sample(list(d.keys()), int(MASS_REMOVE_COEFF * len(d))) if verbose: print("Mass-Removing {} elements".format(len(to_remove_list))) for k in to_remove_list: del d[k] del dreimpl[k] removed.add(k) if extra_checks: for k in d.keys(): assert d[k] == dreimpl[k] for r in removed: try: dreimpl[r] assert False except KeyError: pass key_to_insert = key_value_factory.generate_key() value_to_insert = key_value_factory.generate_value() _keys_set = getattr(d, '_keys_set', None) # TODO: ugly code written on a plane # TODO: properly implement in/not in when I land if _keys_set is not None: key_present = key_to_insert in _keys_set else: key_present = key_to_insert in d if not key_present: if verbose: print("Inserting ({key}, {value})".format(key=key_to_insert, value=value_to_insert)) try: dreimpl[key_to_insert] assert False except KeyError: pass else: if verbose: print("Replacing ({key}, {value1}) with ({key}, {value2})".format(key=key_to_insert, value1=d[key_to_insert], value2=value_to_insert)) removed.discard(key_to_insert) d[key_to_insert] = value_to_insert dreimpl[key_to_insert] = value_to_insert if verbose: print(d) verify_same(d, ref_impl_dump, dreimpl, test_impl_dump) assert dreimpl[key_to_insert] == value_to_insert if __name__ == "__main__": parser = argparse.ArgumentParser(description='Stress-test dict-like reimplementations') parser.add_argument('--reference-implementation', choices=IMPLEMENTATIONS.keys(), required=True) parser.add_argument('--test-implementation', choices=IMPLEMENTATIONS.keys(), required=True) parser.add_argument('--no-extra-getitem-checks', dest='extra_checks', action='store_false') parser.add_argument('--num-inserts', type=int, default=500) parser.add_argument('--forever', action='store_true') parser.add_argument('--kv', choices=["numbers", "all"], required=True) parser.add_argument('--initial-size', type=int, default=-1) parser.add_argument('--verbose', action='store_true') args = parser.parse_args() if args.kv == "numbers": kv_factory = IntKeyValueFactory(args.num_inserts) elif args.kv == "all": kv_factory = AllKeyValueFactory(args.num_inserts) ref_impl = IMPLEMENTATIONS[args.reference_implementation] test_impl = IMPLEMENTATIONS[args.test_implementation] def test_iteration(): initial_size = args.initial_size if args.initial_size >= 0 else random.randint(0, 100) initial_state = [(kv_factory.generate_key(), kv_factory.generate_value()) for _ in range(initial_size)] run(*(ref_impl + test_impl), n_inserts=args.num_inserts, extra_checks=args.extra_checks, key_value_factory=kv_factory, initial_state=initial_state, verbose=args.verbose) if args.forever: while True: test_iteration() else: test_iteration() ================================================ FILE: python_code/dict_reimpl_common.py ================================================ from common import EMPTY class Slot(object): def __init__(self, hash_code=EMPTY, key=EMPTY, value=EMPTY): self.hash_code = hash_code self.key = key self.value = value class BaseDictImpl(object): def __init__(self): self.slots = [Slot() for _ in range(self.START_SIZE)] self.fill = 0 self.used = 0 def find_nearest_size(self, minused): new_size = 8 while new_size <= minused: new_size *= 2 return new_size ================================================ FILE: python_code/dict_reimplementation.py ================================================ from common import DUMMY, EMPTY from dict_reimpl_common import BaseDictImpl, Slot from operator import attrgetter class PyDictReimplementationBase(BaseDictImpl): START_SIZE = 8 PERTURB_SHIFT = 5 def __init__(self, pairs=None): BaseDictImpl.__init__(self) start_size = self.find_nearest_size(len(pairs)) if pairs else self.START_SIZE self.slots = [Slot() for _ in range(start_size)] if pairs: for k, v in pairs: self[k] = v def __setitem__(self, key, value): hash_code = hash(key) perturb = self.signed_to_unsigned(hash_code) idx = hash_code % len(self.slots) target_idx = None while self.slots[idx].key is not EMPTY: if self.slots[idx].hash_code == hash_code and self.slots[idx].key == key: target_idx = idx break if target_idx is None and self.slots[idx].key is DUMMY: target_idx = idx idx = (idx * 5 + perturb + 1) % len(self.slots) perturb >>= self.PERTURB_SHIFT if target_idx is None: target_idx = idx if self.slots[target_idx].key is EMPTY: self.used += 1 self.fill += 1 elif self.slots[target_idx].key is DUMMY: self.used += 1 self.slots[target_idx] = Slot(hash_code, key, value) if self.fill * 3 >= len(self.slots) * 2: self.resize() def __delitem__(self, key): idx = self.lookdict(key) self.used -= 1 self.slots[idx].key = DUMMY self.slots[idx].value = EMPTY def __getitem__(self, key): idx = self.lookdict(key) return self.slots[idx].value @staticmethod def signed_to_unsigned(hash_code): return 2**64 + hash_code if hash_code < 0 else hash_code def lookdict(self, key): hash_code = hash(key) perturb = self.signed_to_unsigned(hash_code) idx = hash_code % len(self.slots) while self.slots[idx].key is not EMPTY: if self.slots[idx].hash_code == hash_code and self.slots[idx].key == key: return idx idx = (idx * 5 + perturb + 1) % len(self.slots) perturb >>= self.PERTURB_SHIFT raise KeyError() def resize(self): old_slots = self.slots new_size = self.find_nearest_size(self._next_size()) self.slots = [Slot() for _ in range(new_size)] self.fill = self.used for slot in old_slots: if slot.key is not EMPTY and slot.key is not DUMMY: perturb = self.signed_to_unsigned(slot.hash_code) idx = slot.hash_code % len(self.slots) while self.slots[idx].key is not EMPTY: idx = (idx * 5 + perturb + 1) % len(self.slots) perturb >>= self.PERTURB_SHIFT self.slots[idx] = Slot(slot.hash_code, slot.key, slot.value) class PyDictReimplementation32(PyDictReimplementationBase): def _next_size(self): return self.used * (4 if self.used <= 50000 else 2) def dump_reimpl_dict(d): def extract_fields(field_name): return list(map(attrgetter(field_name), d.slots)) return extract_fields('hash_code'), extract_fields('key'), extract_fields('value'), d.fill, d.used ================================================ FILE: python_code/dictinfo.py ================================================ import sys def dump_py_dict(d): vi = sys.version_info if vi.major != 3: raise Exception("Unsupported major version") if vi.minor < 2: raise Exception("Unsupported minor version (too old)") if vi.minor > 3: raise Exception("Unsupported minor version (too new)") if vi.minor == 2: import dictinfo32 return dictinfo32.dump_py_dict(d) else: import dictinfo33 return dictinfo33.dump_py_dict(d) ================================================ FILE: python_code/dictinfo32.py ================================================ from ctypes import Structure, c_ulong, POINTER, cast, py_object, c_long from common import get_object_field_or_null, EMPTY, DUMMY class PyDictEntry(Structure): _fields_ = [ ('me_hash', c_long), ('me_key', py_object), ('me_value', py_object), ] class PyDictObject(Structure): _fields_ = [ ('ob_refcnt', c_ulong), ('ob_type', c_ulong), ('ma_fill', c_ulong), ('ma_used', c_ulong), ('ma_mask', c_ulong), ('ma_table', POINTER(PyDictEntry)), ] def dictobject(d): return cast(id(d), POINTER(PyDictObject)).contents d = {0: 0} del d[0] dummy_internal = dictobject(d).ma_table[0].me_key del d def dump_py_dict(d): do = dictobject(d) keys = [] hashes = [] values = [] size = do.ma_mask + 1 for i in range(size): key = get_object_field_or_null(do.ma_table[i], 'me_key') keys.append(key if key is not dummy_internal else DUMMY) for i, key in enumerate(keys): if key is EMPTY: hashes.append(EMPTY) values.append(EMPTY) else: hashes.append(do.ma_table[i].me_hash) values.append(get_object_field_or_null(do.ma_table[i], 'me_value')) return hashes, keys, values, do.ma_fill, do.ma_used ================================================ FILE: python_code/dictinfo33.py ================================================ from ctypes import Structure, c_ulong, POINTER, cast, addressof, py_object, c_long, c_void_p from common import get_object_field_or_null, EMPTY, DUMMY class PyDictKeyEntry(Structure): _fields_ = [ ('me_hash', c_long), ('me_key', py_object), ('me_value', py_object), ] class PyDictKeysObject(Structure): _fields_ = [ ('dk_refcnt', c_long), ('dk_size', c_long), ('dict_lookup_func', POINTER(c_void_p)), ('dk_usable', c_long), ('dk_entries', PyDictKeyEntry), ] class PyDictObject(Structure): _fields_ = [ ('ob_refcnt', c_ulong), ('ob_type', c_ulong), ('ma_used', c_long), ('ma_keys', POINTER(PyDictKeysObject)), # Not actually a void*, split tables are not supported right now ('ma_values', POINTER(c_void_p)) ] def dictobject(d): return cast(id(d), POINTER(PyDictObject)).contents d = {0: 0} del d[0] dummy_internal = dictobject(d).ma_keys.contents.dk_entries.me_key del d def usable_fraction(size): return (size * 2 + 1) // 3 def dump_py_dict(d): do = dictobject(d) keys = [] hashes = [] values = [] size = do.ma_keys.contents.dk_size entries = cast(addressof(do.ma_keys.contents.dk_entries), POINTER(PyDictKeyEntry)) for i in range(size): key = get_object_field_or_null(entries[i], 'me_key') keys.append(key if key is not dummy_internal else DUMMY) for i, key in enumerate(keys): if key is EMPTY: hashes.append(EMPTY) values.append(EMPTY) else: hashes.append(entries[i].me_hash) values.append(get_object_field_or_null(entries[i], 'me_value')) return hashes, keys, values, usable_fraction(do.ma_keys.contents.dk_size) - do.ma_keys.contents.dk_usable, do.ma_used ================================================ FILE: python_code/hash_chapter1_impl.py ================================================ def create_new(numbers): n = len(numbers) keys = [None for i in range(2 * n)] for num in numbers: idx = num % len(keys) while keys[idx] is not None: idx = (idx + 1) % len(keys) keys[idx] = num return keys def create_new_broken(numbers): n = len(numbers) keys = [None for i in range(n)] for num in numbers: idx = num % len(keys) keys[idx] = num return keys def has_key(keys, key): idx = key % len(keys) while keys[idx] is not None: if keys[idx] == key: return True idx = (idx + 1) % len(keys) return False def linear_search(numbers, number): return number in numbers ================================================ FILE: python_code/hash_chapter1_reimpl_js.py ================================================ from js_reimpl_common import run_op_chapter1_chapter2 def run_op(keys, op, **kwargs): return run_op_chapter1_chapter2("chapter1", None, keys, op, **kwargs) def create_new(numbers): return run_op(None, "create_new", array=numbers) def create_new_broken(numbers): return run_op(None, "create_new_broken", array=numbers) def has_key(keys, key): return run_op(keys, "has_key", key=key) def linear_search(numbers, key): return run_op(None, "linear_search", key=key, array=numbers) ================================================ FILE: python_code/hash_chapter1_reimplementation_test.py ================================================ import random import argparse import hash_chapter1_reimpl_js import hash_chapter1_impl import build_autogenerated_chapter1_hash def get_implementation(is_broken, impl): if impl == "js": module = hash_chapter1_reimpl_js elif impl == "py_ref": module = hash_chapter1_impl elif impl == "py_extracted": module = build_autogenerated_chapter1_hash else: assert False return (module.create_new_broken if is_broken else module.create_new, module.has_key) def run(test_implementation, is_broken, n_inserts): MAX_VAL = 5000 ref_create_new, ref_has_key = get_implementation(is_broken, "py_ref") test_create_new, test_has_key = get_implementation(is_broken, test_implementation) numbers = list(set(random.randint(-MAX_VAL, MAX_VAL) for _ in range(n_inserts))) ref_keys = ref_create_new(numbers) test_keys = test_create_new(numbers) for number in numbers: if not is_broken: assert ref_has_key(ref_keys, number) assert test_has_key(test_keys, number) else: assert ref_has_key(ref_keys, number) == test_has_key(test_keys, number) for i in range(n_inserts * 3): number = random.randint(-MAX_VAL, MAX_VAL) assert ref_has_key(ref_keys, number) == test_has_key(test_keys, number) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Stress-test chapter1 reimplementation') parser.add_argument('--is-broken', action='store_true') parser.add_argument('--test-implementation', choices=['py_extracted', 'js'], required=True) parser.add_argument('--num-inserts', type=int, default=500) args = parser.parse_args() run(test_implementation=args.test_implementation, is_broken=args.is_broken, n_inserts=args.num_inserts) ================================================ FILE: python_code/hash_chapter2_impl.py ================================================ from common import DUMMY, EMPTY def create_new(from_keys): n = len(from_keys) hash_codes = [EMPTY for i in range(2 * n)] keys = [EMPTY for i in range(2 * n)] for key in from_keys: hash_code = hash(key) idx = hash_code % len(keys) while keys[idx] is not EMPTY: if hash_codes[idx] == hash_code and keys[idx] == key: break idx = (idx + 1) % len(keys) hash_codes[idx] = hash_code keys[idx] = key return hash_codes, keys def insert(hash_codes, keys, key): hash_code = hash(key) idx = hash_code % len(keys) while hash_codes[idx] is not EMPTY: if hash_codes[idx] == hash_code and keys[idx] == key: return idx = (idx + 1) % len(keys) hash_codes[idx] = hash_code keys[idx] = key def remove(hash_codes, keys, key): hash_code = hash(key) idx = hash_code % len(keys) while hash_codes[idx] is not EMPTY: if hash_codes[idx] == hash_code and keys[idx] == key: keys[idx] = DUMMY return idx = (idx + 1) % len(keys) raise KeyError() def has_key(hash_codes, keys, key): hash_code = hash(key) idx = hash_code % len(keys) while hash_codes[idx] is not EMPTY: if hash_codes[idx] == hash_code and keys[idx] == key: return True idx = (idx + 1) % len(keys) return False def resize(hash_codes, keys): new_hash_codes = [EMPTY for i in range(len(hash_codes) * 2)] new_keys = [EMPTY for i in range(len(keys) * 2)] for hash_code, key in zip(hash_codes, keys): if key is EMPTY or key is DUMMY: continue idx = hash_code % len(new_keys) while new_hash_codes[idx] is not EMPTY: idx = (idx + 1) % len(new_keys) new_hash_codes[idx] = hash_code new_keys[idx] = key return new_hash_codes, new_keys ================================================ FILE: python_code/hash_chapter2_impl_test.py ================================================ import unittest from hash_chapter2_impl import create_new, has_key, insert, remove, resize, DUMMY from common import generate_random_string class MyHashTest(unittest.TestCase): def test_handcrafted(self): expected_len = 6 hashes, keys = create_new([42, 43, 12]) self.assertEqual(len(hashes), expected_len) self.assertEqual(len(keys), expected_len) insert(hashes, keys, 42) self.assertEqual(hashes[42 % expected_len], 42) self.assertEqual(keys[42 % expected_len], 42) self.assertEqual(hashes[43 % expected_len], 43) self.assertEqual(keys[43 % expected_len], 43) self.assertEqual(hashes[42 % expected_len], 42) self.assertEqual(keys[42 % expected_len], 42) self.assertEqual(hashes[12 % expected_len], 42) self.assertEqual(keys[12 % expected_len], 42) self.assertEqual(hashes[12 % expected_len + 1], 43) self.assertEqual(keys[12 % expected_len + 1], 43) self.assertEqual(hashes[12 % expected_len + 2], 12) self.assertEqual(keys[12 % expected_len + 2], 12) self.assertTrue(has_key(hashes, keys, 42)) self.assertTrue(has_key(hashes, keys, 43)) self.assertTrue(has_key(hashes, keys, 12)) self.assertFalse(has_key(hashes, keys, 45)) # table: [42, 43, 12, None, None, None] insert(hashes, keys, "") # hash("") == 0 self.assertEqual(hashes[3], 0) self.assertEqual(keys[3], "") self.assertTrue(has_key(hashes, keys, "")) self.assertTrue(has_key(hashes, keys, 42)) insert(hashes, keys, "aba") # hash("aba") % 6 == 5 self.assertEqual(hashes[5], hash("aba")) self.assertEqual(keys[5], "aba") self.assertTrue(has_key(hashes, keys, 12)) remove(hashes, keys, 12) self.assertFalse(has_key(hashes, keys, 12)) self.assertEqual(hashes[12 % expected_len], 42) self.assertEqual(keys[12 % expected_len], 42) self.assertEqual(keys[12 % expected_len + 2], DUMMY) with self.assertRaises(KeyError): remove(hashes, keys, 12) with self.assertRaises(KeyError): remove(hashes, keys, 45) self.assertFalse(has_key(hashes, keys, 12)) self.assertFalse(has_key(hashes, keys, 45)) self.assertTrue(has_key(hashes, keys, 42)) self.assertTrue(has_key(hashes, keys, 43)) self.assertTrue(has_key(hashes, keys, "")) self.assertTrue(has_key(hashes, keys, "aba")) insert(hashes, keys, "abg") self.assertTrue(has_key(hashes, keys, "abg")) self.assertEqual(hashes[4], hash("abg")) self.assertEqual(keys[4], "abg") hashes, keys = resize(hashes, keys) self.assertTrue(has_key(hashes, keys, 42)) self.assertTrue(has_key(hashes, keys, 43)) self.assertTrue(has_key(hashes, keys, "")) self.assertTrue(has_key(hashes, keys, "aba")) self.assertTrue(has_key(hashes, keys, "abg")) self.assertFalse(has_key(hashes, keys, 12)) self.assertFalse(has_key(hashes, keys, 45)) self.assertEqual(hashes[6], 42) self.assertEqual(keys[6], 42) self.assertEqual(hashes[7], 43) self.assertEqual(keys[7], 43) self.assertEqual(hashes[0], 0) self.assertEqual(keys[0], "") for h in hashes: self.assertTrue(h != 12) self.assertEqual(hashes[5], hash("aba")) self.assertEqual(keys[5], "aba") self.assertEqual(hashes[11], hash("abg")) self.assertEqual(keys[11], "abg") def test_all(self): n = 10 initial_keys = [generate_random_string() for _ in range(n)] more_keys = [generate_random_string() for _ in range(n // 3)] myhashes, mykeys = create_new(initial_keys) for key in more_keys: insert(myhashes, mykeys, key) insert(myhashes, mykeys, key) existing_keys = initial_keys + more_keys for key in existing_keys: self.assertTrue(has_key(myhashes, mykeys, key)) myhashes, mykeys = resize(myhashes, mykeys) for key in existing_keys: self.assertTrue(has_key(myhashes, mykeys, key)) missing_keys = [generate_random_string() for _ in range(3 * n)] for key in set(missing_keys) - set(existing_keys): self.assertFalse(has_key(myhashes, mykeys, key)) with self.assertRaises(KeyError): remove(myhashes, mykeys, key) for key in existing_keys: self.assertTrue(has_key(myhashes, mykeys, key)) remove(myhashes, mykeys, key) self.assertFalse(has_key(myhashes, mykeys, key)) for key in more_keys: self.assertFalse(has_key(myhashes, mykeys, key)) insert(myhashes, mykeys, key) self.assertTrue(has_key(myhashes, mykeys, key)) remove(myhashes, mykeys, key) self.assertFalse(has_key(myhashes, mykeys, key)) def main(): unittest.main() if __name__ == "__main__": main() ================================================ FILE: python_code/hash_chapter2_reimpl_js.py ================================================ from js_reimpl_common import run_op_chapter1_chapter2 def run_op(hash_codes, keys, op, **kwargs): return run_op_chapter1_chapter2("chapter2", hash_codes, keys, op, **kwargs) def create_new(from_keys): return run_op(None, None, "create_new", array=from_keys) def insert(hash_codes, keys, key): new_hash_codes, new_keys = run_op(hash_codes, keys, "insert", key=key) hash_codes[:] = new_hash_codes keys[:] = new_keys def remove(hash_codes, keys, key): new_hash_codes, new_keys = run_op(hash_codes, keys, "remove", key=key) hash_codes[:] = new_hash_codes keys[:] = new_keys def has_key(hash_codes, keys, key): return run_op(hash_codes, keys, "has_key", key=key) def resize(hash_codes, keys): return run_op(hash_codes, keys, "resize") ================================================ FILE: python_code/hash_chapter2_reimplementation_test.py ================================================ import random import argparse from common import DUMMY, EMPTY, AllKeyValueFactory, IntKeyValueFactory import hash_chapter2_reimpl_js import hash_chapter2_impl import build_autogenerated_chapter2 TEST_IMPLEMENTATIONS = { 'js_reimpl': hash_chapter2_reimpl_js, 'py_extracted': build_autogenerated_chapter2 } def verify_same(ref_hash_codes, ref_keys, hash_codes, keys): if (ref_hash_codes, ref_keys) != (hash_codes, keys): print("ORIG SIZES", len(ref_hash_codes), len(ref_keys)) print("NEW SIZES", len(hash_codes), len(keys)) if len(ref_hash_codes) == len(hash_codes) == len(ref_keys) == len(keys): size = len(hash_codes) print("NEW | ORIG") for i in range(size): if ref_hash_codes[i] is not EMPTY or hash_codes[i] is not EMPTY: print(i, " " * 3, ref_hash_codes[i], ref_keys[i], " " * 3, hash_codes[i], keys[i], " " * 3) assert ref_hash_codes == hash_codes and ref_keys == keys def run(ref_impl, test_impl, n_inserts, key_value_factory, initial_state, extra_checks, verbose): SINGLE_REMOVE_CHANCE = 0.3 ref_hash_codes, ref_keys = ref_impl.create_new(initial_state) test_hash_codes, test_keys = test_impl.create_new(initial_state) def vs(): verify_same(ref_hash_codes, ref_keys, test_hash_codes, test_keys) vs() if verbose: print("Starting test") for i in range(n_inserts): key_to_insert = key_value_factory.generate_key() existing_keys = set([k for k in ref_keys if k is not DUMMY and k is not EMPTY]) fill = sum(1 for k in ref_keys if k is not EMPTY) if existing_keys and random.random() < SINGLE_REMOVE_CHANCE: key_to_remove = random.choice(list(existing_keys)) assert ref_impl.has_key(ref_hash_codes, ref_keys, key_to_remove) assert test_impl.has_key(test_hash_codes, test_keys, key_to_remove) ref_impl.remove(ref_hash_codes, ref_keys, key_to_remove) test_impl.remove(test_hash_codes, test_keys, key_to_remove) existing_keys.remove(key_to_remove) assert not ref_impl.has_key(ref_hash_codes, ref_keys, key_to_remove) assert not test_impl.has_key(test_hash_codes, test_keys, key_to_remove) is_key_present = ref_impl.has_key(ref_hash_codes, ref_keys, key_to_insert) assert (key_to_insert in existing_keys) == is_key_present if not is_key_present: if verbose: print("Inserting {}".format(key_to_insert)) assert not test_impl.has_key(test_hash_codes, test_keys, key_to_insert) else: if verbose: print("Re-Inserting {}".format(key_to_insert)) ref_impl.insert(ref_hash_codes, ref_keys, key_to_insert) test_impl.insert(test_hash_codes, test_keys, key_to_insert) vs() assert test_impl.has_key(test_hash_codes, test_keys, key_to_insert) assert ref_impl.has_key(ref_hash_codes, ref_keys, key_to_insert) if fill / len(ref_keys) > 0.66: ref_hash_codes, ref_keys = ref_impl.resize(ref_hash_codes, ref_keys) test_hash_codes, test_keys = test_impl.resize(test_hash_codes, test_keys) vs() if extra_checks: for k in existing_keys: assert test_impl.has_key(test_hash_codes, test_keys, k) assert ref_impl.has_key(ref_hash_codes, ref_keys, k) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Stress-test chapter2 reimplementation') parser.add_argument('--test-implementation', choices=TEST_IMPLEMENTATIONS.keys(), required=True) parser.add_argument('--num-inserts', type=int, default=500) parser.add_argument('--forever', action='store_true') parser.add_argument('--kv', choices=["numbers", "all"], required=True) parser.add_argument('--initial-size', type=int, default=-1) parser.add_argument('--extra-getitem-checks', action='store_true', default=False) parser.add_argument('--verbose', action='store_true', default=False) args = parser.parse_args() if args.kv == "numbers": kv_factory = IntKeyValueFactory(args.num_inserts) elif args.kv == "all": kv_factory = AllKeyValueFactory(args.num_inserts) def test_iteration(): initial_size = args.initial_size if args.initial_size >= 0 else random.randint(0, 100) initial_state = [kv_factory.generate_key() for _ in range(initial_size)] run(hash_chapter2_impl, TEST_IMPLEMENTATIONS[args.test_implementation], n_inserts=args.num_inserts, key_value_factory=kv_factory, initial_state=initial_state, extra_checks=args.extra_getitem_checks, verbose=args.verbose) if args.forever: while True: test_iteration() else: test_iteration() ================================================ FILE: python_code/hash_chapter3_class_impl.py ================================================ from common import DUMMY, EMPTY from dict_reimpl_common import BaseDictImpl, Slot class AlmostPythonDictBase(BaseDictImpl): START_SIZE = 8 def __init__(self, pairs=None): BaseDictImpl.__init__(self) self._keys_set = set() if pairs: for k, v in pairs: self[k] = v def lookdict(self, key): hash_code = hash(key) idx = hash_code % len(self.slots) while self.slots[idx].key is not EMPTY: if self.slots[idx].hash_code == hash_code and self.slots[idx].key == key: return idx idx = (idx + 1) % len(self.slots) raise KeyError() def __getitem__(self, key): idx = self.lookdict(key) return self.slots[idx].value def __delitem__(self, key): idx = self.lookdict(key) self.used -= 1 self.slots[idx].key = DUMMY self.slots[idx].value = EMPTY self._keys_set.remove(key) def resize(self): old_slots = self.slots new_size = self.find_nearest_size(2 * self.used) self.slots = [Slot() for _ in range(new_size)] for slot in old_slots: if slot.key is not EMPTY and slot.key is not DUMMY: idx = slot.hash_code % len(self.slots) while self.slots[idx].key is not EMPTY: idx = (idx + 1) % len(self.slots) self.slots[idx] = Slot(slot.hash_code, slot.key, slot.value) self.fill = self.used def keys(self): return self._keys_set def __len__(self): return len(self.keys()) class AlmostPythonDictImplementationRecycling(AlmostPythonDictBase): def __setitem__(self, key, value): hash_code = hash(key) idx = hash_code % len(self.slots) target_idx = None while self.slots[idx].key is not EMPTY: if self.slots[idx].hash_code == hash_code and self.slots[idx].key == key: target_idx = idx break if target_idx is None and self.slots[idx].key is DUMMY: target_idx = idx idx = (idx + 1) % len(self.slots) if target_idx is None: target_idx = idx if self.slots[target_idx].key is EMPTY: self.used += 1 self.fill += 1 elif self.slots[target_idx].key is DUMMY: self.used += 1 self.slots[target_idx] = Slot(hash_code, key, value) if self.fill * 3 >= len(self.slots) * 2: self.resize() self._keys_set.add(key) class AlmostPythonDictImplementationNoRecycling(AlmostPythonDictBase): def __setitem__(self, key, value): hash_code = hash(key) idx = hash_code % len(self.slots) target_idx = None while self.slots[idx].key is not EMPTY: if self.slots[idx].hash_code == hash_code and\ self.slots[idx].key == key: target_idx = idx break idx = (idx + 1) % len(self.slots) if target_idx is None: target_idx = idx if self.slots[target_idx].key is EMPTY: self.used += 1 self.fill += 1 self.slots[target_idx] = Slot(hash_code, key, value) if self.fill * 3 >= len(self.slots) * 2: self.resize() self._keys_set.add(key) class AlmostPythonDictImplementationNoRecyclingSimplerVersion(AlmostPythonDictBase): def __setitem__(self, key, value): hash_code = hash(key) idx = hash_code % len(self.slots) while self.slots[idx].key is not EMPTY: if self.slots[idx].hash_code == hash_code and\ self.slots[idx].key == key: break idx = (idx + 1) % len(self.slots) if self.slots[idx].key is EMPTY: self.used += 1 self.fill += 1 self.slots[idx] = Slot(hash_code, key, value) if self.fill * 3 >= len(self.slots) * 2: self.resize() self._keys_set.add(key) ================================================ FILE: python_code/hash_chapter3_class_impl_test.py ================================================ import unittest from common import DUMMY, EMPTY from hash_chapter3_class_impl import AlmostPythonDictImplementationRecycling, AlmostPythonDictImplementationNoRecycling class HashDictImplementationTest(unittest.TestCase): def test_handcrafted(self): d = AlmostPythonDictImplementationRecycling() self.assertEqual(len(d.slots), 8) def assert_contains(i, h, k, v): self.assertEqual(d.slots[i].hash_code, h) self.assertEqual(d.slots[i].key, k) self.assertEqual(d.slots[i].value, v) d[""] = 1 d[17] = 2 d[18] = 3 self.assertEqual(d[""], 1) self.assertEqual(d[17], 2) self.assertEqual(d[18], 3) assert_contains(0, 0, "", 1) assert_contains(1, 17, 17, 2) assert_contains(2, 18, 18, 3) self.assertEqual(d.fill, 3) self.assertEqual(d.used, 3) with self.assertRaises(KeyError): del d[1] del d[17] assert_contains(1, 17, DUMMY, EMPTY) self.assertEqual(d.fill, 3) self.assertEqual(d.used, 2) # hash("abcd") % 8 == 0 # py 3.2 hash() d["abcd"] = 4 self.assertEqual(d["abcd"], 4) assert_contains(1, -2835746963027601024, "abcd", 4) self.assertEqual(d.fill, 3) self.assertEqual(d.used, 3) d["abcd"] = 5 self.assertEqual(d["abcd"], 5) assert_contains(1, -2835746963027601024, "abcd", 5) self.assertEqual(d.fill, 3) self.assertEqual(d.used, 3) del d["abcd"] with self.assertRaises(KeyError): d["abcd"] d[15] = 6 d[14] = 7 assert_contains(7, 15, 15, 6) assert_contains(6, 14, 14, 7) self.assertEqual(len(d.slots), 8) self.assertEqual(d.fill, 5) self.assertEqual(d.used, 4) d[13] = 8 self.assertEqual(len(d.slots), 16) self.assertEqual(d.fill, 5) self.assertEqual(d.used, 5) assert_contains(0, 0, "", 1) assert_contains(2, 18, 18, 3) assert_contains(13, 13, 13, 8) assert_contains(14, 14, 14, 7) assert_contains(15, 15, 15, 6) def test_handcrafted_simple_setitem(self): d = AlmostPythonDictImplementationNoRecycling() self.assertEqual(len(d.slots), 8) def assert_contains(i, h, k, v): self.assertEqual(d.slots[i].hash_code, h) self.assertEqual(d.slots[i].key, k) self.assertEqual(d.slots[i].value, v) d[""] = 1 d[17] = 2 d[18] = 3 self.assertEqual(d[""], 1) self.assertEqual(d[17], 2) self.assertEqual(d[18], 3) assert_contains(0, 0, "", 1) assert_contains(1, 17, 17, 2) assert_contains(2, 18, 18, 3) self.assertEqual(d.fill, 3) self.assertEqual(d.used, 3) with self.assertRaises(KeyError): del d[1] del d[17] assert_contains(1, 17, DUMMY, EMPTY) self.assertEqual(d.fill, 3) self.assertEqual(d.used, 2) # hash("abcd") % 8 == 0 # py 3.2 hash() d["abcd"] = 4 self.assertEqual(d["abcd"], 4) assert_contains(3, -2835746963027601024, "abcd", 4) self.assertEqual(d.fill, 4) self.assertEqual(d.used, 3) d["abcd"] = 5 self.assertEqual(d["abcd"], 5) assert_contains(3, -2835746963027601024, "abcd", 5) self.assertEqual(d.fill, 4) self.assertEqual(d.used, 3) del d["abcd"] with self.assertRaises(KeyError): d["abcd"] self.assertEqual(len(d.slots), 8) self.assertEqual(d.fill, 4) self.assertEqual(d.used, 2) d[15] = 6 self.assertEqual(len(d.slots), 8) self.assertEqual(d.fill, 5) self.assertEqual(d.used, 3) assert_contains(7, 15, 15, 6) d[13] = 8 self.assertEqual(len(d.slots), 16) self.assertEqual(d.fill, 4) self.assertEqual(d.used, 4) assert_contains(0, 0, "", 1) assert_contains(2, 18, 18, 3) assert_contains(13, 13, 13, 8) assert_contains(15, 15, 15, 6) def main(): unittest.main() if __name__ == "__main__": main() ================================================ FILE: python_code/interface_test.py ================================================ import unittest from dict32_reimplementation import PyDictReimplementation from hash_chapter3_class_impl import AlmostPythonDictImplementationRecycling, AlmostPythonDictImplementationNoRecycling from js_reimplementation_interface import Dict32JsImpl, AlmostPythonDictRecyclingJsImpl, AlmostPythonDictNoRecyclingJsImpl class Interface(unittest.TestCase): def test_all(self): self.do_simple_test_single_class(PyDictReimplementation) self.do_simple_test_single_class(AlmostPythonDictImplementationRecycling) self.do_simple_test_single_class(AlmostPythonDictImplementationNoRecycling) self.do_simple_test_single_class(Dict32JsImpl) self.do_simple_test_single_class(AlmostPythonDictRecyclingJsImpl) self.do_simple_test_single_class(AlmostPythonDictNoRecyclingJsImpl) def do_simple_test_single_class(self, klass): d = klass() for i in range(100): d[i] = i self.assertEqual(d[i], i) for i in range(50): del d[i] with self.assertRaises(KeyError): d[i] for i in range(200): d[i] = i + 1 self.assertEqual(d[i], i + 1) def main(): unittest.main() if __name__ == "__main__": main() ================================================ FILE: python_code/js_reimpl_common.py ================================================ import socket import json from common import DUMMY, EMPTY none_info = { "type": "None", "hash": str(hash(None)) } def dump_simple_py_obj(obj): if obj is DUMMY: return { "type": "DUMMY" } elif obj is EMPTY: return None elif obj is None: return none_info elif isinstance(obj, int): return { 'type': 'int', 'value': str(obj) } return obj def dump_pairs(pairs): res = [] for k, v in pairs: res.append([dump_simple_py_obj(k), dump_simple_py_obj(v)]) return res def dump_array(array): return list(map(dump_simple_py_obj, array)) def parse_array(array): return list(map(parse_simple_py_obj, array)) def parse_simple_py_obj(obj): if isinstance(obj, dict): assert obj["type"] in ["DUMMY", "None", "int"] if obj["type"] == "DUMMY": return DUMMY if obj["type"] == "None": return None return int(obj["value"]) elif obj is None: return EMPTY return obj sock = None sockfile = None def _init_sock_stuff(): global sock global sockfile # TODO: unhardcode? SOCK_FILENAME = 'pynode.sock' if sock is None: sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) sock.connect(SOCK_FILENAME) sockfile = sock.makefile('r') return sock, sockfile def run_op_chapter1_chapter2(chapter, hash_codes, keys, op, **kwargs): _init_sock_stuff() for name in kwargs: if name != 'array': kwargs[name] = dump_simple_py_obj(kwargs[name]) else: kwargs[name] = dump_array(kwargs[name]) data = { "dict": chapter, "op": op, "args": kwargs, "hashCodes": dump_array(hash_codes) if hash_codes is not None else None, "keys": dump_array(keys) if keys is not None else None, } sock.send(bytes(json.dumps(data) + "\n", 'UTF-8')) response = json.loads(sockfile.readline()) if "exception" in response and response["exception"]: raise KeyError() if 'result' in response and response['result'] is not None: # TODO: this is pretty hacky return response["result"] elif "hashCodes" in response: return parse_array(response["hashCodes"]), parse_array(response["keys"]) else: return parse_array(response["keys"]) ================================================ FILE: python_code/js_reimplementation_interface.py ================================================ import socket import json from common import DUMMY, EMPTY from js_reimpl_common import dump_simple_py_obj, parse_simple_py_obj, dump_pairs from dict_reimpl_common import Slot class JsImplBase(object): # TODO: unhardcode? SOCK_FILENAME = 'pynode.sock' def __init__(self, pairs=None): pairs = pairs or [] self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) self.sock.connect(self.SOCK_FILENAME) self.sockfile = self.sock.makefile('r') self.slots = None self.fill = None self.used = None self.run_op("__init__", pairs=pairs) def __del__(self): self.sock.close() def dump_slots(self): def dump_slot(slot): key = dump_simple_py_obj(slot.key) value = dump_simple_py_obj(slot.value) hash_code = slot.hash_code if hash_code is EMPTY: hash_code = None return { "hashCode": str(hash_code) if hash_code is not None else None, "key": key, "value": value, } if self.slots is None: return None return list(map(dump_slot, self.slots)) def restore_slots(self, slots): def restore_slot(slot): key = parse_simple_py_obj(slot["key"]) value = parse_simple_py_obj(slot["value"]) assert value is not DUMMY hash_code = int(slot["hashCode"]) if slot["hashCode"] is not None else None if hash_code is None: hash_code = EMPTY return Slot(hash_code, key, value) self.slots = list(map(restore_slot, slots)) def run_op(self, op, **kwargs): for name in kwargs: if name != 'pairs': kwargs[name] = dump_simple_py_obj(kwargs[name]) else: kwargs[name] = dump_pairs(kwargs[name]) data = { "dict": self.dict_type, "op": op, "args": kwargs, "self": { "slots": self.dump_slots(), "used": self.used, "fill": self.fill } } # pprint(("<< sending", data, op, kwargs)) self.sock.send(bytes(json.dumps(data) + "\n", 'UTF-8')) response = json.loads(self.sockfile.readline()) # pprint((">> receiving", response)) self.restore_slots(response["self"]["slots"]) self.fill = response["self"]["fill"] self.used = response["self"]["used"] if response["exception"]: raise KeyError("whatever") return parse_simple_py_obj(response["result"]) class Dict32JsImpl(JsImplBase): dict_type = "dict32" def __setitem__(self, key, value): return self.run_op("__setitem__", key=key, value=value) def __delitem__(self, key): return self.run_op("__delitem__", key=key) def __getitem__(self, key): return self.run_op("__getitem__", key=key) class AlmostPythonDictBaseJsImpl(JsImplBase): dict_type = "almost_python_dict" def __delitem__(self, key): return self.run_op("__delitem__", key=key) def __getitem__(self, key): return self.run_op("__getitem__", key=key) class AlmostPythonDictRecyclingJsImpl(AlmostPythonDictBaseJsImpl): def __setitem__(self, key, value): return self.run_op("__setitem__recycling", key=key, value=value) class AlmostPythonDictNoRecyclingJsImpl(AlmostPythonDictBaseJsImpl): def __setitem__(self, key, value): return self.run_op("__setitem__no_recycling", key=key, value=value) ================================================ FILE: scripts/extractPythonCode.js ================================================ import 'ignore-styles'; import { DICT32_INIT, DICT32_SETITEM, DICT32_RESIZE_CODE, _DICT32_GETITEM_ONLY, _DICT32_DELITEM_ONLY, DICT32_LOOKDICT, STATICMETHOD_SIGNED_TO_UNSIGNED, PROBING_PYTHON_CODE, } from '../src/chapter4_real_python_dict'; import { HASH_CLASS_INIT_CODE, HASH_CLASS_SETITEM_RECYCLING_CODE, HASH_CLASS_SETITEM_SIMPLIFIED_CODE, _HASH_CLASS_GETITEM_ONLY, _HASH_CLASS_DELITEM_ONLY, HASH_CLASS_LOOKDICT, HASH_CLASS_RESIZE_CODE, FIND_NEAREST_SIZE_CODE_STRING, SLOT_CLASS_CODE_STRING, } from '../src/chapter3_hash_class'; import { HASH_CREATE_NEW_CODE, HASH_SEARCH_CODE, HASH_REMOVE_CODE, HASH_RESIZE_CODE, HASH_INSERT_CODE, } from '../src/chapter2_hash_table_functions'; import { SIMPLIFIED_INSERT_ALL_BROKEN_CODE, SIMPLIFIED_INSERT_ALL_CODE, SIMPLIFIED_SEARCH_CODE, SIMPLE_LIST_SEARCH, } from '../src/chapter1_simplified_hash'; import fs from 'fs'; import * as path from 'path'; function extractCodeLines(codeWithBpAndLevels) { return codeWithBpAndLevels.map(([line, bp, level]) => line); } function outputCode(filename, headers, importedCode, indent4 = true) { let allLines = []; for (let part of importedCode) { let lines; if (typeof part !== 'string') { lines = extractCodeLines(part); } else { lines = part.split('\n'); } allLines.push(...lines); if (lines[lines.length - 1] !== '') { allLines.push(''); } } const joinedLines = allLines.map(line => (line.length > 0 && indent4 ? ' ' + line : line)).join('\n'); fs.writeFileSync(filename, headers.join('\n') + '\n' + joinedLines); } const commonImports = `import sys import os sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'python_code')) from common import DUMMY, EMPTY `; const dict32def = ` class Dict32Extracted(object):`; const DIR = 'build'; outputCode( path.join(DIR, 'dict32js_extracted.py'), [commonImports, SLOT_CLASS_CODE_STRING, dict32def], [ DICT32_INIT, FIND_NEAREST_SIZE_CODE_STRING, STATICMETHOD_SIGNED_TO_UNSIGNED, DICT32_SETITEM, DICT32_RESIZE_CODE, _DICT32_GETITEM_ONLY, _DICT32_DELITEM_ONLY, DICT32_LOOKDICT, ] ); const hashClassRecyclingDef = ` class HashClassRecyclingExtracted(object):`; outputCode( path.join(DIR, 'hash_class_recycling_extracted.py'), [commonImports, SLOT_CLASS_CODE_STRING, hashClassRecyclingDef], [ HASH_CLASS_INIT_CODE, FIND_NEAREST_SIZE_CODE_STRING, HASH_CLASS_SETITEM_RECYCLING_CODE, HASH_CLASS_RESIZE_CODE, _HASH_CLASS_GETITEM_ONLY, _HASH_CLASS_DELITEM_ONLY, HASH_CLASS_LOOKDICT, ] ); const hashClassNoRecyclingDef = ` class HashClassNoRecyclingExtracted(object):`; outputCode( path.join(DIR, 'hash_class_no_recycling_extracted.py'), [commonImports, SLOT_CLASS_CODE_STRING, hashClassNoRecyclingDef], [ HASH_CLASS_INIT_CODE, FIND_NEAREST_SIZE_CODE_STRING, HASH_CLASS_SETITEM_SIMPLIFIED_CODE, HASH_CLASS_RESIZE_CODE, _HASH_CLASS_GETITEM_ONLY, _HASH_CLASS_DELITEM_ONLY, HASH_CLASS_LOOKDICT, ] ); outputCode( path.join(DIR, 'hash_chapter2_extracted.py'), [commonImports], [HASH_CREATE_NEW_CODE, HASH_SEARCH_CODE, HASH_REMOVE_CODE, HASH_RESIZE_CODE, HASH_INSERT_CODE], false ); outputCode( path.join(DIR, 'hash_chapter1_extracted.py'), [commonImports], [SIMPLIFIED_INSERT_ALL_CODE, SIMPLIFIED_INSERT_ALL_BROKEN_CODE, SIMPLIFIED_SEARCH_CODE, SIMPLE_LIST_SEARCH], false ); outputCode(path.join(DIR, 'chapter4_probing_python_code.py'), [commonImports], [PROBING_PYTHON_CODE], false); ================================================ FILE: scripts/pyReimplWrapper.js ================================================ const net = require('net'); const split = require('split'); import 'ignore-styles'; import {BigNumber} from 'bignumber.js'; import {DUMMY, EMPTY, None} from '../src/hash_impl_common'; import {Dict32} from '../src/chapter4_real_python_dict'; import {GenerateProbingLinks} from '../src/probing_visualization.js'; import {AlmostPythonDict} from '../src/chapter3_hash_class'; import {Ops as Chapter2Ops} from '../src/chapter2_hash_table_functions'; import {Ops as Chapter1Ops} from '../src/chapter1_simplified_hash'; import {Slot} from '../src/chapter3_and_4_common'; import {List as ImmutableList} from 'immutable'; function parseSimplePyObj(obj) { if (obj === null || typeof obj === 'string') { return obj; } else if (typeof obj === 'object' && obj.type === 'None') { let res = None; // TODO FIXME: this does not support multiple clients res._hashCode = obj.hash; return res; } else if (typeof obj === 'object' && obj.type === 'DUMMY') { return DUMMY; } else if (typeof obj === 'object' && obj.type === 'EMPTY') { return EMPTY; } else if (typeof obj === 'object' && obj.type === 'int') { return BigNumber(obj.value); } else { throw new Error(`Unknown obj ${JSON.stringify(obj)}`); } } function parseArray(array) { return array.map(parseSimplePyObj); } function dumpArray(array) { return array.map(dumpSimplePyObj); } function parsePairs(pairs) { return pairs.map(([k, v]) => [parseSimplePyObj(k), parseSimplePyObj(v)]); } function dumpSimplePyObj(obj) { if (obj === DUMMY) { return { type: 'DUMMY', }; } else if (obj === None) { return { type: 'None', }; } else if (BigNumber.isBigNumber(obj)) { return { type: 'int', value: obj.toString(), }; } else { return obj; } } function restorePyDictState(state) { let {pySelf} = Dict32.__init__(); if (state.slots != null) { pySelf = pySelf.set( 'slots', new ImmutableList( state.slots.map(slot => { let key = parseSimplePyObj(slot.key); let value = parseSimplePyObj(slot.value); return Slot({ pyHashCode: slot.hashCode ? new BigNumber(slot.hashCode) : null, key: key, value: value, }); }) ) ); } else { pySelf = pySelf.set('slots', null); } pySelf = pySelf.set('used', state.used); pySelf = pySelf.set('fill', state.fill); return pySelf; } function dumpPyDictState(pySelf) { let data = {}; data.slots = pySelf.get('slots').map(slot => { return { hashCode: slot.pyHashCode != null ? slot.pyHashCode.toString() : null, key: dumpSimplePyObj(slot.key), value: dumpSimplePyObj(slot.value), }; }); data.used = pySelf.get('used'); data.fill = pySelf.get('fill'); return data; } function dict32RunOp(pySelf, op, key, value, pairs) { switch (op) { case '__init__': pySelf = Dict32.__init__(pairs).pySelf; return {pySelf}; case '__getitem__': { const {result, isException} = Dict32.__getitem__(pySelf, key); return {pySelf, result, isException}; } case '__setitem__': { ({pySelf} = Dict32.__setitem__(pySelf, key, value)); return {pySelf}; } case '__delitem__': { let isException; ({pySelf, isException} = Dict32.__delitem__(pySelf, key)); return {pySelf, isException}; } default: throw new Error('Unknown op: ' + op); } } function almostPyDictRunOp(pySelf, op, key, value, pairs) { switch (op) { case '__init__': pySelf = AlmostPythonDict.__init__(pairs).pySelf; return {pySelf}; case '__getitem__': { const {result, isException} = AlmostPythonDict.__getitem__(pySelf, key); return {pySelf, result, isException}; } case '__setitem__recycling': { ({pySelf} = AlmostPythonDict.__setitem__recycling(pySelf, key, value)); return {pySelf}; } case '__setitem__no_recycling': { ({pySelf} = AlmostPythonDict.__setitem__no_recycling(pySelf, key, value)); return {pySelf}; } case '__delitem__': { let isException; ({pySelf, isException} = AlmostPythonDict.__delitem__(pySelf, key)); return {pySelf, isException}; } default: throw new Error('Unknown op: ' + op); } } function chapter1run(keys, op, key, numbers) { switch (op) { case 'create_new': ({keys} = Chapter1Ops.createNew(numbers)); return {keys}; case 'create_new_broken': ({keys} = Chapter1Ops.createNewBroken(numbers)); return {keys}; case 'has_key': { let result; ({keys, result} = Chapter1Ops.hasKey(keys, key)); return {keys, result}; } case 'linear_search': { let {result} = Chapter1Ops.linearSearch(numbers, key); return {result}; } default: throw new Error('Unknown op: ' + op); } } function chapter2run(hashCodes, keys, op, key, array) { switch (op) { case 'create_new': ({hashCodes, keys} = Chapter2Ops.createNew(array)); return {hashCodes, keys}; case 'insert': ({hashCodes, keys} = Chapter2Ops.insert(hashCodes, keys, key)); return {hashCodes, keys}; case 'remove': { let isException; ({hashCodes, keys, isException} = Chapter2Ops.remove(hashCodes, keys, key)); return {hashCodes, keys, isException}; } case 'has_key': { let result; ({hashCodes, keys, result} = Chapter2Ops.hasKey(hashCodes, keys, key)); return {hashCodes, keys, result}; } case 'resize': ({hashCodes, keys} = Chapter2Ops.resize(hashCodes, keys)); return {hashCodes, keys}; default: throw new Error('Unknown op: ' + op); } } const server = net.createServer(c => { console.log('Client connected'); c.on('end', () => { console.log('Client disconnected'); }); c.pipe(split()).on('data', line => { console.log('Received line of length ' + line.length); if (!line) return; const data = JSON.parse(line); const dictType = data.dict; const op = data.op; let {key, value, pairs, array} = data.args; if (key !== undefined) { key = parseSimplePyObj(key); } if (value !== undefined) { value = parseSimplePyObj(value); } if (pairs !== undefined) { pairs = parsePairs(pairs); } if (array !== undefined) { array = parseArray(array); } console.log(op, data.args); let isException, result; let response; if (dictType === 'dict32' || dictType === 'almost_python_dict') { let pySelf = restorePyDictState(data.self); if (dictType === 'dict32') { ({pySelf, isException, result} = dict32RunOp(pySelf, op, key, value, pairs)); } else if (dictType === 'almost_python_dict') { ({pySelf, isException, result} = almostPyDictRunOp(pySelf, op, key, value, pairs)); } else { throw new Error('Unknown dict type'); } response = { exception: isException || false, result: result !== undefined ? dumpSimplePyObj(result) : null, self: dumpPyDictState(pySelf), }; } else if (dictType === 'chapter2') { let hashCodes = data.hashCodes != null ? new ImmutableList(parseArray(data.hashCodes)) : undefined; let keys = data.keys != null ? new ImmutableList(parseArray(data.keys)) : undefined; ({hashCodes, keys, isException, result} = chapter2run(hashCodes, keys, op, key, array)); response = { exception: isException || false, result: result !== undefined ? result : null, hashCodes: dumpArray(hashCodes), keys: dumpArray(keys), }; } else if (dictType === 'chapter1') { let keys = data.keys != null ? new ImmutableList(parseArray(data.keys)) : undefined; ({keys, result} = chapter1run(keys, op, key, array)); response = { result: result !== undefined ? result : null, keys: keys !== undefined ? dumpArray(keys) : null, }; } else if (dictType === 'pythonProbing') { let g = new GenerateProbingLinks(); const result = g.run(data.args.slotsCount, key, 'python'); response = { result, }; } else { throw new Error('Unknown dict type'); } c.write(JSON.stringify(response) + '\n'); }); }); server.on('error', err => { throw err; }); server.on('listening', () => { console.log(`Listening`); }); server.listen('pynode.sock', () => { console.log('Starting listening...'); }); ================================================ FILE: scripts/ssr.js ================================================ require('dotenv').config(); process.env.NODE_ENV = 'ssr'; console.log = () => {}; // Do not log to stdout global.performance = {now: () => 0}; import 'ignore-styles'; import * as React from 'react'; import ReactDOMServer from 'react-dom/server'; import {CHAPTER_ID_TO_COMPONENT} from '../src/index'; import {App} from '../src/app'; import fs from 'fs'; const filename = process.argv[2]; const chapterIds = JSON.parse(process.argv[3]); const chapters = chapterIds.map(id => CHAPTER_ID_TO_COMPONENT[id]); let selectedChapterId; if (chapterIds.length === 1) { selectedChapterId = chapterIds[0]; } fs.readFile(filename, 'utf8', function(err, file) { if (err) { throw new Error(`Cannot read source html: ${err}`); } const renderedComponent = ReactDOMServer.renderToString( ); let fullHtml = file.replace(/
<\/div>/, `
${renderedComponent}
`); const gaId = process.env.GA_ID; console.warn('Google analytics ID is', gaId); if (gaId) { let GA_SCRIPT = ` `; GA_SCRIPT = GA_SCRIPT.replace(/__GA_CODE_HERE__/g, gaId); fullHtml = fullHtml.replace('', `${GA_SCRIPT}`); } process.stdout.write(fullHtml); }); ================================================ FILE: src/app.js ================================================ import _ from 'lodash'; import Bootstrap from 'bootstrap/dist/css/bootstrap.min.css'; import stylesCss from './styles.css'; import * as React from 'react'; import ReactDOM from 'react-dom'; import {MyErrorBoundary, initUxSettings, getUxSettings, BootstrapAlert, doubleRAF} from './util'; import {win, globalSettings} from './store'; import {faDesktop} from '@fortawesome/free-solid-svg-icons/faDesktop'; import {faSpinner} from '@fortawesome/free-solid-svg-icons/faSpinner'; import {faSyncAlt} from '@fortawesome/free-solid-svg-icons/faSyncAlt'; import {faEnvelope} from '@fortawesome/free-solid-svg-icons/faEnvelope'; import {faChevronRight} from '@fortawesome/free-solid-svg-icons/faChevronRight'; import {faFirefox} from '@fortawesome/free-brands-svg-icons/faFirefox'; import {faGithub} from '@fortawesome/free-brands-svg-icons/faGithub'; import {faTwitter} from '@fortawesome/free-brands-svg-icons/faTwitter'; import {faMailchimp} from '@fortawesome/free-brands-svg-icons/faMailchimp'; import {library, config as fontAwesomeConfig} from '@fortawesome/fontawesome-svg-core'; fontAwesomeConfig.autoAddCss = false; library.add(faDesktop); library.add(faFirefox); library.add(faSpinner); library.add(faSyncAlt); library.add(faEnvelope); library.add(faChevronRight); library.add(faGithub); library.add(faMailchimp); library.add(faTwitter); import {FontAwesomeIcon} from '@fortawesome/react-fontawesome'; import '@fortawesome/fontawesome-svg-core/styles.css'; function getWindowDimensions() { const width = document.documentElement.clientWidth; const height = document.documentElement.clientHeight; return {width, height}; } function logViewportStats() { console.log(`DIMENSIONS: window inner: ${window.innerWidth}x${window.innerHeight}`); console.log( `DIMENSIONS: document.documentElement: ${document.documentElement.clientWidth}x${ document.documentElement.clientHeight }` ); const vv = window.visualViewport; console.log(`DIMENSIONS: visualViewport: ${vv != null ? vv.width + 'x' + vv.height : vv}`); const {width, height} = getWindowDimensions(); console.log(`DIMENSIONS: used: ${width}x${height}`); // TODO FIXME: this is for debugging only /*const url = `/viewports?wi=${window.innerWidth}x${window.innerHeight}&de=${document.documentElement.clientWidth}x${document.documentElement.clientHeight}&vv=${vv.width}x${vv.height}`; const Http = new XMLHttpRequest(); Http.open("GET", url); Http.send();*/ } const GITHUB_REPO_URL = 'https://github.com/eleweek/inside_python_dict'; const MAILCHIMP_URL = 'http://eepurl.com/gbzhvn'; const TWITTER_LINK = 'https://twitter.com/SashaPutilin'; const EMAIL = 'avp-13@yandex.ru'; function GithubRibbon() { return ( Fork me on GitHub ); } function GithubCorner() { // FROM: http://tholman.com/github-corners/ return (
`, }} /> ); } function GithubForkMe({windowWidth}) { /*if (windowWidth != null && windowWidth > 1150) { return ; } else {*/ return ; /*}*/ } const CONTENTS_DATA = [ [1, 'chapter1.html', 'Searching efficiently in a list'], [2, 'chapter2.html', 'Why are hash tables called hash tables?'], [3, 'chapter3.html', 'Putting it all together to make an "almost"-python-dict'], [4, 'chapter4.html', 'How python dict *really* works internally'], ]; function chapterIdDotHtml(chapterId) { if (chapterId && !chapterId.endsWith('.html')) { return chapterId + '.html'; } else { return null; } } function NextPrev({selectedChapterId}) { const selectedChapter = chapterIdDotHtml(selectedChapterId); if (selectedChapter == null) { return null; } let prevHref, prevTitle; let nextHref, nextTitle; for (let i = 0; i < CONTENTS_DATA.length; ++i) { if (CONTENTS_DATA[i][1] === selectedChapter) { if (i > 0) { prevHref = CONTENTS_DATA[i - 1][1]; prevTitle = CONTENTS_DATA[i - 1][2]; } if (i < CONTENTS_DATA.length - 1) { nextHref = CONTENTS_DATA[i + 1][1]; nextTitle = CONTENTS_DATA[i + 1][2]; } break; } } if (nextHref) { return ( ); } else { return null; } } class Contents extends React.PureComponent { static EXTRA_ERROR_BOUNDARY = true; render() { const {selectedChapterId} = this.props; const selectedChapter = chapterIdDotHtml(selectedChapterId); const CIRCLE_SIZE = 30; return (
{CONTENTS_DATA.map(([i, href, title]) => { const contentRow = (
{i}
{title}
); return (
{selectedChapter === href ? ( contentRow ) : ( {contentRow} )}
); })}
); } } class LoadingAlert extends React.PureComponent { constructor() { super(); this.state = { loaded: false, }; } render() { return ( JavaScript code is loading... ); } componentDidMount() { this.setState({loaded: true}); } } class Alerts extends React.Component { constructor() { super(); this.state = { mounted: false, }; } render() { const alerts = []; const isRunningInBrowser = typeof window !== 'undefined'; alerts.push(); if (this.state.mounted) { const {browser, windowWidth, windowHeight} = this.props; if (browser) { if (browser.platform.type === 'mobile') { alerts.push( Mobile device detected. For the best experience use a desktop browser ); if (windowWidth < windowHeight) { alerts.push( Rotating your device is recommended{' '} - animations are better with a wider viewport ); } } else if (browser.browser.name === 'Firefox' && browser.os.name !== 'Linux') { alerts.push( Firefox detected. Heavy animations may lag sometimes. If this happens, Chrome or Safari is recommended. ); } } } return {alerts}; } componentDidMount() { this.setState({mounted: true}); } } function Footer() { return ( ); } // mainly to prevent addressbar stuff on mobile changing things excessively const SIGNIFICANT_HEIGHT_CHANGE = 100; export class App extends React.Component { constructor() { super(); this.state = { mounted: false, windowWidth: null, windowHeight: null, }; } windowSizeChangeHandle = () => { logViewportStats(); const dimensions = getWindowDimensions(); const windowWidth = dimensions.width; const windowHeight = dimensions.height; if (this.state.windowWidth !== windowWidth || this.state.windowHeight !== windowHeight) { console.log('Processing window size change', windowWidth, windowHeight); if ( this.state.windowWidth != windowWidth || this.state.windowHeight > windowHeight || windowHeight - this.state.windowHeight > SIGNIFICANT_HEIGHT_CHANGE ) { console.log('App size changed from', this.state); this.setState({ windowWidth, windowHeight, }); if (win.width !== windowWidth || win.height !== windowHeight) { win.setWH(windowWidth, windowHeight); } } fixStickyResize(windowWidth, windowHeight); } }; componentDidMount() { const MEANINGFUL_Y_DIFF = 50; // components that depend on scroll should allow some leeway let lastScrollY = null; const onScroll = _.throttle(() => { if (!lastScrollY || Math.abs(lastScrollY - window.scrollY) > MEANINGFUL_Y_DIFF) { console.log('onScroll triggered', window.scrollY); win.setScrollY(window.scrollY); lastScrollY = window.scrollY; } }, 100); window.addEventListener('scroll', onScroll); const dimensions = getWindowDimensions(); const windowWidth = dimensions.width; const windowHeight = dimensions.height; console.log('componentDidMount() window geometry', windowWidth, windowHeight); window.addEventListener('resize', _.throttle(this.windowSizeChangeHandle, 500)); globalSettings.maxCodePlaySpeed = getUxSettings().MAX_CODE_PLAY_SPEED; this.setState({ windowWidth, windowHeight, mounted: true, }); win.setAll(windowWidth, windowHeight, window.scrollY, true); } componentWillUnmount() { window.removeEventListener('resize', this.windowSizeChangeHandle); } render() { console.log('App.render()'); const contents = ; const independentContents = this.props.selectedChapterId === 'chapter1'; // Make sure SSR works const {windowWidth, windowHeight} = this.state.mounted ? this.state : {}; let chapters = []; for (let [i, Chapter] of this.props.chapters.entries()) { chapters.push( ); } return (

Inside python dict — an explorable explanation

{!independentContents && {contents}} {chapters}