[
  {
    "path": ".gitignore",
    "content": "# Compiled Object files, Static and Dynamic libs (Shared Objects)\n*.o\n*.a\n*.so\n\n*.gor\n\n# Folders\n_obj\n_test\n\n# Architecture specific extensions/prefixes\n*.[568vq]\n[568vq].out\n\n*.cgo1.go\n*.cgo2.c\n_cgo_defun.c\n_cgo_gotypes.go\n_cgo_export.*\n\n_testmain.go\n\n*.exe\n*.bin\n*.test\n\n\n*.sublime-workspace\n*.sw*\n*.un*\n"
  },
  {
    "path": "LICENSE",
    "content": "Attribution-NonCommercial-ShareAlike 4.0 International\n\n=======================================================================\n\nCreative Commons Corporation (\"Creative Commons\") is not a law firm and\ndoes not provide legal services or legal advice. Distribution of\nCreative Commons public licenses does not create a lawyer-client or\nother relationship. Creative Commons makes its licenses and related\ninformation available on an \"as-is\" basis. Creative Commons gives no\nwarranties regarding its licenses, any material licensed under their\nterms and conditions, or any related information. Creative Commons\ndisclaims all liability for damages resulting from their use to the\nfullest extent possible.\n\nUsing Creative Commons Public Licenses\n\nCreative Commons public licenses provide a standard set of terms and\nconditions that creators and other rights holders may use to share\noriginal works of authorship and other material subject to copyright\nand certain other rights specified in the public license below. The\nfollowing considerations are for informational purposes only, are not\nexhaustive, and do not form part of our licenses.\n\n     Considerations for licensors: Our public licenses are\n     intended for use by those authorized to give the public\n     permission to use material in ways otherwise restricted by\n     copyright and certain other rights. Our licenses are\n     irrevocable. Licensors should read and understand the terms\n     and conditions of the license they choose before applying it.\n     Licensors should also secure all rights necessary before\n     applying our licenses so that the public can reuse the\n     material as expected. Licensors should clearly mark any\n     material not subject to the license. This includes other CC-\n     licensed material, or material used under an exception or\n     limitation to copyright. More considerations for licensors:\n\twiki.creativecommons.org/Considerations_for_licensors\n\n     Considerations for the public: By using one of our public\n     licenses, a licensor grants the public permission to use the\n     licensed material under specified terms and conditions. If\n     the licensor's permission is not necessary for any reason--for\n     example, because of any applicable exception or limitation to\n     copyright--then that use is not regulated by the license. Our\n     licenses grant only permissions under copyright and certain\n     other rights that a licensor has authority to grant. Use of\n     the licensed material may still be restricted for other\n     reasons, including because others have copyright or other\n     rights in the material. A licensor may make special requests,\n     such as asking that all changes be marked or described.\n     Although not required by our licenses, you are encouraged to\n     respect those requests where reasonable. More_considerations\n     for the public: \n\twiki.creativecommons.org/Considerations_for_licensees\n\n=======================================================================\n\nCreative Commons Attribution-NonCommercial-ShareAlike 4.0 International\nPublic License\n\nBy exercising the Licensed Rights (defined below), You accept and agree\nto be bound by the terms and conditions of this Creative Commons\nAttribution-NonCommercial-ShareAlike 4.0 International Public License\n(\"Public License\"). To the extent this Public License may be\ninterpreted as a contract, You are granted the Licensed Rights in\nconsideration of Your acceptance of these terms and conditions, and the\nLicensor grants You such rights in consideration of benefits the\nLicensor receives from making the Licensed Material available under\nthese terms and conditions.\n\n\nSection 1 -- Definitions.\n\n  a. Adapted Material means material subject to Copyright and Similar\n     Rights that is derived from or based upon the Licensed Material\n     and in which the Licensed Material is translated, altered,\n     arranged, transformed, or otherwise modified in a manner requiring\n     permission under the Copyright and Similar Rights held by the\n     Licensor. For purposes of this Public License, where the Licensed\n     Material is a musical work, performance, or sound recording,\n     Adapted Material is always produced where the Licensed Material is\n     synched in timed relation with a moving image.\n\n  b. Adapter's License means the license You apply to Your Copyright\n     and Similar Rights in Your contributions to Adapted Material in\n     accordance with the terms and conditions of this Public License.\n\n  c. BY-NC-SA Compatible License means a license listed at\n     creativecommons.org/compatiblelicenses, approved by Creative\n     Commons as essentially the equivalent of this Public License.\n\n  d. Copyright and Similar Rights means copyright and/or similar rights\n     closely related to copyright including, without limitation,\n     performance, broadcast, sound recording, and Sui Generis Database\n     Rights, without regard to how the rights are labeled or\n     categorized. For purposes of this Public License, the rights\n     specified in Section 2(b)(1)-(2) are not Copyright and Similar\n     Rights.\n\n  e. Effective Technological Measures means those measures that, in the\n     absence of proper authority, may not be circumvented under laws\n     fulfilling obligations under Article 11 of the WIPO Copyright\n     Treaty adopted on December 20, 1996, and/or similar international\n     agreements.\n\n  f. Exceptions and Limitations means fair use, fair dealing, and/or\n     any other exception or limitation to Copyright and Similar Rights\n     that applies to Your use of the Licensed Material.\n\n  g. License Elements means the license attributes listed in the name\n     of a Creative Commons Public License. The License Elements of this\n     Public License are Attribution, NonCommercial, and ShareAlike.\n\n  h. Licensed Material means the artistic or literary work, database,\n     or other material to which the Licensor applied this Public\n     License.\n\n  i. Licensed Rights means the rights granted to You subject to the\n     terms and conditions of this Public License, which are limited to\n     all Copyright and Similar Rights that apply to Your use of the\n     Licensed Material and that the Licensor has authority to license.\n\n  j. Licensor means the individual(s) or entity(ies) granting rights\n     under this Public License.\n\n  k. NonCommercial means not primarily intended for or directed towards\n     commercial advantage or monetary compensation. For purposes of\n     this Public License, the exchange of the Licensed Material for\n     other material subject to Copyright and Similar Rights by digital\n     file-sharing or similar means is NonCommercial provided there is\n     no payment of monetary compensation in connection with the\n     exchange.\n\n  l. Share means to provide material to the public by any means or\n     process that requires permission under the Licensed Rights, such\n     as reproduction, public display, public performance, distribution,\n     dissemination, communication, or importation, and to make material\n     available to the public including in ways that members of the\n     public may access the material from a place and at a time\n     individually chosen by them.\n\n  m. Sui Generis Database Rights means rights other than copyright\n     resulting from Directive 96/9/EC of the European Parliament and of\n     the Council of 11 March 1996 on the legal protection of databases,\n     as amended and/or succeeded, as well as other essentially\n     equivalent rights anywhere in the world.\n\n  n. You means the individual or entity exercising the Licensed Rights\n     under this Public License. Your has a corresponding meaning.\n\n\nSection 2 -- Scope.\n\n  a. License grant.\n\n       1. Subject to the terms and conditions of this Public License,\n          the Licensor hereby grants You a worldwide, royalty-free,\n          non-sublicensable, non-exclusive, irrevocable license to\n          exercise the Licensed Rights in the Licensed Material to:\n\n            a. reproduce and Share the Licensed Material, in whole or\n               in part, for NonCommercial purposes only; and\n\n            b. produce, reproduce, and Share Adapted Material for\n               NonCommercial purposes only.\n\n       2. Exceptions and Limitations. For the avoidance of doubt, where\n          Exceptions and Limitations apply to Your use, this Public\n          License does not apply, and You do not need to comply with\n          its terms and conditions.\n\n       3. Term. The term of this Public License is specified in Section\n          6(a).\n\n       4. Media and formats; technical modifications allowed. The\n          Licensor authorizes You to exercise the Licensed Rights in\n          all media and formats whether now known or hereafter created,\n          and to make technical modifications necessary to do so. The\n          Licensor waives and/or agrees not to assert any right or\n          authority to forbid You from making technical modifications\n          necessary to exercise the Licensed Rights, including\n          technical modifications necessary to circumvent Effective\n          Technological Measures. For purposes of this Public License,\n          simply making modifications authorized by this Section 2(a)\n          (4) never produces Adapted Material.\n\n       5. Downstream recipients.\n\n            a. Offer from the Licensor -- Licensed Material. Every\n               recipient of the Licensed Material automatically\n               receives an offer from the Licensor to exercise the\n               Licensed Rights under the terms and conditions of this\n               Public License.\n\n            b. Additional offer from the Licensor -- Adapted Material.\n               Every recipient of Adapted Material from You\n               automatically receives an offer from the Licensor to\n               exercise the Licensed Rights in the Adapted Material\n               under the conditions of the Adapter's License You apply.\n\n            c. No downstream restrictions. You may not offer or impose\n               any additional or different terms or conditions on, or\n               apply any Effective Technological Measures to, the\n               Licensed Material if doing so restricts exercise of the\n               Licensed Rights by any recipient of the Licensed\n               Material.\n\n       6. No endorsement. Nothing in this Public License constitutes or\n          may be construed as permission to assert or imply that You\n          are, or that Your use of the Licensed Material is, connected\n          with, or sponsored, endorsed, or granted official status by,\n          the Licensor or others designated to receive attribution as\n          provided in Section 3(a)(1)(A)(i).\n\n  b. Other rights.\n\n       1. Moral rights, such as the right of integrity, are not\n          licensed under this Public License, nor are publicity,\n          privacy, and/or other similar personality rights; however, to\n          the extent possible, the Licensor waives and/or agrees not to\n          assert any such rights held by the Licensor to the limited\n          extent necessary to allow You to exercise the Licensed\n          Rights, but not otherwise.\n\n       2. Patent and trademark rights are not licensed under this\n          Public License.\n\n       3. To the extent possible, the Licensor waives any right to\n          collect royalties from You for the exercise of the Licensed\n          Rights, whether directly or through a collecting society\n          under any voluntary or waivable statutory or compulsory\n          licensing scheme. In all other cases the Licensor expressly\n          reserves any right to collect such royalties, including when\n          the Licensed Material is used other than for NonCommercial\n          purposes.\n\n\nSection 3 -- License Conditions.\n\nYour exercise of the Licensed Rights is expressly made subject to the\nfollowing conditions.\n\n  a. Attribution.\n\n       1. If You Share the Licensed Material (including in modified\n          form), You must:\n\n            a. retain the following if it is supplied by the Licensor\n               with the Licensed Material:\n\n                 i. identification of the creator(s) of the Licensed\n                    Material and any others designated to receive\n                    attribution, in any reasonable manner requested by\n                    the Licensor (including by pseudonym if\n                    designated);\n\n                ii. a copyright notice;\n\n               iii. a notice that refers to this Public License;\n\n                iv. a notice that refers to the disclaimer of\n                    warranties;\n\n                 v. a URI or hyperlink to the Licensed Material to the\n                    extent reasonably practicable;\n\n            b. indicate if You modified the Licensed Material and\n               retain an indication of any previous modifications; and\n\n            c. indicate the Licensed Material is licensed under this\n               Public License, and include the text of, or the URI or\n               hyperlink to, this Public License.\n\n       2. You may satisfy the conditions in Section 3(a)(1) in any\n          reasonable manner based on the medium, means, and context in\n          which You Share the Licensed Material. For example, it may be\n          reasonable to satisfy the conditions by providing a URI or\n          hyperlink to a resource that includes the required\n          information.\n       3. If requested by the Licensor, You must remove any of the\n          information required by Section 3(a)(1)(A) to the extent\n          reasonably practicable.\n\n  b. ShareAlike.\n\n     In addition to the conditions in Section 3(a), if You Share\n     Adapted Material You produce, the following conditions also apply.\n\n       1. The Adapter's License You apply must be a Creative Commons\n          license with the same License Elements, this version or\n          later, or a BY-NC-SA Compatible License.\n\n       2. You must include the text of, or the URI or hyperlink to, the\n          Adapter's License You apply. You may satisfy this condition\n          in any reasonable manner based on the medium, means, and\n          context in which You Share Adapted Material.\n\n       3. You may not offer or impose any additional or different terms\n          or conditions on, or apply any Effective Technological\n          Measures to, Adapted Material that restrict exercise of the\n          rights granted under the Adapter's License You apply.\n\n\nSection 4 -- Sui Generis Database Rights.\n\nWhere the Licensed Rights include Sui Generis Database Rights that\napply to Your use of the Licensed Material:\n\n  a. for the avoidance of doubt, Section 2(a)(1) grants You the right\n     to extract, reuse, reproduce, and Share all or a substantial\n     portion of the contents of the database for NonCommercial purposes\n     only;\n\n  b. if You include all or a substantial portion of the database\n     contents in a database in which You have Sui Generis Database\n     Rights, then the database in which You have Sui Generis Database\n     Rights (but not its individual contents) is Adapted Material,\n     including for purposes of Section 3(b); and\n\n  c. You must comply with the conditions in Section 3(a) if You Share\n     all or a substantial portion of the contents of the database.\n\nFor the avoidance of doubt, this Section 4 supplements and does not\nreplace Your obligations under this Public License where the Licensed\nRights include other Copyright and Similar Rights.\n\n\nSection 5 -- Disclaimer of Warranties and Limitation of Liability.\n\n  a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE\n     EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS\n     AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF\n     ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,\n     IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,\n     WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR\n     PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,\n     ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT\n     KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT\n     ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.\n\n  b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE\n     TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,\n     NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,\n     INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,\n     COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR\n     USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN\n     ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR\n     DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR\n     IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.\n\n  c. The disclaimer of warranties and limitation of liability provided\n     above shall be interpreted in a manner that, to the extent\n     possible, most closely approximates an absolute disclaimer and\n     waiver of all liability.\n\n\nSection 6 -- Term and Termination.\n\n  a. This Public License applies for the term of the Copyright and\n     Similar Rights licensed here. However, if You fail to comply with\n     this Public License, then Your rights under this Public License\n     terminate automatically.\n\n  b. Where Your right to use the Licensed Material has terminated under\n     Section 6(a), it reinstates:\n\n       1. automatically as of the date the violation is cured, provided\n          it is cured within 30 days of Your discovery of the\n          violation; or\n\n       2. upon express reinstatement by the Licensor.\n\n     For the avoidance of doubt, this Section 6(b) does not affect any\n     right the Licensor may have to seek remedies for Your violations\n     of this Public License.\n\n  c. For the avoidance of doubt, the Licensor may also offer the\n     Licensed Material under separate terms or conditions or stop\n     distributing the Licensed Material at any time; however, doing so\n     will not terminate this Public License.\n\n  d. Sections 1, 5, 6, 7, and 8 survive termination of this Public\n     License.\n\n\nSection 7 -- Other Terms and Conditions.\n\n  a. The Licensor shall not be bound by any additional or different\n     terms or conditions communicated by You unless expressly agreed.\n\n  b. Any arrangements, understandings, or agreements regarding the\n     Licensed Material not stated herein are separate from and\n     independent of the terms and conditions of this Public License.\n\n\nSection 8 -- Interpretation.\n\n  a. For the avoidance of doubt, this Public License does not, and\n     shall not be interpreted to, reduce, limit, restrict, or impose\n     conditions on any use of the Licensed Material that could lawfully\n     be made without permission under this Public License.\n\n  b. To the extent possible, if any provision of this Public License is\n     deemed unenforceable, it shall be automatically reformed to the\n     minimum extent necessary to make it enforceable. If the provision\n     cannot be reformed, it shall be severed from this Public License\n     without affecting the enforceability of the remaining terms and\n     conditions.\n\n  c. No term or condition of this Public License will be waived and no\n     failure to comply consented to unless expressly agreed to by the\n     Licensor.\n\n  d. Nothing in this Public License constitutes or may be interpreted\n     as a limitation upon, or waiver of, any privileges and immunities\n     that apply to the Licensor or You, including from the legal\n     processes of any jurisdiction or authority.\n\n=======================================================================\n\nCreative Commons is not a party to its public\nlicenses. Notwithstanding, Creative Commons may elect to apply one of\nits public licenses to material it publishes and in those instances\nwill be considered the “Licensor.” The text of the Creative Commons\npublic licenses is dedicated to the public domain under the CC0 Public\nDomain Dedication. Except for the limited purpose of indicating that\nmaterial is shared under a Creative Commons public license or as\notherwise permitted by the Creative Commons policies published at\ncreativecommons.org/policies, Creative Commons does not authorize the\nuse of the trademark \"Creative Commons\" or any other trademark or logo\nof Creative Commons without its prior written consent including,\nwithout limitation, in connection with any unauthorized modifications\nto any of its public licenses or any other arrangements,\nunderstandings, or agreements concerning use of licensed material. For\nthe avoidance of doubt, this paragraph does not form part of the\npublic licenses.\n\nCreative Commons may be contacted at creativecommons.org.\n"
  },
  {
    "path": "Makefile",
    "content": ".PHONY: toc\n\ntoc:\n\tdocker run --rm -it -v ${PWD}:/usr/src jorgeandrada/doctoc --github\n\t$(shell tail -n +`grep -n '# \\`go-internals\\`' README.md | tr ':' ' ' | awk '{print $$1}'` README.md > /tmp/README2.md)\n\tcp /tmp/README2.md README.md\n"
  },
  {
    "path": "README.md",
    "content": "# `go-internals`\n\n`go-internals` is a work-in-progress book about the internals of the Go (1.10+) programming language.\n\n---\n\n## Table of Contents\n\n- [Chapter I: A Primer on Go Assembly](./chapter1_assembly_primer/README.md)\n- [Chapter II: Interfaces](./chapter2_interfaces/README.md)\n- [Chapter III (soon!): The Garbage Collector](./chapter3_garbage_collector/README.md)\n\n---\n\n*Click [here](https://cmc.gitbook.io/go-internals/) for the GitBook version.*\n\n## Goals\n\n- **Concise**: The book aims to be as concise as possible, encouraging code and diagrams over lengthy prose.\n- **Community-effort**: I myself am learning as I go through the writing of this book. I *will* make mistakes along the way. Hopefully the community can help in pointing out and correcting these mistakes.\n- **Beyond the theory**: The book will not just cover the theory, but the actual implementation too. Assumptions will be proven or invalidated via experiments and measurements.\n- **Up-to-Date**: The book will try to keep up-to-date with new Go versions being released.\n- **Experienced audience**: The Go community has created *loads* of great introductory material for newcomers. Unfortunately we're still lacking good resources when it comes to the more advanced stuff. This books hopes to help solve this issue.\n\n## Translations\n\n- [Chinese](https://github.com/go-internals-cn/go-internals)\n\n## Contributing\n\nAll kinds of contributions are very much welcome.\n\nDon't hesitate to open an issue for e.g...:\n- pointing out technical or english mistakes\n- suggesting improvements and/or additions to existing chapters\n- suggesting external links that might be of interest\n- ..and pretty much anything else you can think of, really!\n\n## Author\n\nClement Rey <<cr.rey.clement@gmail.com>> ([@teh_cmc](https://twitter.com/teh_cmc))\n\n## License\n\nLicensed under the [BY-NC-SA Creative Commons 4.0 International Public License](http://creativecommons.org/licenses/by-nc-sa/4.0/)\n"
  },
  {
    "path": "chapter1_assembly_primer/Makefile",
    "content": "GOOS=linux\nGOARCH=amd64\n\nSOURCES := $(wildcard *.go)\nOBJECTS = $(SOURCES:.go=.o)\nEXECUTABLES = $(OBJECTS:.o=.bin)\n\n.SECONDARY: ${OBJECTS}\n\nall: ${EXECUTABLES}\n\n%.o: %.go\n\tGOOS=${GOOS} GOARCH=${GOARCH} go tool compile $<\n\n%.bin: %.o\n\tGOOS=${GOOS} GOARCH=${GOARCH} go tool link -o $@ $<\n\nclean:\n\trm -f ${OBJECTS}\n\trm -f ${EXECUTABLES}\n"
  },
  {
    "path": "chapter1_assembly_primer/README.md",
    "content": "<!-- Copyright © 2018 Clement Rey <cr.rey.clement@gmail.com>. -->\n<!-- Licensed under the BY-NC-SA Creative Commons 4.0 International Public License. -->\n\n```Bash\n$ go version\ngo version go1.10 linux/amd64\n```\n\n# Chapter I: A Primer on Go Assembly\n\nDeveloping some familiarity with Go's abstract assembly language is a must before we can start delving into the implementation of the runtime & standard library.  \nThis quick guide should hopefully get you up-to-speed.\n\n---\n\n**Table of Contents**\n<!-- START doctoc generated TOC please keep comment here to allow auto update -->\n<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->\n\n\n- [\"Pseudo-assembly\"](#pseudo-assembly)\n- [Decomposing a simple program](#decomposing-a-simple-program)\n  - [Dissecting `add`](#dissecting-add)\n  - [Dissecting `main`](#dissecting-main)\n- [A word about goroutines, stacks and splits](#a-word-about-goroutines-stacks-and-splits)\n  - [Stacks](#stacks)\n  - [Splits](#splits)\n  - [Minus some subtleties](#minus-some-subtleties)\n- [Conclusion](#conclusion)\n- [Links](#links)\n\n<!-- END doctoc generated TOC please keep comment here to allow auto update -->\n\n---\n\n- *This chapter assumes some basic knowledge of any kind of assembler.*\n- *If and when running into architecture-specific matters, always assume `linux/amd64`.*\n- *We will always work with compiler optimizations **enabled**.*\n- *Quoted text and/or comments always come from the official documentation and/or codebase, unless stated otherwise.*\n\n## \"Pseudo-assembly\"\n\nThe Go compiler outputs an abstract, portable form of assembly that doesn't actually map to any real hardware. The Go assembler then uses this pseudo-assembly output in order to generate concrete, machine-specific instructions for the targeted hardware.  \nThis extra layer has many benefits, the main one being how easy it makes porting Go to new architectures. For more information, have a look at Rob Pike's *The Design of the Go Assembler*, listed in the links at the end of this chapter.\n\n> The most important thing to know about Go's assembler is that it is not a direct representation of the underlying machine. Some of the details map precisely to the machine, but some do not. This is because the compiler suite needs no assembler pass in the usual pipeline. Instead, the compiler operates on a kind of semi-abstract instruction set, and instruction selection occurs partly after code generation. The assembler works on the semi-abstract form, so when you see an instruction like MOV what the toolchain actually generates for that operation might not be a move instruction at all, perhaps a clear or load. Or it might correspond exactly to the machine instruction with that name. In general, machine-specific operations tend to appear as themselves, while more general concepts like memory move and subroutine call and return are more abstract. The details vary with architecture, and we apologize for the imprecision; the situation is not well-defined.\n\n> The assembler program is a way to parse a description of that semi-abstract instruction set and turn it into instructions to be input to the linker.\n\n## Decomposing a simple program\n\nConsider the following Go code ([direct_topfunc_call.go](./direct_topfunc_call.go)):\n```Go\n//go:noinline\nfunc add(a, b int32) (int32, bool) { return a + b, true }\n\nfunc main() { add(10, 32) }\n```\n*(Note the `//go:noinline` compiler-directive here... Don't get bitten.)*\n\nLet's compile this down to assembly:\n```\n$ GOOS=linux GOARCH=amd64 go tool compile -S direct_topfunc_call.go\n```\n```Assembly\n0x0000 TEXT\t\t\"\".add(SB), NOSPLIT, $0-16\n  0x0000 FUNCDATA\t$0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB)\n  0x0000 FUNCDATA\t$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)\n  0x0000 MOVL\t\t\"\".b+12(SP), AX\n  0x0004 MOVL\t\t\"\".a+8(SP), CX\n  0x0008 ADDL\t\tCX, AX\n  0x000a MOVL\t\tAX, \"\".~r2+16(SP)\n  0x000e MOVB\t\t$1, \"\".~r3+20(SP)\n  0x0013 RET\n\n0x0000 TEXT\t\t\"\".main(SB), $24-0\n  ;; ...omitted stack-split prologue...\n  0x000f SUBQ\t\t$24, SP\n  0x0013 MOVQ\t\tBP, 16(SP)\n  0x0018 LEAQ\t\t16(SP), BP\n  0x001d FUNCDATA\t$0, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)\n  0x001d FUNCDATA\t$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)\n  0x001d MOVQ\t\t$137438953482, AX\n  0x0027 MOVQ\t\tAX, (SP)\n  0x002b PCDATA\t\t$0, $0\n  0x002b CALL\t\t\"\".add(SB)\n  0x0030 MOVQ\t\t16(SP), BP\n  0x0035 ADDQ\t\t$24, SP\n  0x0039 RET\n  ;; ...omitted stack-split epilogue...\n```\n\nWe'll dissect those 2 functions line-by-line in order to get a better understanding of what the compiler is doing.\n\n### Dissecting `add`\n\n```Assembly\n0x0000 TEXT \"\".add(SB), NOSPLIT, $0-16\n```\n\n- `0x0000`: Offset of the current instruction, relative to the start of the function.\n\n- `TEXT \"\".add`: The `TEXT` directive declares the `\"\".add` symbol as part of the `.text` section (i.e. runnable code) and indicates that the instructions that follow are the body of the function.  \nThe empty string `\"\"` will be replaced by the name of the current package at link-time: i.e., `\"\".add` will become `main.add` once linked into our final binary.\n\n- `(SB)`: `SB` is the virtual register that holds the \"static-base\" pointer, i.e. the address of the beginning of the address-space of our program.  \n`\"\".add(SB)` declares that our symbol is located at some constant offset (computed by the linker) from the start of our address-space. Put differently, it has an absolute, direct address: it's a global function symbol.  \nGood ol' `objdump` will confirm all of that for us:\n```\n$ objdump -j .text -t direct_topfunc_call | grep 'main.add'\n000000000044d980 g     F .text\t000000000000000f main.add\n```\n> All user-defined symbols are written as offsets to the pseudo-registers FP (arguments and locals) and SB (globals).  \n> The SB pseudo-register can be thought of as the origin of memory, so the symbol foo(SB) is the name foo as an address in memory.\n\n- `NOSPLIT`: Indicates to the compiler that it should *not* insert the *stack-split* preamble, which checks whether the current stack needs to be grown.  \nIn the case of our `add` function, the compiler has set the flag by itself: it is smart enough to figure that, since `add` has no local variables and no stack-frame of its own, it simply cannot outgrow the current stack; thus it'd be a complete waste of CPU cycles to run these checks at each call site.  \n> \"NOSPLIT\": Don't insert the preamble to check if the stack must be split. The frame for the routine, plus anything it calls, must fit in the spare space at the top of the stack segment. Used to protect routines such as the stack splitting code itself.  \nWe'll have a quick word about goroutines and stack-splits at the end this chapter.\n\n- `$0-16`: `$0` denotes the size in bytes of the stack-frame that will be allocated; while `$16` specifies the size of the arguments passed in by the caller.  \n> In the general case, the frame size is followed by an argument size, separated by a minus sign. (It's not a subtraction, just idiosyncratic syntax.) The frame size $24-8 states that the function has a 24-byte frame and is called with 8 bytes of argument, which live on the caller's frame. If NOSPLIT is not specified for the TEXT, the argument size must be provided. For assembly functions with Go prototypes, go vet will check that the argument size is correct.\n\n```Assembly\n0x0000 FUNCDATA $0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB)\n0x0000 FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)\n```\n\n> The FUNCDATA and PCDATA directives contain information for use by the garbage collector; they are introduced by the compiler.\n\nDon't worry about this for now; we'll come back to it when diving into garbage collection later in the book.\n\n```Assembly\n0x0000 MOVL \"\".b+12(SP), AX\n0x0004 MOVL \"\".a+8(SP), CX\n```\n\nThe Go calling convention mandates that every argument must be passed on the stack, using the pre-reserved space on the caller's stack-frame.  \nIt is the caller's responsibility to grow (and shrink back) the stack appropriately so that arguments can be passed to the callee, and potential return-values passed back to the caller.\n\nThe Go compiler never generates instructions from the PUSH/POP family: the stack is grown or shrunk by respectively decrementing or incrementing the ~virtual~ hardware stack pointer `SP`.  \n*[UPDATE: We've discussed about this matter in [issue #21: about SP register](https://github.com/teh-cmc/go-internals/issues/21).]*  \n> The SP pseudo-register is a virtual stack pointer used to refer to frame-local variables and the arguments being prepared for function calls. It points to the top of the local stack frame, so references should use negative offsets in the range [−framesize, 0): x-8(SP), y-4(SP), and so on.\n\nAlthough the official documentation states that \"*All user-defined symbols are written as offsets to the pseudo-register FP (arguments and locals)*\", this is only ever true for hand-written code.  \nLike most recent compilers, the Go tool suite always references argument and locals using offsets from the stack-pointer directly in the code it generates. This allows for the frame-pointer to be used as an extra general-purpose register on platform with fewer registers (e.g. x86).  \nHave a look at *Stack frame layout on x86-64* in the links at the end of this chapter if you enjoy this kind of nitty gritty details.  \n*[UPDATE: We've discussed about this matter in [issue #2: Frame pointer](https://github.com/teh-cmc/go-internals/issues/2).]*\n\n`\"\".b+12(SP)` and `\"\".a+8(SP)` respectively refer to the addresses 12 bytes and 8 bytes below the top of the stack (remember: it grows downwards!).  \n`.a` and `.b` are arbitrary aliases given to the referred locations; although *they have absolutely no semantic meaning* whatsoever, they are mandatory when using relative addressing on virtual registers.\nThe documentation about the virtual frame-pointer has some to say about this:\n> The FP pseudo-register is a virtual frame pointer used to refer to function arguments. The compilers maintain a virtual frame pointer and refer to the arguments on the stack as offsets from that pseudo-register. Thus 0(FP) is the first argument to the function, 8(FP) is the second (on a 64-bit machine), and so on. However, when referring to a function argument this way, it is necessary to place a name at the beginning, as in first_arg+0(FP) and second_arg+8(FP). (The meaning of the offset —offset from the frame pointer— distinct from its use with SB, where it is an offset from the symbol.) The assembler enforces this convention, rejecting plain 0(FP) and 8(FP). The actual name is semantically irrelevant but should be used to document the argument's name.\n\nFinally, there are two important things to note here:\n1. The first argument `a` is not located at `0(SP)`, but rather at `8(SP)`; that's because the caller stores its return-address in `0(SP)` via the `CALL` pseudo-instruction.\n2. Arguments are passed in reverse-order; i.e. the first argument is the closest to the top of the stack.\n\n```Assembly\n0x0008 ADDL CX, AX\n0x000a MOVL AX, \"\".~r2+16(SP)\n0x000e MOVB $1, \"\".~r3+20(SP)\n```\n\n`ADDL` does the actual addition of the two **L**ong-words (i.e. 4-byte values) stored in `AX` and `CX`, then stores the final result in `AX`.  \nThat result is then moved over to `\"\".~r2+16(SP)`, where the caller had previously reserved some stack space and expects to find its return values. Once again, `\"\".~r2` has no semantic meaning here.\n\nTo demonstrate how Go handles multiple return-values, we're also returning a constant `true` boolean value.  \nThe mechanics at play are exactly the same as for our first return value; only the offset relative to `SP` changes.\n\n```Assembly\n0x0013 RET\n```\n\nA final `RET` pseudo-instruction tells the Go assembler to insert whatever instructions are required by the calling convention of the target platform in order to properly return from a subroutine call.  \nMost likely this will cause the code to pop off the return-address stored at `0(SP)` then jump back to it.\n\n> The last instruction in a TEXT block must be some sort of jump, usually a RET (pseudo-)instruction.\n> (If it's not, the linker will append a jump-to-itself instruction; there is no fallthrough in TEXTs.)\n\nThat's a lot of syntax and semantics to ingest all at once. Here's a quick inlined summary of what we've just covered:\n```Assembly\n;; Declare global function symbol \"\".add (actually main.add once linked)\n;; Do not insert stack-split preamble\n;; 0 bytes of stack-frame, 16 bytes of arguments passed in\n;; func add(a, b int32) (int32, bool)\n0x0000 TEXT\t\"\".add(SB), NOSPLIT, $0-16\n  ;; ...omitted FUNCDATA stuff...\n  0x0000 MOVL\t\"\".b+12(SP), AX\t    ;; move second Long-word (4B) argument from caller's stack-frame into AX\n  0x0004 MOVL\t\"\".a+8(SP), CX\t    ;; move first Long-word (4B) argument from caller's stack-frame into CX\n  0x0008 ADDL\tCX, AX\t\t    ;; compute AX=CX+AX\n  0x000a MOVL\tAX, \"\".~r2+16(SP)   ;; move addition result (AX) into caller's stack-frame\n  0x000e MOVB\t$1, \"\".~r3+20(SP)   ;; move `true` boolean (constant) into caller's stack-frame\n  0x0013 RET\t\t\t    ;; jump to return address stored at 0(SP)\n```\n\nAll in all, here's a visual representation of what the stack looks like when `main.add` has finished executing:\n```\n   |    +-------------------------+ <-- 32(SP)              \n   |    |                         |                         \n G |    |                         |                         \n R |    |                         |                         \n O |    | main.main's saved       |                         \n W |    |     frame-pointer (BP)  |                         \n S |    |-------------------------| <-- 24(SP)              \n   |    |      [alignment]        |                         \n D |    | \"\".~r3 (bool) = 1/true  | <-- 21(SP)              \n O |    |-------------------------| <-- 20(SP)              \n W |    |                         |                         \n N |    | \"\".~r2 (int32) = 42     |                         \n W |    |-------------------------| <-- 16(SP)              \n A |    |                         |                         \n R |    | \"\".b (int32) = 32       |                         \n D |    |-------------------------| <-- 12(SP)              \n S |    |                         |                         \n   |    | \"\".a (int32) = 10       |                         \n   |    |-------------------------| <-- 8(SP)               \n   |    |                         |                         \n   |    |                         |                         \n   |    |                         |                         \n \\ | /  | return address to       |                         \n  \\|/   |     main.main + 0x30    |                         \n   -    +-------------------------+ <-- 0(SP) (TOP OF STACK)\n\n(diagram made with https://textik.com)\n```\n<!-- https://textik.com/#af55d3485eaa56f2 -->\n\n### Dissecting `main`\n\nWe'll spare you some unnecessary scrolling, here's a reminder of what our `main` function looks like:\n```Assembly\n0x0000 TEXT\t\t\"\".main(SB), $24-0\n  ;; ...omitted stack-split prologue...\n  0x000f SUBQ\t\t$24, SP\n  0x0013 MOVQ\t\tBP, 16(SP)\n  0x0018 LEAQ\t\t16(SP), BP\n  ;; ...omitted FUNCDATA stuff...\n  0x001d MOVQ\t\t$137438953482, AX\n  0x0027 MOVQ\t\tAX, (SP)\n  ;; ...omitted PCDATA stuff...\n  0x002b CALL\t\t\"\".add(SB)\n  0x0030 MOVQ\t\t16(SP), BP\n  0x0035 ADDQ\t\t$24, SP\n  0x0039 RET\n  ;; ...omitted stack-split epilogue...\n```\n\n```Assembly\n0x0000 TEXT \"\".main(SB), $24-0\n```\n\nNothing new here:\n- `\"\".main` (`main.main` once linked) is a global function symbol in the `.text` section, whose address is some constant offset from the beginning of our address-space.\n- It allocates a 24 bytes stack-frame and doesn't receive any argument nor does it return any value.\n\n```Assembly\n0x000f SUBQ     $24, SP\n0x0013 MOVQ     BP, 16(SP)\n0x0018 LEAQ     16(SP), BP\n```\n\nAs we mentioned above, the Go calling convention mandates that every argument must be passed on the stack.\n\nOur caller, `main`, grows its stack-frame by 24 bytes (*remember that the stack grows downwards, so `SUBQ` here actually makes the stack-frame bigger*) by decrementing the virtual stack-pointer.\nOf those 24 bytes:\n- 8 bytes (`16(SP)`-`24(SP)`) are used to store the current value of the frame-pointer `BP` (*the real one!*) to allow for stack-unwinding and facilitate debugging\n- 1+3 bytes (`12(SP)`-`16(SP)`) are reserved for the second return value (`bool`) plus 3 bytes of necessary alignment on `amd64`\n- 4 bytes (`8(SP)`-`12(SP)`) are reserved for the first return value (`int32`)\n- 4 bytes (`4(SP)`-`8(SP)`) are reserved for the value of argument `b (int32)`\n- 4 bytes (`0(SP)`-`4(SP)`) are reserved for the value of argument `a (int32)`\n\nFinally, following the growth of the stack, `LEAQ` computes the new address of the frame-pointer and stores it in `BP`.\n\n```Assembly\n0x001d MOVQ     $137438953482, AX\n0x0027 MOVQ     AX, (SP)\n```\n\nThe caller pushes the arguments for the callee as a **Q**uad word (i.e. an 8-byte value) at the top of the stack that it has just grown.  \nAlthough it might look like random garbage at first, `137438953482` actually corresponds to the `10` and `32` 4-byte values concatenated into one 8-byte value:\n```\n$ echo 'obase=2;137438953482' | bc\n10000000000000000000000000000000001010\n\\____/\\______________________________/\n   32                              10\n```\n\n```Assembly\n0x002b CALL     \"\".add(SB)\n```\n\nWe `CALL` our `add` function as an offset relative to the static-base pointer: i.e. this is a straightforward jump to a direct address.\n\nNote that `CALL` also pushes the return-address (8-byte value) at the top of the stack; so every references to `SP` made from within our `add` function end up being offsetted by 8 bytes!  \nE.g. `\"\".a` is not at `0(SP)` anymore, but at `8(SP)`.\n\n```Assembly\n0x0030 MOVQ     16(SP), BP\n0x0035 ADDQ     $24, SP\n0x0039 RET\n```\n\nFinally, we:\n1. Unwind the frame-pointer by one stack-frame (i.e. we \"go down\" one level)\n2. Shrink the stack by 24 bytes to reclaim the stack space we had previously allocated\n3. Ask the Go assembler to insert subroutine-return related stuff\n\n## A word about goroutines, stacks and splits\n\nNow is not the time nor place to delve into goroutines' internals (*..that comes later*), but as we start looking at assembly dumps more and more, instructions related to stack management will rapidly become a very familiar sight.  \nWe should be able to quickly recognize these patterns, and, while we're at it, understand the general idea of what they do and why do they do it.\n\n### Stacks\n\nSince the number of goroutines in a Go program is non-deterministic, and can go up to several millions in practice, the runtime must take the conservative route when allocating stack space for goroutines to avoid eating up all of the available memory.  \nAs such, every new goroutine is given an initial tiny 2kB stack by the runtime (said stack is actually allocated on the heap behind the scenes).\n\nAs a goroutine runs along doing its job, it might end up outgrowing its contrived, initial stack-space (i.e. stack-overflow).  \nTo prevent this from happening, the runtime makes sure that when a goroutine is running out of stack, a new, bigger stack with two times the size of the old one gets allocated, and that the content of the original stack gets copied over to the new one.  \nThis process is known as a *stack-split* and effectively makes goroutine stacks dynamically-sized.\n\n### Splits\n\nFor stack-splitting to work, the compiler inserts a few instructions at the beginning and end of every function that could potentially overflow its stack.  \nAs we've seen earlier in this chapter, and to avoid unnecessary overhead, functions that cannot possibly outgrow their stack are marked as `NOSPLIT` as a hint for the compiler not to insert these checks.\n\nLet's look at our main function from earlier, this time without omitting the stack-split preamble:\n```Assembly\n0x0000 TEXT\t\"\".main(SB), $24-0\n  ;; stack-split prologue\n  0x0000 MOVQ\t(TLS), CX\n  0x0009 CMPQ\tSP, 16(CX)\n  0x000d JLS\t58\n\n  0x000f SUBQ\t$24, SP\n  0x0013 MOVQ\tBP, 16(SP)\n  0x0018 LEAQ\t16(SP), BP\n  ;; ...omitted FUNCDATA stuff...\n  0x001d MOVQ\t$137438953482, AX\n  0x0027 MOVQ\tAX, (SP)\n  ;; ...omitted PCDATA stuff...\n  0x002b CALL\t\"\".add(SB)\n  0x0030 MOVQ\t16(SP), BP\n  0x0035 ADDQ\t$24, SP\n  0x0039 RET\n\n  ;; stack-split epilogue\n  0x003a NOP\n  ;; ...omitted PCDATA stuff...\n  0x003a CALL\truntime.morestack_noctxt(SB)\n  0x003f JMP\t0\n```\n\nAs you can see, the stack-split preamble is divided into a prologue and an epilogue:\n- The prologue checks whether the goroutine is running out of space and, if it's the case, jumps to the epilogue.\n- The epilogue, on the other hand, triggers the stack-growth machinery and then jumps back to the prologue.\n\nThis creates a feedback loop that goes on for as long as a large enough stack hasn't been allocated for our starved goroutine.\n\n**Prologue**\n```Assembly\n0x0000 MOVQ\t(TLS), CX   ;; store current *g in CX\n0x0009 CMPQ\tSP, 16(CX)  ;; compare SP and g.stackguard0\n0x000d JLS\t58\t    ;; jumps to 0x3a if SP <= g.stackguard0\n```\n\n`TLS` is a virtual register maintained by the runtime that holds a pointer to the current `g`, i.e. the data-structure that keeps track of all the state of a goroutine.\n\nLooking at the definition of `g` from the source code of the runtime:\n```Go\ntype g struct {\n\tstack       stack   // 16 bytes\n\t// stackguard0 is the stack pointer compared in the Go stack growth prologue.\n\t// It is stack.lo+StackGuard normally, but can be StackPreempt to trigger a preemption.\n\tstackguard0 uintptr\n\tstackguard1 uintptr\n\n\t// ...omitted dozens of fields...\n}\n```\nWe can see that `16(CX)` corresponds to `g.stackguard0`, which is the threshold value maintained by the runtime that, when compared to the stack-pointer, indicates whether or not a goroutine is about to run out of space.  \nThe prologue thus checks if the current `SP` value is less than or equal to the `stackguard0` threshold (that is, it's bigger), then jumps to the epilogue if it happens to be the case.\n\n**Epilogue**\n```Assembly\n0x003a NOP\n0x003a CALL\truntime.morestack_noctxt(SB)\n0x003f JMP\t0\n```\n\nThe body of the epilogue is pretty straightforward: it calls into the runtime, which will do the actual work of growing the stack, then jumps back to the first instruction of the function (i.e. to the prologue).\n\nThe `NOP` instruction just before the `CALL` exists so that the prologue doesn't jump directly onto a `CALL` instruction. On some platforms, doing so can lead to very dark places; it's a common pratice to set-up a noop instruction right before the actual call and land on this `NOP` instead.  \n*[UPDATE: We've discussed about this matter in [issue #4: Clarify \"nop before call\" paragraph](https://github.com/teh-cmc/go-internals/issues/4).]*\n\n### Minus some subtleties\n\nWe've merely covered the tip of the iceberg here.  \nThe inner mechanics of stack-growth have many more subtleties that we haven't even mentioned here: the whole process is quite a complex machinery overall, and will require a chapter of its own.\n\nWe'll come back to these matters in time.\n\n## Conclusion\n\nThis quick introduction to Go's assembler should give you enough material to start toying around.\n\nAs we dig deeper and deeper into Go's internals for the rest of this book, Go assembly will be one of our most relied-on tool to understand what goes on behind the scenes and connect the, at first sight, not-always-so-obvious dots.\n\nIf you have any questions or suggestions, don't hesitate to open an issue with the `chapter1:` prefix!\n\n## Links\n\n- [[Official] A Quick Guide to Go's Assembler](https://golang.org/doc/asm)\n- [[Official] Go Compiler Directives](https://golang.org/cmd/compile/#hdr-Compiler_Directives)\n- [[Official] The design of the Go Assembler](https://www.youtube.com/watch?v=KINIAgRpkDA)\n- [[Official] Contiguous stacks Design Document](https://docs.google.com/document/d/1wAaf1rYoM4S4gtnPh0zOlGzWtrZFQ5suE8qr2sD8uWQ/pub)\n- [[Official] The `_StackMin` constant](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/stack.go#L70-L71)\n- [[Discussion] Issue #2: *Frame pointer*](https://github.com/teh-cmc/go-internals/issues/2)\n- [[Discussion] Issue #4: *Clarify \"nop before call\" paragraph*](https://github.com/teh-cmc/go-internals/issues/4)\n- [A Foray Into Go Assembly Programming](https://blog.sgmansfield.com/2017/04/a-foray-into-go-assembly-programming/)\n- [Dropping Down Go Functions in Assembly](https://www.youtube.com/watch?v=9jpnFmJr2PE)\n- [What is the purpose of the EBP frame pointer register?](https://stackoverflow.com/questions/579262/what-is-the-purpose-of-the-ebp-frame-pointer-register)\n- [Stack frame layout on x86-64](https://eli.thegreenplace.net/2011/09/06/stack-frame-layout-on-x86-64)\n- [How Stacks are Handled in Go](https://blog.cloudflare.com/how-stacks-are-handled-in-go/)\n- [Why stack grows down](https://gist.github.com/cpq/8598782)\n"
  },
  {
    "path": "chapter1_assembly_primer/direct_topfunc_call.go",
    "content": "package main\n\n//go:noinline\nfunc add(a, b int32) (int32, bool) { return a + b, true }\n\nfunc main() { add(10, 32) }\n"
  },
  {
    "path": "chapter2_interfaces/Makefile",
    "content": "GOOS=linux\nGOARCH=amd64\n\nSOURCES := $(wildcard *.go)\nOBJECTS = $(SOURCES:.go=.o)\nEXECUTABLES = $(OBJECTS:.o=.bin)\n\n.SECONDARY: ${OBJECTS}\n\nall: ${EXECUTABLES}\n\n%.o: %.go\n\tGOOS=${GOOS} GOARCH=${GOARCH} go tool compile $<\n\n%.bin: %.o\n\tGOOS=${GOOS} GOARCH=${GOARCH} go tool link -o $@ $<\n\nclean:\n\trm -f ${OBJECTS}\n\trm -f ${EXECUTABLES}\n"
  },
  {
    "path": "chapter2_interfaces/README.md",
    "content": "<!-- Copyright © 2018 Clement Rey <cr.rey.clement@gmail.com>. -->\n<!-- Licensed under the BY-NC-SA Creative Commons 4.0 International Public License. -->\n\n```Bash\n$ go version\ngo version go1.10 linux/amd64\n```\n\n# Chapter II: Interfaces\n\nThis chapter covers the inner workings of Go's interfaces.\n\nSpecifically, we'll look at:\n- How functions & methods are called at run time.\n- How interfaces are built and what they're made of.\n- How, when and at what cost does dynamic dispatch work.\n- How the empty interface & other special cases differ from their peers.\n- How interface composition works.\n- How and at what cost do type assertions work.\n\nAs we dig deeper and deeper, we'll also poke at miscellaneous low-level concerns, such as some implementation details of modern CPUs as well as various optimizations techniques used by the Go compiler.\n\n---\n\n**Table of Contents**\n<!-- START doctoc generated TOC please keep comment here to allow auto update -->\n<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->\n\n\n- [Function and method calls](#function-and-method-calls)\n  - [Overview of direct calls](#overview-of-direct-calls)\n  - [Implicit dereferencing](#implicit-dereferencing)\n- [Anatomy of an interface](#anatomy-of-an-interface)\n  - [Overview of the datastructures](#overview-of-the-datastructures)\n  - [Creating an interface](#creating-an-interface)\n  - [Reconstructing an `itab` from an executable](#reconstructing-an-itab-from-an-executable)\n- [Dynamic dispatch](#dynamic-dispatch)\n  - [Indirect method call on interface](#indirect-method-call-on-interface)\n  - [Overhead](#overhead)\n    - [The theory: quick refresher on modern CPUs](#the-theory-quick-refresher-on-modern-cpus)\n    - [The practice: benchmarks](#the-practice-benchmarks)\n- [Special cases & compiler tricks](#special-cases--compiler-tricks)\n  - [The empty interface](#the-empty-interface)\n  - [Interface holding a scalar type](#interface-holding-a-scalar-type)\n  - [A word about zero-values](#a-word-about-zero-values)\n  - [A tangent about zero-size variables](#a-tangent-about-zero-size-variables)\n- [Interface composition](#interface-composition)\n- [Assertions](#assertions)\n  - [Type assertions](#type-assertions)\n  - [Type-switches](#type-switches)\n- [Conclusion](#conclusion)\n- [Links](#links)\n\n<!-- END doctoc generated TOC please keep comment here to allow auto update -->\n\n---\n\n- *This chapter assumes you're familiar with Go's assembler ([chapter I](../chapter1_assembly_primer/README.md)).*\n- *If and when running into architecture-specific matters, always assume `linux/amd64`.*\n- *We will always work with compiler optimizations **enabled**.*\n- *Quoted text and/or comments always come from the official documentation (including Russ Cox \"Function Calls\" design document) and/or codebase, unless stated otherwise.*\n\n## Function and method calls\n\nAs pointed out by Russ Cox in his design document about function calls (listed at the end of this chapter), Go has..:\n\n..4 different kinds of functions..:\n> - top-level func\n> - method with value receiver\n> - method with pointer receiver\n> - func literal\n\n..and 5 different kinds of calls:\n> - direct call of top-level func (`func TopLevel(x int) {}`)\n> - direct call of method with value receiver (`func (Value) M(int) {}`)\n> - direct call of method with pointer receiver (`func (*Pointer) M(int) {}`)\n> - indirect call of method on interface (`type Interface interface { M(int) }`)\n> - indirect call of func value (`var literal = func(x int) {}`)\n\nMixed together, these make up for 10 possible combinations of function and call types:\n> - direct call of top-level func /\n> - direct call of method with value receiver /\n> - direct call of method with pointer receiver /\n> - indirect call of method on interface / containing value with value method\n> - indirect call of method on interface / containing pointer with value method\n> - indirect call of method on interface / containing pointer with pointer method\n> - indirect call of func value / set to top-level func\n> - indirect call of func value / set to value method\n> - indirect call of func value / set to pointer method\n> - indirect call of func value / set to func literal\n>\n> (A slash separates what is known at compile time from what is only found out at run time.)\n\nWe'll first take a few minutes to review the three kinds of direct calls, then we'll shift our focus towards interfaces and indirect method calls for the rest of this chapter.  \nWe won't cover function literals in this chapter, as doing so would first require us to become familiar with the mechanics of closures.. which we'll inevitably do, in due time.\n\n### Overview of direct calls\n\nConsider the following code ([direct_calls.go](./direct_calls.go)):\n```Go\n//go:noinline\nfunc Add(a, b int32) int32 { return a + b }\n\ntype Adder struct{ id int32 }\n//go:noinline\nfunc (adder *Adder) AddPtr(a, b int32) int32 { return a + b }\n//go:noinline\nfunc (adder Adder) AddVal(a, b int32) int32 { return a + b }\n\nfunc main() {\n    Add(10, 32) // direct call of top-level function\n\n    adder := Adder{id: 6754}\n    adder.AddPtr(10, 32) // direct call of method with pointer receiver\n    adder.AddVal(10, 32) // direct call of method with value receiver\n\n    (&adder).AddVal(10, 32) // implicit dereferencing\n}\n```\n\nLet's have a quick look at the code generated for each of those 4 calls.\n\n**Direct call of a top-level function**\n\nLooking at the assembly output for `Add(10, 32)`:\n```Assembly\n0x0000 TEXT\t\"\".main(SB), $40-0\n  ;; ...omitted everything but the actual function call...\n  0x0021 MOVQ\t$137438953482, AX\n  0x002b MOVQ\tAX, (SP)\n  0x002f CALL\t\"\".Add(SB)\n  ;; ...omitted everything but the actual function call...\n```\nWe see that, as we already knew from chapter I, this translates into a direct jump to a global function symbol in the `.text` section, with the arguments and return values stored on the caller's stack-frame.  \nIt's as straightforward as it gets.\n\nRuss Cox wraps it up as such in his document:\n> Direct call of top-level func:\n> A direct call of a top-level func passes all arguments on the stack, expecting results to occupy the successive stack positions.\n\n**Direct call of a method with pointer receiver**\n\nFirst things first, the receiver is initialized via `adder := Adder{id: 6754}`:\n```Assembly\n0x0034 MOVL\t$6754, \"\".adder+28(SP)\n```\n*(The extra-space on our stack-frame was pre-allocated as part of the frame-pointer preamble, which we haven't shown here for conciseness.)*\n\nThen comes the actual method call to `adder.AddPtr(10, 32)`:\n```Assembly\n0x0057 LEAQ\t\"\".adder+28(SP), AX\t;; move &adder to..\n0x005c MOVQ\tAX, (SP)\t\t;; ..the top of the stack (argument #1)\n0x0060 MOVQ\t$137438953482, AX\t;; move (32,10) to..\n0x006a MOVQ\tAX, 8(SP)\t\t;; ..the top of the stack (arguments #3 & #2)\n0x006f CALL\t\"\".(*Adder).AddPtr(SB)\n```\n\nLooking at the assembly output, we can clearly see that a call to a method (whether it has a value or pointer receiver) is almost identical to a function call, the only difference being that the receiver is passed as first argument.  \nIn this case, we do so by loading the effective address (`LEAQ`) of `\"\".adder+28(SP)` at the top of the frame, so that argument #1 becomes `&adder` (if you're a bit confused regarding the semantics of `LEA` vs. `MOV`, you may want to have a look at the links at the end of this chapter for some pointers).\n\nNote how the compiler encodes the type of the receiver and whether it's a value or pointer directly into the name of the symbol: `\"\".(*Adder).AddPtr`.\n\n> Direct call of method:\n> In order to use the same generated code for both an indirect call of a func value and for a direct call, the code generated for a method (both value and pointer receivers) is chosen to have the same calling convention as a top-level function with the receiver as a leading argument.\n\n**Direct call of a method with value receiver**\n\nAs we'd expect, using a value receiver yields very similar code as above.  \nConsider `adder.AddVal(10, 32)`:\n```Assembly\n0x003c MOVQ\t$42949679714, AX\t;; move (10,6754) to..\n0x0046 MOVQ\tAX, (SP)\t\t;; ..the top of the stack (arguments #2 & #1)\n0x004a MOVL\t$32, 8(SP)\t\t;; move 32 to the top of the stack (argument #3)\n0x0052 CALL\t\"\".Adder.AddVal(SB)\n```\n\nLooks like something a bit trickier is going on here, though: the generated assembly isn't even referencing `\"\".adder+28(SP)` anywhere, even though that is where our receiver currently resides.  \nSo what's really going on here? Well, since the receiver is a value, and since the compiler is able to statically infer that value, it doesn't bother with copying the existing value from its current location (`28(SP)`): instead, it simply creates a new, identical `Adder` value directly on the stack, and merges this operation with the creation of the second argument to save one more instruction in the process.\n\nOnce again, notice how the symbol name of the method explicitly denotes that it expects a value receiver.\n\n### Implicit dereferencing\n\nThere's one final call that we haven't looked at yet: `(&adder).AddVal(10, 32)`.  \nIn that case, we're using a pointer variable to call a method that instead expects a value receiver. Somehow, Go automagically dereferences our pointer and manages to make the call. How so?\n\nHow the compiler handles this kind of situation depends on whether or not the receiver being pointed to has escaped to the heap or not.\n\n**Case A: The receiver is on the stack**\n\nIf the receiver is still on the stack and its size is sufficiently small that it can be copied in a few instructions, as is the case here, the compiler simply copies its value over to the top of the stack then does a straightforward method call to `\"\".Adder.AddVal` (i.e. the one with a value receiver).\n\n`(&adder).AddVal(10, 32)` thus looks like this in this situation:\n```Assembly\n0x0074 MOVL\t\"\".adder+28(SP), AX\t;; move (i.e. copy) adder (note the MOV instead of a LEA) to..\n0x0078 MOVL\tAX, (SP)\t\t;; ..the top of the stack (argument #1)\n0x007b MOVQ\t$137438953482, AX\t;; move (32,10) to..\n0x0085 MOVQ\tAX, 4(SP)\t\t;; ..the top of the stack (arguments #3 & #2)\n0x008a CALL\t\"\".Adder.AddVal(SB)\n```\n\nBoring (although efficient). Let's move on to case B.\n\n**Case B: The receiver is on the heap**\n\nIf the receiver has escaped to the heap then the compiler has to take a cleverer route: it generates a new method (with a pointer receiver, this time) that wraps `\"\".Adder.AddVal`, and replaces the original call to `\"\".Adder.AddVal` (the wrappee) with a call to `\"\".(*Adder).AddVal` (the wrapper).  \nThe wrapper's sole mission, then, is to make sure that the receiver gets properly dereferenced before being passed to the wrappee, and that any arguments and return values involved are properly copied back and forth between the caller and the wrappee.\n\n(*NOTE: In assembly outputs, these wrapper methods are marked as `<autogenerated>`.*)\n\nHere's an annotated listing of the generated wrapper that should hopefully clear things up a bit:\n```Assembly\n0x0000 TEXT\t\"\".(*Adder).AddVal(SB), DUPOK|WRAPPER, $32-24\n  ;; ...omitted preambles...\n\n  0x0026 MOVQ\t\"\"..this+40(SP), AX ;; check whether the receiver..\n  0x002b TESTQ\tAX, AX\t\t    ;; ..is nil\n  0x002e JEQ\t92\t\t    ;; if it is, jump to 0x005c (panic)\n\n  0x0030 MOVL\t(AX), AX            ;; dereference pointer receiver..\n  0x0032 MOVL\tAX, (SP)            ;; ..and move (i.e. copy) the resulting value to argument #1\n\n  ;; forward (copy) arguments #2 & #3 then call the wrappee\n  0x0035 MOVL\t\"\".a+48(SP), AX\n  0x0039 MOVL\tAX, 4(SP)\n  0x003d MOVL\t\"\".b+52(SP), AX\n  0x0041 MOVL\tAX, 8(SP)\n  0x0045 CALL\t\"\".Adder.AddVal(SB) ;; call the wrapped method\n\n  ;; copy return value from wrapped method then return\n  0x004a MOVL\t16(SP), AX\n  0x004e MOVL\tAX, \"\".~r2+56(SP)\n  ;; ...omitted frame-pointer stuff...\n  0x005b RET\n\n  ;; throw a panic with a detailed error\n  0x005c CALL\truntime.panicwrap(SB)\n\n  ;; ...omitted epilogues...\n```\n\nObviously, this kind of wrapper can induce quite a bit of overhead considering all the copying that needs to be done in order to pass the arguments back and forth; especially if the wrappee is just a few instructions.  \nFortunately, in practice, the compiler would have inlined the wrappee directly into the wrapper to amortize these costs (when feasible, at least).\n\nNote the `WRAPPER` directive in the definition of the symbol, which indicates that this method shouldn't appear in backtraces (so as not to confuse the end-user), nor should it be able to recover from panics that might be thrown by the wrappee.\n> WRAPPER: This is a wrapper function and should not count as disabling recover.\n\nThe `runtime.panicwrap` function, which throws a panic if the wrapper's receiver is `nil`, is pretty self-explanatory; here's its complete listing for reference ([src/runtime/error.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/error.go#L132-L157)):\n```Go\n// panicwrap generates a panic for a call to a wrapped value method\n// with a nil pointer receiver.\n//\n// It is called from the generated wrapper code.\nfunc panicwrap() {\n    pc := getcallerpc()\n    name := funcname(findfunc(pc))\n    // name is something like \"main.(*T).F\".\n    // We want to extract pkg (\"main\"), typ (\"T\"), and meth (\"F\").\n    // Do it by finding the parens.\n    i := stringsIndexByte(name, '(')\n    if i < 0 {\n        throw(\"panicwrap: no ( in \" + name)\n    }\n    pkg := name[:i-1]\n    if i+2 >= len(name) || name[i-1:i+2] != \".(*\" {\n        throw(\"panicwrap: unexpected string after package name: \" + name)\n    }\n    name = name[i+2:]\n    i = stringsIndexByte(name, ')')\n    if i < 0 {\n        throw(\"panicwrap: no ) in \" + name)\n    }\n    if i+2 >= len(name) || name[i:i+2] != \").\" {\n        throw(\"panicwrap: unexpected string after type name: \" + name)\n    }\n    typ := name[:i]\n    meth := name[i+2:]\n    panic(plainError(\"value method \" + pkg + \".\" + typ + \".\" + meth + \" called using nil *\" + typ + \" pointer\"))\n}\n```\n\nThat's all for function and method calls, we'll now focus on the main course: interfaces.\n\n## Anatomy of an interface\n\n### Overview of the datastructures\n\nBefore we can understand how they work, we first need to build a mental model of the datastructures that make up interfaces and how they're laid out in memory.  \nTo that end, we'll have a quick peek into the runtime package to see what an interface actually looks like from the standpoint of the Go implementation.\n\n**The `iface` structure**\n\n`iface` is the root type that represents an interface within the runtime ([src/runtime/runtime2.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/runtime2.go#L143-L146)).  \nIts definition goes like this:\n```Go\ntype iface struct { // 16 bytes on a 64bit arch\n    tab  *itab\n    data unsafe.Pointer\n}\n```\n\nAn interface is thus a very simple structure that maintains 2 pointers:\n- `tab` holds the address of an `itab` object, which embeds the datastructures that describe both the type of the interface as well as the type of the data it points to.\n- `data` is a raw (i.e. `unsafe`) pointer to the value held by the interface.\n\nWhile extremely simple, this definition already gives us some valuable information: since interfaces can only hold pointers, *any concrete value that we wrap into an interface will have to have its address taken*.  \nMore often than not, this will result in a heap allocation as the compiler takes the conservative route and forces the receiver to escape.  \nThis holds true even for scalar types!\n\nWe can prove that with a few lines of code ([escape.go](./escape.go)):\n```Go\ntype Addifier interface{ Add(a, b int32) int32 }\n\ntype Adder struct{ name string }\n//go:noinline\nfunc (adder Adder) Add(a, b int32) int32 { return a + b }\n\nfunc main() {\n    adder := Adder{name: \"myAdder\"}\n    adder.Add(10, 32)\t      // doesn't escape\n    Addifier(adder).Add(10, 32) // escapes\n}\n```\n```Bash\n$ GOOS=linux GOARCH=amd64 go tool compile -m escape.go\nescape.go:13:10: Addifier(adder) escapes to heap\n# ...\n```\n\nOne could even visualize the resulting heap allocation using a simple benchmark ([escape_test.go](./escape_test.go)):\n```Go\nfunc BenchmarkDirect(b *testing.B) {\n    adder := Adder{id: 6754}\n    for i := 0; i < b.N; i++ {\n        adder.Add(10, 32)\n    }\n}\n\nfunc BenchmarkInterface(b *testing.B) {\n    adder := Adder{id: 6754}\n    for i := 0; i < b.N; i++ {\n        Addifier(adder).Add(10, 32)\n    }\n}\n```\n```Bash\n$ GOOS=linux GOARCH=amd64 go tool compile -m escape_test.go \n# ...\nescape_test.go:22:11: Addifier(adder) escapes to heap\n# ...\n```\n```Bash\n$ GOOS=linux GOARCH=amd64 go test -bench=. -benchmem ./escape_test.go\nBenchmarkDirect-8      \t2000000000\t         1.60 ns/op\t       0 B/op\t       0 allocs/op\nBenchmarkInterface-8   \t100000000\t         15.0 ns/op\t       4 B/op\t       1 allocs/op\n```\n\nWe can clearly see how each time we create a new `Addifier` interface and initialize it with our `adder` variable, a heap allocation of `sizeof(Adder)` actually takes place. \nLater in this chapter, we'll see how even simple scalar types can lead to heap allocations when used with interfaces.\n\nLet's turn our attention towards the next datastructure: `itab`.\n\n**The `itab` structure**\n\n`itab` is defined thusly ([src/runtime/runtime2.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/runtime2.go#L648-L658)):\n```Go\ntype itab struct { // 40 bytes on a 64bit arch\n    inter *interfacetype\n    _type *_type\n    hash  uint32 // copy of _type.hash. Used for type switches.\n    _     [4]byte\n    fun   [1]uintptr // variable sized. fun[0]==0 means _type does not implement inter.\n}\n```\n\nAn `itab` is the heart & brain of an interface.  \n\nFirst, it embeds a `_type`, which is the internal representation of any Go type within the runtime.  \nA `_type` describes every facets of a type: its name, its characteristics (e.g. size, alignment...), and to some extent, even how it behaves (e.g. comparison, hashing...)!  \nIn this instance, the `_type` field describes the type of the value held by the interface, i.e. the value that the `data` pointer points to.\n\nSecond, we find a pointer to an `interfacetype`, which is merely a wrapper around `_type` with some extra information that are specific to interfaces.  \nAs you'd expect, the `inter` field describes the type of the interface itself.\n\nFinally, the `fun` array holds the function pointers that make up the virtual/dispatch table of the interface.  \nNotice the comment that says `// variable sized`, meaning that the size with which this array is declared is *irrelevant*.  \nWe'll see later in this chapter that the compiler is responsible for allocating the memory that backs this array, and does so independently of the size indicated here. Likewise, the runtime always accesses this array using raw pointers, thus bounds-checking does not apply here.\n\n**The `_type` structure**\n\nAs we said above, the `_type` structure gives a complete description of a Go type.  \nIt's defined as such ([src/runtime/type.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/type.go#L25-L43)):\n```Go\ntype _type struct { // 48 bytes on a 64bit arch\n    size       uintptr\n    ptrdata    uintptr // size of memory prefix holding all pointers\n    hash       uint32\n    tflag      tflag\n    align      uint8\n    fieldalign uint8\n    kind       uint8\n    alg        *typeAlg\n    // gcdata stores the GC type data for the garbage collector.\n    // If the KindGCProg bit is set in kind, gcdata is a GC program.\n    // Otherwise it is a ptrmask bitmap. See mbitmap.go for details.\n    gcdata    *byte\n    str       nameOff\n    ptrToThis typeOff\n}\n```\n\nThankfully, most of these fields are quite self-explanatory.\n\nThe `nameOff` & `typeOff` types are `int32` offsets into the metadata embedded into the final executable by the linker. This metadata is loaded into `runtime.moduledata` structures at run time ([src/runtime/symtab.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/symtab.go#L352-L393)), which should look fairly similar if you've ever had to look at the content of an ELF file.  \nThe runtime provide helpers that implement the necessary logic for following these offsets through the `moduledata` structures, such as e.g. `resolveNameOff` ([src/runtime/type.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/type.go#L168-L196)) and `resolveTypeOff` ([src/runtime/type.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/type.go#L202-L236)):\n```Go\nfunc resolveNameOff(ptrInModule unsafe.Pointer, off nameOff) name {}\nfunc resolveTypeOff(ptrInModule unsafe.Pointer, off typeOff) *_type {}\n```\nI.e., assuming `t` is a `_type`, calling `resolveTypeOff(t, t.ptrToThis)` returns a copy of `t`.\n\n**The `interfacetype` structure**\n\nFinally, here's the `interfacetype` structure ([src/runtime/type.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/type.go#L342-L346)):\n```Go\ntype interfacetype struct { // 80 bytes on a 64bit arch\n    typ     _type\n    pkgpath name\n    mhdr    []imethod\n}\n\ntype imethod struct {\n    name nameOff\n    ityp typeOff\n}\n```\n\nAs mentioned, an `interfacetype` is just a wrapper around a `_type` with some extra interface-specific metadata added on top.  \nIn the current implementation, this metadata is mostly composed of a list of offsets that points to the respective names and types of the methods exposed by the interface (`[]imethod`).\n\n**Conclusion**\n\nHere's an overview of what an `iface` looks like when represented with all of its sub-types inlined; this hopefully should help connect all the dots:\n```Go\ntype iface struct { // `iface`\n    tab *struct { // `itab`\n        inter *struct { // `interfacetype`\n            typ struct { // `_type`\n                size       uintptr\n                ptrdata    uintptr\n                hash       uint32\n                tflag      tflag\n                align      uint8\n                fieldalign uint8\n                kind       uint8\n                alg        *typeAlg\n                gcdata     *byte\n                str        nameOff\n                ptrToThis  typeOff\n            }\n            pkgpath name\n            mhdr    []struct { // `imethod`\n                name nameOff\n                ityp typeOff\n            }\n        }\n        _type *struct { // `_type`\n            size       uintptr\n            ptrdata    uintptr\n            hash       uint32\n            tflag      tflag\n            align      uint8\n            fieldalign uint8\n            kind       uint8\n            alg        *typeAlg\n            gcdata     *byte\n            str        nameOff\n            ptrToThis  typeOff\n        }\n        hash uint32\n        _    [4]byte\n        fun  [1]uintptr\n    }\n    data unsafe.Pointer\n}\n```\n\nThis section glossed over the different data-types that make up an interface to help us to start building a mental model of the various cogs involved in the overall machinery, and how they all work with each other.  \nIn the next section, we'll learn how these datastructures actually get computed.\n\n### Creating an interface\n\nNow that we've had a quick look at all the datastructures involved, we'll focus on how they actually get allocated and initiliazed.\n\nConsider the following program ([iface.go](./iface.go)):\n```Go\ntype Mather interface {\n    Add(a, b int32) int32\n    Sub(a, b int64) int64\n}\n\ntype Adder struct{ id int32 }\n//go:noinline\nfunc (adder Adder) Add(a, b int32) int32 { return a + b }\n//go:noinline\nfunc (adder Adder) Sub(a, b int64) int64 { return a - b }\n\nfunc main() {\n    m := Mather(Adder{id: 6754})\n\n    // This call just makes sure that the interface is actually used.\n    // Without this call, the linker would see that the interface defined above\n    // is in fact never used, and thus would optimize it out of the final\n    // executable.\n    m.Add(10, 32)\n}\n```\n\n*NOTE: For the remainder of this chapter, we will denote an interface `I` that holds a type `T` as `<I,T>`. E.g. `Mather(Adder{id: 6754})` instantiates an `iface<Mather, Adder>`.*\n\nLet's zoom in on the instantiation of `iface<Mather, Adder>`:\n```Go\nm := Mather(Adder{id: 6754})\n```\nThis single line of Go code actually sets off quite a bit of machinery, as the assembly listing generated by the compiler can attest:  \n```Assembly\n;; part 1: allocate the receiver\n0x001d MOVL\t$6754, \"\"..autotmp_1+36(SP)\n;; part 2: set up the itab\n0x0025 LEAQ\tgo.itab.\"\".Adder,\"\".Mather(SB), AX\n0x002c MOVQ\tAX, (SP)\n;; part 3: set up the data\n0x0030 LEAQ\t\"\"..autotmp_1+36(SP), AX\n0x0035 MOVQ\tAX, 8(SP)\n0x003a CALL\truntime.convT2I32(SB)\n0x003f MOVQ\t16(SP), AX\n0x0044 MOVQ\t24(SP), CX\n```\n\nAs you can see, we've splitted the output into three logical parts.\n\n**Part 1: Allocate the receiver**\n\n```Assembly\n0x001d MOVL\t$6754, \"\"..autotmp_1+36(SP)\n```\n\nA constant decimal value of `6754`, corresponding to the ID of our `Adder`, is stored at the beginning of the current stack-frame.  \nIt's stored there so that the compiler will later be able to reference it by its address; we'll see why in part 3.\n\n**Part 2: Set up the itab**\n\n```Assembly\n0x0025 LEAQ\tgo.itab.\"\".Adder,\"\".Mather(SB), AX\n0x002c MOVQ\tAX, (SP)\n```\n\nIt looks like the compiler has already created the necessary `itab` for representing our `iface<Mather, Adder>` interface, and made it available to us via a global symbol: `go.itab.\"\".Adder,\"\".Mather`.  \n\nWe're in the process of building an `iface<Mather, Adder>` interface and, in order to do so, we're loading the effective address of this global `go.itab.\"\".Adder,\"\".Mather` symbol at the top of the current stack-frame.  \nOnce again, we'll see why in part 3.\n\nSemantically, this gives us something along the lines of the following pseudo-code:\n```Go\ntab := getSymAddr(`go.itab.main.Adder,main.Mather`).(*itab)\n```\nThat's half of our interface right there!\n\nNow, while we're at it, let's have a deeper look at that `go.itab.\"\".Adder,\"\".Mather` symbol.  \nAs usual, the `-S` flag of the compiler can tell us a lot:\n```\n$ GOOS=linux GOARCH=amd64 go tool compile -S iface.go | grep -A 7 '^go.itab.\"\".Adder,\"\".Mather'\ngo.itab.\"\".Adder,\"\".Mather SRODATA dupok size=40\n    0x0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................\n    0x0010 8a 3d 5f 61 00 00 00 00 00 00 00 00 00 00 00 00  .=_a............\n    0x0020 00 00 00 00 00 00 00 00                          ........\n    rel 0+8 t=1 type.\"\".Mather+0\n    rel 8+8 t=1 type.\"\".Adder+0\n    rel 24+8 t=1 \"\".(*Adder).Add+0\n    rel 32+8 t=1 \"\".(*Adder).Sub+0\n```\n\nNeat. Let's analyze this piece by piece.\n\nThe first piece declares the symbol and its attributes:\n```\ngo.itab.\"\".Adder,\"\".Mather SRODATA dupok size=40\n```\nAs usual, since we're looking directly at the intermediate object file generated by the compiler (i.e. the linker hasn't run yet), symbol names are still missing package names. Nothing new on that front.  \nOther than that, what we've got here is a 40-byte global object symbol that will be stored in the `.rodata` section of our binary.\n\nNote the `dupok` directive, which tells the linker that it is legal for this symbol to appear multiple times at link-time: the linker will have to arbitrarily choose one of them over the others.  \nWhat makes the Go authors think that this symbol might end up duplicated, I'm not sure. Feel free to file an issue if you know more.  \n*[UPDATE: We've discussed about this matter in [issue #7: How you can get duplicated go.itab interface definitions](https://github.com/teh-cmc/go-internals/issues/7).]*\n\nThe second piece is a hexdump of the 40 bytes of data associated with the symbol. I.e., it's a serialized representation of an `itab` structure:\n```\n0x0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................\n0x0010 8a 3d 5f 61 00 00 00 00 00 00 00 00 00 00 00 00  .=_a............\n0x0020 00 00 00 00 00 00 00 00                          ........\n```\nAs you can see, most of this data is just a bunch of zeros at this point. The linker will take care of filling them up, as we'll see in a minute.\n\nNotice how, among all these zeros, 4 bytes actually have been set though, at offset `0x10+4`.  \nIf we take a look back at the declaration of the `itab` structure and annotate the respective offsets of its fields:\n```Go\ntype itab struct { // 40 bytes on a 64bit arch\n    inter *interfacetype // offset 0x00 ($00)\n    _type *_type\t // offset 0x08 ($08)\n    hash  uint32\t // offset 0x10 ($16)\n    _     [4]byte\t // offset 0x14 ($20)\n    fun   [1]uintptr\t // offset 0x18 ($24)\n\t\t\t // offset 0x20 ($32)\n}\n```\nWe see that offset `0x10+4` matches the `hash uint32` field: i.e., the hash value that corresponds to our `main.Adder` type is already right there in our object file.\n\nThe third and final piece lists a bunch of relocation directives for the linker:\n```\nrel 0+8 t=1 type.\"\".Mather+0\nrel 8+8 t=1 type.\"\".Adder+0\nrel 24+8 t=1 \"\".(*Adder).Add+0\nrel 32+8 t=1 \"\".(*Adder).Sub+0\n```\n\n`rel 0+8 t=1 type.\"\".Mather+0` tells the linker to fill up the first 8 bytes (`0+8`) of the contents with the address of the global object symbol `type.\"\".Mather`.  \n`rel 8+8 t=1 type.\"\".Adder+0` then fills the next 8 bytes with the address of `type.\"\".Adder`, and so on and so forth.\n\nOnce the linker has done its job and followed all of these directives, our 40-byte serialized `itab` will be complete.  \nOverall, we're now looking at something akin to the following pseudo-code:\n```Go\ntab := getSymAddr(`go.itab.main.Adder,main.Mather`).(*itab)\n\n// NOTE: The linker strips the `type.` prefix from these symbols when building\n// the executable, so the final symbol names in the .rodata section of the\n// binary will actually be `main.Mather` and `main.Adder` rather than\n// `type.main.Mather` and `type.main.Adder`.\n// Don't get tripped up by this when toying around with objdump.\ntab.inter = getSymAddr(`type.main.Mather`).(*interfacetype)\ntab._type = getSymAddr(`type.main.Adder`).(*_type)\n\ntab.fun[0] = getSymAddr(`main.(*Adder).Add`).(uintptr)\ntab.fun[1] = getSymAddr(`main.(*Adder).Sub`).(uintptr)\n```\n\nWe've got ourselves a ready-to-use `itab`, now if we just had some data to along with it, that'd make for a nice, complete interface.\n\n**Part 3: Set up the data**\n\n```Assembly\n0x0030 LEAQ\t\"\"..autotmp_1+36(SP), AX\n0x0035 MOVQ\tAX, 8(SP)\n0x003a CALL\truntime.convT2I32(SB)\n0x003f MOVQ\t16(SP), AX\n0x0044 MOVQ\t24(SP), CX\n```\n\nRemember from part 1 that the top of the stack `(SP)` currently holds the address of `go.itab.\"\".Adder,\"\".Mather` (argument #1).  \nAlso remember from part 2 that we had stored a `$6754` decimal constant in `\"\"..autotmp_1+36(SP)`: we now load the effective address of this constant just below the top of the stack-frame, at 8(SP) (argument #2).\n\nThese two pointers are the two arguments that we pass into `runtime.convT2I32`, which will apply the final touches of glue to create and return our complete interface.  \nLet's have a closer look at it ([src/runtime/iface.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/iface.go#L433-L451)):\n```Go\nfunc convT2I32(tab *itab, elem unsafe.Pointer) (i iface) {\n    t := tab._type\n    /* ...omitted debug stuff... */\n    var x unsafe.Pointer\n    if *(*uint32)(elem) == 0 {\n        x = unsafe.Pointer(&zeroVal[0])\n    } else {\n        x = mallocgc(4, t, false)\n        *(*uint32)(x) = *(*uint32)(elem)\n    }\n    i.tab = tab\n    i.data = x\n    return\n}\n```\n\nSo `runtime.convT2I32` does 4 things:\n1. It creates a new `iface` structure `i` (to be pedantic, its caller creates it.. same difference).\n2. It assigns the `itab` pointer we just gave it to `i.tab`.\n3. It **allocates a new object of type `i.tab._type` on the heap**, then copy the value pointed to by the second argument `elem` into that new object.\n4. It returns the final interface.\n\nThis process is quite straightforward overall, although the 3rd step does involve some tricky implementation details in this specific case, which are caused by the fact that our `Adder` type is effectively a scalar type.  \nWe'll look at the interactions of scalar types and interfaces in more details in the section about [the special cases of interfaces](#interface-holding-a-scalar-type).\n\nConceptually, we've now accomplished the following (pseudo-code):\n```Go\ntab := getSymAddr(`go.itab.main.Adder,main.Mather`).(*itab)\nelem := getSymAddr(`\"\"..autotmp_1+36(SP)`).(*int32)\n\ni := runtime.convTI32(tab, unsafe.Pointer(elem))\n\nassert(i.tab == tab)\nassert(*(*int32)(i.data) == 6754) // same value..\nassert((*int32)(i.data) != elem)  // ..but different (al)locations!\n```\n\nTo summarize all that just went down, here's a complete, annotated version of the assembly code for all 3 parts:\n```Assembly\n0x001d MOVL\t$6754, \"\"..autotmp_1+36(SP)         ;; create an addressable $6754 value at 36(SP)\n0x0025 LEAQ\tgo.itab.\"\".Adder,\"\".Mather(SB), AX  ;; set up go.itab.\"\".Adder,\"\".Mather..\n0x002c MOVQ\tAX, (SP)                            ;; ..as first argument (tab *itab)\n0x0030 LEAQ\t\"\"..autotmp_1+36(SP), AX            ;; set up &36(SP)..\n0x0035 MOVQ\tAX, 8(SP)                           ;; ..as second argument (elem unsafe.Pointer)\n0x003a CALL\truntime.convT2I32(SB)               ;; call convT2I32(go.itab.\"\".Adder,\"\".Mather, &$6754)\n0x003f MOVQ\t16(SP), AX                          ;; AX now holds i.tab (go.itab.\"\".Adder,\"\".Mather)\n0x0044 MOVQ\t24(SP), CX                          ;; CX now holds i.data (&$6754, somewhere on the heap)\n```\nKeep in mind that all of this started with just one single line: `m := Mather(Adder{id: 6754})`.\n\nWe finally got ourselves a complete, working interface.\n\n### Reconstructing an `itab` from an executable\n\nIn the previous section, we dumped the contents of `go.itab.\"\".Adder,\"\".Mather` directly from the object files generated by the compiler and ended up looking at what was mostly a blob of zeros (except for the `hash` value):\n```\n$ GOOS=linux GOARCH=amd64 go tool compile -S iface.go | grep -A 3 '^go.itab.\"\".Adder,\"\".Mather'\ngo.itab.\"\".Adder,\"\".Mather SRODATA dupok size=40\n    0x0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................\n    0x0010 8a 3d 5f 61 00 00 00 00 00 00 00 00 00 00 00 00  .=_a............\n    0x0020 00 00 00 00 00 00 00 00                          ........\n```\n\nTo get a better picture of how the data is laid out into the final executable produced by the linker, we'll walk through the generated ELF file and manually reconstruct the bytes that make up the `itab` of our `iface<Mather, Adder>`.  \nHopefully, this'll enable us to observe what our `itab` looks like once the linker has done its job.\n\nFirst things first, let's build the `iface` binary: `GOOS=linux GOARCH=amd64 go build -o iface.bin iface.go`.\n\n**Step 1: Find `.rodata`**\n\nLet's print the section headers in search of `.rodata`, `readelf` can help with that:\n```Bash\n$ readelf -St -W iface.bin\nThere are 22 section headers, starting at offset 0x190:\n\nSection Headers:\n  [Nr] Name\n       Type            Address          Off    Size   ES   Lk Inf Al\n       Flags\n  [ 0] \n       NULL            0000000000000000 000000 000000 00   0   0  0\n       [0000000000000000]: \n  [ 1] .text\n       PROGBITS        0000000000401000 001000 04b3cf 00   0   0 16\n       [0000000000000006]: ALLOC, EXEC\n  [ 2] .rodata\n       PROGBITS        000000000044d000 04d000 028ac4 00   0   0 32\n       [0000000000000002]: ALLOC\n## ...omitted rest of output...\n```\nWhat we really need here is the (decimal) offset of the section, so let's apply some pipe-foo:\n```Bash\n$ readelf -St -W iface.bin | \\\n  grep -A 1 .rodata | \\\n  tail -n +2 | \\\n  awk '{print \"ibase=16;\"toupper($3)}' | \\\n  bc\n315392\n```\n\nWhich means that `fseek`-ing 315392 bytes into our binary should place us right at the start of the `.rodata` section.  \nNow what we need to do is map this file location to a virtual-memory address.\n\n**Step 2: Find the virtual-memory address (VMA) of `.rodata`**\n\nThe VMA is the virtual address at which the section will be mapped once the binary has been loaded in memory by the OS. I.e., this is the address that we'll use to reference a symbol at runtime.\n\nThe reason we care about the VMA in this case is that we cannot directly ask `readelf` or `objdump` for the offset of a specific symbol (AFAIK). What we can do, on the other hand, is ask for the VMA of a specific symbol.  \nCoupled with some simple maths, we should be able to build a mapping between VMAs and offsets and finally find the offsets of the symbols that we're looking for.\n\nFinding the VMA of `.rodata` is no different than finding its offset, it's just a different column is all:\n```Bash\n$ readelf -St -W iface.bin | \\\n  grep -A 1 .rodata | \\\n  tail -n +2 | \\\n  awk '{print \"ibase=16;\"toupper($2)}' | \\\n  bc\n4509696\n```\n\nSo here's what we know so far: the `.rodata` section is located at offset `$315392` (= `0x04d000`) into the ELF file, which will be mapped at virtual address `$4509696` (= `0x44d000`) at run time.\n\nNow we need the VMA as well as the size of the symbol we're looking for:\n- Its VMA will (indirectly) allow us to locate it within the executable.\n- Its size will tell us how much data to extract once we've found the correct offset.\n\n**Step 3: Find the VMA & size of `go.itab.main.Adder,main.Mather`**\n\n`objdump` has those available for us.\n\nFirst, find the symbol:\n```Bash\n$ objdump -t -j .rodata iface.bin | grep \"go.itab.main.Adder,main.Mather\"\n0000000000475140 g     O .rodata\t0000000000000028 go.itab.main.Adder,main.Mather\n```\n\nThen, get its VMA in decimal form:\n```Bash\n$ objdump -t -j .rodata iface.bin | \\\n  grep \"go.itab.main.Adder,main.Mather\" | \\\n  awk '{print \"ibase=16;\"toupper($1)}' | \\\n  bc\n4673856\n```\n\nAnd finally, get its size in decimal form:\n```Bash\n$ objdump -t -j .rodata iface.bin | \\\n  grep \"go.itab.main.Adder,main.Mather\" | \\\n  awk '{print \"ibase=16;\"toupper($5)}' | \\\n  bc\n40\n```\n\nSo `go.itab.main.Adder,main.Mather` will be mapped at virtual address `$4673856` (= `0x475140`) at run time, and has a size of 40 bytes (which we already knew, as it's the size of an `itab` structure).\n\n**Step 4: Find & extract `go.itab.main.Adder,main.Mather`**\n\nWe now have all the elements we need in order to locate `go.itab.main.Adder,main.Mather` within our binary.  \n\nHere's a reminder of what we know so far:\n```\n.rodata offset: 0x04d000 == $315392\n.rodata VMA: 0x44d000 == $4509696\n\ngo.itab.main.Adder,main.Mather VMA: 0x475140 == $4673856\ngo.itab.main.Adder,main.Mather size: 0x24 = $40\n```\n\nIf `$315392` (`.rodata`'s offset) maps to $4509696 (`.rodata`'s VMA) and `go.itab.main.Adder,main.Mather`'s VMA is `$4673856`, then `go.itab.main.Adder,main.Mather`'s offset within the executable is:  \n`sym.offset = sym.vma - section.vma + section.offset = $4673856 - $4509696 + $315392 = $479552`.\n\nNow that we know both the offset and size of the data, we can take out good ol' `dd` and extract the raw bytes straight out of the executable:  \n```Bash\n$ dd if=iface.bin of=/dev/stdout bs=1 count=40 skip=479552 2>/dev/null | hexdump\n0000000 bd20 0045 0000 0000 ed40 0045 0000 0000\n0000010 3d8a 615f 0000 0000 c2d0 0044 0000 0000\n0000020 c350 0044 0000 0000                    \n0000028\n```\n\nThis certainly does look like a clear-cut victory.. but is it, really? Maybe we've just dumped 40 totally random, unrelated bytes? Who knows?  \nThere's at least one way to be sure: let's compare the type hash found in our binary dump (at offset `0x10+4` -> `0x615f3d8a`) with the one loaded by the runtime ([iface_type_hash.go](./iface_type_hash.go)):\n```Go\n// simplified definitions of runtime's iface & itab types\ntype iface struct {\n    tab  *itab\n    data unsafe.Pointer\n}\ntype itab struct {\n    inter uintptr\n    _type uintptr\n    hash  uint32\n    _     [4]byte\n    fun   [1]uintptr\n}\n\nfunc main() {\n    m := Mather(Adder{id: 6754})\n\n    iface := (*iface)(unsafe.Pointer(&m))\n    fmt.Printf(\"iface.tab.hash = %#x\\n\", iface.tab.hash) // 0x615f3d8a\n}\n```\n\nIt's a match! `fmt.Printf(\"iface.tab.hash = %#x\\n\", iface.tab.hash)` gives us `0x615f3d8a`, which corresponds to the value that we've extracted from the contents of the ELF file.\n\n**Conclusion**\n\nWe've reconstructed the complete `itab` for our `iface<Mather, Adder>` interface; it's all there in the executable, just waiting to be used, and already contains all the information that the runtime will need to make the interface behave as we expect.\n\nOf course, since an `itab` is mostly composed of a bunch of pointers to other datastructures, we'd have to follow the virtual addresses present in the contents that we've extracted via `dd` in order to reconstruct the complete picture.  \nSpeaking of pointers, we can now have a clear view of the virtual-table for `iface<Mather, Adder>`; here's an annotated version of the contents of `go.itab.main.Adder,main.Mather`:\n```Bash\n$ dd if=iface.bin of=/dev/stdout bs=1 count=40 skip=479552 2>/dev/null | hexdump\n0000000 bd20 0045 0000 0000 ed40 0045 0000 0000\n0000010 3d8a 615f 0000 0000 c2d0 0044 0000 0000\n#                           ^^^^^^^^^^^^^^^^^^^\n#                       offset 0x18+8: itab.fun[0]\n0000020 c350 0044 0000 0000                    \n#       ^^^^^^^^^^^^^^^^^^^\n# offset 0x20+8: itab.fun[1]\n0000028\n```\n```Bash\n$ objdump -t -j .text iface.bin | grep 000000000044c2d0\n000000000044c2d0 g     F .text\t0000000000000079 main.(*Adder).Add\n```\n```Bash\n$ objdump -t -j .text iface.bin | grep 000000000044c350\n000000000044c350 g     F .text\t000000000000007f main.(*Adder).Sub\n```\n\nWithout surprise, the virtual table for `iface<Mather, Adder>` holds two method pointers: `main.(*Adder).add` and `main.(*Adder).sub`.  \nWell, actually, this *is* a bit surprising: we've never defined these two methods to have pointer receivers.  \nThe compiler has generated these wrapper methods on our behalf (as we've described in the [\"Implicit dereferencing\" section](#implicit-dereferencing)) because it knows that we're going to need them: since an interface can only hold pointers, and since our `Adder` implementation of said interface only provides methods with value-receivers, we'll have to go through a wrapper at some point if we're going to call either of these methods via the virtual table of the interface.\n\nThis should already give you a pretty good idea of how dynamic dispatch is handled at run time; which is what we will look at in the next section.\n\n**Bonus**\n\nI've hacked up a generic bash script that you can use to dump the contents of any symbol in any section of an ELF file ([dump_sym.sh](./dump_sym.sh)):\n```Bash\n# ./dump_sym.sh bin_path section_name sym_name\n$ ./dump_sym.sh iface.bin .rodata go.itab.main.Adder,main.Mather\n.rodata file-offset: 315392\n.rodata VMA: 4509696\ngo.itab.main.Adder,main.Mather VMA: 4673856\ngo.itab.main.Adder,main.Mather SIZE: 40\n\n0000000 bd20 0045 0000 0000 ed40 0045 0000 0000\n0000010 3d8a 615f 0000 0000 c2d0 0044 0000 0000\n0000020 c350 0044 0000 0000                    \n0000028\n```\n\nI'd imagine there must exist an easier way to do what this script does, maybe some arcane flags or an obscure gem hidden inside the `binutils` distribution.. who knows.  \nIf you've got some hints, don't hesitate to say so in the issues.\n\n## Dynamic dispatch\n\nIn this section we'll finally cover the main feature of interfaces: dynamic dispatch.  \nSpecifically, we'll look at how dynamic dispatch works under the hood, and how much we got to pay for it.\n\n### Indirect method call on interface\n\nLet's have a look back at our code from earlier ([iface.go](./iface.go)):\n```Go\ntype Mather interface {\n    Add(a, b int32) int32\n    Sub(a, b int64) int64\n}\n\ntype Adder struct{ id int32 }\n//go:noinline\nfunc (adder Adder) Add(a, b int32) int32 { return a + b }\n//go:noinline\nfunc (adder Adder) Sub(a, b int64) int64 { return a - b }\n\nfunc main() {\n    m := Mather(Adder{id: 6754})\n    m.Add(10, 32)\n}\n```\n\nWe've already had a deeper look into most of what happens in this piece of code: how the `iface<Mather, Adder>` interface gets created, how it's laid out in the final exectutable, and how it ends up being loaded by the runtime.  \nThere's only one thing left for us to look at, and that is the actual indirect method call that follows: `m.Add(10, 32)`.\n\nTo refresh our memory, we'll zoom in on both the creation of the interface as well as on the method call itself:\n```Go\nm := Mather(Adder{id: 6754})\nm.Add(10, 32)\n```\nThankfully, we already have a fully annotated version of the assembly generated by the instantiation done on the first line (`m := Mather(Adder{id: 6754})`):\n```Assembly\n;; m := Mather(Adder{id: 6754})\n0x001d MOVL\t$6754, \"\"..autotmp_1+36(SP)         ;; create an addressable $6754 value at 36(SP)\n0x0025 LEAQ\tgo.itab.\"\".Adder,\"\".Mather(SB), AX  ;; set up go.itab.\"\".Adder,\"\".Mather..\n0x002c MOVQ\tAX, (SP)                            ;; ..as first argument (tab *itab)\n0x0030 LEAQ\t\"\"..autotmp_1+36(SP), AX            ;; set up &36(SP)..\n0x0035 MOVQ\tAX, 8(SP)                           ;; ..as second argument (elem unsafe.Pointer)\n0x003a CALL\truntime.convT2I32(SB)               ;; runtime.convT2I32(go.itab.\"\".Adder,\"\".Mather, &$6754)\n0x003f MOVQ\t16(SP), AX                          ;; AX now holds i.tab (go.itab.\"\".Adder,\"\".Mather)\n0x0044 MOVQ\t24(SP), CX                          ;; CX now holds i.data (&$6754, somewhere on the heap)\n```\nAnd now, here's the assembly listing for the indirect method call that follows (`m.Add(10, 32)`):\n```Assembly\n;; m.Add(10, 32)\n0x0049 MOVQ\t24(AX), AX\n0x004d MOVQ\t$137438953482, DX\n0x0057 MOVQ\tDX, 8(SP)\n0x005c MOVQ\tCX, (SP)\n0x0060 CALL\tAX\n```\n\nWith the knowledge accumulated from the previous sections, these few instructions should be straightforward to understand.\n\n```Assembly\n0x0049 MOVQ\t24(AX), AX\n```\nOnce `runtime.convT2I32` has returned, `AX` holds `i.tab`, which as we know is a pointer to an `itab`; and more specifically a pointer to `go.itab.\"\".Adder,\"\".Mather` in this case.  \nBy dereferencing `AX` and offsetting 24 bytes forward, we reach `i.tab.fun`, which corresponds to the first entry of the virtual table.  \nHere's a reminder of what the offset table for `itab` looks like:\n```Go\ntype itab struct { // 32 bytes on a 64bit arch\n    inter *interfacetype // offset 0x00 ($00)\n    _type *_type\t // offset 0x08 ($08)\n    hash  uint32\t // offset 0x10 ($16)\n    _     [4]byte\t // offset 0x14 ($20)\n    fun   [1]uintptr\t // offset 0x18 ($24)\n\t\t\t // offset 0x20 ($32)\n}\n```\n\nAs we've seen in the previous section where we've reconstructed the final `itab` directly from the executable, `iface.tab.fun[0]` is a pointer to `main.(*Adder).add`, which is the compiler-generated wrapper-method that wraps our original value-receiver `main.Adder.add` method.\n\n```Assembly\n0x004d MOVQ\t$137438953482, DX\n0x0057 MOVQ\tDX, 8(SP)\n```\nWe store `10` and `32` at the top of the stack, as arguments #2 & #3.\n\n```Assembly\n0x005c MOVQ\tCX, (SP)\n0x0060 CALL\tAX\n```\nOnce `runtime.convT2I32` has returned, `CX` holds `i.data`, which is a pointer to our `Adder` instance.  \nWe move this pointer to the top of stack, as argument #1, to satisfy the calling convention: the receiver for a method should always be passed as the first argument.\n\nFinally, with our stack all set up, we can do the actual call.\n\nWe'll close this section with a complete annotated assembly listing of the entire process:\n```Assembly\n;; m := Mather(Adder{id: 6754})\n0x001d MOVL\t$6754, \"\"..autotmp_1+36(SP)         ;; create an addressable $6754 value at 36(SP)\n0x0025 LEAQ\tgo.itab.\"\".Adder,\"\".Mather(SB), AX  ;; set up go.itab.\"\".Adder,\"\".Mather..\n0x002c MOVQ\tAX, (SP)                            ;; ..as first argument (tab *itab)\n0x0030 LEAQ\t\"\"..autotmp_1+36(SP), AX            ;; set up &36(SP)..\n0x0035 MOVQ\tAX, 8(SP)                           ;; ..as second argument (elem unsafe.Pointer)\n0x003a CALL\truntime.convT2I32(SB)               ;; runtime.convT2I32(go.itab.\"\".Adder,\"\".Mather, &$6754)\n0x003f MOVQ\t16(SP), AX                          ;; AX now holds i.tab (go.itab.\"\".Adder,\"\".Mather)\n0x0044 MOVQ\t24(SP), CX                          ;; CX now holds i.data (&$6754, somewhere on the heap)\n;; m.Add(10, 32)\n0x0049 MOVQ\t24(AX), AX                          ;; AX now holds (*iface.tab)+0x18, i.e. iface.tab.fun[0]\n0x004d MOVQ\t$137438953482, DX                   ;; move (32,10) to..\n0x0057 MOVQ\tDX, 8(SP)                           ;; ..the top of the stack (arguments #3 & #2)\n0x005c MOVQ\tCX, (SP)                            ;; CX, which holds &$6754 (i.e., our receiver), gets moved to\n                                                    ;; ..the top of stack (argument #1 -> receiver)\n0x0060 CALL\tAX                                  ;; you know the drill\n```\n\nWe now have a clear picture of the entire machinery required for interfaces and virtual method calls to work.  \nIn the next section, we'll measure the actual cost of this machinery, in theory as well as in practice.\n\n### Overhead\n\nAs we've seen, the implementation of interfaces delegates most of the work on both the compiler and the linker. From a performance standpoint, this is obviously very good news: we effectively want to relieve the runtime from as much work as possible.  \nThere do exist some specific cases where instantiating an interface may also require the runtime to get to work (e.g. the `runtime.convT2*` family of functions), though they are not so prevalent in practice.  \nWe'll learn more about these edge cases in the [section dedicated to the special cases of interfaces](#special-cases--compiler-tricks). In the meantime, we'll concentrate purely on the overhead of virtual method calls and ignore the one-time costs related to instantiation.\n\nOnce an interface has been properly instantiated, calling methods on it is nothing more than going through one more layer of indirection compared to the usual statically dispatched call (i.e. dereferencing `itab.fun` at the desired index).  \nAs such, one would imagine this process to be virtually free.. and one would be kind of right, but not quite: the theory is a bit tricky, and the reality even trickier still.\n\n#### The theory: quick refresher on modern CPUs\n\nThe extra indirection inherent to virtual calls is, in and of itself, effectively free *for as long as it is somewhat predictable from the standpoint of the CPU*.  \nModern CPUs are very aggressive beasts: they cache aggressively, they aggressively pre-fetch both instructions & data, they pre-execute code aggressively, they even reorder and parallelize it as they see fit.  \nAll of this extra work is done whether we want it or not, hence we should always strive not to get in the way of the CPU's efforts to be extra smart, so all of these precious cycles don't go needlessly wasted.\n\nThis is where virtual method calls can quickly become a problem.\n\nIn the case of a statically dispatched call, the CPU has foreknowledge of the upcoming branch in the program and pre-fetches the necessary instructions accordingly. This makes up for a smooth, transparent transition from one branch of the program to the other as far as performance is concerned.  \nWith dynamic dispatch, on the other hand, the CPU cannot know in advance where the program is heading: it all depends on computations whose results are, by definition, not known until run time. To counter-balance this, the CPU applies various algorithms and heuristics in order to guess where the program is going to branch next (i.e. \"branch prediction\").\n\nIf the processor guesses correctly, we can expect a dynamic branch to be almost as efficient as a static one, since the instructions of the landing site have already been pre-fetched into the processor's caches anyway.\n\nIf it gets things wrong, though, things can get a bit rough: first, of course, we'll have to pay for the extra indirection plus the corresponding (slow) load from main memory (i.e. the CPU is effectively stalled) to load the right instructions into the L1i cache. Even worse, we'll have to pay for the price of the CPU backtracking in its own mistakes and flushing its instruction pipeline following the branch misprediction.  \nAnother important downside of dynamic dispatch is that it makes inlining impossible by definition: one simply cannot inline what they don't know is coming.\n\nAll in all, it should, at least in theory, be very possible to end up with massive differences in performance between a direct call to an inlined function F, and a call to that same function that couldn't be inlined and had to go through some extra layers of indirection, and maybe even got hit by a branch misprediction on its way.\n\nThat's mostly it for the theory.  \nWhen it comes to modern hardware, though, one should always be wary of the theory.\n\nLet's measure this stuff.\n\n#### The practice: benchmarks\n\nFirst things first, some information about the CPU we're running on:\n```Bash\n$ lscpu | sed -nr '/Model name/ s/.*:\\s*(.* @ .*)/\\1/p'\nIntel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz\n```\n\nWe'll define the interface used for our benchmarks as such ([iface_bench_test.go](./iface_bench_test.go)):\n```Go\ntype identifier interface {\n    idInline() int32\n    idNoInline() int32\n}\n\ntype id32 struct{ id int32 }\n\n// NOTE: Use pointer receivers so we don't measure the extra overhead incurred by\n// autogenerated wrappers as part of our results.\n\nfunc (id *id32) idInline() int32 { return id.id }\n//go:noinline\nfunc (id *id32) idNoInline() int32 { return id.id }\n```\n\n**Benchmark suite A: single instance, many calls, inlined & non-inlined**\n\nFor our first two benchmarks, we'll try calling a non-inlined method inside a busy-loop, on both an `*Adder` value and a `iface<Mather, *Adder>` interface:\n```Go\nvar escapeMePlease *id32\n// escapeToHeap makes sure that `id` escapes to the heap.\n//\n// In simple situations such as some of the benchmarks present in this file,\n// the compiler is able to statically infer the underlying type of the\n// interface (or rather the type of the data it points to, to be pedantic) and\n// ends up replacing what should have been a dynamic method call by a\n// static call.\n// This anti-optimization prevents this extra cleverness.\n//\n//go:noinline\nfunc escapeToHeap(id *id32) identifier {\n    escapeMePlease = id\n    return escapeMePlease\n}\n\nvar myID int32\n\nfunc BenchmarkMethodCall_direct(b *testing.B) {\n    b.Run(\"single/noinline\", func(b *testing.B) {\n        m := escapeToHeap(&id32{id: 6754}).(*id32)\n        for i := 0; i < b.N; i++ {\n            // CALL \"\".(*id32).idNoInline(SB)\n            // MOVL 8(SP), AX\n            // MOVQ \"\".&myID+40(SP), CX\n            // MOVL AX, (CX)\n            myID = m.idNoInline()\n        }\n    })\n}\n\nfunc BenchmarkMethodCall_interface(b *testing.B) {\n    b.Run(\"single/noinline\", func(b *testing.B) {\n        m := escapeToHeap(&id32{id: 6754})\n        for i := 0; i < b.N; i++ {\n            // MOVQ 32(AX), CX\n            // MOVQ \"\".m.data+40(SP), DX\n            // MOVQ DX, (SP)\n            // CALL CX\n            // MOVL 8(SP), AX\n            // MOVQ \"\".&myID+48(SP), CX\n            // MOVL AX, (CX)\n            myID = m.idNoInline()\n        }\n    })\n}\n```\n\nWe expect both benchmarks to run A) extremely fast and B) at almost the same speeds.\n\nGiven the tightness of the loop, we can expect both benchmarks to have their data (receiver & vtable) and instructions (`\"\".(*id32).idNoInline`) already be present in the L1d/L1i caches of the CPU for each iteration of the loop. I.e., performance should be purely CPU-bound.\n\n`BenchmarkMethodCall_interface` should run a bit slower (on the nanosecond scale) though, as it has to deal with the overhead of finding & copying the right pointer from the virtual table (which is already in the L1 cache, though).  \nSince the `CALL CX` instruction has a strong dependency on the output of these few extra instructions required to consult the vtable, the processor has no choice but to execute all of this extra logic as a sequential stream, leaving any chance of instruction-level parallelization on the table.  \nThis is ultimately the main reason why we would expect the \"interface\" version to run a bit slower.\n\nWe end up with the following results for the \"direct\" version:\n```Bash\n$ go test -run=NONE -o iface_bench_test.bin iface_bench_test.go && \\\n  perf stat --cpu=1 \\\n  taskset 2 \\\n  ./iface_bench_test.bin -test.cpu=1 -test.benchtime=1s -test.count=3 \\\n      -test.bench='BenchmarkMethodCall_direct/single/noinline'\nBenchmarkMethodCall_direct/single/noinline         \t2000000000\t         1.81 ns/op\nBenchmarkMethodCall_direct/single/noinline         \t2000000000\t         1.80 ns/op\nBenchmarkMethodCall_direct/single/noinline         \t2000000000\t         1.80 ns/op\n\n Performance counter stats for 'CPU(s) 1':\n\n      11702.303843      cpu-clock (msec)          #    1.000 CPUs utilized          \n             2,481      context-switches          #    0.212 K/sec                  \n                 1      cpu-migrations            #    0.000 K/sec                  \n             7,349      page-faults               #    0.628 K/sec                  \n    43,726,491,825      cycles                    #    3.737 GHz                    \n   110,979,100,648      instructions              #    2.54  insn per cycle         \n    19,646,440,556      branches                  # 1678.852 M/sec                  \n           566,424      branch-misses             #    0.00% of all branches        \n\n      11.702332281 seconds time elapsed\n```\nAnd here's for the \"interface\" version:\n```Bash\n$ go test -run=NONE -o iface_bench_test.bin iface_bench_test.go && \\\n  perf stat --cpu=1 \\\n  taskset 2 \\\n  ./iface_bench_test.bin -test.cpu=1 -test.benchtime=1s -test.count=3 \\\n      -test.bench='BenchmarkMethodCall_interface/single/noinline'\nBenchmarkMethodCall_interface/single/noinline         \t2000000000\t         1.95 ns/op\nBenchmarkMethodCall_interface/single/noinline         \t2000000000\t         1.96 ns/op\nBenchmarkMethodCall_interface/single/noinline         \t2000000000\t         1.96 ns/op\n\n Performance counter stats for 'CPU(s) 1':\n\n      12709.383862      cpu-clock (msec)          #    1.000 CPUs utilized          \n             3,003      context-switches          #    0.236 K/sec                  \n                 1      cpu-migrations            #    0.000 K/sec                  \n            10,524      page-faults               #    0.828 K/sec                  \n    47,301,533,147      cycles                    #    3.722 GHz                    \n   124,467,105,161      instructions              #    2.63  insn per cycle         \n    19,878,711,448      branches                  # 1564.097 M/sec                  \n           761,899      branch-misses             #    0.00% of all branches        \n\n      12.709412950 seconds time elapsed\n```\n\nThe results match our expectations: the \"interface\" version is indeed a bit slower, with approximately 0.15 extra nanoseconds per iteration, or a ~8% slowdown.  \n8% might sound like a noticeable difference at first, but we have to keep in mind that A) these are nanosecond-scale measurements and B) the method being called does so little work that it magnifies even more the overhead of the call.\n\nLooking at the number of instructions per benchmark, we see that the interface-based version has had to execute for ~14 billion more instructions compared to the \"direct\" version (`110,979,100,648` vs. `124,467,105,161`), even though both benchmarks were run for `6,000,000,000` (`2,000,000,000\\*3`) iterations.  \nAs we've mentioned before, the CPU cannot parallelize these extra instructions due to the `CALL` depending on them, which gets reflected quite clearly in the instruction-per-cycle ratio: both benchmarks end up with a similar IPC ratio (`2.54` vs. `2.63`) even though the \"interface\" version has much more work to do overall.  \nThis lack of parallelism piles up to an extra ~3.5 billion CPU cycles for the \"interface\" version, which is where those extra 0.15ns that we've measured are actually spent.\n\nNow what happens when we let the compiler inline the method call?\n\n```Go\nvar myID int32\n\nfunc BenchmarkMethodCall_direct(b *testing.B) {\n    b.Run(\"single/inline\", func(b *testing.B) {\n        m := escapeToHeap(&id32{id: 6754}).(*id32)\n        b.ResetTimer()\n        for i := 0; i < b.N; i++ {\n            // MOVL (DX), SI\n            // MOVL SI, (CX)\n            myID = m.idInline()\n        }\n    })\n}\n\nfunc BenchmarkMethodCall_interface(b *testing.B) {\n    b.Run(\"single/inline\", func(b *testing.B) {\n        m := escapeToHeap(&id32{id: 6754})\n        b.ResetTimer()\n        for i := 0; i < b.N; i++ {\n            // MOVQ 32(AX), CX\n            // MOVQ \"\".m.data+40(SP), DX\n            // MOVQ DX, (SP)\n            // CALL CX\n            // MOVL 8(SP), AX\n            // MOVQ \"\".&myID+48(SP), CX\n            // MOVL AX, (CX)\n            myID = m.idNoInline()\n        }\n    })\n}\n```\n\nTwo things jump out at us:\n- `BenchmarkMethodCall_direct`: Thanks to inlining, the call has been reduced to a simple pair of memory moves.\n- `BenchmarkMethodCall_interface`: Due to dynamic dispatch, the compiler has been unable to inline the call, thus the generated assembly ends up being exactly the same as before.\n\nWe won't even bother running `BenchmarkMethodCall_interface` since the code hasn't changed a bit.  \nLet's have a quick look at the \"direct\" version though:\n```Bash\n$ go test -run=NONE -o iface_bench_test.bin iface_bench_test.go && \\\n  perf stat --cpu=1 \\\n  taskset 2 \\\n  ./iface_bench_test.bin -test.cpu=1 -test.benchtime=1s -test.count=3 \\\n      -test.bench='BenchmarkMethodCall_direct/single/inline'\nBenchmarkMethodCall_direct/single/inline         \t2000000000\t         0.35 ns/op\nBenchmarkMethodCall_direct/single/inline         \t2000000000\t         0.34 ns/op\nBenchmarkMethodCall_direct/single/inline         \t2000000000\t         0.34 ns/op\n\n Performance counter stats for 'CPU(s) 1':\n\n       2464.353001      cpu-clock (msec)          #    1.000 CPUs utilized          \n               629      context-switches          #    0.255 K/sec                  \n                 1      cpu-migrations            #    0.000 K/sec                  \n             7,322      page-faults               #    0.003 M/sec                  \n     9,026,867,915      cycles                    #    3.663 GHz                    \n    41,580,825,875      instructions              #    4.61  insn per cycle         \n     7,027,066,264      branches                  # 2851.485 M/sec                  \n         1,134,955      branch-misses             #    0.02% of all branches        \n\n       2.464386341 seconds time elapsed\n```\n\nAs expected, this runs ridiculously fast now that the overhead of the call is gone.  \nWith ~0.34ns per op for the \"direct\" inlined version, the \"interface\" version is now ~475% slower, quite a steep drop from the ~8% difference that we've measured earlier with inlining disabled.\n\nNotice how, with the branching inherent to the method call now gone, the CPU is able to parallelize and speculatively execute the remaining instructions much more efficiently, reaching an IPC ratio of 4.61.\n\n**Benchmark suite B: many instances, many non-inlined calls, small/big/pseudo-random iterations**\n\nFor this second benchmark suite, we'll look at a more real-world-like situation in which an iterator goes through a slice of objects that all expose a common method and calls it for each object.  \nTo better mimic reality, we'll disable inlining, as most methods called this way in a real program would most likely by sufficiently complex not to be inlined by the compiler (YMMV; a good counter-example of this is the `sort.Interface` interface from the standard library).\n\nWe'll define 3 similar benchmarks that just differ in the way they access this slice of objects; the goal being to simulate decreasing levels of cache friendliness:\n1. In the first case, the iterator walks the array in order, calls the method, then gets incremented by the size of one object for each iteration.\n1. In the second case, the iterator still walks the slice in order, but this time gets incremented by a value that's larger than the size of a single cache-line.\n1. Finally, in the third case, the iterator will pseudo-randomly steps through the slice.\n\nIn all three cases, we'll make sure that the array is big enough not to fit entirely in any of the processor's caches in order to simulate (not-so-accurately) a very busy server that's putting a lot of pressure of both its CPU caches and main memory.\n\nHere's a quick recap of the processor's attributes, we'll design the benchmarks accordingly:\n```Bash\n$ lscpu | sed -nr '/Model name/ s/.*:\\s*(.* @ .*)/\\1/p'\nIntel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz\n$ lscpu | grep cache\nL1d cache:           32K\nL1i cache:           32K\nL2 cache:            256K\nL3 cache:            6144K\n$ getconf LEVEL1_DCACHE_LINESIZE\n64\n$ getconf LEVEL1_ICACHE_LINESIZE\n64\n$ find /sys/devices/system/cpu/cpu0/cache/index{1,2,3} -name \"shared_cpu_list\" -exec cat {} \\;\n# (annotations are mine)\n0,4 # L1 (hyperthreading)\n0,4 # L2 (hyperthreading)\n0-7 # L3 (shared + hyperthreading)\n```\n\nHere's what the benchmark suite looks like for the \"direct\" version (the benchmarks marked as `baseline` compute the cost of retrieving the receiver in isolation, so that we can subtract that cost from the final measurements):\n```Go\nconst _maxSize = 2097152             // 2^21\nconst _maxSizeModMask = _maxSize - 1 // avoids a mod (%) in the hot path\n\nvar _randIndexes = [_maxSize]int{}\nfunc init() {\n    rand.Seed(42)\n    for i := range _randIndexes {\n        _randIndexes[i] = rand.Intn(_maxSize)\n    }\n}\n\nfunc BenchmarkMethodCall_direct(b *testing.B) {\n    adders := make([]*id32, _maxSize)\n    for i := range adders {\n        adders[i] = &id32{id: int32(i)}\n    }\n    runtime.GC()\n\n    var myID int32\n\n    b.Run(\"many/noinline/small_incr\", func(b *testing.B) {\n        var m *id32\n        b.Run(\"baseline\", func(b *testing.B) {\n            for i := 0; i < b.N; i++ {\n                m = adders[i&_maxSizeModMask]\n            }\n        })\n        b.Run(\"call\", func(b *testing.B) {\n            for i := 0; i < b.N; i++ {\n                m = adders[i&_maxSizeModMask]\n                myID = m.idNoInline()\n            }\n        })\n    })\n    b.Run(\"many/noinline/big_incr\", func(b *testing.B) {\n        var m *id32\n        b.Run(\"baseline\", func(b *testing.B) {\n            j := 0\n            for i := 0; i < b.N; i++ {\n                m = adders[j&_maxSizeModMask]\n                j += 32\n            }\n        })\n        b.Run(\"call\", func(b *testing.B) {\n            j := 0\n            for i := 0; i < b.N; i++ {\n                m = adders[j&_maxSizeModMask]\n                myID = m.idNoInline()\n                j += 32\n            }\n        })\n    })\n    b.Run(\"many/noinline/random_incr\", func(b *testing.B) {\n        var m *id32\n        b.Run(\"baseline\", func(b *testing.B) {\n            for i := 0; i < b.N; i++ {\n                m = adders[_randIndexes[i&_maxSizeModMask]]\n            }\n        })\n        b.Run(\"call\", func(b *testing.B) {\n            for i := 0; i < b.N; i++ {\n                m = adders[_randIndexes[i&_maxSizeModMask]]\n                myID = m.idNoInline()\n            }\n        })\n    })\n}\n```\nThe benchmark suite for the \"interface\" version is identical, except that the array is initialized with interface values instead of pointers to the concrete type, as one would expect:\n```Go\nfunc BenchmarkMethodCall_interface(b *testing.B) {\n    adders := make([]identifier, _maxSize)\n    for i := range adders {\n        adders[i] = identifier(&id32{id: int32(i)})\n    }\n    runtime.GC()\n\n    /* ... */\n}\n```\n\nFor the \"direct\" suite, we get the following results:\n```Bash\n$ go test -run=NONE -o iface_bench_test.bin iface_bench_test.go && \\\n  benchstat <(\n    taskset 2 ./iface_bench_test.bin -test.cpu=1 -test.benchtime=1s -test.count=3 \\\n      -test.bench='BenchmarkMethodCall_direct/many/noinline')\nname                                                  time/op\nMethodCall_direct/many/noinline/small_incr/baseline   0.99ns ± 3%\nMethodCall_direct/many/noinline/small_incr/call       2.32ns ± 1% # 2.32 - 0.99 = 1.33ns\nMethodCall_direct/many/noinline/big_incr/baseline     5.86ns ± 0%\nMethodCall_direct/many/noinline/big_incr/call         17.1ns ± 1% # 17.1 - 5.86 = 11.24ns\nMethodCall_direct/many/noinline/random_incr/baseline  8.80ns ± 0%\nMethodCall_direct/many/noinline/random_incr/call      30.8ns ± 0% # 30.8 - 8.8 = 22ns\n```\nThere really are no surprises here:\n1. `small_incr`: By being *extremely* cache-friendly, we obtain results similar to the previous benchmark that looped over a single instance.\n2. `big_incr`: By forcing the CPU to fetch a new cache-line at every iteration, we do see a noticeable bump in latencies, which is completely unrelated to the cost of doing the call, though: ~6ns are attributable to the baseline while the rest is a combination of the cost of dereferencing the receiver in order to get to its `id` field and copying around the return value.\n3. `random_incr`: Same remarks as with `big_incr`, except that the bump in latencies is even more pronounced due to A) the pseudo-random accesses and B) the cost of retrieving the next index from the pre-computed array of indexes (which triggers cache misses in and of itself).\n\nAs logic would dictate, thrashing the CPU d-caches doesn't seem to influence the latency of the actual direct method call (inlined or not) by any mean, although it does make everything that surrounds it slower.\n\nWhat about dynamic dispatch?\n```Bash\n$ go test -run=NONE -o iface_bench_test.bin iface_bench_test.go && \\\n  benchstat <(\n    taskset 2 ./iface_bench_test.bin -test.cpu=1 -test.benchtime=1s -test.count=3 \\\n      -test.bench='BenchmarkMethodCall_interface/many/inline')\nname                                                     time/op\nMethodCall_interface/many/noinline/small_incr/baseline   1.38ns ± 0%\nMethodCall_interface/many/noinline/small_incr/call       3.48ns ± 0% # 3.48 - 1.38 = 2.1ns\nMethodCall_interface/many/noinline/big_incr/baseline     6.86ns ± 0%\nMethodCall_interface/many/noinline/big_incr/call         19.6ns ± 1% # 19.6 - 6.86 = 12.74ns\nMethodCall_interface/many/noinline/random_incr/baseline  11.0ns ± 0%\nMethodCall_interface/many/noinline/random_incr/call      34.7ns ± 0% # 34.7 - 11.0 = 23.7ns\n```\nThe results are extremely similar, albeit a tiny bit slower overall simply due to the fact that we're copying two quad-words (i.e. both fields of an `identifier` interface) out of the slice at each iteration instead of one (a pointer to `id32`).\n\nThe reason this runs almost as fast as its \"direct\" counterpart is that, since all the interfaces in the slice share a common `itab` (i.e. they're all `iface<Mather, Adder>` interfaces), their associated virtual table never leaves the L1d cache and so fetching the right method pointer at each iteration is virtually free.  \nLikewise, the instructions that make up the body of the `main.(*id32).idNoInline` method never leave the L1i cache.\n\nOne might think that, in practice, a slice of interfaces would encompass many different underlying types (and thus vtables), which would result in thrashing of both the L1i and L1d caches due to the varying vtables pushing each other out.  \nWhile that holds true in theory, these kinds of thoughts tend to be the result of years of experience using older OOP languages such as C++ that (used to, at least) encourage the use of deeply-nested hierarchies of inherited classes and virtual calls as their main tool of abstraction.  \nWith big enough hierarchies, the number of associated vtables could sometimes get large enough to thrash the CPU caches when iterating over a datastructure holding various implementations of a virtual class (think e.g. of a GUI framework where everything is a `Widget` stored in a graph-like datastructure); especially so that, in C++ at least, virtual classes tend to specify quite complex behaviors, sometimes with dozen of methods, resulting in quite big vtables and even more pressure on the L1d cache.\n\nGo, on the other hand, has very different idioms: OOP has been completely thrown out of the window, the type system flattened, and interfaces are most often used to describe minimal, constrained behaviors (a few methods at most an average, helped by the fact that interfaces are implicitly satisfied) instead of being used as an abstraction on top of a more complex, layered type hierarchy.  \nIn practice, in Go, I've found it's very rare to have to iterate over a set of interfaces that carry many different underlying types. YMMV, of course.\n\nFor the curious-minded, here's what the results of the \"direct\" version would have looked like with inlining enabled:\n```Bash\nname                                                time/op\nMethodCall_direct/many/inline/small_incr            0.97ns ± 1% # 0.97ns\nMethodCall_direct/many/inline/big_incr/baseline     5.96ns ± 1%\nMethodCall_direct/many/inline/big_incr/call         11.9ns ± 1% # 11.9 - 5.96 = 5.94ns\nMethodCall_direct/many/inline/random_incr/baseline  9.20ns ± 1%\nMethodCall_direct/many/inline/random_incr/call      16.9ns ± 1% # 16.9 - 9.2 = 7.7ns\n```\nWhich would have made the \"direct\" version around 2 to 3 times faster than the \"interface\" version in cases where the compiler would have been able to inline the call.  \nThen again, as we've mentioned earlier, the limited capabilities of the current compiler with regards to inlining mean that, in practice, these kind of wins would rarely be seen. And of course, there often are times when you really don't have a choice but to resort to virtual calls anyway.\n\n**Conclusion**\n\nEffectively measuring the latency of a virtual call turned out to be quite a complex endeavor, as most of it is the direct consequence of many intertwined side-effects that result from the very complex implementation details of modern hardware.\n\n*In Go*, thanks to the idioms encouraged by the design of the language, and taking into account the (current) limitations of the compiler with regards to inlining, one could effectively consider dynamic dispatch as virtually free.  \nStill, when in doubt, one should always measure their hot paths and look at the relevant performance counters to assert with certainty whether dynamic dispatch ends up being an issue or not.\n\n*(NOTE: We will look at the inlining capabilities of the compiler in a later chapter of this book.*)\n\n## Special cases & compiler tricks\n\nThis section will review some of the most common special cases that we encounter every day when dealing with interfaces.\n\nBy now you should have a pretty clear idea of how interfaces work, so we'll try and aim for conciseness here.\n\n### The empty interface\n\nThe datastructure for the empty interface is what you'd intuitively think it would be: an `iface` without an `itab`.  \nThere are two reasons for that:\n1. Since the empty interface has no methods, everything related to dynamic dispatch can safely be dropped from the datastructure.\n1. With the virtual table gone, the type of the empty interface itself, not to be confused with the type of the data it holds, is always the same (i.e. we talk about *the* empty interface rather than *an* empty interface).\n\n*NOTE: Similar to the notation we used for `iface`, we'll denote the empty interface holding a type T as `eface<T>`*\n\n`eface` is the root type that represents the empty interface within the runtime ([src/runtime/runtime2.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/runtime2.go#L148-L151)).  \nIts definition goes like this:\n```Go\ntype eface struct { // 16 bytes on a 64bit arch\n    _type *_type\n    data  unsafe.Pointer\n}\n```\nWhere `_type` holds the type information of the value pointed to by `data`.  \nAs expected, the `itab` has been dropped entirely.\n\nWhile the empty interface could just reuse the `iface` datastructure (it is a superset of `eface` after all), the runtime chooses to distinguish the two for two main reasons: space efficiency and code clarity.\n\n### Interface holding a scalar type\n\nEarlier in this chapter ([#Anatomy of an Interface](#overview-of-the-datastructures)), we've mentioned that even storing a simple scalar type such as an integer into an interface will result in a heap allocation.  \nIt's time we see why, and how.\n\nConsider these two benchmarks ([eface_scalar_test.go](./eface_scalar_test.go)):\n```Go\nfunc BenchmarkEfaceScalar(b *testing.B) {\n    var Uint uint32\n    b.Run(\"uint32\", func(b *testing.B) {\n        for i := 0; i < b.N; i++ {\n            Uint = uint32(i)\n        }\n    })\n    var Eface interface{}\n    b.Run(\"eface32\", func(b *testing.B) {\n        for i := 0; i < b.N; i++ {\n            Eface = uint32(i)\n        }\n    })\n}\n```\n```Bash\n$ go test -benchmem -bench=. ./eface_scalar_test.go\nBenchmarkEfaceScalar/uint32-8         \t2000000000\t   0.54 ns/op\t  0 B/op     0 allocs/op\nBenchmarkEfaceScalar/eface32-8        \t 100000000\t   12.3 ns/op\t  4 B/op     1 allocs/op\n```\n1. That's a 2-orders-of-magnitude difference in performance for a simple assignment operation, and\n1. we can see that the second benchmark has to allocate 4 extra bytes at each iteration.\n\nClearly, some hidden heavy machinery is being set off in the second case: we need to have a look at the generated assembly.\n\nFor the first benchmark, the compiler generates exactly what you'd expect it to with regard to the assignment operation:\n```Assembly\n;; Uint = uint32(i)\n0x000d MOVL\tDX, (AX)\n```\n\nIn the second benchmark, though, things get far more complex:\n```Assembly\n;; Eface = uint32(i)\n0x0050 MOVL\tCX, \"\"..autotmp_3+36(SP)\n0x0054 LEAQ\ttype.uint32(SB), AX\n0x005b MOVQ\tAX, (SP)\n0x005f LEAQ\t\"\"..autotmp_3+36(SP), DX\n0x0064 MOVQ\tDX, 8(SP)\n0x0069 CALL\truntime.convT2E32(SB)\n0x006e MOVQ\t24(SP), AX\n0x0073 MOVQ\t16(SP), CX\n0x0078 MOVQ\t\"\".&Eface+48(SP), DX\n0x007d MOVQ\tCX, (DX)\n0x0080 MOVL\truntime.writeBarrier(SB), CX\n0x0086 LEAQ\t8(DX), DI\n0x008a TESTL\tCX, CX\n0x008c JNE\t148\n0x008e MOVQ\tAX, 8(DX)\n0x0092 JMP\t46\n0x0094 CALL\truntime.gcWriteBarrier(SB)\n0x0099 JMP\t46\n```\nThis is *just* the assignment, not the complete benchmark!  \nWe'll have to study this code piece by piece.\n\n**Step 1: Create the interface**\n\n```Assembly\n0x0050 MOVL\tCX, \"\"..autotmp_3+36(SP)\n0x0054 LEAQ\ttype.uint32(SB), AX\n0x005b MOVQ\tAX, (SP)\n0x005f LEAQ\t\"\"..autotmp_3+36(SP), DX\n0x0064 MOVQ\tDX, 8(SP)\n0x0069 CALL\truntime.convT2E32(SB)\n0x006e MOVQ\t24(SP), AX\n0x0073 MOVQ\t16(SP), CX\n```\n\nThis first piece of the listing instantiates the empty interface `eface<uint32>` that we will later assign to `Eface`.\n\nWe've already studied similar code in the section about creating interfaces ([#Creating an interface](#creating-an-interface)), except that this code was calling `runtime.convT2I32` instead of `runtime.convT2E32` here; nonetheless, this should look very familiar.\n\nIt turns out that `runtime.convT2I32` and `runtime.convT2E32` are part of a larger family of functions whose job is to instanciate either a specific interface or the empty interface from a scalar value (or a string or slice, as special cases).  \nThis family is composed of 10 symbols, one for each combination of `(eface/iface, 16/32/64/string/slice)`:\n```Go\n// empty interface from scalar value\nfunc convT2E16(t *_type, elem unsafe.Pointer) (e eface) {}\nfunc convT2E32(t *_type, elem unsafe.Pointer) (e eface) {}\nfunc convT2E64(t *_type, elem unsafe.Pointer) (e eface) {}\nfunc convT2Estring(t *_type, elem unsafe.Pointer) (e eface) {}\nfunc convT2Eslice(t *_type, elem unsafe.Pointer) (e eface) {}\n\n// interface from scalar value\nfunc convT2I16(tab *itab, elem unsafe.Pointer) (i iface) {}\nfunc convT2I32(tab *itab, elem unsafe.Pointer) (i iface) {}\nfunc convT2I64(tab *itab, elem unsafe.Pointer) (i iface) {}\nfunc convT2Istring(tab *itab, elem unsafe.Pointer) (i iface) {}\nfunc convT2Islice(tab *itab, elem unsafe.Pointer) (i iface) {}\n```\n(*You'll notice that there is no `convT2E8` nor `convT2I8` function; this is due to a compiler optimization that we'll take a look at at the end of this section.*)\n\nAll of these functions do almost the exact same thing, they only differ in the type of their return value (`iface` vs. `eface`) and the size of the memory that they allocate on the heap.  \nLet's take a look at e.g. `runtime.convT2E32` more closely ([src/runtime/iface.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/iface.go#L308-L325)):\n```Go\nfunc convT2E32(t *_type, elem unsafe.Pointer) (e eface) {\n    /* ...omitted debug stuff... */\n    var x unsafe.Pointer\n    if *(*uint32)(elem) == 0 {\n        x = unsafe.Pointer(&zeroVal[0])\n    } else {\n        x = mallocgc(4, t, false)\n        *(*uint32)(x) = *(*uint32)(elem)\n    }\n    e._type = t\n    e.data = x\n    return\n}\n```\n\nThe function initializes the `_type` field of the `eface` structure \"passed\" in by the caller (remember: return values are allocated by the caller on its own stack-frame) with the `_type` given as first parameter.  \nFor the `data` field of the `eface`, it all depends on the value of the second parameter `elem`:\n- If `elem` is zero, `e.data` is initialized to point to `runtime.zeroVal`, which is a special global variable defined by the runtime that represents the zero value. We'll discuss a bit more about this special variable in the next section.\n- If `elem` is non-zero, the function allocates 4 bytes on the heap (`x = mallocgc(4, t, false)`), initializes the contents of those 4 bytes with the value pointed to by `elem` (`*(*uint32)(x) = *(*uint32)(elem)`), then stick the resulting pointer into `e.data`.\n\nIn this case, `e._type` holds the address of `type.uint32` (`LEAQ type.uint32(SB), AX`), which is implemented by the standard library and whose address will only be known when linking against said stdlib:\n```Bash\n$ go tool nm eface_scalar_test.o | grep 'type\\.uint32'\n         U type.uint32\n```\n(`U` denotes that the symbol is not defined in this object file, and will (hopefully) be provided by another object at link-time (i.e. the standard library in this case).)\n\n**Step 2: Assign the result (part 1)**\n\n```Assembly\n0x0078 MOVQ\t\"\".&Eface+48(SP), DX\n0x007d MOVQ\tCX, (DX)\t\t;; Eface._type = ret._type\n```\n\nThe result of `runtime.convT2E32` gets assigned to our `Eface` variable.. or does it?\n\nActually, for now, only the `_type` field of the returned value is being assigned to `Eface._type`, the `data` field cannot be copied over just yet.\n\n**Step 3: Assign the result (part 2) or ask the garbage collector to**\n\n```Assembly\n0x0080 MOVL\truntime.writeBarrier(SB), CX\n0x0086 LEAQ\t8(DX), DI\t;; Eface.data = ret.data (indirectly via runtime.gcWriteBarrier)\n0x008a TESTL\tCX, CX\n0x008c JNE\t148\n0x008e MOVQ\tAX, 8(DX)\t;; Eface.data = ret.data (direct)\n0x0092 JMP\t46\n0x0094 CALL\truntime.gcWriteBarrier(SB)\n0x0099 JMP\t46\n```\n\nThe apparent complexity of this last piece is a side-effect of assigning the `data` pointer of the returned `eface` to `Eface.data`: since we're manipulating the memory graph of our program (i.e. which part of memory holds references to which part of memory), we may have to notify the garbage collector of this change, just in case a garbage collection were to be currently running in the background.\n\nThis is known as a write barrier, and is a direct consequence of Go's *concurrent* garbage collector.  \nDon't worry if this sounds a bit vague for now; the next chapter of this book will offer a thorough review of garbage collection in Go.  \nFor now, it's enough to remember that when we see some assembly code calling into `runtime.gcWriteBarrier`, it has to do with pointer manipulation and notifying the garbage collector.\n\nAll in all, this final piece of code can do one of two things:\n- If the write-barrier is currently inactive, it assigns `ret.data` to `Eface.data` (`MOVQ AX, 8(DX)`).\n- If the write-barrier is active, it politely asks the garbage-collector to do the assignment on our behalf  \n(`LEAQ 8(DX), DI` + `CALL runtime.gcWriteBarrier(SB)`).\n\n(*Once again, try not to worry too much about this for now.*)\n\nVoila, we've got a complete interface holding a simple scalar type (`uint32`).\n\n**Conclusion**\n\nWhile sticking a scalar value into an interface is not something that happens that often in practice, it can be a costly operation for various reasons, and as such it's important to be aware of the machinery behind it.\n\nSpeaking of cost, we've mentioned that the compiler implements various tricks to avoid allocating in some specific situations; we'll close this section with a quick look at 3 of those tricks.\n\n**Interface trick 1: Byte-sized values**\n\nConsider this benchmark that instanciates an `eface<uint8>` ([eface_scalar_test.go](./eface_scalar_test.go)):\n```Go\nfunc BenchmarkEfaceScalar(b *testing.B) {\n    b.Run(\"eface8\", func(b *testing.B) {\n        for i := 0; i < b.N; i++ {\n            // LEAQ    type.uint8(SB), BX\n            // MOVQ    BX, (CX)\n            // MOVBLZX AL, SI\n            // LEAQ    runtime.staticbytes(SB), R8\n            // ADDQ    R8, SI\n            // MOVL    runtime.writeBarrier(SB), R9\n            // LEAQ    8(CX), DI\n            // TESTL   R9, R9\n            // JNE     100\n            // MOVQ    SI, 8(CX)\n            // JMP     40\n            // MOVQ    AX, R9\n            // MOVQ    SI, AX\n            // CALL    runtime.gcWriteBarrier(SB)\n            // MOVQ    R9, AX\n            // JMP     40\n            Eface = uint8(i)\n        }\n    })\n}\n```\n```Bash\n$ go test -benchmem -bench=BenchmarkEfaceScalar/eface8 ./eface_scalar_test.go\nBenchmarkEfaceScalar/eface8-8         \t2000000000\t   0.88 ns/op\t  0 B/op     0 allocs/op\n```\n\nWe notice that in the case of a byte-sized value, the compiler avoids the call to `runtime.convT2E`/`runtime.convT2I` and the associated heap allocation, and instead re-uses the address of a global variable exposed by the runtime that already holds the 1-byte value we're looking for: `LEAQ    runtime.staticbytes(SB), R8`.\n\n`runtime.staticbytes` can be found in [src/runtime/iface.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/iface.go#L619-L653) and looks like this:\n```Go\n// staticbytes is used to avoid convT2E for byte-sized values.\nvar staticbytes = [...]byte{\n    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,\n    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,\n    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,\n    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,\n    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,\n    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,\n    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,\n    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,\n    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,\n    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,\n    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,\n    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,\n    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,\n    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,\n    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,\n    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,\n}\n```\nUsing the right offset into this array, the compiler can effectively avoid an extra heap allocation and still reference any value representable as a single byte.\n\nSomething feels wrong here, though.. can you tell?  \nThe generated code still embeds all the machinery related to the write-barrier, even though the pointer we're manipulating holds the address of a global variable whose lifetime is the same as the entire program's anyway.  \nI.e. `runtime.staticbytes` can never be garbage collected, no matter which part of memory holds a reference to it or not, so we shouldn't have to pay for the overhead of a write-barrier in this case.\n\n**Interface trick 2: Static inference**\n\nConsider this benchmark that instanciates an `eface<uint64>` from a value known at compile time ([eface_scalar_test.go](./eface_scalar_test.go)):\n```Go\nfunc BenchmarkEfaceScalar(b *testing.B) {\n    b.Run(\"eface-static\", func(b *testing.B) {\n        for i := 0; i < b.N; i++ {\n            // LEAQ  type.uint64(SB), BX\n            // MOVQ  BX, (CX)\n            // MOVL  runtime.writeBarrier(SB), SI\n            // LEAQ  8(CX), DI\n            // TESTL SI, SI\n            // JNE   92\n            // LEAQ  \"\".statictmp_0(SB), SI\n            // MOVQ  SI, 8(CX)\n            // JMP   40\n            // MOVQ  AX, SI\n            // LEAQ  \"\".statictmp_0(SB), AX\n            // CALL  runtime.gcWriteBarrier(SB)\n            // MOVQ  SI, AX\n            // LEAQ  \"\".statictmp_0(SB), SI\n            // JMP   40\n            Eface = uint64(42)\n        }\n    })\n}\n```\n```Bash\n$ go test -benchmem -bench=BenchmarkEfaceScalar/eface-static ./eface_scalar_test.go\nBenchmarkEfaceScalar/eface-static-8    \t2000000000\t   0.81 ns/op\t  0 B/op     0 allocs/op\n```\n\nWe can see from the generated assembly that the compiler completely optimizes out the call to `runtime.convT2E64`, and instead directly constructs the empty interface by loading the address of an autogenerated global variable that already holds the value we're looking for: `LEAQ \"\".statictmp_0(SB), SI` (note the `(SB)` part, indicating a global variable).\n\nWe can better visualize what's going on using the script that we've hacked up earlier: `dump_sym.sh`.\n```Bash\n$ GOOS=linux GOARCH=amd64 go tool compile eface_scalar_test.go\n$ GOOS=linux GOARCH=amd64 go tool link -o eface_scalar_test.bin eface_scalar_test.o\n$ ./dump_sym.sh eface_scalar_test.bin .rodata main.statictmp_0\n.rodata file-offset: 655360\n.rodata VMA: 4849664\nmain.statictmp_0 VMA: 5145768\nmain.statictmp_0 SIZE: 8\n\n0000000 002a 0000 0000 0000                    \n0000008\n```\nAs expected, `main.statictmp_0` is a 8-byte variable whose value is `0x000000000000002a`, i.e. `$42`.\n\n**Interface trick 3: Zero-values**\n\nFor this final trick, consider the following benchmark that instanciates an `eface<uint32>` from a zero-value ([eface_scalar_test.go](./eface_scalar_test.go)):\n```Go\nfunc BenchmarkEfaceScalar(b *testing.B) {\n    b.Run(\"eface-zeroval\", func(b *testing.B) {\n        for i := 0; i < b.N; i++ {\n            // MOVL  $0, \"\"..autotmp_3+36(SP)\n            // LEAQ  type.uint32(SB), AX\n            // MOVQ  AX, (SP)\n            // LEAQ  \"\"..autotmp_3+36(SP), CX\n            // MOVQ  CX, 8(SP)\n            // CALL  runtime.convT2E32(SB)\n            // MOVQ  16(SP), AX\n            // MOVQ  24(SP), CX\n            // MOVQ  \"\".&Eface+48(SP), DX\n            // MOVQ  AX, (DX)\n            // MOVL  runtime.writeBarrier(SB), AX\n            // LEAQ  8(DX), DI\n            // TESTL AX, AX\n            // JNE   152\n            // MOVQ  CX, 8(DX)\n            // JMP   46\n            // MOVQ  CX, AX\n            // CALL  runtime.gcWriteBarrier(SB)\n            // JMP   46\n            Eface = uint32(i - i) // outsmart the compiler (avoid static inference)\n        }\n    })\n}\n```\n```Bash\n$ go test -benchmem -bench=BenchmarkEfaceScalar/eface-zero ./eface_scalar_test.go\nBenchmarkEfaceScalar/eface-zeroval-8  \t 500000000\t   3.14 ns/op\t  0 B/op     0 allocs/op\n```\n\nFirst, notice how we make use of `uint32(i - i)` instead of `uint32(0)` to prevent the compiler from falling back to optimization #2 (static inference).  \n(*Sure, we could just have declared a global zero variable and the compiler would had been forced to take the conservative route too.. but then again, we're trying to have some fun here. Don't be that guy.*)  \nThe generated code now looks exactly like the normal, allocating case.. and still, it doesn't allocate. What's going on?\n\nAs we've mentioned earlier back when we were dissecting `runtime.convT2E32`, the allocation here can be optimized out using a trick similar to #1 (byte-sized values): when some code needs to reference a variable that holds a zero-value, the compiler simply gives it the address of a global variable exposed by the runtime whose value is always zero.  \nSimilarly to `runtime.staticbytes`, we can find this variable in the runtime code ([src/runtime/hashmap.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/hashmap.go#L1248-L1249)):\n```Go\nconst maxZero = 1024 // must match value in ../cmd/compile/internal/gc/walk.go\nvar zeroVal [maxZero]byte\n```\n\nThis ends our little tour of optimizations.  \nWe'll close this section with a summary of all the benchmarks that we've just looked at:\n```Bash\n$ go test -benchmem -bench=. ./eface_scalar_test.go\nBenchmarkEfaceScalar/uint32-8         \t2000000000\t   0.54 ns/op\t  0 B/op     0 allocs/op\nBenchmarkEfaceScalar/eface32-8        \t 100000000\t   12.3 ns/op\t  4 B/op     1 allocs/op\nBenchmarkEfaceScalar/eface8-8         \t2000000000\t   0.88 ns/op\t  0 B/op     0 allocs/op\nBenchmarkEfaceScalar/eface-zeroval-8  \t 500000000\t   3.14 ns/op\t  0 B/op     0 allocs/op\nBenchmarkEfaceScalar/eface-static-8    \t2000000000\t   0.81 ns/op\t  0 B/op     0 allocs/op\n```\n\n### A word about zero-values\n\nAs we've just seen, the `runtime.convT2*` family of functions avoids a heap allocation when the data to be held by the resulting interface happens to reference a zero-value.  \nThis optimization is not specific to interfaces and is actually part of a broader effort by the Go runtime to make sure that, when in need of a pointer to a zero-value, unnecessary allocations are avoided by taking the address of a special, always-zero variable exposed by the runtime.\n\nWe can confirm this with a simple program ([zeroval.go](./zeroval.go)):\n```Go\n//go:linkname zeroVal runtime.zeroVal\nvar zeroVal uintptr\n\ntype eface struct{ _type, data unsafe.Pointer }\n\nfunc main() {\n    x := 42\n    var i interface{} = x - x // outsmart the compiler (avoid static inference)\n\n    fmt.Printf(\"zeroVal = %p\\n\", &zeroVal)\n    fmt.Printf(\"      i = %p\\n\", ((*eface)(unsafe.Pointer(&i))).data)\n}\n```\n```Bash\n$ go run zeroval.go\nzeroVal = 0x5458e0\n      i = 0x5458e0\n```\nAs expected.\n\nNote the `//go:linkname` directive which allows us to reference an external symbol:\n> The //go:linkname directive instructs the compiler to use “importpath.name” as the object file symbol name for the variable or function declared as “localname” in the source code. Because this directive can subvert the type system and package modularity, it is only enabled in files that have imported \"unsafe\".\n\n### A tangent about zero-size variables\n\nIn a similar vein as zero-values, a very common trick in Go programs is to rely on the fact that instanciating an object of size 0 (such as `struct{}{}`) doesn't result in an allocation.  \nThe official Go specification (linked at the end of this chapter) ends on a note that explains this:\n> A struct or array type has size zero if it contains no fields (or elements, respectively) that have a size greater than zero.\n> Two distinct zero-size variables may have the same address in memory.\n\nThe \"may\" in \"may have the same address in memory\" implies that the compiler doesn't guarantee this fact to be true, although it has always been and continues to be the case in the current implementation of the official Go compiler (`gc`).\n\nAs usual, we can confirm this with a simple program ([zerobase.go](./zerobase.go)):\n```Go\nfunc main() {\n    var s struct{}\n    var a [42]struct{}\n\n    fmt.Printf(\"s = % p\\n\", &s)\n    fmt.Printf(\"a = % p\\n\", &a)\n}\n```\n```Bash\n$ go run zerobase.go\ns = 0x546fa8\na = 0x546fa8\n```\n\nIf we'd like to know what hides behind this address, we can simply have a peek inside the binary:\n```Bash\n$ go build -o zerobase.bin zerobase.go && objdump -t zerobase.bin | grep 546fa8\n0000000000546fa8 g     O .noptrbss\t0000000000000008 runtime.zerobase\n```\nThen it's just a matter of finding this `runtime.zerobase` variable within the runtime source code ([src/runtime/malloc.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/malloc.go#L516-L517)):\n```Go\n// base address for all 0-byte allocations\nvar zerobase uintptr\n```\n\nAnd if we'd rather be really, really sure indeed:\n```Go\n//go:linkname zerobase runtime.zerobase\nvar zerobase uintptr\n\nfunc main() {\n    var s struct{}\n    var a [42]struct{}\n\n    fmt.Printf(\"zerobase = %p\\n\", &zerobase)\n    fmt.Printf(\"       s = %p\\n\", &s)\n    fmt.Printf(\"       a = %p\\n\", &a)\n}\n```\n```Bash\n$ go run zerobase.go\nzerobase = 0x546fa8\n       s = 0x546fa8\n       a = 0x546fa8\n```\n\n## Interface composition\n\nThere really is nothing special about interface composition, it merely is syntastic sugar exposed by the compiler.\n\nConsider the following program ([compound_interface.go](./compound_interface.go)):\n```Go\ntype Adder interface{ Add(a, b int32) int32 }\ntype Subber interface{ Sub(a, b int32) int32 }\ntype Mather interface {\n    Adder\n    Subber\n}\n\ntype Calculator struct{ id int32 }\nfunc (c *Calculator) Add(a, b int32) int32 { return a + b }\nfunc (c *Calculator) Sub(a, b int32) int32 { return a - b }\n\nfunc main() {\n    calc := Calculator{id: 6754}\n    var m Mather = &calc\n    m.Sub(10, 32)\n}\n```\n\nAs usual, the compiler generates the corresponding `itab` for `iface<Mather, *Calculator>`:\n```Bash\n$ GOOS=linux GOARCH=amd64 go tool compile -S compound_interface.go | \\\n  grep -A 7 '^go.itab.\\*\"\".Calculator,\"\".Mather'\ngo.itab.*\"\".Calculator,\"\".Mather SRODATA dupok size=40\n    0x0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................\n    0x0010 5e 33 ca c8 00 00 00 00 00 00 00 00 00 00 00 00  ^3..............\n    0x0020 00 00 00 00 00 00 00 00                          ........\n    rel 0+8 t=1 type.\"\".Mather+0\n    rel 8+8 t=1 type.*\"\".Calculator+0\n    rel 24+8 t=1 \"\".(*Calculator).Add+0\n    rel 32+8 t=1 \"\".(*Calculator).Sub+0\n```\nWe can see from the relocation directives that the virtual table generated by the compiler holds both the methods of `Adder` as well as those belonging to `Subber`, as we'd expect:\n```\nrel 24+8 t=1 \"\".(*Calculator).Add+0\nrel 32+8 t=1 \"\".(*Calculator).Sub+0\n```\n\nLike we said, there's no secret sauce when it comes to interface composition.\n\nOn an unrelated note, this little program demonstrates something that we had not seen up until now: since the generated `itab` is specifically tailored to *a pointer to* a `Constructor`, as opposed to a concrete value, this fact gets reflected both in its symbol-name (`go.itab.*\"\".Calculator,\"\".Mather`) as well as in the `_type` that it embeds (`type.*\"\".Calculator`).  \nThis is consistent with the semantics used for naming method symbols, like we've seen earlier at the beginning of this chapter.\n\n## Assertions\n\nWe'll close this chapter by looking at type assertions, both from an implementation and a cost standpoint.\n\n### Type assertions\n\nConsider this short program ([eface_to_type.go](./eface_to_type.go)):\n```Go\nvar j uint32\nvar Eface interface{} // outsmart compiler (avoid static inference)\n\nfunc assertion() {\n    i := uint64(42)\n    Eface = i\n    j = Eface.(uint32)\n}\n```\n\nHere's the annotated assembly listing for `j = Eface.(uint32)`:\n```Assembly\n0x0065 00101 MOVQ\t\"\".Eface(SB), AX\t\t;; AX = Eface._type\n0x006c 00108 MOVQ\t\"\".Eface+8(SB), CX\t\t;; CX = Eface.data\n0x0073 00115 LEAQ\ttype.uint32(SB), DX\t\t;; DX = type.uint32\n0x007a 00122 CMPQ\tAX, DX\t\t\t\t;; Eface._type == type.uint32 ?\n0x007d 00125 JNE\t162\t\t\t\t;; no? panic our way outta here\n0x007f 00127 MOVL\t(CX), AX\t\t\t;; AX = *Eface.data\n0x0081 00129 MOVL\tAX, \"\".j(SB)\t\t\t;; j = AX = *Eface.data\n;; exit\n0x0087 00135 MOVQ\t40(SP), BP\n0x008c 00140 ADDQ\t$48, SP\n0x0090 00144 RET\n;; panic: interface conversion: <iface> is <have>, not <want>\n0x00a2 00162 MOVQ\tAX, (SP)\t\t\t;; have: Eface._type\n0x00a6 00166 MOVQ\tDX, 8(SP)\t\t\t;; want: type.uint32\n0x00ab 00171 LEAQ\ttype.interface {}(SB), AX\t;; AX = type.interface{} (eface)\n0x00b2 00178 MOVQ\tAX, 16(SP)\t\t\t;; iface: AX\n0x00b7 00183 CALL\truntime.panicdottypeE(SB)\t;; func panicdottypeE(have, want, iface *_type)\n0x00bc 00188 UNDEF\n0x00be 00190 NOP\n```\n\nNothing surprising in there: the code compares the address held by `Eface._type` with the address of `type.uint32`, which, as we've seen before, is the global symbol exposed by the standard library that holds the content of the `_type` structure which describes an `uint32`.  \nIf the `_type` pointers match, then all is good and we're free to assign `*Eface.data` to `j`; otherwise, we call `runtime.panicdottypeE` to throw a panic message that precisely describes the mismatch.\n\n`runtime.panicdottypeE` is a _very_ simple function that does no more than you'd expect ([src/runtime/iface.go](https://github.com/golang/go/blob/bf86aec25972f3a100c3aa58a6abcbcc35bdea49/src/runtime/iface.go#L235-L245)):\n```Go\n// panicdottypeE is called when doing an e.(T) conversion and the conversion fails.\n// have = the dynamic type we have.\n// want = the static type we're trying to convert to.\n// iface = the static type we're converting from.\nfunc panicdottypeE(have, want, iface *_type) {\n    haveString := \"\"\n    if have != nil {\n        haveString = have.string()\n    }\n    panic(&TypeAssertionError{iface.string(), haveString, want.string(), \"\"})\n}\n```\n\n**What about performance?**\n\nWell, let's see what we've got here: a bunch of `MOV`s from main memory, a *very* predictable branch and, last but not least, a pointer dereference (`j = *Eface.data`) (which is only there because we've initialized our interface with a concrete value in the first place, otherwise we could just have copied the `Eface.data` pointer directly).\n\nIt's not even worth micro-benchmarking this, really.  \nSimilarly to the overhead of dynamic dispatch that we've measured earlier, this is in and of itself, in theory, almost free. How much it'll really cost you in practice will most likely be a matter of how your code-path is designed with regard to cache-friendliness & al.  \nA simple micro-benchmark would probably be too skewed to tell us anything useful here, anyway.\n\nAll in all, we end up with the same old advice as usual: measure for your specific use case, check your processor's performance counters, and assert whether or not this has a visible impact on your hot path.  \nIt might. It might not. It most likely doesn't.\n\n### Type-switches\n\nType-switches are a bit trickier, of course. Consider the following code ([eface_to_type.go](./eface_to_type.go)):\n```Go\nvar j uint32\nvar Eface interface{} // outsmart compiler (avoid static inference)\n\nfunc typeSwitch() {\n    i := uint32(42)\n    Eface = i\n    switch v := Eface.(type) {\n    case uint16:\n        j = uint32(v)\n    case uint32:\n        j = v\n    }\n}\n```\n\nThis quite simple type-switch statement translates into the following assembly (annotated):\n```Assembly\n;; switch v := Eface.(type)\n0x0065 00101 MOVQ\t\"\".Eface(SB), AX\t;; AX = Eface._type\n0x006c 00108 MOVQ\t\"\".Eface+8(SB), CX\t;; CX = Eface.data\n0x0073 00115 TESTQ\tAX, AX\t\t\t;; Eface._type == nil ?\n0x0076 00118 JEQ\t153\t\t\t;; yes? exit the switch\n0x0078 00120 MOVL\t16(AX), DX\t\t;; DX = Eface.type._hash\n;; case uint32\n0x007b 00123 CMPL\tDX, $-800397251\t\t;; Eface.type._hash == type.uint32.hash ?\n0x0081 00129 JNE\t163\t\t\t;; no? go to next case (uint16)\n0x0083 00131 LEAQ\ttype.uint32(SB), BX\t;; BX = type.uint32\n0x008a 00138 CMPQ\tBX, AX\t\t\t;; type.uint32 == Eface._type ? (hash collision?)\n0x008d 00141 JNE\t206\t\t\t;; no? clear BX and go to next case (uint16)\n0x008f 00143 MOVL\t(CX), BX\t\t;; BX = *Eface.data\n0x0091 00145 JNE\t163\t\t\t;; landsite for indirect jump starting at 0x00d3\n0x0093 00147 MOVL\tBX, \"\".j(SB)\t\t;; j = BX = *Eface.data\n;; exit\n0x0099 00153 MOVQ\t40(SP), BP\n0x009e 00158 ADDQ\t$48, SP\n0x00a2 00162 RET\n;; case uint16\n0x00a3 00163 CMPL\tDX, $-269349216\t\t;; Eface.type._hash == type.uint16.hash ?\n0x00a9 00169 JNE\t153\t\t\t;; no? exit the switch\n0x00ab 00171 LEAQ\ttype.uint16(SB), DX\t;; DX = type.uint16\n0x00b2 00178 CMPQ\tDX, AX\t\t\t;; type.uint16 == Eface._type ? (hash collision?)\n0x00b5 00181 JNE\t199\t\t\t;; no? clear AX and exit the switch\n0x00b7 00183 MOVWLZX\t(CX), AX\t\t;; AX = uint16(*Eface.data)\n0x00ba 00186 JNE\t153\t\t\t;; landsite for indirect jump starting at 0x00cc\n0x00bc 00188 MOVWLZX\tAX, AX\t\t\t;; AX = uint16(AX) (redundant)\n0x00bf 00191 MOVL\tAX, \"\".j(SB)\t\t;; j = AX = *Eface.data\n0x00c5 00197 JMP\t153\t\t\t;; we're done, exit the switch\n;; indirect jump table\n0x00c7 00199 MOVL\t$0, AX\t\t\t;; AX = $0\n0x00cc 00204 JMP\t186\t\t\t;; indirect jump to 153 (exit)\n0x00ce 00206 MOVL\t$0, BX\t\t\t;; BX = $0\n0x00d3 00211 JMP\t145\t\t\t;; indirect jump to 163 (case uint16)\n```\n\nOnce again, if you meticulously step through the generated code and carefully read the corresponding annotations, you'll find that there's no dark magic in there.  \nThe control flow might look a bit convoluted at first, as it jumps back and forth a lot, but other than it's a pretty faithful rendition of the original Go code.\n\nThere are quite a few interesting things to note, though.\n\n**Note 1: Layout**\n\nFirst, notice the high-level layout of the generated code, which matches pretty closely the original switch statement:\n1. We find an initial block of instructions that loads the `_type` of the variable we're interested in, and checks for `nil` pointers, just in case.\n1. Then, we get N logical blocks that each correspond to one of the cases described in the original switch statement.\n1. And finally, one last block defines a kind of indirect jump table that allows the control flow to jump from one case to the next while making sure to properly reset dirty registers on the way.\n\nWhile obvious in hindsight, that second point is pretty important, as it implies that the number of instructions generated by a type-switch statement is purely a factor of the number of cases that it describes.  \nIn practice, this could lead to surprising performance issues as, for example, a massive type-switch statement with plenty of cases could generate a ton of instructions and end up thrashing the L1i cache if used on the wrong path.\n\nAnother interesting fact regarding the layout of our simple switch-statement above is the order in which the cases are set up in the generated code. In our original Go code, `case uint16` came first, followed by `case uint32`. In the assembly generated by the compiler, though, their orders have been reversed, with `case uint32` now being first and `case uint16` coming in second.  \nThat this reordering is a net win for us in this particular case is nothing but mere luck, AFAICT. In fact, if you take the time to experiment a bit with type-switches, especially ones with more than two cases, you'll find that the compiler always shuffles the cases using some kind of deterministic heuristics.  \nWhat those heuristics are, I don't know (but as always, I'd love to if you do).\n\n**Note 2: O(n)**\n\nSecond, notice how the control flow blindly jumps from one case to the next, until it either lands on one that evaluates to true or finally reaches the end of the switch statement.\n\nOnce again, while obvious when one actually stops to think about it (\"how else could it work?\"), this is easy to overlook when reasoning at a higher-level. In practice, this means that the cost of evaluating a type-switch statement grows linearly with its number of cases: it's `O(n)`.  \nLikewise, evaluating a type-switch statement with N cases effectively has the same time-complexity as evaluating N type-assertions. As we've said, there's no magic here.\n\nIt's easy to confirm this with a bunch of benchmarks ([eface_to_type_test.go](./eface_to_type_test.go)):\n```Go\nvar j uint32\nvar eface interface{} = uint32(42)\n\nfunc BenchmarkEfaceToType(b *testing.B) {\n    b.Run(\"switch-small\", func(b *testing.B) {\n        for i := 0; i < b.N; i++ {\n            switch v := eface.(type) {\n            case int8:\n                j = uint32(v)\n            case int16:\n                j = uint32(v)\n            default:\n                j = v.(uint32)\n            }\n        }\n    })\n    b.Run(\"switch-big\", func(b *testing.B) {\n        for i := 0; i < b.N; i++ {\n            switch v := eface.(type) {\n            case int8:\n                j = uint32(v)\n            case int16:\n                j = uint32(v)\n            case int32:\n                j = uint32(v)\n            case int64:\n                j = uint32(v)\n            case uint8:\n                j = uint32(v)\n            case uint16:\n                j = uint32(v)\n            case uint64:\n                j = uint32(v)\n            default:\n                j = v.(uint32)\n            }\n        }\n    })\n}\n```\n```Bash\nbenchstat <(go test -benchtime=1s -bench=. -count=3 ./eface_to_type_test.go)\nname                        time/op\nEfaceToType/switch-small-8  1.91ns ± 2%\nEfaceToType/switch-big-8    3.52ns ± 1%\n```\nWith all its extra cases, the second type-switch does take almost twice as long per iteration indeed.\n\nAs an interesting exercise for the reader, try adding a `case uint32` in either one of the benchmarks above (anywhere), you'll see their performances improve drastically:\n```Bash\nbenchstat <(go test -benchtime=1s -bench=. -count=3 ./eface_to_type_test.go)\nname                        time/op\nEfaceToType/switch-small-8  1.63ns ± 1%\nEfaceToType/switch-big-8    2.17ns ± 1%\n```\nUsing all the tools and knowledge that we've gathered during this chapter, you should be able to explain the rationale behind the numbers. Have fun!\n\n**Note 3: Type hashes & pointer comparisons**\n\nFinally, notice how the type comparisons in each cases are always done in two phases:\n1. The types' hashes (`_type.hash`) are compared, and then\n2. if they match, the respective memory-addresses of each `_type` pointers are compared directly.\n\nSince each `_type` structure is generated once by the compiler and stored in a global variable in the `.rodata` section, we are guaranteed that each type gets assigned a unique address for the lifetime of the program.\n\nIn that context, it makes sense to do this extra pointer comparison in order to make sure that the successful match wasn't simply the result of a hash collision.. but then this raises an obvious question: why not just compare the pointers directly in the first place, and drop the notion of type hashes altogether? Especially when simple type assertions, as we've seen earlier, don't use type hashes at all.  \nThe answer is I don't have the slightest clue, and certainly would love some enlightment on this. As always, feel free to open an issue if you know more.\n\nSpeaking of type hashes, how is it that we know that `$-800397251` corresponds to `type.uint32.hash` and `$-269349216` to `type.uint16.hash`, you might wonder? The hard way, of course ([eface_type_hash.go](./eface_type_hash.go)):\n```Go\n// simplified definitions of runtime's eface & _type types\ntype eface struct {\n    _type *_type\n    data  unsafe.Pointer\n}\ntype _type struct {\n    size    uintptr\n    ptrdata uintptr\n    hash    uint32\n    /* omitted lotta fields */\n}\n\nvar Eface interface{}\nfunc main() {\n    Eface = uint32(42)\n    fmt.Printf(\"eface<uint32>._type.hash = %d\\n\",\n        int32((*eface)(unsafe.Pointer(&Eface))._type.hash))\n\n    Eface = uint16(42)\n    fmt.Printf(\"eface<uint16>._type.hash = %d\\n\",\n        int32((*eface)(unsafe.Pointer(&Eface))._type.hash))\n}\n```\n```\n$ go run eface_type_hash.go\neface<uint32>._type.hash = -800397251\neface<uint16>._type.hash = -269349216\n```\n\n## Conclusion\n\nThat's it for interfaces.\n\nI hope this chapter has given you most of the answers you were looking for when it comes to interfaces and their innards. Most importantly, it should have provided you with all the necessary tools and skills required to dig further whenever you'd need to.\n\nIf you have any questions or suggestions, don't hesitate to open an issue with the `chapter2:` prefix!\n\n## Links\n\n- [[Official] Go 1.1 Function Calls](https://docs.google.com/document/d/1bMwCey-gmqZVTpRax-ESeVuZGmjwbocYs1iHplK-cjo/pub)\n- [[Official] The Go Programming Language Specification](https://golang.org/ref/spec)\n- [The Gold linker by Ian Lance Taylor](https://lwn.net/Articles/276782/)\n- [ELF: a linux executable walkthrough](https://i.imgur.com/EL7lT1i.png)\n- [VMA vs LMA?](https://www.embeddedrelated.com/showthread/comp.arch.embedded/77071-1.php)\n- [In C++ why and how are virtual functions slower?](https://softwareengineering.stackexchange.com/questions/191637/in-c-why-and-how-are-virtual-functions-slower)\n- [The cost of dynamic (virtual calls) vs. static (CRTP) dispatch in C++](https://eli.thegreenplace.net/2013/12/05/the-cost-of-dynamic-virtual-calls-vs-static-crtp-dispatch-in-c)\n- [Why is it faster to process a sorted array than an unsorted array?](https://stackoverflow.com/a/11227902)\n- [Is accessing data in the heap faster than from the stack?](https://stackoverflow.com/a/24057744)\n- [CPU cache](https://en.wikipedia.org/wiki/CPU_cache)\n- [CppCon 2014: Mike Acton \"Data-Oriented Design and C++\"](https://www.youtube.com/watch?v=rX0ItVEVjHc)\n- [CppCon 2017: Chandler Carruth \"Going Nowhere Faster\"](https://www.youtube.com/watch?v=2EWejmkKlxs)\n- [What is the difference between MOV and LEA?](https://stackoverflow.com/a/1699778)\n- [Issue #24631 (golang/go): *testing: don't truncate allocs/op*](https://github.com/golang/go/issues/24631)\n"
  },
  {
    "path": "chapter2_interfaces/compound_interface.go",
    "content": "package main\n\ntype Adder interface{ Add(a, b int32) int32 }\ntype Subber interface{ Sub(a, b int32) int32 }\n\ntype Mather interface {\n\tAdder\n\tSubber\n}\n\ntype Calculator struct{ id int32 }\n\nfunc (c *Calculator) Add(a, b int32) int32 { return a + b }\nfunc (c *Calculator) Sub(a, b int32) int32 { return a - b }\n\nfunc main() {\n\tcalc := Calculator{id: 6754}\n\tvar m Mather = &calc\n\tm.Sub(10, 32)\n}\n"
  },
  {
    "path": "chapter2_interfaces/direct_calls.go",
    "content": "package main\n\n//go:noinline\nfunc Add(a, b int32) int32 { return a + b }\n\ntype Adder struct{ id int32 }\n\n//go:noinline\nfunc (adder *Adder) AddPtr(a, b int32) int32 { return a + b }\n\n//go:noinline\nfunc (adder Adder) AddVal(a, b int32) int32 { return a + b }\n\nfunc main() {\n\tAdd(10, 32) // direct call of top-level function\n\n\tadder := Adder{id: 6754}\n\tadder.AddPtr(10, 32) // direct call of method with pointer receiver\n\tadder.AddVal(10, 32) // direct call of method with value receiver\n\n\t(&adder).AddVal(10, 32) // implicit dereferencing\n}\n"
  },
  {
    "path": "chapter2_interfaces/dump_sym.sh",
    "content": "#!/usr/bin/env bash\n\nBIN=\"$1\"\ntest \"$BIN\"\nSECTION=\"$2\"\ntest \"$SECTION\"\nSYM=\"$3\"\ntest \"$SYM\"\n\nsection_offset=$(\n  readelf -St -W \"$BIN\" | \\\n  grep -A 1 \"$SECTION\" | \\\n  tail -n +2 | \\\n  awk '{print toupper($3)}'\n)\nsection_offset_dec=$(echo \"ibase=16;$section_offset\" | bc)\necho \"$SECTION file-offset: $section_offset_dec\"\n\nsection_vma=$(\n  readelf -St -W \"$BIN\" | \\\n  grep -A 1 \"$SECTION\" | \\\n  tail -n +2 | \\\n  awk '{print toupper($2)}'\n)\nsection_vma_dec=$(echo \"ibase=16;$section_vma\" | bc)\necho \"$SECTION VMA: $section_vma_dec\"\n\nsym_vma=$(objdump -t -j \"$SECTION\" \"$BIN\" | grep \"$SYM\" | awk '{print toupper($1)}')\nsym_vma_dec=$(echo \"ibase=16;$sym_vma\" | bc)\necho \"$SYM VMA: $sym_vma_dec\"\nsym_size=$(objdump -t -j \"$SECTION\" \"$BIN\" | grep \"$SYM\" | awk '{print toupper($5)}')\nsym_size_dec=$(echo \"ibase=16;$sym_size\" | bc)\necho -e \"$SYM SIZE: $sym_size_dec\\n\"\n\nsym_offset=$(( $sym_vma_dec - $section_vma_dec + $section_offset_dec ))\ndd if=\"$BIN\" of=/dev/stdout bs=1 count=$sym_size_dec skip=\"$sym_offset\" 2>/dev/null | hexdump\n"
  },
  {
    "path": "chapter2_interfaces/eface_scalar_test.go",
    "content": "package main\n\nimport (\n\t\"testing\"\n)\n\nfunc BenchmarkEfaceScalar(b *testing.B) {\n\tvar Uint uint32\n\tb.Run(\"uint32\", func(b *testing.B) {\n\t\tfor i := 0; i < b.N; i++ {\n\t\t\t// MOVL DX, (AX)\n\t\t\tUint = uint32(i)\n\t\t}\n\t})\n\tvar Eface interface{}\n\tb.Run(\"eface32\", func(b *testing.B) {\n\t\tfor i := 0; i < b.N; i++ {\n\t\t\t// MOVL  CX, \"\"..autotmp_3+36(SP)\n\t\t\t// LEAQ  type.uint32(SB), AX\n\t\t\t// MOVQ  AX, (SP)\n\t\t\t// LEAQ  \"\"..autotmp_3+36(SP), DX\n\t\t\t// MOVQ  DX, 8(SP)\n\t\t\t// CALL  runtime.convT2E32(SB)\n\t\t\t// MOVQ  24(SP), AX\n\t\t\t// MOVQ  16(SP), CX\n\t\t\t// MOVQ  \"\".&Eface+48(SP), DX\n\t\t\t// MOVQ  CX, (DX)\n\t\t\t// MOVL  runtime.writeBarrier(SB), CX\n\t\t\t// LEAQ  8(DX), DI\n\t\t\t// TESTL CX, CX\n\t\t\t// JNE   148\n\t\t\t// MOVQ  AX, 8(DX)\n\t\t\t// JMP   46\n\t\t\t// CALL  runtime.gcWriteBarrier(SB)\n\t\t\t// JMP   46\n\t\t\tEface = uint32(i)\n\t\t}\n\t})\n\tb.Run(\"eface8\", func(b *testing.B) {\n\t\tfor i := 0; i < b.N; i++ {\n\t\t\t// LEAQ    type.uint8(SB), BX\n\t\t\t// MOVQ    BX, (CX)\n\t\t\t// MOVBLZX AL, SI\n\t\t\t// LEAQ    runtime.staticbytes(SB), R8\n\t\t\t// ADDQ    R8, SI\n\t\t\t// MOVL    runtime.writeBarrier(SB), R9\n\t\t\t// LEAQ    8(CX), DI\n\t\t\t// TESTL   R9, R9\n\t\t\t// JNE     100\n\t\t\t// MOVQ    SI, 8(CX)\n\t\t\t// JMP     40\n\t\t\t// MOVQ    AX, R9\n\t\t\t// MOVQ    SI, AX\n\t\t\t// CALL    runtime.gcWriteBarrier(SB)\n\t\t\t// MOVQ    R9, AX\n\t\t\t// JMP     40\n\t\t\tEface = uint8(i)\n\t\t}\n\t})\n\tb.Run(\"eface-zeroval\", func(b *testing.B) {\n\t\tfor i := 0; i < b.N; i++ {\n\t\t\t// MOVL  $0, \"\"..autotmp_3+36(SP)\n\t\t\t// LEAQ  type.uint32(SB), AX\n\t\t\t// MOVQ  AX, (SP)\n\t\t\t// LEAQ  \"\"..autotmp_3+36(SP), CX\n\t\t\t// MOVQ  CX, 8(SP)\n\t\t\t// CALL  runtime.convT2E32(SB)\n\t\t\t// MOVQ  16(SP), AX\n\t\t\t// MOVQ  24(SP), CX\n\t\t\t// MOVQ  \"\".&Eface+48(SP), DX\n\t\t\t// MOVQ  AX, (DX)\n\t\t\t// MOVL  runtime.writeBarrier(SB), AX\n\t\t\t// LEAQ  8(DX), DI\n\t\t\t// TESTL AX, AX\n\t\t\t// JNE   152\n\t\t\t// MOVQ  CX, 8(DX)\n\t\t\t// JMP   46\n\t\t\t// MOVQ  CX, AX\n\t\t\t// CALL  runtime.gcWriteBarrier(SB)\n\t\t\t// JMP   46\n\t\t\tEface = uint32(i - i) // outsmart the compiler\n\t\t}\n\t})\n\tb.Run(\"eface-static\", func(b *testing.B) {\n\t\tfor i := 0; i < b.N; i++ {\n\t\t\t// LEAQ  type.uint64(SB), BX\n\t\t\t// MOVQ  BX, (CX)\n\t\t\t// MOVL  runtime.writeBarrier(SB), SI\n\t\t\t// LEAQ  8(CX), DI\n\t\t\t// TESTL SI, SI\n\t\t\t// JNE   92\n\t\t\t// LEAQ  \"\".statictmp_0(SB), SI\n\t\t\t// MOVQ  SI, 8(CX)\n\t\t\t// JMP   40\n\t\t\t// MOVQ  AX, SI\n\t\t\t// LEAQ  \"\".statictmp_0(SB), AX\n\t\t\t// CALL  runtime.gcWriteBarrier(SB)\n\t\t\t// MOVQ  SI, AX\n\t\t\t// LEAQ  \"\".statictmp_0(SB), SI\n\t\t\t// JMP   40\n\t\t\tEface = uint64(42)\n\t\t}\n\t})\n}\n\nfunc main() {\n\t// So that we can easily compile this and retrieve `main.statictmp_0`\n\t// from the final executable.\n\tBenchmarkEfaceScalar(&testing.B{})\n}\n"
  },
  {
    "path": "chapter2_interfaces/eface_to_type.go",
    "content": "package main\n\nvar j uint32\nvar Eface interface{} // outsmart compiler (avoid static inference)\n\nfunc assertion() {\n\ti := uint32(42)\n\tEface = i\n\n\t// 0x0065 00101 MOVQ  \"\".Eface(SB), AX          ;; AX = Eface._type\n\t// 0x006c 00108 MOVQ  \"\".Eface+8(SB), CX        ;; CX = Eface.data\n\t// 0x0073 00115 LEAQ  type.uint32(SB), DX       ;; DX = type.uint32\n\t// 0x007a 00122 CMPQ  AX, DX                    ;; Eface._type == type.uint32 ?\n\t// 0x007d 00125 JNE   162                       ;; no? panic our way outta here\n\t// 0x007f 00127 MOVL  (CX), AX                  ;; AX = *Eface.data\n\t// 0x0081 00129 MOVL  AX, \"\".j(SB)              ;; j = AX = *Eface.data\n\t// ;; exit\n\t// 0x0087 00135 MOVQ  40(SP), BP\n\t// 0x008c 00140 ADDQ  $48, SP\n\t// 0x0090 00144 RET\n\t// ;; panic: interface conversion: <iface> is <have>, not <want>\n\t// 0x00a2 00162 MOVQ  AX, (SP)                  ;; have: Eface._type\n\t// 0x00a6 00166 MOVQ  DX, 8(SP)                 ;; want: type.uint32\n\t// 0x00ab 00171 LEAQ  type.interface {}(SB), AX ;; AX = type.interface{} (eface)\n\t// 0x00b2 00178 MOVQ  AX, 16(SP)                ;; iface: AX\n\t// 0x00b7 00183 CALL  runtime.panicdottypeE(SB) ;; func panicdottypeE(have, want, iface *_type)\n\t// 0x00bc 00188 UNDEF\n\t// 0x00be 00190 NOP\n\tj = Eface.(uint32)\n}\n\nfunc typeSwitch() {\n\ti := uint32(42)\n\tEface = i\n\n\t// ;; switch v := Eface.(type)\n\t// 0x0065 00101 MOVQ    \"\".Eface(SB), AX    ;; AX = Eface._type\n\t// 0x006c 00108 MOVQ    \"\".Eface+8(SB), CX  ;; CX = Eface.data\n\t// 0x0073 00115 TESTQ   AX, AX              ;; Eface._type == nil ?\n\t// 0x0076 00118 JEQ     153                 ;; yes? exit the switch\n\t// 0x0078 00120 MOVL    16(AX), DX          ;; DX = Eface.type._hash\n\t// ;; case uint32\n\t// 0x007b 00123 CMPL    DX, $-800397251     ;; Eface.type._hash == type.uint32.hash ?\n\t// 0x0081 00129 JNE     163                 ;; no? go to next case (uint16)\n\t// 0x0083 00131 LEAQ    type.uint32(SB), BX ;; BX = type.uint32\n\t// 0x008a 00138 CMPQ    BX, AX              ;; type.uint32 == Eface._type ? (HASH COLLISION?)\n\t// 0x008d 00141 JNE     206                 ;; no? clear BX and go to next case (uint16)\n\t// 0x008f 00143 MOVL    (CX), BX            ;; BX = *Eface.data\n\t// 0x0091 00145 JNE     163                 ;; landsite for indirect jump starting at 0x00d3\n\t// 0x0093 00147 MOVL    BX, \"\".j(SB)        ;; j = BX = *Eface.data\n\t// ;; exit\n\t// 0x0099 00153 MOVQ    40(SP), BP\n\t// 0x009e 00158 ADDQ    $48, SP\n\t// 0x00a2 00162 RET\n\t// ;; case uint16\n\t// 0x00a3 00163 CMPL    DX, $-269349216     ;; Eface.type._hash == type.uint16.hash ?\n\t// 0x00a9 00169 JNE     153                 ;; no? exit the switch\n\t// 0x00ab 00171 LEAQ    type.uint16(SB), DX ;; DX = type.uint16\n\t// 0x00b2 00178 CMPQ    DX, AX              ;; type.uint16 == Eface._type ? (HASH COLLISION?)\n\t// 0x00b5 00181 JNE     199                 ;; no? clear AX and exit the switch\n\t// 0x00b7 00183 MOVWLZX (CX), AX            ;; AX = uint16(*Eface.data)\n\t// 0x00ba 00186 JNE     153                 ;; landsite for indirect jump starting at 0x00cc\n\t// 0x00bc 00188 MOVWLZX AX, AX              ;; AX = uint16(AX) (redundant)\n\t// 0x00bf 00191 MOVL    AX, \"\".j(SB)        ;; j = AX = *Eface.data\n\t// 0x00c5 00197 JMP     153                 ;; we're done, exit the switch\n\t// ;; indirect jump table\n\t// 0x00c7 00199 MOVL    $0, AX              ;; AX = $0\n\t// 0x00cc 00204 JMP     186                 ;; indirect jump to 153 (exit)\n\t// 0x00ce 00206 MOVL    $0, BX              ;; BX = $0\n\t// 0x00d3 00211 JMP     145                 ;; indirect jump to 163 (case uint16)\n\tswitch v := Eface.(type) {\n\tcase uint16:\n\t\tj = uint32(v)\n\tcase uint32:\n\t\tj = v\n\t}\n}\n\nfunc main() {\n\tassertion()\n\ttypeSwitch()\n}\n"
  },
  {
    "path": "chapter2_interfaces/eface_to_type_test.go",
    "content": "package main\n\nimport \"testing\"\n\nvar j uint32\nvar eface interface{} = uint32(42)\n\nfunc BenchmarkEfaceToType(b *testing.B) {\n\tb.Run(\"switch-small\", func(b *testing.B) {\n\t\tfor i := 0; i < b.N; i++ {\n\t\t\tswitch v := eface.(type) {\n\t\t\tcase int8:\n\t\t\t\tj = uint32(v)\n\t\t\tcase uint32:\n\t\t\t\tj = uint32(v)\n\t\t\tcase int16:\n\t\t\t\tj = uint32(v)\n\t\t\tdefault:\n\t\t\t\tj = v.(uint32)\n\t\t\t}\n\t\t}\n\t})\n\tb.Run(\"switch-big\", func(b *testing.B) {\n\t\tfor i := 0; i < b.N; i++ {\n\t\t\tswitch v := eface.(type) {\n\t\t\tcase int8:\n\t\t\t\tj = uint32(v)\n\t\t\tcase int16:\n\t\t\t\tj = uint32(v)\n\t\t\tcase int32:\n\t\t\t\tj = uint32(v)\n\t\t\tcase uint32:\n\t\t\t\tj = uint32(v)\n\t\t\tcase int64:\n\t\t\t\tj = uint32(v)\n\t\t\tcase uint8:\n\t\t\t\tj = uint32(v)\n\t\t\tcase uint16:\n\t\t\t\tj = uint32(v)\n\t\t\tcase uint64:\n\t\t\t\tj = uint32(v)\n\t\t\tdefault:\n\t\t\t\tj = v.(uint32)\n\t\t\t}\n\t\t}\n\t})\n}\n\nfunc main() {}\n"
  },
  {
    "path": "chapter2_interfaces/eface_type_hash.go",
    "content": "package main\n\nimport (\n\t\"fmt\"\n\t\"unsafe\"\n)\n\n// simplified definitions of runtime's eface & _type types\ntype eface struct {\n\t_type *_type\n\tdata  unsafe.Pointer\n}\ntype _type struct {\n\tsize    uintptr\n\tptrdata uintptr\n\thash    uint32\n\t/* omitted lotta fields */\n}\n\n// -----------------------------------------------------------------------------\n\nvar Eface interface{}\n\nfunc main() {\n\tEface = uint32(42)\n\tfmt.Printf(\"eface<uint32>._type.hash = %d\\n\",\n\t\tint32((*eface)(unsafe.Pointer(&Eface))._type.hash))\n\n\tEface = uint16(42)\n\tfmt.Printf(\"eface<uint16>._type.hash = %d\\n\",\n\t\tint32((*eface)(unsafe.Pointer(&Eface))._type.hash))\n}\n"
  },
  {
    "path": "chapter2_interfaces/escape.go",
    "content": "package main\n\ntype Addifier interface{ Add(a, b int32) int32 }\n\ntype Adder struct{ id int32 }\n\n//go:noinline\nfunc (adder Adder) Add(a, b int32) int32 { return a + b }\n\nfunc main() {\n\tadder := Adder{id: 6754}\n\tadder.Add(10, 32)\n\tAddifier(adder).Add(10, 32)\n}\n"
  },
  {
    "path": "chapter2_interfaces/escape_test.go",
    "content": "package main\n\nimport \"testing\"\n\ntype Addifier interface{ Add(a, b int32) int32 }\n\ntype Adder struct{ id int32 }\n\n//go:noinline\nfunc (adder Adder) Add(a, b int32) int32 { return a + b }\n\nfunc BenchmarkDirect(b *testing.B) {\n\tadder := Adder{id: 6754}\n\tfor i := 0; i < b.N; i++ {\n\t\tadder.Add(10, 32)\n\t}\n}\n\nfunc BenchmarkInterface(b *testing.B) {\n\tadder := Adder{id: 6754}\n\tfor i := 0; i < b.N; i++ {\n\t\tAddifier(adder).Add(10, 32)\n\t}\n}\n\nfunc main() {}\n"
  },
  {
    "path": "chapter2_interfaces/iface.go",
    "content": "package main\n\ntype Mather interface {\n\tAdd(a, b int32) int32\n\tSub(a, b int64) int64\n}\n\ntype Adder struct{ id int32 }\n\n//go:noinline\nfunc (adder Adder) Add(a, b int32) int32 { return a + b }\n\n//go:noinline\nfunc (adder Adder) Sub(a, b int64) int64 { return a - b }\n\nfunc main() {\n\tm := Mather(Adder{id: 6754})\n\n\t// This call just makes sure that the interface is actually used.\n\t// Without this call, the linker would see that the interface defined above\n\t// is in fact never used, and thus would optimize it out of the final\n\t// executable.\n\tm.Add(10, 32)\n}\n"
  },
  {
    "path": "chapter2_interfaces/iface_bench_test.go",
    "content": "package main\n\nimport (\n\t\"math/rand\"\n\t\"runtime\"\n\t\"testing\"\n)\n\n// -----------------------------------------------------------------------------\n\ntype identifier interface {\n\tidInline() int32\n\tidNoInline() int32\n}\n\ntype id32 struct{ id int32 }\n\n// NOTE: Use pointer receivers so we don't measure the extra overhead incurred by\n// autogenerated wrappers as part of our results.\n\nfunc (id *id32) idInline() int32 { return id.id }\n\n//go:noinline\nfunc (id *id32) idNoInline() int32 { return id.id }\n\n// -----------------------------------------------------------------------------\n\nconst _maxSize = 2097152             // 2^21\nconst _maxSizeModMask = _maxSize - 1 // avoids a mod (%) in the hot path\n\nvar _randIndexes = [_maxSize]int{}\n\nfunc init() {\n\trand.Seed(42)\n\tfor i := range _randIndexes {\n\t\t_randIndexes[i] = rand.Intn(_maxSize)\n\t}\n}\n\nvar escapeMePlease *id32\n\n// escapeToHeap makes sure that `id` escapes to the heap.\n//\n// In simple situations such as some of the benchmarks present in this file,\n// the compiler is able to statically infer the underlying type of the\n// interface (or rather the type of the data it points to, to be pedantic) and\n// ends up replacing what should have been a dynamic method call by a\n// static call.\n// This anti-optimization prevents this extra cleverness.\n//\n//go:noinline\nfunc escapeToHeap(id *id32) identifier {\n\tescapeMePlease = id\n\treturn escapeMePlease\n}\n\nfunc BenchmarkMethodCall_direct(b *testing.B) {\n\tadders := make([]*id32, _maxSize)\n\tfor i := range adders {\n\t\tadders[i] = escapeToHeap(&id32{id: int32(i)}).(*id32)\n\t}\n\truntime.GC()\n\n\tvar myID int32\n\n\tb.Run(\"single/noinline\", func(b *testing.B) {\n\t\tm := escapeToHeap(&id32{id: 6754}).(*id32)\n\t\tb.ResetTimer()\n\t\tfor i := 0; i < b.N; i++ {\n\t\t\t// CALL \"\".(*id32).idNoInline(SB)\n\t\t\t// MOVL 8(SP), AX\n\t\t\t// MOVQ \"\".&myID+40(SP), CX\n\t\t\t// MOVL AX, (CX)\n\t\t\tmyID = m.idNoInline()\n\t\t}\n\t})\n\tb.Run(\"single/inline\", func(b *testing.B) {\n\t\tm := escapeToHeap(&id32{id: 6754}).(*id32)\n\t\tb.ResetTimer()\n\t\tfor i := 0; i < b.N; i++ {\n\t\t\t// MOVL (DX), SI\n\t\t\t// MOVL SI, (CX)\n\t\t\tmyID = m.idInline()\n\t\t}\n\t})\n\n\tb.Run(\"many/noinline/small_incr\", func(b *testing.B) {\n\t\tvar m *id32\n\t\tb.Run(\"baseline\", func(b *testing.B) {\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[i&_maxSizeModMask]\n\t\t\t}\n\t\t})\n\t\tb.Run(\"call\", func(b *testing.B) {\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[i&_maxSizeModMask]\n\t\t\t\tmyID = m.idNoInline()\n\t\t\t}\n\t\t})\n\t})\n\tb.Run(\"many/noinline/big_incr\", func(b *testing.B) {\n\t\tvar m *id32\n\t\tb.Run(\"baseline\", func(b *testing.B) {\n\t\t\tj := 0\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[j&_maxSizeModMask]\n\t\t\t\tj += 32\n\t\t\t}\n\t\t})\n\t\tb.Run(\"call\", func(b *testing.B) {\n\t\t\tj := 0\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[j&_maxSizeModMask]\n\t\t\t\tmyID = m.idNoInline()\n\t\t\t\tj += 32\n\t\t\t}\n\t\t})\n\t})\n\tb.Run(\"many/noinline/random_incr\", func(b *testing.B) {\n\t\tvar m *id32\n\t\tb.Run(\"baseline\", func(b *testing.B) {\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[_randIndexes[i&_maxSizeModMask]]\n\t\t\t}\n\t\t})\n\t\tb.Run(\"call\", func(b *testing.B) {\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[_randIndexes[i&_maxSizeModMask]]\n\t\t\t\tmyID = m.idNoInline()\n\t\t\t}\n\t\t})\n\t})\n}\n\nfunc BenchmarkMethodCall_interface(b *testing.B) {\n\tadders := make([]identifier, _maxSize)\n\tfor i := range adders {\n\t\tadders[i] = escapeToHeap(&id32{id: int32(i)})\n\t}\n\truntime.GC()\n\n\tvar myID int32\n\n\tb.Run(\"single/noinline\", func(b *testing.B) {\n\t\tm := escapeToHeap(&id32{id: 6754})\n\t\tb.ResetTimer()\n\t\tfor i := 0; i < b.N; i++ {\n\t\t\t// MOVQ 32(AX), CX\n\t\t\t// MOVQ \"\".m.data+40(SP), DX\n\t\t\t// MOVQ DX, (SP)\n\t\t\t// CALL CX\n\t\t\t// MOVL 8(SP), AX\n\t\t\t// MOVQ \"\".&myID+48(SP), CX\n\t\t\t// MOVL AX, (CX)\n\t\t\tmyID = m.idNoInline()\n\t\t}\n\t})\n\tb.Run(\"single/inline\", func(b *testing.B) {\n\t\tm := escapeToHeap(&id32{id: 6754})\n\t\tb.ResetTimer()\n\t\tfor i := 0; i < b.N; i++ {\n\t\t\t// MOVQ 24(AX), CX\n\t\t\t// MOVQ \"\".m.data+40(SP), DX\n\t\t\t// MOVQ DX, (SP)\n\t\t\t// CALL CX\n\t\t\t// MOVL 8(SP), AX\n\t\t\t// MOVQ \"\".&myID+48(SP), CX\n\t\t\t// MOVL AX, (CX)\n\t\t\tmyID = m.idInline()\n\t\t}\n\t})\n\n\tb.Run(\"many/noinline/small_incr\", func(b *testing.B) {\n\t\tvar m identifier\n\t\tb.Run(\"baseline\", func(b *testing.B) {\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[i&_maxSizeModMask]\n\t\t\t}\n\t\t})\n\t\tb.Run(\"call\", func(b *testing.B) {\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[i&_maxSizeModMask]\n\t\t\t\tmyID = m.idNoInline()\n\t\t\t}\n\t\t})\n\t})\n\tb.Run(\"many/noinline/big_incr\", func(b *testing.B) {\n\t\tvar m identifier\n\t\tb.Run(\"baseline\", func(b *testing.B) {\n\t\t\tj := 0\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[j&_maxSizeModMask]\n\t\t\t\tj += 32\n\t\t\t}\n\t\t})\n\t\tb.Run(\"call\", func(b *testing.B) {\n\t\t\tj := 0\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[j&_maxSizeModMask]\n\t\t\t\tmyID = m.idNoInline()\n\t\t\t\tj += 32\n\t\t\t}\n\t\t})\n\t})\n\tb.Run(\"many/noinline/random_incr\", func(b *testing.B) {\n\t\tvar m identifier\n\t\tb.Run(\"baseline\", func(b *testing.B) {\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[_randIndexes[i&_maxSizeModMask]]\n\t\t\t}\n\t\t})\n\t\tb.Run(\"call\", func(b *testing.B) {\n\t\t\tfor i := 0; i < b.N; i++ {\n\t\t\t\tm = adders[_randIndexes[i&_maxSizeModMask]]\n\t\t\t\tmyID = m.idNoInline()\n\t\t\t}\n\t\t})\n\t})\n}\n\nfunc main() {}\n"
  },
  {
    "path": "chapter2_interfaces/iface_type_hash.go",
    "content": "package main\n\nimport (\n\t\"fmt\"\n\t\"unsafe\"\n)\n\ntype Mather interface {\n\tAdd(a, b int32) int32\n\tSub(a, b int64) int64\n}\n\ntype Adder struct{ id int32 }\n\n//go:noinline\nfunc (adder Adder) Add(a, b int32) int32 { return a + b }\n\n//go:noinline\nfunc (adder Adder) Sub(a, b int64) int64 { return a - b }\n\nfunc main() {\n\tm := Mather(Adder{id: 6754})\n\n\tiface := (*iface)(unsafe.Pointer(&m))\n\tfmt.Printf(\"iface.tab.hash = %#x\\n\", iface.tab.hash)\n\n}\n\n// simplified definitions of runtime's iface & itab types\n\ntype iface struct {\n\ttab  *itab\n\tdata unsafe.Pointer\n}\ntype itab struct {\n\tinter uintptr\n\t_type uintptr\n\thash  uint32\n\t_     [4]byte\n\tfun   [1]uintptr\n}\n"
  },
  {
    "path": "chapter2_interfaces/zerobase.go",
    "content": "package main\n\nimport (\n\t\"fmt\"\n\t_ \"unsafe\"\n)\n\n//go:linkname zerobase runtime.zerobase\nvar zerobase uintptr\n\nfunc main() {\n\tvar s struct{}\n\tvar a [42]struct{}\n\n\tfmt.Printf(\"zerobase = %p\\n\", &zerobase)\n\tfmt.Printf(\"       s = %p\\n\", &s)\n\tfmt.Printf(\"       a = %p\\n\", &a)\n}\n"
  },
  {
    "path": "chapter2_interfaces/zeroval.go",
    "content": "package main\n\nimport (\n\t\"fmt\"\n\t\"unsafe\"\n)\n\n//go:linkname zeroVal runtime.zeroVal\nvar zeroVal uintptr\n\ntype eface struct{ _type, data unsafe.Pointer }\n\nfunc main() {\n\tx := 42\n\tvar i interface{} = x - x // outsmart the compiler (avoid static inference)\n\n\tfmt.Printf(\"zeroVal = %p\\n\", &zeroVal)\n\tfmt.Printf(\"      i = %p\\n\", ((*eface)(unsafe.Pointer(&i))).data)\n}\n"
  },
  {
    "path": "chapter3_garbage_collector/README.md",
    "content": "<!-- Copyright © 2018 Clement Rey <cr.rey.clement@gmail.com>. -->\n<!-- Licensed under the BY-NC-SA Creative Commons 4.0 International Public License. -->\n\nSoon!\n"
  }
]